diff --git a/src/main/java/fr/inria/corese/core/next/api/FormatSerializer.java b/src/main/java/fr/inria/corese/core/next/api/FormatSerializer.java deleted file mode 100644 index 4741a9712..000000000 --- a/src/main/java/fr/inria/corese/core/next/api/FormatSerializer.java +++ /dev/null @@ -1,22 +0,0 @@ -package fr.inria.corese.core.next.api; - -import java.io.Writer; - -import fr.inria.corese.core.next.impl.exception.SerializationException; - -public interface FormatSerializer { - - /** - * A serializer that converts a {@link Model} instance - * into a specific output format and writes it to a character stream. - * - * Implementations may follow standard RDF serialization formats - * (e.g., Turtle, N-Triples, JSON-LD), or define custom formats. - * - * @param writer the destination {@link Writer} for the serialized - * output - * @throws SerializationException if an error occurs during the serialization - * process - */ - void write(Writer writer) throws SerializationException; -} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/api/RdfSerializer.java b/src/main/java/fr/inria/corese/core/next/api/RdfSerializer.java new file mode 100644 index 000000000..39d1f2dbc --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/api/RdfSerializer.java @@ -0,0 +1,31 @@ +package fr.inria.corese.core.next.api; + +import fr.inria.corese.core.next.impl.exception.SerializationException; + +import java.io.Writer; + +/** + * Factory interface for creating {@link RdfSerializer} instances. + * This interface defines a contract for classes that are responsible + * for providing appropriate RDF serializers based on the desired + * {@link fr.inria.corese.core.next.impl.common.serialization.RdfFormat}, a {@link Model} to be serialized, and + * {@link SerializationConfig}. + * Implementations of this factory can manage the instantiation + * and configuration of various RDF serializers, promoting + * loose coupling and extensibility in the serialization process. + */ +public interface RdfSerializer { + + /** + * A serializer that converts a {@link Model} instance + * into a specific output format and writes it to a character stream. + * Implementations may follow standard RDF serialization formats + * (e.g., Turtle, N-Triples, JSON-LD, TriG , XML ), or define custom formats. + * + * @param writer the destination {@link Writer} for the serialized + * output + * @throws SerializationException if an error occurs during the serialization + * process + */ + void write(Writer writer) throws SerializationException; +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/api/SerializationConfig.java b/src/main/java/fr/inria/corese/core/next/api/SerializationConfig.java new file mode 100644 index 000000000..47b9f8cbe --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/api/SerializationConfig.java @@ -0,0 +1,14 @@ +package fr.inria.corese.core.next.api; + + +/** + * Marker interface for configuration objects used in RDF serialization. + * This interface provides a common type for all serialization configuration + * implementations, promoting abstraction and flexibility in how serialization + * options are defined and provided. + * Implementations of this interface (e.g., SerializerConfig) will define the + * specific parameters and settings relevant to a particular serialization process. + */ +public interface SerializationConfig { + +} diff --git a/src/main/java/fr/inria/corese/core/next/api/SerializerFactory.java b/src/main/java/fr/inria/corese/core/next/api/SerializerFactory.java new file mode 100644 index 000000000..b30177181 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/api/SerializerFactory.java @@ -0,0 +1,19 @@ +package fr.inria.corese.core.next.api; + +import fr.inria.corese.core.next.impl.common.serialization.RdfFormat; + +/** + * Factory interface for creating {@link RdfSerializer} instances. + * This interface defines a contract for classes that are responsible + * for providing appropriate RDF serializers based on the desired + * {@link RdfFormat}, a {@link Model} to be serialized, and + * {@link SerializationConfig}. + * Implementations of this factory can manage the instantiation + * and configuration of various RDF serializers, promoting + * loose coupling and extensibility in the serialization process. + */ +public interface SerializerFactory { + + + RdfSerializer createSerializer(RdfFormat format, Model model, SerializationConfig config); +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/DefaultSerializerFactory.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/DefaultSerializerFactory.java new file mode 100644 index 000000000..166badef4 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/DefaultSerializerFactory.java @@ -0,0 +1,122 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.RdfSerializer; +import fr.inria.corese.core.next.api.SerializationConfig; +import fr.inria.corese.core.next.api.SerializerFactory; +import fr.inria.corese.core.next.impl.common.serialization.config.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.function.BiFunction; + +/** + * Default implementation of {@link SerializerFactory}. + * This factory is responsible for creating instances of {@link RdfSerializer} + * based on the requested {@link RdfFormat}. It uses a registry pattern + * to map each format to its corresponding serializer constructor, + * providing a flexible and extensible way to manage serializer instances. + * + *

It adapts the generic {@link SerializationConfig} provided to the specific + * configuration type expected by each serializer in the hierarchy, with a fallback + * to default configurations if an incompatible type is provided.

+ */ +public class DefaultSerializerFactory implements SerializerFactory { + + private static final Logger logger = LoggerFactory.getLogger(DefaultSerializerFactory.class); + + private final Map> registry; + + /** + * Constructs a {@code DefaultSerializerFactory} and populates its registry + * with constructors for all known {@link RdfFormat} implementations. + * Each constructor attempts to cast the generic {@link SerializationConfig} to the + * specific configuration type required by the serializer. If the cast is not possible, + * it falls back to the format's default configuration. + */ + public DefaultSerializerFactory() { + Map> tempRegistry = new HashMap<>(); + + tempRegistry.put(RdfFormat.TURTLE, (model, genericConfig) -> { + if (genericConfig instanceof TurtleConfig specificConfig) { + return new TurtleSerializer(model, specificConfig); + } else { + logger.warn("Provided config for TURTLE is not TurtleConfig (was {}). Using default TurtleConfig.", + genericConfig.getClass().getSimpleName()); + return new TurtleSerializer(model, TurtleConfig.defaultConfig()); + } + }); + + tempRegistry.put(RdfFormat.NTRIPLES, (model, genericConfig) -> { + if (genericConfig instanceof NTriplesConfig specificConfig) { + return new NTriplesSerializer(model, specificConfig); + } else { + logger.warn("Provided config for NTRIPLES is not NTriplesConfig (was {}). Using default NTriplesConfig.", + genericConfig.getClass().getSimpleName()); + return new NTriplesSerializer(model, NTriplesConfig.defaultConfig()); + } + }); + + tempRegistry.put(RdfFormat.NQUADS, (model, genericConfig) -> { + if (genericConfig instanceof NQuadsConfig specificConfig) { + return new NQuadsSerializer(model, specificConfig); + } else { + logger.warn("Provided config for NQUADS is not NQuadsConfig (was {}). Using default NQuadsConfig.", + genericConfig.getClass().getSimpleName()); + return new NQuadsSerializer(model, NQuadsConfig.defaultConfig()); + } + }); + + tempRegistry.put(RdfFormat.TRIG, (model, genericConfig) -> { + if (genericConfig instanceof TriGConfig specificConfig) { + return new TriGSerializer(model, specificConfig); + } else { + logger.warn("Provided config for TRIG is not TriGConfig (was {}). Using default TriGConfig.", + genericConfig.getClass().getSimpleName()); + return new TriGSerializer(model, TriGConfig.defaultConfig()); + } + }); + + tempRegistry.put(RdfFormat.RDFXML, (model, genericConfig) -> { + if (genericConfig instanceof XmlConfig specificConfig) { + return new XmlSerializer(model, specificConfig); + } else { + logger.warn("Provided config for RDFXML is not RdfXmlConfig (was {}). Using default RdfXmlConfig.", + genericConfig.getClass().getSimpleName()); + return new XmlSerializer(model, XmlConfig.defaultConfig()); + } + }); + + this.registry = Collections.unmodifiableMap(tempRegistry); + } + + /** + * Creates an {@link RdfSerializer} instance for the specified format, model, and configuration. + * + * @param format the {@link RdfFormat} for which to create the serializer. Must not be null. + * @param model the {@link Model} to be serialized. Must not be null. + * @param config the {@link SerializationConfig} to apply during serialization. Must not be null. + * @return a new instance of {@link RdfSerializer} configured for the specified format. + * @throws NullPointerException if any of the arguments (format, model, config) are null. + * @throws IllegalArgumentException if the provided format is not supported by this factory. + */ + @Override + public RdfSerializer createSerializer(RdfFormat format, Model model, SerializationConfig config) { + + Objects.requireNonNull(format, "RdfFormat cannot be null"); + Objects.requireNonNull(model, "Model cannot be null"); + Objects.requireNonNull(config, "SerializationConfig cannot be null"); + + BiFunction constructor = registry.get(format); + + if (constructor == null) { + throw new IllegalArgumentException("Unsupported RdfFormat: " + format.getName()); + } + + return constructor.apply(model, config); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/FormatConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/FormatConfig.java deleted file mode 100644 index 0e85acff7..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/FormatConfig.java +++ /dev/null @@ -1,69 +0,0 @@ -package fr.inria.corese.core.next.impl.common.serialization; - -import java.util.Objects; - -/** - * Configuration options for the {@link NTriplesFormat} serializer. - * Use {@link FormatConfig # Builder} to create instances. - */ -public class FormatConfig { - - private final String blankNodePrefix; - - - /** - * Private constructor to enforce usage of the Builder. - * - * @param builder The builder instance. - */ - private FormatConfig(Builder builder) { - this.blankNodePrefix = builder.blankNodePrefix; - - } - - /** - * Returns the prefix to use for blank nodes. - * - * @return The blank node prefix. - */ - public String getBlankNodePrefix() { - return blankNodePrefix; - } - - /** - * Builder class for {@link FormatConfig}. - */ - public static class Builder { - - private String blankNodePrefix = "_:"; - - /** - * Default constructor for the Builder. - * Initializes fields with default values. - */ - Builder() { - - } - - /** - * Sets the prefix to use for blank nodes. Default is "_:". - * - * @param blankNodePrefix The desired blank node prefix. - * @return The builder instance. - */ - public Builder blankNodePrefix(String blankNodePrefix) { - this.blankNodePrefix = Objects.requireNonNull(blankNodePrefix, "Blank node prefix cannot be null"); - return this; - } - - - /** - * Builds a new {@link FormatConfig} instance. - * - * @return A new NFormatConfig instance. - */ - public FormatConfig build() { - return new FormatConfig(this); - } - } -} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsFormat.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsFormat.java deleted file mode 100644 index daa921cf0..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsFormat.java +++ /dev/null @@ -1,304 +0,0 @@ -package fr.inria.corese.core.next.impl.common.serialization; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.impl.common.util.SerializationConstants; -import fr.inria.corese.core.next.impl.exception.SerializationException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.io.Writer; -import java.util.Objects; - -/** - * Serializes a Corese {@link Model} into N-Quads format. - * This class provides a method to write the statements (quads) of a model to a given {@link Writer} - * according to the N-Quads specification, including support for named graphs (contexts). - */ -public class NQuadsFormat implements FormatSerializer { - - /** - * Logger for this class, used for logging potential issues or information during serialization. - */ - private static final Logger logger = LoggerFactory.getLogger(NQuadsFormat.class); - - private final Model model; - private final FormatConfig config; - - /** - * Constructs a new {@code NQuadsFormat} instance with the specified model and default configuration. - * - * @param model the {@link Model} to be serialized. Must not be null. - * @throws NullPointerException if the provided model is null. - */ - public NQuadsFormat(Model model) { - this(model, new FormatConfig.Builder().build()); - } - - /** - * Constructs a new {@code NQuadsFormat} instance with the specified model and custom configuration. - * - * @param model the {@link Model} to be serialized. Must not be null. - * @param config the {@link FormatConfig} to use for serialization. Must not be null. - * @throws NullPointerException if the provided model or config is null. - */ - public NQuadsFormat(Model model, FormatConfig config) { - this.model = Objects.requireNonNull(model, "Model cannot be null"); - this.config = Objects.requireNonNull(config, "Configuration cannot be null"); - } - - /** - * Writes the model to the given writer in N-Quads format. - * Each statement (quad) in the model is written on a new line, terminated by a dot and a newline character. - * - * @param writer the {@link Writer} to which the N-Quads output will be written. - * @throws SerializationException if an I/O error occurs during writing or if invalid data is encountered. - */ - @Override - public void write(Writer writer) throws SerializationException { - try { - for (Statement stmt : model) { - writeStatement(writer, stmt); - } - writer.flush(); - } catch (IOException e) { - throw new SerializationException("Failed to write", "NQuads", e); - } catch (IllegalArgumentException e) { - throw new SerializationException("Invalid data: " + e.getMessage(), "NQuads", e); - } - } - - /** - * Writes a single {@link Statement} (quad) to the writer in N-Quads format. - * The statement is written as "$subject $predicate $object $context ." if a context is present, - * or "$subject $predicate $object ." if no context is present (default graph). - * - * @param writer the {@link Writer} to which the statement will be written. - * @param stmt the {@link Statement} to write. - * @throws IOException if an I/O error occurs. - */ - private void writeStatement(Writer writer, Statement stmt) throws IOException { - writeValue(writer, stmt.getSubject()); - writer.write(SerializationConstants.SPACE); - writeValue(writer, stmt.getPredicate()); - writer.write(SerializationConstants.SPACE); - writeValue(writer, stmt.getObject()); - - Resource context = stmt.getContext(); - if (context != null) { - writer.write(SerializationConstants.SPACE); - writeValue(writer, context); - } - - writer.write(SerializationConstants.SPACE_POINT); - } - - /** - * Writes a single {@link Value} to the writer. - * Handles literals, blank nodes, and IRIs. - * - * @param writer the {@link Writer} to which the value will be written. - * @param value the {@link Value} to write. - * @throws IOException if an I/O error occurs. - * @throws IllegalArgumentException if the provided value is null or an unsupported type. - */ - private void writeValue(Writer writer, Value value) throws IOException { - validateValue(value); - - if (value.isLiteral()) { - writeLiteral(writer, (Literal) value); - } else if (value.isResource()) { - if (value.isIRI()) { - writeIRI(writer, (IRI) value); - } else if (value.isBNode()) { - writeBlankNode(writer, (Resource) value); - } else { - throw new IllegalArgumentException("Unsupported resource type for N-Quads serialization: " + value.getClass().getName()); - } - } else { - throw new IllegalArgumentException("Unsupported value type for N-Quads serialization: " + value.getClass().getName()); - } - } - - /** - * Writes a {@link Literal} to the writer in N-Quads format. - * Handles plain literals, language-tagged literals, and typed literals. - * - * @param writer the {@link Writer} to which the literal will be written. - * @param literal the {@link Literal} to write. - * @throws IOException if an I/O error occurs. - */ - private void writeLiteral(Writer writer, Literal literal) throws IOException { - writer.write(SerializationConstants.QUOTE); - writer.write(escapeLiteral(literal.stringValue())); - writer.write(SerializationConstants.QUOTE); - - // Gestion du langage - literal.getLanguage().ifPresent(lang -> { - try { - writer.write(SerializationConstants.AT_SIGN + lang); - } catch (IOException e) { - throw new UncheckedIOException("Error writing language tag", e); - } - }); - - if (!literal.getLanguage().isPresent()) { - IRI datatype = literal.getDatatype(); - if (datatype != null && !datatype.stringValue().equals(SerializationConstants.XSD_STRING)) { - writer.write(SerializationConstants.DATATYPE_SEPARATOR); - writeIRI(writer, datatype); - } - } - } - - /** - * Writes an {@link IRI} to the writer. - * The IRI's string representation must be enclosed in angle brackets for N-Quads. - * - * @param writer the {@link Writer} to which the IRI will be written. - * @param iri the {@link IRI} to write. - * @throws IOException if an I/O error occurs. - */ - private void writeIRI(Writer writer, IRI iri) throws IOException { - writer.write(SerializationConstants.LT); - writer.write(escapeIRI(iri.stringValue())); - writer.write(SerializationConstants.GT); - } - - /** - * Writes a blank node to the writer. - * Blank nodes are prefixed with "_:", and the identifier is appended. - * - * @param writer the {@link Writer} to which the blank node will be written. - * @param blankNode the {@link Resource} representing the blank node. - * @throws IOException if an I/O error occurs. - */ - private void writeBlankNode(Writer writer, Resource blankNode) throws IOException { - writer.write(config.getBlankNodePrefix()); - writer.write(blankNode.stringValue()); - } - - /** - * Validates and potentially escapes an IRI string. - * Throws an {@link IllegalArgumentException} if the IRI contains characters - * that are not allowed in N-Quads unescaped form (like spaces, quotes, angle brackets). - * - * @param iri The string value of the IRI to validate and escape. - * @return The validated and potentially escaped IRI string. - * @throws IllegalArgumentException if the IRI string is invalid. - */ - private String escapeIRI(String iri) { - - if (iri.contains(SerializationConstants.SPACE) || iri.contains(SerializationConstants.QUOTE) || - iri.contains(SerializationConstants.LT) || iri.contains(SerializationConstants.GT)) { - throw new IllegalArgumentException("Invalid IRI: contains illegal characters for N-Quads unescaped form: " + iri); - } - return iri; - } - - /** - * Escape special characters in N-Quads string literals. - * Handles backslash, double quote, and common control characters. - * Unicode escape sequences are used for unprintable characters. - * - * @param value The string value of the literal to escape. - * @return The escaped string suitable for N-Quads literal. - */ - private String escapeLiteral(String value) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < value.length(); i++) { - char c = value.charAt(i); - switch (c) { - case '\n': - sb.append(SerializationConstants.BACK_SLASH).append('n'); - break; - case '\r': - sb.append(SerializationConstants.BACK_SLASH).append('r'); - break; - case '\t': - sb.append(SerializationConstants.BACK_SLASH).append('t'); - break; - case '\b': - sb.append(SerializationConstants.BACK_SLASH).append('b'); - break; - case '\f': - sb.append(SerializationConstants.BACK_SLASH).append('f'); - break; - case '"': - sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.QUOTE); - break; - case '\\': - sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.BACK_SLASH); - break; - default: - if (c <= 0x1F || c == 0x7F) { - sb.append(String.format("\\u%04X", (int) c)); - } else { - sb.append(c); - } - } - } - return sb.toString(); - } - - /** - * Validates RDF values before serialization to ensure they conform to N-Quads rules. - * - * @param value The {@link Value} to validate. - * @throws IllegalArgumentException if the value is null or invalid. - */ - private void validateValue(Value value) { - if (value == null) { - logger.warn("Encountered a null value where a non-null value was expected for N-Quads serialization."); - throw new IllegalArgumentException("Value cannot be null in N-Quads format"); - } - - if (value.isLiteral()) { - validateLiteral((Literal) value); - } else if (value.isIRI()) { - validateIRI((IRI) value); - } - } - - /** - * Validates a {@link Literal} to ensure it conforms to RDF/N-Quads rules. - * Specifically checks for consistency between language tags and the rdf:langString datatype. - * - * @param literal The {@link Literal} to validate. - * @throws IllegalArgumentException if the literal is invalid (e.g., language tag with wrong datatype, - * or rdf:langString literal missing a language tag). - */ - private void validateLiteral(Literal literal) { - IRI datatype = literal.getDatatype(); - - - if (literal.getLanguage().isPresent()) { - - if (datatype == null || !datatype.stringValue().equals(SerializationConstants.RDF_LANGSTRING)) { - throw new IllegalArgumentException( - "Literal with language tag must use rdf:langString datatype. Found: " + (datatype != null ? datatype.stringValue() : "null")); - } - } else { - - if (datatype != null && datatype.stringValue().equals(SerializationConstants.RDF_LANGSTRING)) { - throw new IllegalArgumentException( - "rdf:langString literal must have a language tag."); - } - } - } - - /** - * Validates an {@link IRI} to ensure it conforms to N-Quads rules. - * Checks if the IRI string contains characters that are not allowed in N-Quads - * unescaped form, such as spaces. - * - * @param iri The {@link IRI} to validate. - * @throws IllegalArgumentException if the IRI contains spaces or is otherwise invalid. - */ - private void validateIRI(IRI iri) { - if (iri.stringValue().contains(SerializationConstants.SPACE)) { - throw new IllegalArgumentException("IRI contains spaces, which is not allowed in N-Quads unescaped form: " + iri.stringValue()); - } - } -} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsSerializer.java new file mode 100644 index 000000000..af2a67c66 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsSerializer.java @@ -0,0 +1,82 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.impl.common.serialization.base.AbstractLineBasedSerializer; +import fr.inria.corese.core.next.impl.common.serialization.config.NQuadsConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Writer; +import java.util.Objects; + +/** + * Serializes a Corese {@link Model} into N-Quads format. + * This class extends {@link AbstractLineBasedSerializer} to provide + * N-Quads specific serialization behavior. + */ +public class NQuadsSerializer extends AbstractLineBasedSerializer { + + /** + * Logger for this class, used for logging potential issues or information during serialization. + */ + private static final Logger logger = LoggerFactory.getLogger(NQuadsSerializer.class); + + /** + * Constructs a new {@code NQuadsSerializer} instance with the specified model and default N-Quads configuration. + * The default configuration is obtained from {@link NQuadsConfig#defaultConfig()}. + * + * @param model the {@link Model} to be serialized. Must not be null. + * @throws NullPointerException if the provided model is null. + */ + public NQuadsSerializer(Model model) { + this(model, NQuadsConfig.defaultConfig()); + } + + /** + * Constructs a new {@code NQuadsSerializer} instance with the specified model and custom configuration. + * + * @param model the {@link Model} to be serialized. Must not be null. + * @param config the {@link NQuadsConfig} to use for serialization. Must not be null. + * This config object should be an instance of {@code NQuadsConfig} or a subclass thereof. + * @throws NullPointerException if the provided model or config is null. + */ + public NQuadsSerializer(Model model, NQuadsConfig config) { + super(model, config); + Objects.requireNonNull(config, "NQuadsConfig cannot be null"); + } + + /** + * Returns the format name for error messages and logging. + * + * @return "N-Quads" + */ + @Override + protected String getFormatName() { + return "N-Quads"; + } + + /** + * Writes the context (named graph) part of a statement. + * For N-Quads, the context is written as the fourth component if present + * and {@code config.includeContext()} is true. + * + * @param writer the {@link Writer} to which the context will be written. + * @param stmt the {@link Statement} whose context should be written. + * @throws IOException if an I/O error occurs. + */ + @Override + protected void writeContext(Writer writer, Statement stmt) throws IOException { + Resource context = stmt.getContext(); + if (context != null && config.includeContext()) { + writer.write(SerializationConstants.SPACE); + writeValue(writer, context); + } else if (context != null && logger.isWarnEnabled()) { + logger.warn("Context '{}' will be ignored for statement: {} because includeContext is false in configuration.", + context.stringValue(), stmt); + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesFormat.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesFormat.java deleted file mode 100644 index fc20c0e92..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesFormat.java +++ /dev/null @@ -1,305 +0,0 @@ -package fr.inria.corese.core.next.impl.common.serialization; - -import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.impl.common.util.SerializationConstants; -import fr.inria.corese.core.next.impl.exception.SerializationException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.io.Writer; -import java.util.Objects; - -/** - * Serializes a Corese {@link Model} into N-Triples format. - * This class provides a method to write the statements of a model to a given {@link Writer} - * according to the N-Triples specification. - */ -public class NTriplesFormat implements FormatSerializer { - - /** - * Logger for this class, used for logging potential issues or information during serialization. - */ - private static final Logger logger = LoggerFactory.getLogger(NTriplesFormat.class); - - private final Model model; - private final FormatConfig config; - - /** - * Constructs a new {@code NTriplesFormat} instance with the specified model and default configuration. - * - * @param model the {@link Model} to be serialized. Must not be null. - * @throws NullPointerException if the provided model is null. - */ - public NTriplesFormat(Model model) { - this(model, new FormatConfig.Builder().build()); - } - - /** - * Constructs a new {@code NTriplesFormat} instance with the specified model and custom configuration. - * - * @param model the {@link Model} to be serialized. Must not be null. - * @param config the {@link FormatConfig} to use for serialization. Must not be null. - * @throws NullPointerException if the provided model or config is null. - */ - public NTriplesFormat(Model model, FormatConfig config) { - this.model = Objects.requireNonNull(model, "Model cannot be null"); - this.config = Objects.requireNonNull(config, "Configuration cannot be null"); - } - - /** - * Writes the model to the given writer in N-Triples format. - * Each statement in the model is written on a new line, terminated by a dot and a newline character. - * - * @param writer the {@link Writer} to which the N-Triples output will be written. - * @throws SerializationException if an I/O error occurs during writing or if invalid data is encountered. - */ - @Override - public void write(Writer writer) throws SerializationException { - try { - for (Statement stmt : model) { - writeStatement(writer, stmt); - } - writer.flush(); - } catch (IOException e) { - throw new SerializationException("Failed to write", "NTriples", e); - } catch (IllegalArgumentException e) { - throw new SerializationException("Invalid data: " + e.getMessage(), "NTriples", e); - } - } - - /** - * Writes a single {@link Statement} to the writer in N-Triples format. - * The statement is written as "$subject $predicate $object ." - * N-Triples does not support contexts (named graphs). If a context is present, it's ignored and a warning is logged. - * - * @param writer the {@link Writer} to which the statement will be written. - * @param stmt the {@link Statement} to write. - * @throws IOException if an I/O error occurs. - */ - private void writeStatement(Writer writer, Statement stmt) throws IOException { - writeValue(writer, stmt.getSubject()); - writer.write(SerializationConstants.SPACE); - writeValue(writer, stmt.getPredicate()); - writer.write(SerializationConstants.SPACE); - writeValue(writer, stmt.getObject()); - - Resource context = stmt.getContext(); - if (context != null && logger.isWarnEnabled()) { - logger.warn("N-Triples format does not support named graphs. Context '{}' will be ignored for statement: {}", - context.stringValue(), stmt); - - } - - writer.write(SerializationConstants.SPACE_POINT); - } - - /** - * Writes a single {@link Value} to the writer. - * Handles literals, blank nodes, and IRIs. - * - * @param writer the {@link Writer} to which the value will be written. - * @param value the {@link Value} to write. - * @throws IOException if an I/O error occurs. - * @throws IllegalArgumentException if the provided value is null or an unsupported type. - */ - private void writeValue(Writer writer, Value value) throws IOException { - validateValue(value); - - if (value.isLiteral()) { - writeLiteral(writer, (Literal) value); - } else if (value.isResource()) { - if (value.isIRI()) { - writeIRI(writer, (IRI) value); - } else if (value.isBNode()) { - writeBlankNode(writer, (Resource) value); - } else { - throw new IllegalArgumentException("Unsupported resource type for N-Triples serialization: " + value.getClass().getName()); - } - } else { - throw new IllegalArgumentException("Unsupported value type for N-Triples serialization: " + value.getClass().getName()); - } - } - - /** - * Writes a {@link Literal} to the writer in N-Triples format. - * Handles plain literals, language-tagged literals, and typed literals. - * - * @param writer the {@link Writer} to which the literal will be written. - * @param literal the {@link Literal} to write. - * @throws IOException if an I/O error occurs. - */ - private void writeLiteral(Writer writer, Literal literal) throws IOException { - writer.write(SerializationConstants.QUOTE); - writer.write(escapeLiteral(literal.stringValue())); - writer.write(SerializationConstants.QUOTE); - - literal.getLanguage().ifPresent(lang -> { - try { - writer.write(SerializationConstants.AT_SIGN + lang); - } catch (IOException e) { - - throw new UncheckedIOException("Error writing language tag to stream", e); - } - }); - - if (!literal.getLanguage().isPresent()) { - IRI datatype = literal.getDatatype(); - if (datatype != null && !datatype.stringValue().equals(SerializationConstants.XSD_STRING)) { - writer.write(SerializationConstants.DATATYPE_SEPARATOR); - writeIRI(writer, datatype); - } - } - } - - /** - * Writes an {@link IRI} to the writer. - * The IRI's string representation must be enclosed in angle brackets for N-Triples. - * - * @param writer the {@link Writer} to which the IRI will be written. - * @param iri the {@link IRI} to write. - * @throws IOException if an I/O error occurs. - */ - private void writeIRI(Writer writer, IRI iri) throws IOException { - writer.write(SerializationConstants.LT); - writer.write(escapeIRI(iri.stringValue())); - writer.write(SerializationConstants.GT); - } - - /** - * Writes a blank node to the writer. - * Blank nodes are prefixed with "_:", and the identifier is appended. - * - * @param writer the {@link Writer} to which the blank node will be written. - * @param blankNode the {@link Resource} representing the blank node. - * @throws IOException if an I/O error occurs. - */ - private void writeBlankNode(Writer writer, Resource blankNode) throws IOException { - writer.write(config.getBlankNodePrefix()); - writer.write(blankNode.stringValue()); - } - - /** - * Validates and potentially escapes an IRI string. - * Throws an {@link IllegalArgumentException} if the IRI contains characters - * that are not allowed in N-Triples unescaped form (like spaces, quotes, angle brackets). - * - * @param iri The string value of the IRI to validate and escape. - * @return The validated and potentially escaped IRI string. - * @throws IllegalArgumentException if the IRI string is invalid. - */ - private String escapeIRI(String iri) { - - if (iri.contains(SerializationConstants.SPACE) || iri.contains(SerializationConstants.QUOTE) || iri.contains(SerializationConstants.LT) || iri.contains(SerializationConstants.GT)) { - throw new IllegalArgumentException("Invalid IRI: contains illegal characters for N-Triples unescaped form: " + iri); - } - return iri; - } - - /** - * Escape special characters in N-Triples string literals. - * Handles backslash, double quote, and common control characters. - * Unicode escape sequences are used for unprintable characters. - * - * @param value The string value of the literal to escape. - * @return The escaped string suitable for N-Triples literal. - */ - private String escapeLiteral(String value) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < value.length(); i++) { - char c = value.charAt(i); - switch (c) { - case '\n': - sb.append(SerializationConstants.BACK_SLASH).append('n'); - break; - case '\r': - sb.append(SerializationConstants.BACK_SLASH).append('r'); - break; - case '\t': - sb.append(SerializationConstants.BACK_SLASH).append('t'); - break; - case '\b': // backspace - sb.append(SerializationConstants.BACK_SLASH).append('b'); - break; - case '\f': // form feed - sb.append(SerializationConstants.BACK_SLASH).append('f'); - break; - case '"': - sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.QUOTE); - break; - case '\\': - sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.BACK_SLASH); - break; - default: - if (c <= 0x1F || c == 0x7F) { - sb.append(String.format("\\u%04X", (int) c)); - } else { - sb.append(c); - } - } - } - return sb.toString(); - } - - /** - * Validates RDF values before serialization to ensure they conform to N-Triples rules. - * - * @param value The {@link Value} to validate. - * @throws IllegalArgumentException if the value is null or invalid. - */ - private void validateValue(Value value) { - if (value == null) { - logger.warn("Encountered a null value where a non-null value was expected for N-Triples serialization."); - throw new IllegalArgumentException("Value cannot be null in N-Triples format"); - } - - if (value.isLiteral()) { - validateLiteral((Literal) value); - } else if (value.isIRI()) { - validateIRI((IRI) value); - } - - } - - /** - * Validates a {@link Literal} to ensure it conforms to RDF/N-Triples rules. - * Specifically checks for consistency between language tags and the rdf:langString datatype. - * - * @param literal The {@link Literal} to validate. - * @throws IllegalArgumentException if the literal is invalid (e.g., language tag with wrong datatype, - * or rdf:langString literal missing a language tag). - */ - private void validateLiteral(Literal literal) { - IRI datatype = literal.getDatatype(); - - - if (literal.getLanguage().isPresent()) { - - if (datatype == null || !datatype.stringValue().equals(SerializationConstants.RDF_LANGSTRING)) { - throw new IllegalArgumentException( - "Literal with language tag must use rdf:langString datatype. Found: " + (datatype != null ? datatype.stringValue() : "null")); - } - } else { - - if (datatype != null && datatype.stringValue().equals(SerializationConstants.RDF_LANGSTRING)) { - throw new IllegalArgumentException( - "rdf:langString literal must have a language tag."); - } - } - } - - /** - * Validates an {@link IRI} to ensure it conforms to N-Triples rules. - * Checks if the IRI string contains characters that are not allowed in N-Triples - * unescaped form, such as spaces. - * - * @param iri The {@link IRI} to validate. - * @throws IllegalArgumentException if the IRI contains spaces or is otherwise invalid. - */ - private void validateIRI(IRI iri) { - if (iri.stringValue().contains(SerializationConstants.SPACE)) { - throw new IllegalArgumentException("IRI contains spaces, which is not allowed in N-Triples unescaped form: " + iri.stringValue()); - } - } -} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesSerializer.java new file mode 100644 index 000000000..aaefc3f4f --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesSerializer.java @@ -0,0 +1,79 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.impl.common.serialization.base.AbstractLineBasedSerializer; +import fr.inria.corese.core.next.impl.common.serialization.config.NTriplesConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Writer; +import java.util.Objects; + +/** + * Serializes a Corese {@link Model} into N-Triples format. + * This class extends {@link AbstractLineBasedSerializer} to provide + * N-Triples specific serialization behavior. + */ +public class NTriplesSerializer extends AbstractLineBasedSerializer { + + /** + * Logger for this class, used for logging potential issues or information during serialization. + */ + private static final Logger logger = LoggerFactory.getLogger(NTriplesSerializer.class); + + /** + * Constructs a new {@code NTriplesSerializer} instance with the specified model and default configuration. + * The default configuration is obtained from {@link NTriplesConfig#defaultConfig()}. + * + * @param model the {@link Model} to be serialized. Must not be null. + * @throws NullPointerException if the provided model is null. + */ + public NTriplesSerializer(Model model) { + this(model, NTriplesConfig.defaultConfig()); + } + + /** + * Constructs a new {@code NTriplesSerializer} instance with the specified model and custom configuration. + * + * @param model the {@link Model} to be serialized. Must not be null. + * @param config the {@link NTriplesConfig} to use for serialization. Must not be null. + * This config object should be an instance of {@code NTriplesConfig} or a subclass thereof. + * @throws NullPointerException if the provided model or config is null. + */ + public NTriplesSerializer(Model model, NTriplesConfig config) { + super(model, config); + Objects.requireNonNull(config, "NTriplesConfig cannot be null"); + } + + /** + * Returns the format name for error messages and logging. + * + * @return "N-Triples" + */ + @Override + protected String getFormatName() { + return "N-Triples"; + } + + /** + * Writes the context (named graph) part of a statement. + * For N-Triples, contexts are not supported, so this method logs a warning + * if a context is present and does nothing. + * + * @param writer the {@link Writer} to which the context will be written. + * @param stmt the {@link Statement} whose context should be written. + * @throws IOException if an I/O error occurs. + */ + @Override + protected void writeContext(Writer writer, Statement stmt) throws IOException { + Resource context = stmt.getContext(); + + if (context != null && logger.isWarnEnabled()) { + logger.warn("N-Triples format does not support named graphs. Context '{}' will be ignored for statement: {}", + context.stringValue(), stmt); + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormat.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormat.java index fbea74d93..98dc9171d 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormat.java +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormat.java @@ -52,6 +52,13 @@ public class RdfFormat extends FileFormat { true, false); + public static final RdfFormat TRIG = new RdfFormat( + "TriG", + List.of("trig"), + List.of("application/trig"), + true, + true); + /** * Constructs a new RDF format. * @@ -138,7 +145,7 @@ public static Optional byMimeType(String mimeType) { * @return An unmodifiable List of all RdfFormat constants. */ public static List all() { - return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML); + return List.of(TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG); } @Override diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/Serializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/Serializer.java deleted file mode 100644 index 37427b832..000000000 --- a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/Serializer.java +++ /dev/null @@ -1,55 +0,0 @@ -package fr.inria.corese.core.next.impl.common.serialization; - -import fr.inria.corese.core.next.api.FormatSerializer; -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.impl.exception.SerializationException; - -import java.io.Writer; -import java.util.Objects; - -public class Serializer { - - private final Model model; - private final FormatConfig config; - - public Serializer(Model model) { - this(model, new FormatConfig.Builder().build()); - } - - public Serializer(Model model, FormatConfig config) { - this.model = Objects.requireNonNull(model, "Model cannot be null for serialization"); - this.config = Objects.requireNonNull(config, "FormatConfig cannot be null for serialization"); - } - - /** - * Serializes the RDF model to the given writer in the specified {@link RdfFormat}. - * - * @param writer the Writer to write the serialized data to. - * @param format the {@link RdfFormat} describing the desired serialization format. - * @throws SerializationException if an error occurs during serialization or if the format is not currently supported by an implementation. - */ - public void serialize(Writer writer, RdfFormat format) throws SerializationException { - Objects.requireNonNull(writer, "Writer cannot be null"); - Objects.requireNonNull(format, "RdfFormat cannot be null"); - - FormatSerializer formatSerializer; - - - if (format.equals(RdfFormat.NTRIPLES)) { - formatSerializer = new NTriplesFormat(model, config); - } else if (format.equals( RdfFormat.NQUADS)) { - formatSerializer = new NQuadsFormat(model, config); - } else if (format.equals( RdfFormat.TURTLE)) { - - throw new UnsupportedOperationException("Serialization to " + format.getName() + " format is not yet implemented."); - } else if (format.equals( RdfFormat.JSONLD)) { - throw new UnsupportedOperationException("Serialization to " + format.getName() + " format is not yet implemented."); - } else if (format.equals( RdfFormat.RDFXML)) { - throw new UnsupportedOperationException("Serialization to " + format.getName() + " format is not yet implemented."); - } else { - throw new IllegalArgumentException("Unknown or unsupported RdfFormat: " + format.getName()); - } - - formatSerializer.write(writer); - } -} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/TriGSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/TriGSerializer.java new file mode 100644 index 000000000..d68303ff2 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/TriGSerializer.java @@ -0,0 +1,292 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.impl.common.serialization.base.AbstractGraphSerializer; +import fr.inria.corese.core.next.impl.common.serialization.config.TriGConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; + +import java.io.IOException; +import java.io.Writer; +import java.util.*; + +/** + * Serializes a {@link Model} to TriG format with comprehensive syntax support. + * This class provides a method to write the declarations of a model to a {@link Writer} + * in accordance with the TriG specification, taking into account configuration options. + * + *

This implementation handles:

+ *
    + *
  • Declaration and usage of prefixes for IRIs, including auto-declaration and sorting.
  • + *
  • The 'a' shortcut for 'rdf:type'.
  • + *
  • Escaping of special characters in literals (single-line and multi-lines) and IRIs.
  • + *
  • Basic pretty-printing (indentation, end-of-line dots).
  • + *
  • Management of literal datatype policies (minimal or always typed).
  • + *
  • Serialization of compact triples (semicolons, commas) to group subjects and predicates.
  • + *
  • Serialization of nested blank nodes using the '[]' syntax.
  • + *
  • Serialization of RDF collections (lists) using the '()' syntax.
  • + *
  • Detection and prevention of infinite loops during serialization of nested blank nodes and lists.
  • + *
  • Sorting of subjects and predicates if configured.
  • + *
  • Serialization of named graphs using the `{}` syntax for TriG.
  • + *
+ *

Advanced features such as strict adherence to maximum line length + * and generation of stable blank node identifiers are not fully implemented in this version.

+ */ +public class TriGSerializer extends AbstractGraphSerializer { + + /** + * Constructs a new {@code TriGSerializer} instance with the specified model and default configuration. + * The default configuration is returned by {@link TriGConfig#defaultConfig()}. + * + * @param model the {@link Model} to serialize. Must not be null. + * @throws NullPointerException if the provided model is null. + */ + public TriGSerializer(Model model) { + this(model, TriGConfig.defaultConfig()); + } + + /** + * Constructs a new {@code TriGSerializer} instance with the specified model and custom configuration. + * + * @param model the {@link Model} to serialize. Must not be null. + * @param config the {@link TriGConfig} to use for serialization. Must not be null. + * This config object should be an instance of {@code TriGConfig} or a subclass thereof. + * @throws NullPointerException if the provided model or configuration is null. + */ + public TriGSerializer(Model model, TriGConfig config) { + super(model, config); + Objects.requireNonNull(config, "TriGConfig cannot be null"); + } + + /** + * Returns the format name for error messages and logging. + * + * @return "TriG" + */ + @Override + protected String getFormatName() { + return "TriG"; + } + + /** + * Helper method to safely cast the generic config to TriGConfig. + * This should be called before accessing any methods specific to TriGConfig. + * + * @return The config cast to TriGConfig. + * @throws IllegalStateException if the config is not an instance of TriGConfig. + */ + private TriGConfig getTriGConfig() { + if (!(config instanceof TriGConfig)) { + throw new IllegalStateException("Current serializer configuration is not an instance of TriGConfig. " + + "TriGSerializer requires a TriGConfig instance."); + } + return (TriGConfig) config; + } + + /** + * Implements the main statement writing logic for the TriG format. + * Handles the serialization of named graphs. + * + * @param writer the {@link Writer} to which the statements will be written. + * @throws IOException if an I/O error occurs. + */ + @Override + protected void doWriteStatements(Writer writer) throws IOException { + TriGConfig trigConfig = getTriGConfig(); + + if (trigConfig.includeContext()) { + writeStatementsWithContext(writer); + } else if (trigConfig.useCompactTriples() && trigConfig.groupBySubject()) { + writeOptimizedStatements(writer); + } else { + writeSimpleStatements(writer); + } + } + + /** + * Serializes statements, grouping them by named graph context. + * Statements without a context are considered part of the default graph. + * This method is used when {@code includeContext} is true. + * + * @param writer the {@link Writer} to which the statements will be written. + * @throws IOException if an I/O error occurs. + */ + private void writeStatementsWithContext(Writer writer) throws IOException { + TriGConfig trigConfig = getTriGConfig(); + + Map> byContext = new LinkedHashMap<>(); + model.stream() + .filter(stmt -> !isConsumed(stmt.getSubject())) + .forEach(stmt -> byContext.computeIfAbsent(stmt.getContext(), k -> new ArrayList<>()).add(stmt)); + + for (Map.Entry> contextEntry : byContext.entrySet()) { + Resource context = contextEntry.getKey(); + List statementsInContext = contextEntry.getValue(); + + String initialIndent = ""; + String graphIndent = trigConfig.prettyPrint() ? trigConfig.getIndent() : ""; + + if (context != null) { + if (context.isIRI()) { + writeIRI(writer, (IRI) context); + } else if (context.isBNode()) { + writeValue(writer, context); + } + writer.write(SerializationConstants.SPACE); + writer.write(SerializationConstants.OPEN_BRACE); + writer.write(trigConfig.getLineEnding()); + initialIndent = graphIndent; + } + + Map> bySubject = trigConfig.sortSubjects() ? + new TreeMap<>(Comparator.comparing(Resource::stringValue)) : + new LinkedHashMap<>(); + + statementsInContext.forEach(stmt -> bySubject.computeIfAbsent(stmt.getSubject(), k -> new ArrayList<>()).add(stmt)); + + for (Map.Entry> subjectEntry : bySubject.entrySet()) { + writer.write(initialIndent); + writeValue(writer, subjectEntry.getKey()); + writer.write(SerializationConstants.SPACE); + + Map> byPredicate = trigConfig.sortPredicates() ? + new TreeMap<>(Comparator.comparing(IRI::stringValue)) : + new LinkedHashMap<>(); + + subjectEntry.getValue().forEach(stmt -> byPredicate.computeIfAbsent(stmt.getPredicate(), k -> new ArrayList<>()).add(stmt)); + + boolean firstPredicate = true; + for (Map.Entry> predicateEntry : byPredicate.entrySet()) { + if (!firstPredicate) { + writer.write(SerializationConstants.SEMICOLON); + if (trigConfig.prettyPrint()) { + writer.write(trigConfig.getLineEnding() + initialIndent + trigConfig.getIndent()); + } else { + writer.write(SerializationConstants.SPACE); + } + } + firstPredicate = false; + + writePredicate(writer, predicateEntry.getKey()); + writer.write(SerializationConstants.SPACE); + + boolean firstObject = true; + for (Statement stmt : predicateEntry.getValue()) { + if (!firstObject) { + writer.write(SerializationConstants.COMMA); + if (trigConfig.prettyPrint()) { + writer.write(trigConfig.getLineEnding() + initialIndent + trigConfig.getIndent() + trigConfig.getIndent()); + } else { + writer.write(SerializationConstants.SPACE); + } + } + firstObject = false; + writeValue(writer, stmt.getObject()); + } + } + writer.write(SerializationConstants.SPACE + SerializationConstants.POINT); + writer.write(trigConfig.getLineEnding()); + } + + if (context != null) { + writer.write(SerializationConstants.CLOSE_BRACE); + writer.write(SerializationConstants.SPACE); + writer.write(SerializationConstants.POINT); + writer.write(trigConfig.getLineEnding()); + } + writer.write(trigConfig.getLineEnding()); + } + } + + /** + * Escapes characters in an IRI string for TriG output. + * This method primarily focuses on control characters and problematic characters within angle brackets. + * + * @param iri The IRI to escape. + * @return The escaped IRI. + */ + @Override + protected String escapeIRIString(String iri) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < iri.length(); i++) { + char c = iri.charAt(i); + if (c < 0x20 || c == 0x7F || c == SerializationConstants.LT.charAt(0) || c == SerializationConstants.GT.charAt(0) || c == SerializationConstants.QUOTE.charAt(0) || c == '{' || c == '}' || c == '|' || c == '^' || c == '`' || c == SerializationConstants.BACK_SLASH.charAt(0)) { + sb.append(String.format("\\u%04X", (int) c)); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Escapes special characters in TriG string literals. + * Handles backslashes, double quotes, and common control characters. + * Unicode escape sequences are used for unprintable characters if `escapeUnicode` is true. + * + * @param value The string value of the literal to escape. + * @return The escaped string suitable for a TriG literal. + */ + @Override + protected String escapeLiteralString(String value) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + switch (c) { + case '\n': + sb.append(SerializationConstants.BACK_SLASH).append("n"); + break; + case '\r': + sb.append(SerializationConstants.BACK_SLASH).append("r"); + break; + case '\t': + sb.append(SerializationConstants.BACK_SLASH).append("t"); + break; + case '\b': + sb.append(SerializationConstants.BACK_SLASH).append("b"); + break; + case '\f': + sb.append(SerializationConstants.BACK_SLASH).append("f"); + break; + case '"': + sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.QUOTE); + break; + case '\\': + sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.BACK_SLASH); + break; + default: + if (config.escapeUnicode() && (c <= 0x1F || c == 0x7F || (c >= 0x80 && c <= 0xFFFF))) { + sb.append(String.format("\\u%04X", (int) c)); + } else if (Character.isHighSurrogate(c)) { + int codePoint = value.codePointAt(i); + if (Character.isValidCodePoint(codePoint)) { + sb.append(String.format("\\U%08X", codePoint)); + i++; + } else { + sb.append(c); + } + } else { + sb.append(c); + } + } + } + return sb.toString(); + } + + /** + * Escapes special characters in multi-line literals (triple-quotes). + * Primarily used to escape occurrences of `"""` within the literal. + * + * @param value The string value of the literal to escape. + * @return The escaped string suitable for a TriG multi-line literal. + */ + @Override + protected String escapeMultilineLiteralString(String value) { + return value.replace(SerializationConstants.QUOTE + SerializationConstants.QUOTE + SerializationConstants.QUOTE, + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE + + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE + + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/TurtleSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/TurtleSerializer.java new file mode 100644 index 000000000..d1316920f --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/TurtleSerializer.java @@ -0,0 +1,200 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.serialization.base.AbstractGraphSerializer; +import fr.inria.corese.core.next.impl.common.serialization.config.TurtleConfig; +import fr.inria.corese.core.next.impl.common.serialization.config.AbstractTFamilyConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Writer; +import java.util.Objects; + +/** + * Serializes a {@link Model} to Turtle format with comprehensive syntax support. + * This class provides a method to write the declarations of a model to a {@link Writer} + * in accordance with the Turtle specification, taking into account configuration options. + * + *

This implementation handles:

+ *
    + *
  • Declaration and usage of prefixes for IRIs, including auto-declaration and sorting.
  • + *
  • The 'a' shortcut for 'rdf:type'.
  • + *
  • Escaping of special characters in literals (single-line and multi-line) and IRIs.
  • + *
  • Basic pretty-printing (indentation, end-of-line dots).
  • + *
  • Management of literal datatype policies (minimal or always typed).
  • + *
  • Serialization of compact triples (semicolons, commas) to group subjects and predicates.
  • + *
  • Serialization of nested blank nodes using the '[]' syntax.
  • + *
  • Serialization of RDF collections (lists) using the '()' syntax.
  • + *
  • Detection and prevention of infinite loops during serialization of nested blank nodes and lists.
  • + *
  • Sorting of subjects and predicates if configured.
  • + *
+ *

Advanced features such as strict adherence to maximum line length + * and generation of stable blank node identifiers are not fully implemented in this version.

+ */ +public class TurtleSerializer extends AbstractGraphSerializer { + + private static final Logger logger = LoggerFactory.getLogger(TurtleSerializer.class); + + /** + * Constructs a new {@code TurtleSerializer} instance with the specified model and default configuration. + * The default configuration is returned by {@link TurtleConfig#defaultConfig()}. + * + * @param model the {@link Model} to serialize. Must not be null. + * @throws NullPointerException if the provided model is null. + */ + public TurtleSerializer(Model model) { + this(model, TurtleConfig.defaultConfig()); + } + + /** + * Constructs a new {@code TurtleSerializer} instance with the specified model and custom configuration. + * + * @param model the {@link Model} to serialize. Must not be null. + * @param config the {@link TurtleConfig} to use for serialization. Must not be null. + * @throws NullPointerException if the provided model or configuration is null. + */ + public TurtleSerializer(Model model, TurtleConfig config) { + super(model, config); + Objects.requireNonNull(config, "TurtleConfig cannot be null"); + } + + /** + * Returns the format name for error messages and logging. + * + * @return "Turtle" + */ + @Override + protected String getFormatName() { + return "Turtle"; + } + + /** + * Implements the main statement writing logic for the Turtle format. + * Turtle does not support named graphs, so this method handles the serialization + * of simple or optimized triples. + * + * @param writer the {@link Writer} to which the statements will be written. + * @throws IOException if an I/O error occurs. + */ + @Override + protected void doWriteStatements(Writer writer) throws IOException { + AbstractTFamilyConfig tFamilyConfig = (AbstractTFamilyConfig) config; + + if (tFamilyConfig.useCompactTriples() && tFamilyConfig.groupBySubject()) { + writeOptimizedStatements(writer); + } else { + writeSimpleStatements(writer); + } + } + + /** + * Escapes characters in an IRI string for Turtle output. + * This method primarily focuses on control characters and problematic characters within angle brackets. + * + * @param iri The IRI to escape. + * @return The escaped IRI. + */ + @Override + protected String escapeIRIString(String iri) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < iri.length(); i++) { + char c = iri.charAt(i); + if (c < 0x20 || c == 0x7F || c == SerializationConstants.LT.charAt(0) || c == SerializationConstants.GT.charAt(0) || c == SerializationConstants.QUOTE.charAt(0) || c == '{' || c == '}' || c == '|' || c == '^' || c == '`' || c == SerializationConstants.BACK_SLASH.charAt(0)) { + sb.append(String.format("\\u%04X", (int) c)); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Escapes special characters in Turtle string literals. + * Handles backslashes, double quotes, and common control characters. + * Unicode escape sequences are used for unprintable characters if `escapeUnicode` is true. + * + * @param value The string value of the literal to escape. + * @return The escaped string suitable for a Turtle literal. + */ + @Override + protected String escapeLiteralString(String value) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + switch (c) { + case '\n': + sb.append(SerializationConstants.BACK_SLASH).append("n"); + break; + case '\r': + sb.append(SerializationConstants.BACK_SLASH).append("r"); + break; + case '\t': + sb.append(SerializationConstants.BACK_SLASH).append("t"); + break; + case '\b': + sb.append(SerializationConstants.BACK_SLASH).append("b"); + break; + case '\f': + sb.append(SerializationConstants.BACK_SLASH).append("f"); + break; + case '"': + sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.QUOTE); + break; + case '\\': + sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.BACK_SLASH); + break; + default: + if (config.escapeUnicode() && (c <= 0x1F || c == 0x7F || (c >= 0x80 && c <= 0xFFFF))) { + sb.append(String.format("\\u%04X", (int) c)); + } else if (Character.isHighSurrogate(c)) { + int codePoint = value.codePointAt(i); + if (Character.isValidCodePoint(codePoint)) { + sb.append(String.format("\\U%08X", codePoint)); + i++; + } else { + sb.append(c); + } + } else { + sb.append(c); + } + } + } + return sb.toString(); + } + + /** + * Escapes special characters in multi-line literals (triple-quotes). + * Primarily used to escape occurrences of `"""` within the literal. + * + * @param value The string value of the literal to escape. + * @return The escaped string suitable for a Turtle multi-line literal. + */ + @Override + protected String escapeMultilineLiteralString(String value) { + return value.replace(SerializationConstants.QUOTE + SerializationConstants.QUOTE + SerializationConstants.QUOTE, + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE + + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE + + SerializationConstants.BACK_SLASH + SerializationConstants.QUOTE); + } + + /** + * Logs a warning if a context (named graph) is present in a statement, + * as the Turtle format does not support named graphs. + * + * @param stmt The statement to check. + */ + private void logContextWarning(Statement stmt) { + if (stmt.getContext() != null && logger.isWarnEnabled()) { + logger.warn("Turtle format does not support named graphs. Context '{}' will be ignored for statement: {}", + stmt.getContext().stringValue(), stmt); + } + } + + @Override + protected void writeStatement(Writer writer, Statement stmt) throws IOException { + super.writeStatement(writer, stmt); + logContextWarning(stmt); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/XmlSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/XmlSerializer.java new file mode 100644 index 000000000..b79e9d3d1 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/XmlSerializer.java @@ -0,0 +1,570 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.serialization.config.LiteralDatatypePolicyEnum; +import fr.inria.corese.core.next.impl.common.serialization.config.PrefixOrderingEnum; +import fr.inria.corese.core.next.impl.common.serialization.config.XmlConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.util.*; +import java.util.stream.Collectors; + +/** + * Serializes a {@link Model} to RDF/XML format. + * This class provides a method to write the statements of a model to a {@link Writer} + * in accordance with the RDF/XML specification, considering configuration options. + * + *

This implementation handles:

+ *
    + *
  • Declaration and usage of XML namespaces for IRIs.
  • + *
  • Basic pretty-printing (indentation).
  • + *
  • Serialization of triples as rdf:Description elements with properties.
  • + *
  • Serialization of blank nodes using rdf:nodeID or nested elements.
  • + *
  • Serialization of literals with language tags or datatypes.
  • + *
+ *

Advanced features such as handling XML schemata, specific RDF/XML graph structures (e.g., rdf:Bag, rdf:Seq, rdf:Alt), + * and full blank node syntax optimization are simplified in this version.

+ */ +public class XmlSerializer implements RdfSerializer { + + private static final Logger logger = LoggerFactory.getLogger(XmlSerializer.class); + + private final Model model; + private final XmlConfig config; + private final Map iriToPrefixMapping; + private final Map prefixToIriMapping; + private final Map blankNodeIds; + private int blankNodeCounter = 0; + private List cachedStatements; + + /** + * Constructs a new {@code XmlSerializer} instance with the specified model and default configuration. + * The default configuration is obtained from {@link XmlConfig#defaultConfig()}. + * + * @param model the {@link Model} to serialize. Must not be null. + * @throws NullPointerException if the provided model is null. + */ + public XmlSerializer(Model model) { + this(model, XmlConfig.defaultConfig()); + } + + /** + * Constructs a new {@code XmlSerializer} instance with the specified model and custom configuration. + * + * @param model the {@link Model} to serialize. Must not be null. + * @param config the {@link XmlConfig} to use for serialization. Must not be null. + * @throws NullPointerException if the provided model or configuration is null. + */ + public XmlSerializer(Model model, XmlConfig config) { + this.model = Objects.requireNonNull(model, "Model cannot be null"); + this.config = Objects.requireNonNull(config, "Configuration cannot be null"); + this.iriToPrefixMapping = new HashMap<>(); + this.prefixToIriMapping = new HashMap<>(); + this.blankNodeIds = new HashMap<>(); + initializePrefixes(); + } + + /** + * Initializes prefix mappings by adding custom prefixes from the configuration. + * The custom prefixes map in XmlConfig is expected to be {prefix: namespaceURI}. + */ + private void initializePrefixes() { + if (config.usePrefixes()) { + for (Map.Entry entry : config.getCustomPrefixes().entrySet()) { + addPrefixMapping(entry.getValue(), entry.getKey()); + } + } + } + + /** + * Writes the model to the given writer in RDF/XML format. + * + * @param writer the {@link Writer} to which the RDF/XML output will be written. + * @throws SerializationException if an I/O error occurs during writing or if invalid data is encountered. + */ + @Override + public void write(Writer writer) throws SerializationException { + try (Writer bufferedWriter = new BufferedWriter(writer)) { + this.cachedStatements = model.stream().toList(); + + writeXmlDeclaration(bufferedWriter); + writeRdfRootElement(bufferedWriter); + } catch (IOException e) { + throw new SerializationException("Failed to write RDF/XML output", "RDF/XML", e); + } catch (IllegalArgumentException e) { + throw new SerializationException("Invalid data for RDF/XML format: " + e.getMessage(), "RDF/XML", e); + } + } + + /** + * Writes the XML declaration at the beginning of the document. + * + * @param writer the {@link Writer} to which the declaration will be written. + * @throws IOException if an I/O error occurs. + */ + private void writeXmlDeclaration(Writer writer) throws IOException { + writer.write(SerializationConstants.XML_DECLARATION_START); + writer.write(config.getLineEnding()); + } + + /** + * Writes the root `` element and its contents. + * This includes namespace declarations and all statements. + * + * @param writer the {@link Writer} to which the root element will be written. + * @throws IOException if an I/O error occurs. + */ + private void writeRdfRootElement(Writer writer) throws IOException { + if (config.usePrefixes() && config.autoDeclarePrefixes()) { + collectUsedNamespaces(); + } + + writer.write(SerializationConstants.RDF_ROOT_START); + writeNamespaceAttributes(writer); + writer.write(">"); + writer.write(config.getLineEnding()); + + Map> statementsBySubject = cachedStatements.stream() + .collect(Collectors.groupingBy(Statement::getSubject)); + + + List sortedSubjects = new ArrayList<>(statementsBySubject.keySet()); + if (config.sortSubjects()) { + Collections.sort(sortedSubjects, Comparator.comparing(Value::stringValue)); + } + + for (Resource subject : sortedSubjects) { + writeDescriptionElement(writer, subject, statementsBySubject.get(subject), config.getIndent()); + } + + writer.write(SerializationConstants.RDF_ROOT_END); + writer.write(config.getLineEnding()); + } + + /** + * Writes the namespace attributes (`xmlns:prefix="uri"`) for the `` element. + * + * @param writer the {@link Writer} to which attributes will be written. + * @throws IOException if an I/O error occurs. + */ + private void writeNamespaceAttributes(Writer writer) throws IOException { + if (!iriToPrefixMapping.containsKey(SerializationConstants.RDF_NS)) { + addPrefixMapping(SerializationConstants.RDF_NS, "rdf"); + } + + List prefixes = new ArrayList<>(prefixToIriMapping.keySet()); + if (config.getPrefixOrdering() == PrefixOrderingEnum.ALPHABETICAL) { + Collections.sort(prefixes); + } + + for (String prefix : prefixes) { + String namespaceURI = prefixToIriMapping.get(prefix); + writer.write(String.format(" %s%s=\"%s\"", SerializationConstants.XMLNS_PREFIX, prefix, escapeXmlAttribute(namespaceURI))); + } + } + + /** + * Collects all namespaces used in the model (subjects, predicates, objects, contexts) + * and attempts to assign prefixes if auto-declaration is enabled and they are not already mapped. + */ + private void collectUsedNamespaces() { + Set namespaces = this.cachedStatements.stream() + .flatMap(stmt -> Arrays.asList( + stmt.getSubject(), + stmt.getPredicate(), + stmt.getObject() + ).stream()) + .filter(Value::isIRI) + .map(v -> getNamespace(v.stringValue())) + .collect(Collectors.toSet()); + + + namespaces.forEach(namespace -> { + if (!iriToPrefixMapping.containsKey(namespace)) { + String prefix = getSuggestedPrefix(namespace); + if (prefix != null) { + addPrefixMapping(namespace, prefix); + } + } + }); + } + + + /** + * Retrieves the prefixed name for a given IRI string. + * This method now prioritizes the longest matching namespace to ensure correct prefix application. + * + * @param iriString The full IRI. + * @return The prefixed name (e.g., "foaf:name") or null if no suitable prefix is found. + */ + private String getPrefixedNameInternal(String iriString) { + String longestMatchingNamespace = null; + String correspondingPrefix = null; + int longestMatchLength = -1; + + for (Map.Entry entry : iriToPrefixMapping.entrySet()) { + String namespace = entry.getKey(); + String prefix = entry.getValue(); + + if (iriString.startsWith(namespace)) { + if (namespace.length() > longestMatchLength) { + longestMatchLength = namespace.length(); + longestMatchingNamespace = namespace; + correspondingPrefix = prefix; + } + } + } + + if (longestMatchingNamespace != null) { + String localName = iriString.substring(longestMatchingNamespace.length()); + + if (localName.isEmpty()) { + return correspondingPrefix + SerializationConstants.COLON; + } + return correspondingPrefix + SerializationConstants.COLON + localName; + } + return null; + } + + /** + * Adds a prefix-namespace URI mapping to the internal mappings. + * Handles potential conflicts to ensure uniqueness. + * + * @param namespaceURI The namespace URI. + * @param prefix The associated prefix. + */ + private void addPrefixMapping(String namespaceURI, String prefix) { + if (iriToPrefixMapping.containsKey(namespaceURI)) { + if (iriToPrefixMapping.get(namespaceURI).equals(prefix)) { + return; + } else { + + if (logger.isWarnEnabled()) { + logger.warn("Namespace URI '{}' is already mapped to prefix '{}'. Cannot map to new prefix '{}'. " + + "Existing mapping for this namespace will be retained.", + namespaceURI, iriToPrefixMapping.get(namespaceURI), prefix); + } + return; + } + } + + String effectivePrefix = prefix; + if (prefixToIriMapping.containsKey(prefix)) { + if (!prefixToIriMapping.get(prefix).equals(namespaceURI)) { + if (logger.isWarnEnabled()) { + logger.warn("Prefix '{}' is already mapped to namespace '{}'. Cannot map to new namespace '{}'. " + + "A new unique prefix will be generated for '{}'.", + prefix, prefixToIriMapping.get(prefix), namespaceURI, namespaceURI); + } + effectivePrefix = generateUniquePrefix(prefix); + } + } + + iriToPrefixMapping.put(namespaceURI, effectivePrefix); + prefixToIriMapping.put(effectivePrefix, namespaceURI); + } + + /** + * Generates a unique prefix based on a given base string, ensuring it's not already in use. + * This method appends numbers to the base prefix until a unique one is found. + * + * @param basePrefix The desired base prefix (e.g., "foaf"). + * @return A unique prefix (e.g., "foaf", "foaf1", "foaf2"). + */ + private String generateUniquePrefix(String basePrefix) { + String candidate = basePrefix; + int i = 0; + while (prefixToIriMapping.containsKey(candidate)) { + candidate = basePrefix + (++i); + } + return candidate; + } + + /** + * Writes an `` element for a given subject. + * This element contains all properties (predicates and objects) for that subject. + * + * @param writer the {@link Writer} to which the element will be written. + * @param subject the {@link Resource} representing the subject. + * @param statements the list of statements with this subject. + * @param currentIndent the current indentation string. + * @throws IOException if an I/O error occurs. + */ + private void writeDescriptionElement(Writer writer, Resource subject, List statements, String currentIndent) throws IOException { + String nextIndent = currentIndent + config.getIndent(); + + writer.write(currentIndent); + if (subject.isIRI()) { + writer.write(String.format("%s %s=\"%s\">", SerializationConstants.RDF_DESCRIPTION_START, SerializationConstants.RDF_ABOUT_ATTRIBUTE, escapeXmlAttribute(subject.stringValue()))); + } else if (subject.isBNode()) { + writer.write(String.format("%s %s=\"%s\">", SerializationConstants.RDF_DESCRIPTION_START, SerializationConstants.RDF_NODEID_ATTRIBUTE, getBlankNodeId(subject))); + } + writer.write(config.getLineEnding()); + + Map> statementsByPredicate = statements.stream() + .collect(Collectors.groupingBy(Statement::getPredicate)); + + List sortedPredicates = new ArrayList<>(statementsByPredicate.keySet()); + if (config.sortPredicates()) { + Collections.sort(sortedPredicates, Comparator.comparing(Value::stringValue)); + } + + for (IRI predicate : sortedPredicates) { + for (Statement stmt : statementsByPredicate.get(predicate)) { + writePropertyElement(writer, stmt.getPredicate(), stmt.getObject(), nextIndent); + } + } + + writer.write(currentIndent); + writer.write(SerializationConstants.RDF_DESCRIPTION_END); + writer.write(config.getLineEnding()); + } + + /** + * Writes a property element (e.g., `objectValue`) for a triple. + * + * @param writer the {@link Writer} to which the element will be written. + * @param predicate the {@link IRI} representing the predicate. + * @param object the {@link Value} representing the object. + * @param currentIndent the current indentation string. + * @throws IOException if an I/O error occurs. + */ + private void writePropertyElement(Writer writer, IRI predicate, Value object, String currentIndent) throws IOException { + String predicateString = predicate.stringValue(); + String prefixedPredicateName = getPrefixedNameInternal(predicateString); + String elementName; + + if (prefixedPredicateName != null && !prefixedPredicateName.endsWith(SerializationConstants.COLON)) { + elementName = prefixedPredicateName; + } else { + elementName = predicateString; + logger.warn("Predicate IRI '{}' cannot be expressed as a valid prefixed element name. Using full IRI as element name in RDF/XML.", predicateString); + } + + writer.write(currentIndent); + writer.write(String.format("<%s", elementName)); + + if (object.isIRI()) { + writer.write(String.format(" %s=\"%s\"/>", SerializationConstants.RDF_RESOURCE_ATTRIBUTE, escapeXmlAttribute(object.stringValue()))); + writer.write(config.getLineEnding()); + } else if (object.isBNode()) { + writer.write(String.format(" %s=\"%s\"/>", SerializationConstants.RDF_NODEID_ATTRIBUTE, getBlankNodeId((Resource) object))); + writer.write(config.getLineEnding()); + } else if (object.isLiteral()) { + Literal literal = (Literal) object; + + literal.getLanguage().ifPresent(lang -> { + try { + writer.write(String.format(" %s=\"%s\">", SerializationConstants.XML_LANG_ATTRIBUTE, escapeXmlAttribute(lang))); + } catch (IOException e) { + throw new UncheckedIOException("Failed to write xml:lang attribute", e); + } + }); + + if (!literal.getLanguage().isPresent() && shouldWriteDatatype(literal)) { + String datatypeUri = literal.getDatatype().stringValue(); + String prefixedDatatype = getPrefixedNameInternal(datatypeUri); + writer.write(String.format(" %s=\"%s\">", SerializationConstants.RDF_DATATYPE_ATTRIBUTE, escapeXmlAttribute(prefixedDatatype != null ? prefixedDatatype : datatypeUri))); + } else if (!literal.getLanguage().isPresent()) { + writer.write(">"); + } + + if (config.useMultilineLiterals() && (literal.stringValue().contains(SerializationConstants.LINE_FEED) || literal.stringValue().contains(SerializationConstants.CARRIAGE_RETURN))) { + + writer.write(escapeXmlContent(literal.stringValue())); + } else { + writer.write(escapeXmlContent(literal.stringValue())); + } + + writer.write(String.format("", elementName)); + writer.write(config.getLineEnding()); + } else { + throw new IllegalArgumentException("Unsupported value type for RDF/XML serialization: " + object.getClass().getName()); + } + } + + /** + * Retrieves or generates a stable blank node ID. + * + * @param bNode the blank node. + * @return a stable ID for the blank node. + */ + private String getBlankNodeId(Resource bNode) { + return blankNodeIds.computeIfAbsent(bNode, k -> { + if (config.stableBlankNodeIds()) { + return "b" + (blankNodeCounter++); + } else { + return bNode.stringValue().substring(2); + } + }); + } + + /** + * Determines if a literal's datatype should be written based on the configuration. + * + * @param literal the {@link Literal} to check. + * @return {@code true} if the datatype should be written, {@code false} otherwise. + */ + private boolean shouldWriteDatatype(Literal literal) { + if (literal.getLanguage().isPresent()) { + return false; + } + + IRI datatype = literal.getDatatype(); + if (datatype == null) { + return false; + } + + return config.getLiteralDatatypePolicy() == LiteralDatatypePolicyEnum.ALWAYS_TYPED || + (!datatype.stringValue().equals(SerializationConstants.XSD_STRING) && + config.getLiteralDatatypePolicy() == LiteralDatatypePolicyEnum.MINIMAL); + } + + + /** + * Extracts the namespace URI part from an IRI string. + * This is a common heuristic for RDF IRIs. + * + * @param iriString The full IRI. + * @return The namespace URI part. + */ + private String getNamespace(String iriString) { + int hashIdx = iriString.lastIndexOf(SerializationConstants.HASH); + int slashIdx = iriString.lastIndexOf(SerializationConstants.SLASH); + + if (hashIdx > -1) { + return iriString.substring(0, hashIdx + 1); + } else if (slashIdx > -1 && slashIdx < iriString.length() - 1) { + int dotIdx = iriString.lastIndexOf(SerializationConstants.POINT); + if (dotIdx > slashIdx) { + return iriString.substring(0, slashIdx + 1); + } + return iriString.substring(0, slashIdx + 1); + } + return iriString; + } + + /** + * Suggests a prefix for a given namespace URI. + * Attempts to derive a meaningful prefix or generates a unique one. + * + * @param namespace The namespace URI. + * @return A suggested prefix, or null if suggestion is not possible. + */ + private String getSuggestedPrefix(String namespace) { + + if (namespace.equals(SerializationConstants.RDF_NS)) return "rdf"; + if (namespace.equals(SerializationConstants.RDFS_NS)) return "rdfs"; + if (namespace.equals(SerializationConstants.XSD_NS)) return "xsd"; + if (namespace.equals(SerializationConstants.OWL_NS)) return "owl"; + if (namespace.equals(SerializationConstants.FOAF_NS)) return "foaf"; + + + String base = namespace; + if (base.endsWith(SerializationConstants.HASH) || base.endsWith(SerializationConstants.SLASH)) { + base = base.substring(0, base.length() - 1); + } + int lastSlash = base.lastIndexOf(SerializationConstants.SLASH); + int lastHash = base.lastIndexOf(SerializationConstants.HASH); + int lastSegmentStart = Math.max(lastSlash, lastHash); + if (lastSegmentStart != -1) { + base = base.substring(lastSegmentStart + 1); + } + + if (base.isEmpty()) { + try { + java.net.URI uri = new java.net.URI(namespace); + base = uri.getHost(); + if (base != null) { + base = base.replace(SerializationConstants.POINT, SerializationConstants.EMPTY_STRING); + } else { + base = "p"; + } + } catch (java.net.URISyntaxException e) { + logger.warn("Malformed URI encountered while suggesting prefix: {}", namespace, e); + base = "p"; + } + } + + base = base.replaceAll("[^a-zA-Z0-9]", SerializationConstants.EMPTY_STRING).toLowerCase(); + if (base.isEmpty()) base = "p"; + + String candidate = base; + int i = 0; + while (prefixToIriMapping.containsKey(candidate) && !prefixToIriMapping.get(candidate).equals(namespace)) { + candidate = base + (++i); + } + return candidate; + } + + /** + * Escapes a string for use as an XML attribute value. + * Replaces characters like '&', '<', '>', '"', "'" with their XML entity equivalents. + * + * @param value The string to escape. + * @return The escaped string. + */ + private String escapeXmlAttribute(String value) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + switch (c) { + case '&': + sb.append(SerializationConstants.AMP_ENTITY); + break; + case '<': + sb.append(SerializationConstants.LT_ENTITY); + break; + case '>': + sb.append(SerializationConstants.GT_ENTITY); + break; + case '"': + sb.append(SerializationConstants.QUOT_ENTITY); + break; + case '\'': + sb.append(SerializationConstants.APOS_ENTITY); + break; + default: + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Escapes a string for use as XML element content. + * Replaces characters like '&', '<', '>' with their XML entity equivalents. + * + * @param value The string to escape. + * @return The escaped string. + */ + private String escapeXmlContent(String value) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + switch (c) { + case '&': + sb.append(SerializationConstants.AMP_ENTITY); + break; + case '<': + sb.append(SerializationConstants.LT_ENTITY); + break; + case '>': + sb.append(SerializationConstants.GT_ENTITY); + break; + default: + sb.append(c); + } + } + return sb.toString(); + } + + +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/base/AbstractGraphSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/base/AbstractGraphSerializer.java new file mode 100644 index 000000000..2c091dd79 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/base/AbstractGraphSerializer.java @@ -0,0 +1,944 @@ +package fr.inria.corese.core.next.impl.common.serialization.base; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.RDF; +import fr.inria.corese.core.next.impl.common.serialization.config.*; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.*; +import java.util.stream.Collectors; + +/** + * Abstract base class for RDF serializers based on TriG and Turtle syntax. + * This class contains the common logic for serializing RDF models + * into formats that support prefixes, nested blank nodes, + * RDF collections, and compact triple serialization. + * Subclasses must implement format-specific methods + * (context handling, specific escaping rules). + * + *

Note: Many features related to compact syntax, pretty-printing, and advanced + * prefix management are specific to Turtle Trig formats and require the + * provided {@link AbstractSerializerConfig} to be an instance of + * {@link AbstractTFamilyConfig} at runtime. An {@link IllegalStateException} + * will be thrown if an incompatible configuration is used for such features.

+ */ +public abstract class AbstractGraphSerializer implements RdfSerializer { + + /** + * Logger for this class, used to log potential issues or information during serialization. + */ + protected static final Logger logger = LoggerFactory.getLogger(AbstractGraphSerializer.class); + + protected final Model model; + protected final AbstractSerializerConfig config; + protected final Map iriToPrefixMapping; + protected final Map prefixToIriMapping; + protected final Set consumedBlankNodes; + protected final Set currentlyWritingBlankNodes; + + /** + * Constructs a new abstract TriG/Turtle serializer instance. + * + * @param model the {@link Model} to serialize. Must not be null. + * @param config the {@link AbstractSerializerConfig} to use for serialization. Must not be null. + * @throws NullPointerException if the provided model or configuration is null. + */ + protected AbstractGraphSerializer(Model model, AbstractSerializerConfig config) { + this.model = Objects.requireNonNull(model, "The model cannot be null"); + this.config = Objects.requireNonNull(config, "The configuration cannot be null"); + this.iriToPrefixMapping = new HashMap<>(); + this.prefixToIriMapping = new HashMap<>(); + this.consumedBlankNodes = new HashSet<>(); + this.currentlyWritingBlankNodes = new HashSet<>(); + initializePrefixes(); + } + + /** + * Helper method to safely cast the generic config to AbstractTFamilyConfig. + * This should be called before accessing any methods specific to AbstractTFamilyConfig. + * + * @return The config cast to AbstractTFamilyConfig. + * @throws IllegalStateException if the config is not an instance of AbstractTFamilyConfig. + */ + private AbstractTFamilyConfig getTFamilyConfig() { + if (!(config instanceof AbstractTFamilyConfig)) { + throw new IllegalStateException("Current serializer configuration is not an instance of AbstractTFamilyConfig. " + + "Features like prefixes, compact syntax, and pretty-printing are only available for T-Family formats."); + } + return (AbstractTFamilyConfig) config; + } + + /** + * Initializes prefix mappings by adding custom prefixes from the configuration. + */ + private void initializePrefixes() { + if (config instanceof AbstractTFamilyConfig && getTFamilyConfig().usePrefixes()) { + for (Map.Entry entry : getTFamilyConfig().getCustomPrefixes().entrySet()) { + addPrefixMapping(entry.getValue(), entry.getKey()); + } + } + } + + /** + * Writes the model to the given writer in the specific format. + * + * @param writer the {@link Writer} to which the output will be written. + * @throws SerializationException if an I/O error occurs during writing or if invalid data is encountered. + */ + @Override + public void write(Writer writer) throws SerializationException { + try (Writer bufferedWriter = new BufferedWriter(writer)) { + writeHeader(bufferedWriter); + + Set precomputedInlineBlankNodes = precomputeInlineBlankNodesAndLists(); + consumedBlankNodes.addAll(precomputedInlineBlankNodes); + + doWriteStatements(bufferedWriter); + + bufferedWriter.flush(); + } catch (IOException e) { + throw new SerializationException("Failed to write to stream for format " + getFormatName(), getFormatName(), e); + } catch (IllegalArgumentException e) { + throw new SerializationException("Invalid data for format " + getFormatName() + ": " + e.getMessage(), getFormatName(), e); + } + } + + /** + * Returns the format name for error messages and logging. + * + * @return the format name (e.g., "TriG", "Turtle"). + */ + protected abstract String getFormatName(); + + /** + * Abstract method for the main statement writing, + * to be implemented by subclasses to handle format-specific details. + * + * @param writer the {@link Writer} to which the statements will be written. + * @throws IOException if an I/O error occurs. + * @throws SerializationException if a format-specific serialization error occurs. + */ + protected abstract void doWriteStatements(Writer writer) throws IOException, SerializationException; + + /** + * Writes the document header, including base IRI declaration and prefixes. + * + * @param writer the {@link Writer} to which the header will be written. + * @throws IOException if an I/O error occurs. + */ + protected void writeHeader(Writer writer) throws IOException { + if (config.getBaseIRI() != null) { + writer.write(String.format("@base <%s> .%s", + config.getBaseIRI(), + config.getLineEnding())); + } + + if (config instanceof AbstractTFamilyConfig + && getTFamilyConfig().usePrefixes() + && getTFamilyConfig().autoDeclarePrefixes()) { + collectUsedNamespaces(); + } + + writePrefixDeclarations(writer); + } + + /** + * Collects all namespaces used in the model and attempts to assign prefixes to them + * if auto-declaration is enabled and they are not already mapped. + */ + protected void collectUsedNamespaces() { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + + Set namespaces = model.stream() + .flatMap(stmt -> { + List values = new ArrayList<>(Arrays.asList( + stmt.getSubject(), + stmt.getPredicate(), + stmt.getObject() + )); + if (stmt.getContext() != null) { + values.add(stmt.getContext()); + } + return values.stream(); + }) + .filter(Objects::nonNull) + .filter(Value::isIRI) + .map(v -> getNamespace(v.stringValue())) + .collect(Collectors.toSet()); + + namespaces.forEach(namespace -> { + if (!iriToPrefixMapping.containsKey(namespace)) { + String prefix = getSuggestedPrefix(namespace); + if (prefix != null) { + addPrefixMapping(namespace, prefix); + } + } + }); + } + + /** + * Writes prefix declarations to the writer, sorted if configured. + * + * @param writer the {@link Writer} to which prefixes will be written. + * @throws IOException if an I/O error occurs. + */ + protected void writePrefixDeclarations(Writer writer) throws IOException { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + + List prefixes = new ArrayList<>(prefixToIriMapping.keySet()); + + if (tFamilyConfig.getPrefixOrdering() == PrefixOrderingEnum.ALPHABETICAL) { + Collections.sort(prefixes); + } + + for (String prefix : prefixes) { + writer.write(String.format("@prefix %s: <%s> .%s", + prefix, + prefixToIriMapping.get(prefix), + config.getLineEnding())); + } + + if (!prefixes.isEmpty() || config.getBaseIRI() != null) { + writer.write(config.getLineEnding()); + } + } + + /** + * Serializes the model's statements in a simple manner, one per line, without grouping. + * Triples already "consumed" by inline serialization are ignored. + * + * @param writer the {@link Writer} to which the statements will be written. + * @throws IOException if an I/O error occurs. + */ + protected void writeSimpleStatements(Writer writer) throws IOException { + for (Statement stmt : model) { + if (!isConsumed(stmt.getSubject())) { + writeStatement(writer, stmt); + writer.write(config.getLineEnding()); + } + } + } + + /** + * Writes a single {@link Statement} to the writer. + * This method is designed to be called by statement writing methods (simple or optimized). + * + * @param writer the {@link Writer} to which the statement will be written. + * @param stmt the {@link Statement} to write. + * @throws IOException if an I/O error occurs. + */ + protected void writeStatement(Writer writer, Statement stmt) throws IOException { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + + String indent = tFamilyConfig.prettyPrint() ? tFamilyConfig.getIndent() : SerializationConstants.EMPTY_STRING; + writer.write(indent); + + // Subject + writeValue(writer, stmt.getSubject()); + writer.write(SerializationConstants.SPACE); + + // Predicate + writePredicate(writer, stmt.getPredicate()); + writer.write(SerializationConstants.SPACE); + + // Object + writeValue(writer, stmt.getObject()); + + writer.write(SerializationConstants.SPACE); + writer.write(SerializationConstants.POINT); + } + + /** + * Writes the predicate to the writer, using the 'a' shortcut if configured and applicable. + * + * @param writer the {@link Writer} to which the predicate will be written. + * @param predicate the {@link Value} representing the predicate. + * @throws IOException if an I/O error occurs. + */ + protected void writePredicate(Writer writer, Value predicate) throws IOException { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + if (tFamilyConfig.useRdfTypeShortcut() && predicate.stringValue().equals(SerializationConstants.RDF_TYPE)) { + writer.write(SerializationConstants.RDF_TYPE_SHORTCUT); + } else { + writeValue(writer, predicate); + } + } + + /** + * Writes a single {@link Value} to the writer. + * Handles literals, blank nodes, and IRIs. + * This is the entry point for serializing nested blank nodes and lists. + * + * @param writer the {@link Writer} to which the value will be written. + * @param value the {@link Value} to write. + * @throws IOException if an I/O error occurs. + * @throws IllegalArgumentException if the provided value is null or of an unsupported type. + */ + protected void writeValue(Writer writer, Value value) throws IOException { + validateValue(value); + + if (value.isIRI()) { + writeIRI(writer, (IRI) value); + } else if (value.isLiteral()) { + writeLiteral(writer, (Literal) value); + } else if (value.isBNode()) { + Resource bNode = (Resource) value; + + if (currentlyWritingBlankNodes.contains(bNode)) { + writer.write(SerializationConstants.BNODE_PREFIX + bNode.stringValue()); + return; + } + + currentlyWritingBlankNodes.add(bNode); + + boolean handled = false; + if (config instanceof AbstractTFamilyConfig && getTFamilyConfig().useCollections() && bNode.isBNode()) { + handled = writeRDFList(writer, bNode); + } + + if (!handled && config instanceof AbstractTFamilyConfig && getTFamilyConfig().getBlankNodeStyle() == BlankNodeStyleEnum.ANONYMOUS && bNode.isBNode()) { // getBlankNodeStyle is on AbstractTFamilyConfig + List properties = model.stream() + .filter(stmt -> stmt.getSubject().equals(bNode)) + .toList(); + + if (!properties.isEmpty()) { + writeInlineBlankNode(writer, properties); + handled = true; + } + } + + if (!handled) { + writer.write(SerializationConstants.BNODE_PREFIX + bNode.stringValue()); + } + + currentlyWritingBlankNodes.remove(bNode); + } else { + throw new IllegalArgumentException("Unsupported value type for " + getFormatName() + " serialization: " + value.getClass().getName()); + } + } + + /** + * Writes an {@link IRI} to the writer. + * Attempts to use a prefixed name if possible, otherwise writes the full IRI in angle brackets. + * + * @param writer the {@link Writer} to which the IRI will be written. + * @param iri the {@link IRI} to write. + * @throws IOException if an I/O error occurs. + */ + protected void writeIRI(Writer writer, IRI iri) throws IOException { + if (config.isStrictMode() && config.validateURIs()) { + validateIRI(iri); + } + + String prefixed = null; + if (config instanceof AbstractTFamilyConfig && getTFamilyConfig().usePrefixes()) { + prefixed = getPrefixedName(iri.stringValue()); + } + + + if (prefixed != null) { + writer.write(prefixed); + } else { + writer.write(String.format("<%s>", escapeIRIString(iri.stringValue()))); + } + } + + /** + * Writes a {@link Literal} to the writer. + * Applies escaping and datatype/language tag rules based on configuration. + * + * @param writer the {@link Writer} to which the literal will be written. + * @param literal the {@link Literal} to write. + * @throws IOException if an I/O error occurs. + */ + protected void writeLiteral(Writer writer, Literal literal) throws IOException { + String value = literal.stringValue(); + + boolean useTripleQuotes = false; + if (config instanceof AbstractTFamilyConfig) { + useTripleQuotes = getTFamilyConfig().shouldUseTripleQuotes(value); + } + + + if (useTripleQuotes) { + writer.write(String.format("\"\"\"%s\"\"\"", escapeMultilineLiteralString(value))); + } else { + writer.write(String.format("\"%s\"", escapeLiteralString(value))); + } + + literal.getLanguage().ifPresent(lang -> { + try { + writer.write(SerializationConstants.AT_SIGN + lang); + } catch (IOException e) { + throw new UncheckedIOException("Error writing language tag to stream", e); + } + }); + + writeDatatype(writer, literal); + } + + /** + * Writes the datatype of a literal if the configured datatype policy allows it. + * + * @param writer the {@link Writer} to which the datatype will be written. + * @param literal the {@link Literal} whose datatype is to be written. + * @throws IOException if an I/O error occurs. + */ + protected void writeDatatype(Writer writer, Literal literal) throws IOException { + IRI datatype = literal.getDatatype(); + if (shouldWriteDatatype(literal)) { + writer.write(SerializationConstants.DATATYPE_SEPARATOR); + writeIRI(writer, datatype); + } + } + + /** + * Determines if a literal's datatype should be written based on the configuration. + * + * @param literal the {@link Literal} to check. + * @return {@code true} if the datatype should be written, {@code false} otherwise. + */ + protected boolean shouldWriteDatatype(Literal literal) { + if (literal.getLanguage().isPresent()) { + return false; + } + + IRI datatype = literal.getDatatype(); + if (datatype == null) { + return false; + } + + return config.getLiteralDatatypePolicy() == LiteralDatatypePolicyEnum.ALWAYS_TYPED || + (!datatype.stringValue().equals(SerializationConstants.XSD_STRING) && + config.getLiteralDatatypePolicy() == LiteralDatatypePolicyEnum.MINIMAL); + } + + /** + * Writes an inline blank node using the '[]' syntax. + * The blank node's properties are serialized inside the brackets. + * + * @param writer the {@link Writer} to which the blank node will be written. + * @param properties the list of statements where the blank node is the subject. + * @throws IOException if an I/O error occurs. + */ + protected void writeInlineBlankNode(Writer writer, List properties) throws IOException { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + + String currentIndent = tFamilyConfig.prettyPrint() ? tFamilyConfig.getIndent() : SerializationConstants.EMPTY_STRING; + String propIndent = tFamilyConfig.prettyPrint() ? currentIndent + tFamilyConfig.getIndent() : ""; + + writer.write(SerializationConstants.BLANK_NODE_START); + + boolean firstProperty = true; + for (Statement stmt : properties) { + if (stmt.getPredicate().stringValue().equals(SerializationConstants.RDF_FIRST) || + stmt.getPredicate().stringValue().equals(SerializationConstants.RDF_REST)) { + continue; + } + + if (!firstProperty) { + writer.write(SerializationConstants.SEMICOLON); + } + firstProperty = false; + + if (tFamilyConfig.prettyPrint()) { + writer.write(config.getLineEnding() + propIndent); + } else { + writer.write(SerializationConstants.SPACE); + } + + writePredicate(writer, stmt.getPredicate()); + writer.write(SerializationConstants.SPACE); + writeValue(writer, stmt.getObject()); + } + + if (tFamilyConfig.prettyPrint() && !properties.isEmpty() && !firstProperty) { + writer.write(config.getLineEnding() + currentIndent); + } + + writer.write(SerializationConstants.BLANK_NODE_END); + } + + /** + * Serializes the model's statements by grouping triples by subject, then by predicate, + * using compact syntax (semicolons and commas) if configured. + * Triples already "consumed" by inline serialization are ignored. + * + * @param writer the {@link Writer} to which the optimized statements will be written. + * @throws IOException if an I/O error occurs. + */ + protected void writeOptimizedStatements(Writer writer) throws IOException { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + + Map> bySubject = tFamilyConfig.sortSubjects() ? + new TreeMap<>(Comparator.comparing(Resource::stringValue)) : + new LinkedHashMap<>(); + + model.stream() + .filter(stmt -> !isConsumed(stmt.getSubject())) + .forEach(stmt -> bySubject.computeIfAbsent(stmt.getSubject(), k -> new ArrayList<>()).add(stmt)); + + for (Map.Entry> subjectEntry : bySubject.entrySet()) { + String indent = tFamilyConfig.prettyPrint() ? tFamilyConfig.getIndent() : SerializationConstants.EMPTY_STRING; + writer.write(indent); + writeValue(writer, subjectEntry.getKey()); + writer.write(SerializationConstants.SPACE); + + Map> byPredicate = tFamilyConfig.sortPredicates() ? + new TreeMap<>(Comparator.comparing(IRI::stringValue)) : + new LinkedHashMap<>(); + + subjectEntry.getValue().forEach(stmt -> byPredicate.computeIfAbsent(stmt.getPredicate(), k -> new ArrayList<>()).add(stmt)); + + boolean firstPredicate = true; + for (Map.Entry> predicateEntry : byPredicate.entrySet()) { + if (!firstPredicate) { + writer.write(SerializationConstants.SEMICOLON); + if (tFamilyConfig.prettyPrint()) { + writer.write(config.getLineEnding() + indent + tFamilyConfig.getIndent()); + } else { + writer.write(SerializationConstants.SPACE); + } + } + firstPredicate = false; + + writePredicate(writer, predicateEntry.getKey()); + writer.write(SerializationConstants.SPACE); + + boolean firstObject = true; + for (Statement stmt : predicateEntry.getValue()) { + if (!firstObject) { + writer.write(SerializationConstants.COMMA); + if (tFamilyConfig.prettyPrint()) { + writer.write(config.getLineEnding() + indent + tFamilyConfig.getIndent() + tFamilyConfig.getIndent()); + } else { + writer.write(SerializationConstants.SPACE); + } + } + firstObject = false; + + writeValue(writer, stmt.getObject()); + } + } + + writer.write(SerializationConstants.SPACE + SerializationConstants.POINT); + writer.write(config.getLineEnding()); + } + } + + /** + * Attempts to serialize an RDF list if the given blank node is its head. + * Marks all blank nodes in the list as consumed. + * + * @param writer the {@link Writer} to which the list will be written. + * @param listHead the blank node that might be the head of an RDF list. + * @return {@code true} if an RDF list was serialized, {@code false} otherwise. + * @throws IOException if an I/O error occurs. + */ + protected boolean writeRDFList(Writer writer, Resource listHead) throws IOException { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + + List items = new ArrayList<>(); + Resource current = listHead; + Set listBlankNodes = new HashSet<>(); + + if (currentlyWritingBlankNodes.contains(listHead)) { + return false; + } + currentlyWritingBlankNodes.add(listHead); + + while (current != null && current.isBNode() && !currentlyWritingBlankNodes.contains(current)) { + listBlankNodes.add(current); + currentlyWritingBlankNodes.add(current); + + final Resource finalCurrentForLambda = current; + List statements = model.stream() + .filter(stmt -> stmt.getSubject().equals(finalCurrentForLambda)) + .toList(); + + if (statements.size() != 2) { + current = null; + break; + } + + Optional first = statements.stream() + .filter(stmt -> stmt.getPredicate().stringValue().equals(SerializationConstants.RDF_FIRST)) + .map(Statement::getObject) + .findFirst(); + + Optional rest = statements.stream() + .filter(stmt -> stmt.getPredicate().stringValue().equals(SerializationConstants.RDF_REST)) + .map(Statement::getObject) + .findFirst(); + + if (!first.isPresent() || !rest.isPresent()) { + current = null; + break; + } + + items.add(first.get()); + + if (rest.get().stringValue().equals(SerializationConstants.RDF_NIL)) { + current = null; + } else if (rest.get().isBNode()) { + current = (Resource) rest.get(); + } else { + current = null; + break; + } + } + currentlyWritingBlankNodes.remove(listHead); + + if (items.isEmpty() || current != null) { + listBlankNodes.forEach(currentlyWritingBlankNodes::remove); + return false; + } + + consumedBlankNodes.addAll(listBlankNodes); + + writer.write(SerializationConstants.OPEN_PARENTHESIS); + boolean firstItem = true; + for (Value item : items) { + if (!firstItem) writer.write(SerializationConstants.SPACE); + firstItem = false; + writeValue(writer, item); + } + writer.write(SerializationConstants.CLOSE_PARENTHESIS); + return true; + } + + /** + * Determines if a value (subject, predicate, object) is a blank node that has already been + * serialized inline (within a '[]' or '()') and should be ignored during top-level serialization. + * + * @param value the {@link Value} to check. + * @return {@code true} if the value is a consumed blank node, {@code false} otherwise. + */ + protected boolean isConsumed(Value value) { + return value.isBNode() && consumedBlankNodes.contains(value); + } + + /** + * Identifies and returns a set of blank nodes that can be serialized inline (either as '[]' or as '()' for lists). + * These nodes will then be "consumed" to prevent their serialization as top-level triples. + * + * @return A {@link Set} of {@link Resource} representing the blank nodes that will be serialized inline. + */ + protected Set precomputeInlineBlankNodesAndLists() { + AbstractTFamilyConfig tFamilyConfig = getTFamilyConfig(); + + Set precomputed = new HashSet<>(); + for (Statement stmt : model) { + if (stmt.getSubject().isBNode()) { + Resource bNodeSubject = stmt.getSubject(); + if (tFamilyConfig.useCollections() && isRDFListHead(bNodeSubject)) { + Resource current = bNodeSubject; + Set listNodes = new HashSet<>(); + Set visitedInPrecomp = new HashSet<>(); + boolean isList = true; + while (current != null && current.isBNode() && !visitedInPrecomp.contains(current)) { + visitedInPrecomp.add(current); + listNodes.add(current); + final Resource finalCurrentForLambda = current; + List listProps = model.stream() + .filter(s -> s.getSubject().equals(finalCurrentForLambda)) + .toList(); + + if (listProps.size() != 2) { + isList = false; + break; + } + + Optional first = listProps.stream() + .filter(s -> s.getPredicate().stringValue().equals(SerializationConstants.RDF_FIRST)) + .map(Statement::getObject) + .findFirst(); + + Optional rest = listProps.stream() + .filter(s -> s.getPredicate().stringValue().equals(SerializationConstants.RDF_REST)) + .map(Statement::getObject) + .findFirst(); + + if (!first.isPresent() || !rest.isPresent()) { + isList = false; + break; + } + + if (rest.get().stringValue().equals(SerializationConstants.RDF_NIL)) { + current = null; + } else if (rest.get().isBNode()) { + current = (Resource) rest.get(); + } else { + isList = false; + break; + } + } + if (isList && current == null) { + precomputed.addAll(listNodes); + } + } + if (tFamilyConfig.getBlankNodeStyle() == BlankNodeStyleEnum.ANONYMOUS) { + List properties = model.stream() + .filter(s -> s.getSubject().equals(bNodeSubject)) + .toList(); + + boolean isPartOfList = properties.stream().anyMatch(s -> + s.getPredicate().stringValue().equals(SerializationConstants.RDF_FIRST) || + s.getPredicate().stringValue().equals(SerializationConstants.RDF_REST) + ); + + if (!properties.isEmpty() && !isPartOfList) { + precomputed.add(bNodeSubject); + } + } + } + } + return precomputed; + } + + /** + * Checks if a given blank node is the head of an RDF list. + * + * @param bNode the blank node to check. + * @return true if it's the head of an RDF list, false otherwise. + */ + protected boolean isRDFListHead(Resource bNode) { + boolean hasFirstAndRest = model.stream() + .filter(stmt -> stmt.getSubject().equals(bNode)) + .anyMatch(stmt -> stmt.getPredicate().stringValue().equals(SerializationConstants.RDF_FIRST)) + && + model.stream() + .filter(stmt -> stmt.getSubject().equals(bNode)) + .anyMatch(stmt -> stmt.getPredicate().stringValue().equals(SerializationConstants.RDF_REST)); + + boolean isObjectOfRest = model.stream() + .filter(stmt -> stmt.getPredicate().stringValue().equals(SerializationConstants.RDF_REST)) + .anyMatch(stmt -> stmt.getObject().equals(bNode)); + + return hasFirstAndRest && !isObjectOfRest; + } + + + /** + * Adds a prefix-namespace URI mapping to the internal mappings. + * Handles potential conflicts to ensure uniqueness. + * + * @param namespaceURI The namespace URI. + * @param prefix The associated prefix. + */ + protected void addPrefixMapping(String namespaceURI, String prefix) { + if (iriToPrefixMapping.containsKey(namespaceURI)) { + if (logger.isWarnEnabled() && !iriToPrefixMapping.get(namespaceURI).equals(prefix)) { + logger.warn("Namespace URI '{}' is already mapped to prefix '{}'. Cannot map to new prefix '{}'.", + namespaceURI, iriToPrefixMapping.get(namespaceURI), prefix); + } + return; + } + + if (prefixToIriMapping.containsKey(prefix)) { + if (logger.isWarnEnabled() && !prefixToIriMapping.get(prefix).equals(namespaceURI)) { + String originalNamespace = prefixToIriMapping.get(prefix); + logger.warn("Prefix '{}' is already mapped to namespace '{}'. Cannot map to new namespace '{}'. " + + "A new unique prefix will be generated for '{}'.", + prefix, originalNamespace, namespaceURI, namespaceURI); + } + return; + } + + iriToPrefixMapping.put(namespaceURI, prefix); + prefixToIriMapping.put(prefix, namespaceURI); + } + + /** + * Extracts the namespace URI part from an IRI string. + * This is a common heuristic for RDF IRIs. + * + * @param iriString The full IRI. + * @return The namespace URI part. + */ + protected String getNamespace(String iriString) { + int hashIdx = iriString.lastIndexOf(SerializationConstants.HASH); + int slashIdx = iriString.lastIndexOf(SerializationConstants.SLASH); + + if (hashIdx > -1) { + return iriString.substring(0, hashIdx + 1); + } else if (slashIdx > -1 && slashIdx < iriString.length() - 1) { + int dotIdx = iriString.lastIndexOf(SerializationConstants.POINT); + if (dotIdx > slashIdx) { + return iriString.substring(0, slashIdx + 1); + } + return iriString.substring(0, slashIdx + 1); + } + return iriString; + } + + /** + * Attempts to find a prefixed name for an IRI from existing mappings. + * + * @param iriString The full IRI. + * @return The prefixed name (e.g., "ex:someResource") or null if no suitable prefix is found. + */ + protected String getPrefixedName(String iriString) { + for (Map.Entry entry : iriToPrefixMapping.entrySet()) { + String namespace = entry.getKey(); + String prefix = entry.getValue(); + + if (iriString.startsWith(namespace)) { + String localName = iriString.substring(namespace.length()); + if (localName.isEmpty()) { + if (!prefix.isEmpty()) { + return prefix + SerializationConstants.COLON; + } else { + continue; + } + } + return prefix + SerializationConstants.COLON + localName; + } + } + return null; + } + + /** + * Suggests a prefix for a given namespace URI. + * Attempts to derive a meaningful prefix or generates a unique one. + * + * @param namespace The namespace URI. + * @return A suggested prefix, or null if suggestion is not possible. + */ + protected String getSuggestedPrefix(String namespace) { + if (namespace.equals(SerializationConstants.RDF_NS)) return "rdf"; + if (namespace.equals(SerializationConstants.RDFS_NS)) return "rdfs"; + if (namespace.equals(SerializationConstants.XSD_NS)) return "xsd"; + if (namespace.equals(SerializationConstants.OWL_NS)) return "owl"; + if (namespace.equals(SerializationConstants.FOAF_NS)) return "foaf"; + + String base = namespace; + if (base.endsWith(SerializationConstants.HASH) || base.endsWith(SerializationConstants.SLASH)) { + base = base.substring(0, base.length() - 1); + } + int lastSlash = base.lastIndexOf(SerializationConstants.SLASH); + int lastHash = base.lastIndexOf(SerializationConstants.HASH); + int lastSegmentStart = Math.max(lastSlash, lastHash); + if (lastSegmentStart != -1) { + base = base.substring(lastSegmentStart + 1); + } + + if (base.isEmpty()) { + try { + URI uri = new URI(namespace); + base = uri.getHost().replace(SerializationConstants.POINT, SerializationConstants.EMPTY_STRING); + } catch (URISyntaxException e) { + logger.warn("Malformed URI encountered while suggesting prefix: {}", namespace, e); + base = "p"; + } + } + + base = base.replaceAll("[^a-zA-Z0-9]", SerializationConstants.EMPTY_STRING).toLowerCase(); + if (base.isEmpty()) base = "p"; + + String candidate = base; + int i = 0; + + while (prefixToIriMapping.containsKey(candidate) && !prefixToIriMapping.get(candidate).equals(namespace)) { + candidate = base + (++i); + } + return candidate; + } + + + /** + * Abstract method to escape special characters in an IRI string for the specific format. + * + * @param iri The IRI to escape. + * @return The escaped IRI. + */ + protected abstract String escapeIRIString(String iri); + + /** + * Abstract method to escape special characters in string literals. + * + * @param value The string value of the literal to escape. + * @return The escaped string. + */ + protected abstract String escapeLiteralString(String value); + + /** + * Abstract method to escape special characters in multi-line literals (triple-quotes). + * + * @param value The string value of the literal to escape. + * @return The escaped string. + */ + protected abstract String escapeMultilineLiteralString(String value); + + /** + * Validates RDF values before serialization. + * Called only if strictMode is enabled. + * + * @param value The {@link Value} to validate. + * @throws IllegalArgumentException if the value is null or invalid according to strict rules. + */ + protected void validateValue(Value value) { + if (value == null) { + logger.warn("Null value encountered where a non-null value was expected for {} serialization. This will lead to an IllegalArgumentException if strict mode is enabled.", getFormatName()); + throw new IllegalArgumentException("Value cannot be null in {} format when strictMode is enabled." + getFormatName()); + } + + if (config.isStrictMode() && value.isLiteral()) { + validateLiteral((Literal) value); + } + } + + /** + * Validates a {@link Literal} to ensure it conforms to RDF/format rules. + * Specifically checks for consistency between language tags and the rdf:langString datatype. + * Called only if strictMode is enabled. + * + * @param literal The {@link Literal} to validate. + * @throws IllegalArgumentException if the literal is invalid (e.g., language tag with wrong datatype, + * or rdf:langString literal without language tag). + */ + protected void validateLiteral(Literal literal) { + IRI datatype = literal.getDatatype(); + + if (literal.getLanguage().isPresent()) { + if (datatype == null || !datatype.stringValue().equals(RDF.LANGSTRING.getIRI().stringValue())) { + throw new IllegalArgumentException( + "A literal with a language tag must use the rdf:langString datatype. Found: " + (datatype != null ? datatype.stringValue() : "null")); + } + } else { + if (datatype != null && datatype.stringValue().equals(RDF.LANGSTRING.getIRI().stringValue())) { + throw new IllegalArgumentException( + "An rdf:langString literal must have a language tag."); + } + } + } + + /** + * Validates an {@link IRI} to ensure it conforms to format rules. + * Checks if the IRI string contains characters not allowed in unescaped form + * within angle brackets (e.g., control characters, space). + * Called only if strictMode and validateURIs are enabled. + * + * @param iri The {@link IRI} to validate. + * @throws IllegalArgumentException if the IRI contains invalid characters. + */ + protected void validateIRI(IRI iri) { + String iriString = iri.stringValue(); + + if (iriString.contains(SerializationConstants.SPACE) || + iriString.contains(SerializationConstants.QUOTE) || + iriString.contains(SerializationConstants.LT) || + iriString.contains(SerializationConstants.GT)) { + throw new IllegalArgumentException("IRI contains illegal characters (space, quotes, angle brackets) for the unescaped form of " + getFormatName() + ": " + iriString); + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/base/AbstractLineBasedSerializer.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/base/AbstractLineBasedSerializer.java new file mode 100644 index 000000000..264501d26 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/base/AbstractLineBasedSerializer.java @@ -0,0 +1,375 @@ +package fr.inria.corese.core.next.impl.common.serialization.base; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.impl.common.literal.RDF; +// Changed import from SerializerConfig to AbstractSerializerConfig +import fr.inria.corese.core.next.impl.common.serialization.config.AbstractSerializerConfig; +import fr.inria.corese.core.next.impl.common.serialization.config.LiteralDatatypePolicyEnum; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.io.Writer; +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +/** + * Base class for line-based RDF serializers (N-Triples, N-Quads). + * Contains all the common logic for writing statements line by line. + * Subclasses only need to implement how to handle the context part. + */ +public abstract class AbstractLineBasedSerializer implements RdfSerializer { + + /** + * Logger for this class, used for logging potential issues or information during serialization. + */ + private static final Logger logger = LoggerFactory.getLogger(AbstractLineBasedSerializer.class); + + protected final Model model; + protected final AbstractSerializerConfig config; + + /** + * Constructs a new line-based serializer. + * + * @param model the {@link Model} to be serialized. Must not be null. + * @param config the {@link AbstractSerializerConfig} to use for serialization. Must not be null. + * @throws NullPointerException if the provided model or config is null. + */ + protected AbstractLineBasedSerializer(Model model, AbstractSerializerConfig config) { + this.model = Objects.requireNonNull(model, "Model cannot be null"); + this.config = Objects.requireNonNull(config, "Configuration cannot be null"); + } + + /** + * Writes the model to the given writer. + * Each statement in the model is written on a new line, terminated by a dot and a newline character. + * + * @param writer the {@link Writer} to which the output will be written. + * @throws SerializationException if an I/O error occurs during writing or if invalid data is encountered. + */ + @Override + public void write(Writer writer) throws SerializationException { + try (BufferedWriter bufferedWriter = new BufferedWriter(writer)) { + Set processedNodes = preprocessModel(); + + for (Statement stmt : model) { + if (shouldProcess(stmt, processedNodes)) { + writeStatement(bufferedWriter, stmt); + } + } + + } catch (IOException e) { + throw new SerializationException(getFormatName() + " serialization failed", getFormatName(), e); + } catch (IllegalArgumentException e) { + throw new SerializationException("Invalid " + getFormatName() + " data: " + e.getMessage(), getFormatName(), e); + } + } + + /** + * Returns the name of the format for error messages. + */ + protected abstract String getFormatName(); + + /** + * Handles writing the context part of a statement. + * This is where N-Triples and N-Quads differ. + * + * @param writer the writer to write to + * @param stmt the statement containing the context + * @throws IOException if an I/O error occurs + */ + protected abstract void writeContext(Writer writer, Statement stmt) throws IOException; + + private Set preprocessModel() { + return Collections.emptySet(); + } + + private boolean shouldProcess(Statement stmt, Set processedNodes) { + return !processedNodes.contains(stmt.getSubject()); + } + + /** + * Writes a single {@link Statement} to the writer. + * The statement is written as "$subject $predicate $object" followed by context handling. + * + * @param writer the {@link Writer} to which the statement will be written. + * @param stmt the {@link Statement} to write. + * @throws IOException if an I/O error occurs. + */ + private void writeStatement(Writer writer, Statement stmt) throws IOException { + writeValue(writer, stmt.getSubject()); + writer.write(SerializationConstants.SPACE); + writeValue(writer, stmt.getPredicate()); + writer.write(SerializationConstants.SPACE); + writeValue(writer, stmt.getObject()); + + // Let subclass handle the context + writeContext(writer, stmt); + + if (config.trailingDot()) { + writer.write(SerializationConstants.SPACE); + writer.write(SerializationConstants.POINT); + } + + writer.write(config.getLineEnding()); + } + + /** + * Writes a single {@link Value} to the writer. + * Handles literals, blank nodes, and IRIs. + * + * @param writer the {@link Writer} to which the value will be written. + * @param value the {@link Value} to write. + * @throws IOException if an I/O error occurs. + * @throws IllegalArgumentException if the provided value is null or an unsupported type. + */ + protected void writeValue(Writer writer, Value value) throws IOException { + validateValue(value); + + if (value.isLiteral()) { + writeLiteral(writer, (Literal) value); + } else if (value.isResource()) { + if (value.isIRI()) { + writeIRI(writer, (IRI) value); + } else if (value.isBNode()) { + writeBlankNode(writer, (Resource) value); + } else { + throw new IllegalArgumentException("Unsupported resource type for " + getFormatName() + " serialization: " + value.getClass().getName()); + } + } else { + throw new IllegalArgumentException("Unsupported value type for " + getFormatName() + " serialization: " + value.getClass().getName()); + } + } + + /** + * Writes a {@link Literal} to the writer. + * Applies escaping and datatype/language tag rules based on configuration. + * + * @param writer the {@link Writer} to which the literal will be written. + * @param literal the {@link Literal} to write. + * @throws IOException if an I/O error occurs. + */ + protected void writeLiteral(Writer writer, Literal literal) throws IOException { + writer.write(SerializationConstants.QUOTE); + writer.write(escapeLiteral(literal.stringValue())); + writer.write(SerializationConstants.QUOTE); + + literal.getLanguage().ifPresent(lang -> { + try { + writer.write(SerializationConstants.AT_SIGN + lang); + } catch (IOException e) { + throw new UncheckedIOException("Error writing language tag to stream", e); + } + }); + + IRI datatype = literal.getDatatype(); + if (!literal.getLanguage().isPresent() && datatype != null && + (config.getLiteralDatatypePolicy() == LiteralDatatypePolicyEnum.ALWAYS_TYPED || + (config.getLiteralDatatypePolicy() == LiteralDatatypePolicyEnum.MINIMAL && !datatype.stringValue().equals(SerializationConstants.XSD_STRING)))) { + writer.write(SerializationConstants.DATATYPE_SEPARATOR); + writeIRI(writer, datatype); + } + } + + /** + * Writes an {@link IRI} to the writer. + * The IRI's string representation must be enclosed in angle brackets. + * Applies URI validation based on configuration. + * + * @param writer the {@link Writer} to which the IRI will be written. + * @param iri the {@link IRI} to write. + * @throws IOException if an I/O error occurs. + * @throws IllegalArgumentException if the IRI is invalid (e.g., contains spaces) and strict mode/URI validation is enabled. + */ + protected void writeIRI(Writer writer, IRI iri) throws IOException { + if (config.isStrictMode() && config.validateURIs()) { + validateIRI(iri); + } + writer.write(SerializationConstants.LT); + writer.write(escapeIRI(iri.stringValue())); + writer.write(SerializationConstants.GT); + } + + /** + * Writes a blank node to the writer. + * Blank nodes are prefixed with "_:", and the identifier is appended. + * + * @param writer the {@link Writer} to which the blank node will be written. + * @param blankNode the {@link Resource} representing the blank node. + * @throws IOException if an I/O error occurs. + */ + protected void writeBlankNode(Writer writer, Resource blankNode) throws IOException { + writer.write(SerializationConstants.BNODE_PREFIX); + writer.write(blankNode.stringValue()); + } + + /** + * Validates and potentially escapes an IRI string. + * Throws an {@link IllegalArgumentException} if the IRI contains characters + * that are not allowed in unescaped form (like spaces, quotes, angle brackets). + * This method is called if strictMode and validateURIs are enabled. + * + * @param iri The string value of the IRI to validate and escape. + * @return The validated and potentially escaped IRI string. + * @throws IllegalArgumentException if the IRI string is invalid. + */ + protected String escapeIRI(String iri) { + if (iri.contains(SerializationConstants.SPACE) || + iri.contains(SerializationConstants.QUOTE) || + iri.contains(SerializationConstants.LT) || + iri.contains(SerializationConstants.GT)) { + + throw new IllegalArgumentException("Invalid IRI for " + getFormatName() + " (contains illegal characters inside '<>'): " + iri); + } + + return config.escapeUnicode() ? escapeUnicodeString(iri) : iri; + } + + /** + * Escape special characters in string literals. + * Handles backslash, double quote, and common control characters. + * Unicode escape sequences are used for unprintable characters if `escapeUnicode` is true. + * + * @param value The string value of the literal to escape. + * @return The escaped string suitable for literal. + */ + protected String escapeLiteral(String value) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < value.length(); i++) { + char c = value.charAt(i); + switch (c) { + case '\n': + sb.append(SerializationConstants.BACK_SLASH).append('n'); + break; + case '\r': + sb.append(SerializationConstants.BACK_SLASH).append('r'); + break; + case '\t': + sb.append(SerializationConstants.BACK_SLASH).append('t'); + break; + case '\b': // backspace + sb.append(SerializationConstants.BACK_SLASH).append('b'); + break; + case '\f': // form feed + sb.append(SerializationConstants.BACK_SLASH).append('f'); + break; + case '"': + sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.QUOTE); + break; + case '\\': + sb.append(SerializationConstants.BACK_SLASH).append(SerializationConstants.BACK_SLASH); + break; + default: + if (config.escapeUnicode()) { + if (c <= 0x1F || c == 0x7F) { + sb.append(String.format("\\u%04X", (int) c)); + } else { + sb.append(c); + } + } else { + sb.append(c); + } + } + } + return sb.toString(); + } + + /** + * Escapes non-ASCII and control characters into Unicode escape sequences. + * This is a helper for `escapeIRI` and potentially `escapeLiteral` + * if `escapeUnicode` is true in config. + * + * @param value The string to escape. + * @return The string with Unicode characters escaped. + */ + protected String escapeUnicodeString(String value) { + StringBuilder sb = new StringBuilder(); + int len = value.length(); // Cache length for invariant stop condition + for (int i = 0; i < len; i++) { + char c = value.charAt(i); + if (c <= 0x1F || c == 0x7F || (c >= 0x80 && c <= 0xFFFF)) { // Basic Multilingual Plane characters and control characters + sb.append(String.format("\\u%04X", (int) c)); + } else if (Character.isHighSurrogate(c)) { // Supplementary characters + int codePoint = value.codePointAt(i); + if (Character.isValidCodePoint(codePoint)) { + sb.append(String.format("\\U%08X", codePoint)); + i++; // Skip the low surrogate char + } else { + sb.append(c); // Append invalid surrogate char directly + } + } else { + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Validates RDF values before serialization to ensure they conform to rules. + * Only called if strictMode is enabled. + * + * @param value The {@link Value} to validate. + * @throws IllegalArgumentException if the value is null or invalid based on rules. + */ + protected void validateValue(Value value) { + if (value == null) { + logger.warn("Encountered a null value where a non-null value was expected for " + getFormatName() + " serialization. This will result in an IllegalArgumentException if strict mode is enabled."); + throw new IllegalArgumentException("Value cannot be null in " + getFormatName() + " format when strictMode is enabled."); + } + + if (value.isLiteral()) { + validateLiteral((Literal) value); + } else if (value.isIRI()) { + validateIRI((IRI) value); + } + } + + /** + * Validates a {@link Literal} to ensure it conforms to RDF rules. + * Specifically checks for consistency between language tags and the rdf:langString datatype. + * Only called if strictMode is enabled. + * + * @param literal The {@link Literal} to validate. + * @throws IllegalArgumentException if the literal is invalid (e.g., language tag with wrong datatype, + * or rdf:langString literal missing a language tag). + */ + protected void validateLiteral(Literal literal) { + IRI datatype = literal.getDatatype(); + + if (literal.getLanguage().isPresent()) { + if (datatype == null || !datatype.stringValue().equals(RDF.LANGSTRING.getIRI().stringValue())) { + throw new IllegalArgumentException( + "Literal with language tag must use rdf:langString datatype. Found: " + (datatype != null ? datatype.stringValue() : "null")); + } + } else { + if (datatype != null && datatype.stringValue().equals(RDF.LANGSTRING.getIRI().stringValue())) { + throw new IllegalArgumentException( + "rdf:langString literal must have a language tag."); + } + } + } + + /** + * Validates an {@link IRI} to ensure it conforms to rules. + * Checks if the IRI string contains characters that are not allowed + * unescaped form, such as spaces, quotes, or angle brackets. + * Only called if strictMode and validateURIs are enabled. + * + * @param iri The {@link IRI} to validate. + * @throws IllegalArgumentException if the IRI contains invalid characters. + */ + protected void validateIRI(IRI iri) { + String iriString = iri.stringValue(); + if (iriString.contains(SerializationConstants.SPACE) || + iriString.contains(SerializationConstants.QUOTE) || + iriString.contains(SerializationConstants.LT) || + iriString.contains(SerializationConstants.GT)) { + throw new IllegalArgumentException("IRI contains illegal characters (space, quote, angle brackets) for " + getFormatName() + " unescaped form: " + iriString); + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractNFamilyConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractNFamilyConfig.java new file mode 100644 index 000000000..34f6f3d61 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractNFamilyConfig.java @@ -0,0 +1,62 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +/** + * An abstract base class for serialization configurations of N-Family RDF formats (e.g., N-Triples, N-Quads). + * This class extends {@link AbstractSerializerConfig} and provides a common foundation + * for formats that typically have simpler, line-based structures and specific default behaviors + * regarding literal datatypes and character escaping. + * + *

It enforces the use of the Builder pattern for construction through its + * nested {@link AbstractNFamilyBuilder}. Subclasses are expected to extend this + * configuration and its builder to add format-specific options.

+ */ +public abstract class AbstractNFamilyConfig extends AbstractSerializerConfig { + + /** + * Protected constructor to be used by concrete builder implementations. + * Initializes the N-Family serialization configuration options, calling the superclass + * constructor for common options. + * + * @param builder The builder instance containing the desired configuration values. + */ + protected AbstractNFamilyConfig(AbstractNFamilyBuilder builder) { + super(builder); + } + + /** + * An abstract base builder for {@link AbstractNFamilyConfig}. + * This builder provides methods for setting N-Family serialization configuration options. + * It extends {@link AbstractSerializerConfig.AbstractBuilder} and uses a recursive type + * parameter (`S`) to allow concrete subclass builders to return their own specific type, + * enabling fluent API chaining. + * + *

By default, it sets {@code literalDatatypePolicy} to {@link LiteralDatatypePolicyEnum#ALWAYS_TYPED} + * and {@code escapeUnicode} to {@code true}, which are common characteristics of N-Family formats.

+ * + * @param The type of the concrete builder extending this abstract builder. + */ + public abstract static class AbstractNFamilyBuilder> + extends AbstractSerializerConfig.AbstractBuilder { + + /** + * Default constructor for the builder. + * Initializes common N-Family specific defaults: + *
    + *
  • {@code literalDatatypePolicy} is set to {@link LiteralDatatypePolicyEnum#ALWAYS_TYPED}.
  • + *
  • {@code escapeUnicode} is set to {@code true}.
  • + *
+ */ + protected AbstractNFamilyBuilder() { + super.literalDatatypePolicy = LiteralDatatypePolicyEnum.ALWAYS_TYPED; + super.escapeUnicode = true; + } + + /** + * Builds and returns a new {@link AbstractNFamilyConfig} instance with the current builder settings. + * This method must be implemented by concrete builder subclasses to return their specific configuration type. + * + * @return A new {@code AbstractNFamilyConfig} instance or a subclass instance. + */ + public abstract AbstractNFamilyConfig build(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractSerializerConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractSerializerConfig.java new file mode 100644 index 000000000..6ed99d68e --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractSerializerConfig.java @@ -0,0 +1,310 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.api.SerializationConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; + +import java.util.Objects; + +/** + * An abstract base class for all RDF serialization configurations. + * This class defines common configuration parameters that are applicable across various + * RDF serialization formats (e.g., N-Triples, Turtle, RDF/XML). + * + *

It enforces the use of the Builder pattern for construction through its + * nested {@link AbstractBuilder}. Subclasses are expected to extend this + * configuration and its builder to add format-specific options.

+ */ +public abstract class AbstractSerializerConfig implements SerializationConfig { + + /** + * The policy for how literal datatypes are printed. + * This determines whether datatypes are always explicit, minimal, or follow specific rules. + */ + protected final LiteralDatatypePolicyEnum literalDatatypePolicy; + /** + * Whether non-ASCII characters should be escaped using Unicode escape sequences (e.g., `\ u00E9`). + * This ensures compatibility with systems that might not handle UTF-8 correctly, but makes output less human-readable. + */ + protected final boolean escapeUnicode; + /** + * Whether a dot `.` should be added at the end of each triple block or statement. + * This is a syntax requirement for some RDF serialization formats (e.g., Turtle, N-Triples). + */ + protected final boolean trailingDot; + /** + * The base IRI to be used for the serialization. + * This allows relative IRIs to be resolved and can shorten the output by avoiding full IRIs. + * Can be {@code null} if no base IRI is specified. + */ + protected final String baseIRI; + /** + * Whether deterministic blank node IDs (e.g., `_:b0`, `_:b1`) should be generated. + * This is crucial for reproducible outputs, especially in testing environments, as blank node IDs are typically random. + */ + protected final boolean stableBlankNodeIds; + /** + * The string used for line endings (e.g., `"\n"` for Unix, `"\r\n"` for Windows). + * This ensures that the generated file has correct line endings for the target operating system. + */ + protected final String lineEnding; + + /** + * General strictness setting for validation during serialization. + * Enabling this can catch errors or non-standard RDF constructs but might reject valid, less common patterns. + */ + protected final boolean strictMode; + /** + * Whether URIs should be validated for compliance with RDF/Turtle/N-Triples rules. + * This ensures that generated URIs are valid and will be correctly parsed by other tools. + */ + protected final boolean validateURIs; + /** + * Whether context information (named graphs) should be included in the serialization output. + * This is essential for formats like N-Quads or TriG which support named graphs. + */ + protected final boolean includeContext; + + /** + * Protected constructor to be used by concrete builder implementations. + * Initializes the common serialization configuration options. + * + * @param builder The builder instance containing the desired configuration values. + * @throws NullPointerException if any required field from the builder is null. + */ + protected AbstractSerializerConfig(AbstractBuilder builder) { + this.literalDatatypePolicy = Objects.requireNonNull(builder.literalDatatypePolicy, "Literal datatype policy cannot be null"); + this.escapeUnicode = builder.escapeUnicode; + this.trailingDot = builder.trailingDot; + this.baseIRI = builder.baseIRI; + this.stableBlankNodeIds = builder.stableBlankNodeIds; + this.lineEnding = Objects.requireNonNull(builder.lineEnding, "Line ending cannot be null"); + + this.strictMode = builder.strictMode; + this.validateURIs = builder.validateURIs; + this.includeContext = builder.includeContext; + } + + // --- Getters for common properties --- + + /** + * Returns the policy for how literal datatypes are printed. + * + * @return The {@link LiteralDatatypePolicyEnum} indicating the literal datatype serialization policy. + */ + public LiteralDatatypePolicyEnum getLiteralDatatypePolicy() { + return literalDatatypePolicy; + } + + /** + * Checks if non-ASCII characters should be escaped using Unicode escape sequences. + * + * @return {@code true} if Unicode escaping is enabled, {@code false} otherwise. + */ + public boolean escapeUnicode() { + return escapeUnicode; + } + + /** + * Checks if a dot `.` should be added at the end of each triple block or statement. + * + * @return {@code true} if a trailing dot is required, {@code false} otherwise. + */ + public boolean trailingDot() { + return trailingDot; + } + + /** + * Returns the base IRI to be used for the serialization. + * + * @return The base IRI string, or {@code null} if no base IRI is specified. + */ + public String getBaseIRI() { + return baseIRI; + } + + /** + * Checks if deterministic blank node IDs should be generated. + * + * @return {@code true} if stable blank node IDs are enabled, {@code false} otherwise. + */ + public boolean stableBlankNodeIds() { + return stableBlankNodeIds; + } + + /** + * Returns the string used for line endings. + * + * @return The line ending string (e.g., `"\n"` for Unix, `"\r\n"` for Windows). + */ + public String getLineEnding() { + return lineEnding; + } + + /** + * Checks if strict mode for validation is enabled. + * + * @return {@code true} if strict mode is enabled, {@code false} otherwise. + */ + public boolean isStrictMode() { + return strictMode; + } + + /** + * Checks if URIs should be validated for compliance with RDF/serialization rules. + * + * @return {@code true} if URI validation is enabled, {@code false} otherwise. + */ + public boolean validateURIs() { + return validateURIs; + } + + /** + * Checks if context information (named graphs) should be included in the serialization output. + * + * @return {@code true} if context inclusion is enabled, {@code false} otherwise. + */ + public boolean includeContext() { + return includeContext; + } + + /** + * An abstract base builder for {@link AbstractSerializerConfig}. + * This builder provides methods for setting common serialization configuration options. + * It uses a recursive type parameter (`S`) to allow concrete subclass builders + * to return their own specific type, enabling fluent API chaining. + * + * @param The type of the concrete builder extending this abstract builder. + */ + public abstract static class AbstractBuilder> { + protected LiteralDatatypePolicyEnum literalDatatypePolicy = LiteralDatatypePolicyEnum.MINIMAL; + protected boolean escapeUnicode = false; + protected boolean trailingDot = true; + protected String baseIRI = null; + protected boolean stableBlankNodeIds = false; + protected String lineEnding = SerializationConstants.DEFAULT_LINE_ENDING; + + protected boolean strictMode = true; + protected boolean validateURIs = true; + protected boolean includeContext = false; + + /** + * Sets the policy for how literal datatypes are printed. + * + * @param policy The {@link LiteralDatatypePolicyEnum} to set. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided policy is null. + */ + public S literalDatatypePolicy(LiteralDatatypePolicyEnum policy) { + this.literalDatatypePolicy = Objects.requireNonNull(policy); + return self(); + } + + /** + * Sets whether non-ASCII characters should be escaped using Unicode escape sequences. + * + * @param escape {@code true} to enable Unicode escaping, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S escapeUnicode(boolean escape) { + this.escapeUnicode = escape; + return self(); + } + + /** + * Sets whether a dot `.` should be added at the end of each triple block or statement. + * + * @param trailing {@code true} to require a trailing dot, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S trailingDot(boolean trailing) { + this.trailingDot = trailing; + return self(); + } + + /** + * Sets the base IRI to be used for the serialization. + * + * @param base The base IRI string. Can be {@code null}. + * @return The builder instance for fluent chaining. + */ + public S baseIRI(String base) { + this.baseIRI = base; + return self(); + } + + /** + * Sets whether deterministic blank node IDs should be generated. + * + * @param stable {@code true} to enable stable blank node IDs, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S stableBlankNodeIds(boolean stable) { + this.stableBlankNodeIds = stable; + return self(); + } + + /** + * Sets the string used for line endings. + * + * @param lineEnding The line ending string (e.g., `"\n"` for Unix, `"\r\n"` for Windows). Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided line ending is null. + */ + public S lineEnding(String lineEnding) { + this.lineEnding = Objects.requireNonNull(lineEnding); + return self(); + } + + /** + * Sets the general strictness setting for validation during serialization. + * + * @param strict {@code true} to enable strict mode, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S strictMode(boolean strict) { + this.strictMode = strict; + return self(); + } + + /** + * Sets whether URIs should be validated for compliance with RDF/serialization rules. + * + * @param validate {@code true} to enable URI validation, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S validateURIs(boolean validate) { + this.validateURIs = validate; + return self(); + } + + /** + * Sets whether context information (named graphs) should be included in the serialization output. + * + * @param include {@code true} to include context information, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S includeContext(boolean include) { + this.includeContext = include; + return self(); + } + + /** + * Builds and returns a new {@link AbstractSerializerConfig} instance with the current builder settings. + * This method must be implemented by concrete builder subclasses to return their specific configuration type. + * + * @return A new {@code AbstractSerializerConfig} instance or a subclass instance. + */ + public abstract AbstractSerializerConfig build(); + + /** + * Helper method to return the concrete builder instance for fluent API chaining. + * This method is used internally by the builder methods to ensure that method calls + * return the correct subclass type, allowing for method chaining. + * + * @return The concrete builder instance. + */ + protected final S self() { + return (S) this; + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractTFamilyConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractTFamilyConfig.java new file mode 100644 index 000000000..4c22d8805 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/AbstractTFamilyConfig.java @@ -0,0 +1,525 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +/** + * An abstract base class for serialization configurations of Turtle Trig RDF formats (e.g., Turtle, TriG). + * This class extends {@link AbstractSerializerConfig} and introduces parameters specific to + * formats that utilize syntax sugar, pretty-printing, and collection syntax. + * + *

It enforces the use of the Builder pattern for construction through its + * nested {@link AbstractTFamilyBuilder}. Subclasses are expected to extend this + * configuration and its builder to add format-specific options.

+ */ +public abstract class AbstractTFamilyConfig extends AbstractSerializerConfig { + + /** + * Whether prefix declarations (e.g., `@prefix`, `PREFIX`) should be used for compact IRIs. + * This is crucial for human-readable formats like Turtle but not for N-Triples. + */ + protected final boolean usePrefixes; + /** + * Whether the serializer should automatically discover and declare prefixes used in the graph. + * This avoids manual prefix configuration but can lead to more prefixes than strictly needed. + */ + protected final boolean autoDeclarePrefixes; + /** + * The policy for ordering prefix declarations (e.g., alphabetically, by usage, or custom). + * This impacts the determinism and readability of the prefix block. + */ + protected final PrefixOrderingEnum prefixOrdering; + /** + * A map of custom URI prefixes to be used for serialization, in addition to or instead of + * auto-declared prefixes. Useful for enforcing specific prefix names or when {@code autoDeclarePrefixes} is false. + */ + protected final Map customPrefixes; // Used for CUSTOM ordering or if autoDeclarePrefixes=false + /** + * Whether compact triple syntax (e.g., using ';' for subject/predicate reuse and ',' for object lists) + * should be used. This significantly reduces file size and improves readability for formats like Turtle. + */ + protected final boolean useCompactTriples; // Includes comma-separated objects and subject/predicate reuse via ';' + /** + * Whether the `a` shortcut should be used for `rdf:type` predicates. + * This is a common Turtle shorthand that improves conciseness and readability. + */ + protected final boolean useRdfTypeShortcut; // 'a' instead of 'rdf:type' + /** + * Whether Turtle collection syntax `( item1 item2 )` should be used for `rdf:List` structures. + * This provides a more idiomatic and readable representation of lists in Turtle. + */ + protected final boolean useCollections; // Turtle collection syntax ( ) + /** + * The preferred style for serializing blank nodes (e.g., `[]` vs `_:id`). + * This affects both the conciseness and the identifiability of blank nodes in the output. + */ + protected final BlankNodeStyleEnum blankNodeStyle; // [] vs _:id + /** + * Whether multi-line literal syntax (triple quotes `"""..."""`) should be used for literals + * containing newline characters. + */ + protected final boolean useMultilineLiterals; + + // --- Pretty-Printing Options --- + /** + * Whether human-readable formatting with indentation and newlines (pretty-printing) is enabled. + * This makes the output easier for humans to read and debug, but increases file size slightly. + */ + protected final boolean prettyPrint; + /** + * The string used for indentation (e.g., " ", "\t"). + * This defines the visual spacing for nested structures when pretty-printing. + */ + protected final String indent; + /** + * The maximum desired line length before the serializer attempts to break lines. + * This helps ensure readability by preventing very long lines in the output. + */ + protected final int maxLineLength; + /** + * Whether triples should be grouped by subject in the output (e.g., using ';' and '.'). + * This organizes the output logically around subjects, improving readability. + */ + protected final boolean groupBySubject; // Group triples by subject using ; and . + /** + * Whether subjects should be sorted alphabetically in the output. + * This ensures a consistent and reproducible order of subjects, useful for diffing or testing. + */ + protected final boolean sortSubjects; // Sort subjects alphabetically + /** + * Whether predicates should be sorted alphabetically within a subject group. + * This ensures a consistent and reproducible order of properties for a given subject. + */ + protected final boolean sortPredicates; // Sort predicates alphabetically within a subject group + + /** + * Protected constructor to be used by concrete builder implementations. + * Initializes the Turtle Trig serialization configuration options, calling the superclass + * constructor for common options. + * + * @param builder The builder instance containing the desired configuration values. + * @throws NullPointerException if any required field from the builder is null. + * @throws IllegalArgumentException if incompatible options (e.g., escapeUnicode and useMultilineLiterals) are enabled. + */ + protected AbstractTFamilyConfig(AbstractTFamilyBuilder builder) { + super(builder); + + this.usePrefixes = builder.usePrefixes; + this.autoDeclarePrefixes = builder.autoDeclarePrefixes; + this.prefixOrdering = Objects.requireNonNull(builder.prefixOrdering, "Prefix ordering cannot be null"); + this.customPrefixes = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(builder.customPrefixes, "Custom prefixes map cannot be null"))); + this.useCompactTriples = builder.useCompactTriples; + this.useRdfTypeShortcut = builder.useRdfTypeShortcut; + this.useCollections = builder.useCollections; + this.blankNodeStyle = Objects.requireNonNull(builder.blankNodeStyle, "Blank node style cannot be null"); + this.useMultilineLiterals = builder.useMultilineLiterals; + + this.prettyPrint = builder.prettyPrint; + this.indent = Objects.requireNonNull(builder.indent, "Indentation string cannot be null"); + this.maxLineLength = builder.maxLineLength; + this.groupBySubject = builder.groupBySubject; + this.sortSubjects = builder.sortSubjects; + this.sortPredicates = builder.sortPredicates; + + if (this.escapeUnicode() && this.useMultilineLiterals) { + throw new IllegalArgumentException("Cannot enable both escapeUnicode and useMultilineLiterals in Turtle TriG configs."); + } + } + + + /** + * Checks if prefix declarations should be used for compact IRIs. + * + * @return {@code true} if prefixes are used, {@code false} otherwise. + */ + public boolean usePrefixes() { + return usePrefixes; + } + + /** + * Checks if the serializer should automatically discover and declare prefixes. + * + * @return {@code true} if auto-declaration is enabled, {@code false} otherwise. + */ + public boolean autoDeclarePrefixes() { + return autoDeclarePrefixes; + } + + /** + * Returns the policy for ordering prefix declarations. + * + * @return The {@link PrefixOrderingEnum} for prefix ordering. + */ + public PrefixOrderingEnum getPrefixOrdering() { + return prefixOrdering; + } + + /** + * Returns an unmodifiable map of custom URI prefixes. + * + * @return A map where keys are prefix names and values are namespace URIs. + */ + public Map getCustomPrefixes() { + return customPrefixes; + } + + /** + * Checks if compact triple syntax (using ';' and ',') should be used. + * + * @return {@code true} if compact triples are enabled, {@code false} otherwise. + */ + public boolean useCompactTriples() { + return useCompactTriples; + } + + /** + * Checks if the `a` shortcut should be used for `rdf:type` predicates. + * + * @return {@code true} if the `a` shortcut is enabled, {@code false} otherwise. + */ + public boolean useRdfTypeShortcut() { + return useRdfTypeShortcut; + } + + /** + * Checks if Turtle collection syntax `( item1 item2 )` should be used for `rdf:List` structures. + * + * @return {@code true} if collection syntax is enabled, {@code false} otherwise. + */ + public boolean useCollections() { + return useCollections; + } + + /** + * Returns the preferred style for serializing blank nodes. + * + * @return The {@link BlankNodeStyleEnum} for blank node serialization. + */ + public BlankNodeStyleEnum getBlankNodeStyle() { + return blankNodeStyle; + } + + /** + * Checks if multi-line literal syntax (triple quotes) should be used. + * + * @return {@code true} if multi-line literals are enabled, {@code false} otherwise. + */ + public boolean useMultilineLiterals() { + return useMultilineLiterals; + } + + /** + * Checks if human-readable formatting (pretty-printing) is enabled. + * + * @return {@code true} if pretty-printing is enabled, {@code false} otherwise. + */ + public boolean prettyPrint() { + return prettyPrint; + } + + /** + * Returns the string used for indentation when pretty-printing. + * + * @return The indentation string. + */ + public String getIndent() { + return indent; + } + + /** + * Returns the maximum desired line length before the serializer attempts to break lines. + * + * @return The maximum line length. + */ + public int getMaxLineLength() { + return maxLineLength; + } + + /** + * Checks if triples should be grouped by subject in the output. + * + * @return {@code true} if grouping by subject is enabled, {@code false} otherwise. + */ + public boolean groupBySubject() { + return groupBySubject; + } + + /** + * Checks if subjects should be sorted alphabetically in the output. + * + * @return {@code true} if subject sorting is enabled, {@code false} otherwise. + */ + public boolean sortSubjects() { + return sortSubjects; + } + + /** + * Checks if predicates should be sorted alphabetically within a subject group. + * + * @return {@code true} if predicate sorting is enabled, {@code false} otherwise. + */ + public boolean sortPredicates() { + return sortPredicates; + } + + /** + * Determines if triple quotes should be used for a given literal value. + * This is typically true if multi-line literals are enabled and the value contains newline characters. + * + * @param literalValue The string value of the literal. + * @return {@code true} if triple quotes should be used, {@code false} otherwise. + */ + public boolean shouldUseTripleQuotes(String literalValue) { + return useMultilineLiterals && (literalValue.contains(SerializationConstants.LINE_FEED) || literalValue.contains(SerializationConstants.CARRIAGE_RETURN)); + } + + /** + * Checks if output optimization features (compact triples, subject grouping, pretty-printing) are enabled. + * + * @return {@code true} if any optimization feature is enabled, {@code false} otherwise. + */ + public boolean shouldOptimizeOutput() { + return useCompactTriples || groupBySubject || prettyPrint; + } + + /** + * Checks if inline blank node syntax (`[]`) should be used. + * This is typically true if anonymous blank node style is chosen and compact triples are enabled. + * + * @return {@code true} if inline blank nodes should be used, {@code false} otherwise. + */ + public boolean shouldUseInlineBlankNodes() { + return blankNodeStyle == BlankNodeStyleEnum.ANONYMOUS && useCompactTriples; + } + + /** + * An abstract base builder for {@link AbstractTFamilyConfig}. + * This builder provides methods for setting Turtle Trig serialization configuration options. + * It extends {@link AbstractSerializerConfig.AbstractBuilder} and uses a recursive type + * parameter (`S`) to allow concrete subclass builders to return their own specific type, + * enabling fluent API chaining. + * + * @param The type of the concrete builder extending this abstract builder. + */ + public abstract static class AbstractTFamilyBuilder> + extends AbstractSerializerConfig.AbstractBuilder { + + protected boolean usePrefixes = true; + protected boolean autoDeclarePrefixes = true; + protected PrefixOrderingEnum prefixOrdering = PrefixOrderingEnum.ALPHABETICAL; + protected final Map customPrefixes = new HashMap<>(); + + protected boolean useCompactTriples = true; + protected boolean useRdfTypeShortcut = true; + // Default to false for complexity, specific formats can override + protected boolean useCollections = false; + // Default to NAMED (safer for initial impl), specific formats can override + protected BlankNodeStyleEnum blankNodeStyle = BlankNodeStyleEnum.NAMED; + protected boolean useMultilineLiterals = true; + + // Pretty-Printing Defaults + protected boolean prettyPrint = true; + protected String indent = SerializationConstants.DEFAULT_INDENTATION; + protected int maxLineLength = 80; + protected boolean groupBySubject = true; + protected boolean sortSubjects = false; + protected boolean sortPredicates = false; + + /** + * Sets whether prefix declarations should be used for compact IRIs. + * + * @param usePrefixes {@code true} to use prefixes, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S usePrefixes(boolean usePrefixes) { + this.usePrefixes = usePrefixes; + return self(); + } + + /** + * Sets whether the serializer should automatically discover and declare prefixes. + * + * @param autoDeclarePrefixes {@code true} to enable auto-declaration, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S autoDeclarePrefixes(boolean autoDeclarePrefixes) { + this.autoDeclarePrefixes = autoDeclarePrefixes; + return self(); + } + + /** + * Sets the policy for ordering prefix declarations. + * + * @param prefixOrdering The {@link PrefixOrderingEnum} to set. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided policy is null. + */ + public S prefixOrdering(PrefixOrderingEnum prefixOrdering) { + this.prefixOrdering = Objects.requireNonNull(prefixOrdering); + return self(); + } + + /** + * Adds a custom prefix mapping to be used for serialization. + * + * @param prefix The prefix name (e.g., "ex"). Must not be null. + * @param namespace The namespace URI (e.g., "http://example.org/"). Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if prefix or namespace is null. + */ + public S addCustomPrefix(String prefix, String namespace) { + Objects.requireNonNull(prefix, "Prefix name cannot be null"); + Objects.requireNonNull(namespace, "Namespace URI cannot be null"); + this.customPrefixes.put(prefix, namespace); + return self(); + } + + /** + * Adds multiple custom prefix mappings from a map. + * + * @param prefixes A map of prefix names to namespace URIs. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided map is null. + */ + public S addCustomPrefixes(Map prefixes) { + Objects.requireNonNull(prefixes, "Prefixes map cannot be null"); + this.customPrefixes.putAll(prefixes); + return self(); + } + + /** + * Sets whether compact triple syntax (using ';' and ',') should be used. + * + * @param useCompactTriples {@code true} to enable compact triples, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S useCompactTriples(boolean useCompactTriples) { + this.useCompactTriples = useCompactTriples; + return self(); + } + + /** + * Sets whether the `a` shortcut should be used for `rdf:type` predicates. + * + * @param useRdfTypeShortcut {@code true} to enable the `a` shortcut, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S useRdfTypeShortcut(boolean useRdfTypeShortcut) { + this.useRdfTypeShortcut = useRdfTypeShortcut; + return self(); + } + + /** + * Sets whether Turtle collection syntax `( item1 item2 )` should be used for `rdf:List` structures. + * + * @param useCollections {@code true} to enable collection syntax, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S useCollections(boolean useCollections) { + this.useCollections = useCollections; + return self(); + } + + /** + * Sets the preferred style for serializing blank nodes. + * + * @param blankNodeStyle The {@link BlankNodeStyleEnum} to set. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided style is null. + */ + public S blankNodeStyle(BlankNodeStyleEnum blankNodeStyle) { + this.blankNodeStyle = Objects.requireNonNull(blankNodeStyle); + return self(); + } + + /** + * Sets whether multi-line literal syntax (triple quotes) should be used. + * + * @param useMultilineLiterals {@code true} to enable multi-line literals, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S useMultilineLiterals(boolean useMultilineLiterals) { + this.useMultilineLiterals = useMultilineLiterals; + return self(); + } + + /** + * Sets whether human-readable formatting (pretty-printing) is enabled. + * + * @param prettyPrint {@code true} to enable pretty-printing, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S prettyPrint(boolean prettyPrint) { + this.prettyPrint = prettyPrint; + return self(); + } + + /** + * Sets the string used for indentation when pretty-printing. + * + * @param indent The indentation string. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided indent string is null. + */ + public S indent(String indent) { + this.indent = Objects.requireNonNull(indent); + return self(); + } + + /** + * Sets the maximum desired line length before the serializer attempts to break lines. + * + * @param maxLineLength The maximum line length. + * @return The builder instance for fluent chaining. + */ + public S maxLineLength(int maxLineLength) { + this.maxLineLength = maxLineLength; + return self(); + } + + /** + * Sets whether triples should be grouped by subject in the output. + * + * @param groupBySubject {@code true} to enable grouping by subject, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S groupBySubject(boolean groupBySubject) { + this.groupBySubject = groupBySubject; + return self(); + } + + /** + * Sets whether subjects should be sorted alphabetically in the output. + * + * @param sortSubjects {@code true} to enable subject sorting, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S sortSubjects(boolean sortSubjects) { + this.sortSubjects = sortSubjects; + return self(); + } + + /** + * Sets whether predicates should be sorted alphabetically within a subject group. + * + * @param sortPredicates {@code true} to enable predicate sorting, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public S sortPredicates(boolean sortPredicates) { + this.sortPredicates = sortPredicates; + return self(); + } + + /** + * Builds and returns a new {@link AbstractTFamilyConfig} instance with the current builder settings. + * This method must be implemented by concrete builder subclasses to return their specific configuration type. + * + * @return A new {@code AbstractTFamilyConfig} instance or a subclass instance. + */ + public abstract AbstractTFamilyConfig build(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/BlankNodeStyleEnum.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/BlankNodeStyleEnum.java new file mode 100644 index 000000000..ef04f0744 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/BlankNodeStyleEnum.java @@ -0,0 +1,15 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +/** + * Defines the style for serializing blank nodes. + */ +public enum BlankNodeStyleEnum { + /** + * Use the compact '[]' or '[ predicate object ; ... ]' shorthand syntax where possible. + */ + ANONYMOUS, + /** + * Use named blank nodes with generated IDs (e.g., '_:b1', '_:b2'). + */ + NAMED +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/LiteralDatatypePolicyEnum.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/LiteralDatatypePolicyEnum.java new file mode 100644 index 000000000..b79a60eb0 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/LiteralDatatypePolicyEnum.java @@ -0,0 +1,19 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +/** + * Defines the policy for serializing literal datatypes. + */ +public enum LiteralDatatypePolicyEnum { + /** + * Only show datatype if it's not xsd:string and not rdf:langString. + */ + MINIMAL, + /** + * Always show the full datatype, even for xsd:string. + */ + ALWAYS_TYPED, + /** + * Only show explicit datatype for XSD types (non-XSD datatypes might be omitted or full URI). + */ + XSD_TYPED +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/NQuadsConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/NQuadsConfig.java new file mode 100644 index 000000000..f39c1e630 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/NQuadsConfig.java @@ -0,0 +1,66 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +/** + * Configuration for N-Quads serialization format. + * This class extends {@link AbstractNFamilyConfig} and provides specific defaults + * and options tailored for N-Quads, which extends N-Quads with named graphs. + * + *

Use the {@link Builder} class to create instances of {@code NQuadsConfig}. + * A predefined default configuration is available via {@link #defaultConfig()}.

+ */ +public class NQuadsConfig extends AbstractNFamilyConfig { + + /** + * Protected constructor to be used by the {@link Builder}. + * + * @param builder The builder instance containing the desired configuration values. + */ + protected NQuadsConfig(Builder builder) { + super(builder); + } + + /** + * Public Builder for {@link NQuadsConfig}. + * Provides a fluent API for constructing {@code NQuadsConfig} instances with default values + * specific to the N-Quads format. + */ + public static class Builder extends AbstractNFamilyConfig.AbstractNFamilyBuilder { + /** + * Default constructor initializes all options with their default values for N-Quads. + */ + public Builder() { + includeContext(true); + } + + /** + * Builds and returns a new {@link NQuadsConfig} instance with the current builder settings. + * + * @return A new {@code NQuadsConfig} instance. + */ + @Override + public NQuadsConfig build() { + return new NQuadsConfig(this); + } + } + + /** + * Returns a default configuration suitable for N-Quads serialization. + * This provides a convenient way to get a standard N-Quads configuration without + * manually building it. + * + * @return A {@code NQuadsConfig} instance with default settings. + */ + public static NQuadsConfig defaultConfig() { + return new Builder().build(); + } + + /** + * Returns a new builder instance for {@link NQuadsConfig}. + * This allows for fluent construction of custom N-Quads configurations. + * + * @return A new {@code Builder} instance. + */ + public static Builder builder() { + return new Builder(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/NTriplesConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/NTriplesConfig.java new file mode 100644 index 000000000..b3818ed50 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/NTriplesConfig.java @@ -0,0 +1,67 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +/** + * Configuration for N-Triples serialization format. + * This class extends {@link AbstractNFamilyConfig} and provides specific defaults + * and options tailored for N-Triples, which is a simple, line-oriented format. + * + *

Use the {@link Builder} class to create instances of {@code NTriplesConfig}. + * A predefined default configuration is available via {@link #defaultConfig()}.

+ */ +public class NTriplesConfig extends AbstractNFamilyConfig { + + /** + * Protected constructor to be used by the {@link Builder}. + * + * @param builder The builder instance containing the desired configuration values. + */ + protected NTriplesConfig(Builder builder) { + super(builder); + } + + /** + * Public Builder for {@link NTriplesConfig}. + * Provides a fluent API for constructing {@code NTriplesConfig} instances with default values + * specific to the N-Triples format. + */ + public static class Builder extends AbstractNFamilyConfig.AbstractNFamilyBuilder { + /** + * Default constructor initializes all options with their default values for N-Triples. + */ + public Builder() { + + includeContext(false); + } + + /** + * Builds and returns a new {@link NTriplesConfig} instance with the current builder settings. + * + * @return A new {@code NTriplesConfig} instance. + */ + @Override + public NTriplesConfig build() { + return new NTriplesConfig(this); + } + } + + /** + * Returns a default configuration suitable for N-Triples serialization. + * This provides a convenient way to get a standard N-Triples configuration without + * manually building it. + * + * @return A {@code NTriplesConfig} instance with default settings. + */ + public static NTriplesConfig defaultConfig() { + return new Builder().build(); + } + + /** + * Returns a new builder instance for {@link NTriplesConfig}. + * This allows for fluent construction of custom N-Triples configurations. + * + * @return A new {@code Builder} instance. + */ + public static NTriplesConfig.Builder builder() { + return new NTriplesConfig.Builder(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/PrefixOrderingEnum.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/PrefixOrderingEnum.java new file mode 100644 index 000000000..76f0aa6c1 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/PrefixOrderingEnum.java @@ -0,0 +1,19 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +/** + * Defines the ordering policy for prefix declarations. + */ +public enum PrefixOrderingEnum { + /** + * Prefixes are sorted alphabetically by their namespace URI. + */ + ALPHABETICAL, + /** + * Prefixes are declared in the order they are first encountered/used in the graph. + */ + USAGE_ORDER, + /** + * A custom order defined by the user through the customPrefixes map. + */ + CUSTOM +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/TriGConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/TriGConfig.java new file mode 100644 index 000000000..aa0a2821c --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/TriGConfig.java @@ -0,0 +1,81 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; + +import java.util.HashMap; +import java.util.Map; + +/** + * Configuration for TriG serialization format. + * This class extends {@link AbstractTFamilyConfig} and provides specific defaults + * and options tailored for TriG, which extends Turtle with named graphs. + * + *

Use the {@link Builder} class to create instances of {@code TriGConfig}. + * A predefined default configuration is available via {@link #defaultConfig()}.

+ */ +public class TriGConfig extends AbstractTFamilyConfig { + + /** + * Protected constructor to be used by the {@link Builder}. + * + * @param builder The builder instance containing the desired configuration values. + */ + protected TriGConfig(Builder builder) { + super(builder); + } + + /** + * Public Builder for {@link TriGConfig}. + * Provides a fluent API for constructing {@code TriGConfig} instances with default values + * specific to the TriG format. + */ + public static class Builder extends AbstractTFamilyConfig.AbstractTFamilyBuilder { + /** + * Default constructor initializes all options with their default values for TriG. + */ + public Builder() { + includeContext(true); + blankNodeStyle(BlankNodeStyleEnum.NAMED); + useCollections(false); + + Map commonTriGPrefixes = new HashMap<>(); + commonTriGPrefixes.put("rdf", SerializationConstants.RDF_NS); + commonTriGPrefixes.put("rdfs", SerializationConstants.RDFS_NS); + commonTriGPrefixes.put("xsd", SerializationConstants.XSD_NS); + commonTriGPrefixes.put("owl", SerializationConstants.OWL_NS); + addCustomPrefixes(commonTriGPrefixes); + + } + + /** + * Builds and returns a new {@link TriGConfig} instance with the current builder settings. + * + * @return A new {@code TriGConfig} instance. + */ + @Override + public TriGConfig build() { + return new TriGConfig(this); + } + } + + /** + * Returns a default configuration suitable for TriG serialization. + * This provides a convenient way to get a standard TriG configuration without + * manually building it. + * + * @return A {@code TriGConfig} instance with default settings. + */ + public static TriGConfig defaultConfig() { + return new Builder().build(); + } + + /** + * Returns a new builder instance for {@link TriGConfig}. + * This allows for fluent construction of custom TriG configurations. + * + * @return A new {@code Builder} instance. + */ + public static TriGConfig.Builder builder() { + return new TriGConfig.Builder(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/TurtleConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/TurtleConfig.java new file mode 100644 index 000000000..739169e9c --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/TurtleConfig.java @@ -0,0 +1,84 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; + +import java.util.HashMap; +import java.util.Map; + +/** + * Configuration for Turtle serialization format. + * This class extends {@link AbstractTFamilyConfig} and provides specific defaults + * and options tailored for Turtle, such as using collections and anonymous blank nodes. + * + *

Use the {@link Builder} class to create instances of {@code TurtleConfig}. + * A predefined default configuration is available via {@link #defaultConfig()}.

+ */ +public class TurtleConfig extends AbstractTFamilyConfig { + + /** + * Protected constructor to be used by the {@link Builder}. + * + * @param builder The builder instance containing the desired configuration values. + */ + protected TurtleConfig(Builder builder) { + super(builder); + } + + /** + * Public Builder for {@link TurtleConfig}. + * Provides a fluent API for constructing {@code TurtleConfig} instances with default values + * specific to the Turtle format. + */ + public static class Builder extends AbstractTFamilyConfig.AbstractTFamilyBuilder { + /** + * Default constructor initializes all options with their default values for Turtle. + */ + public Builder() { + lineEnding(System.lineSeparator()); + validateURIs(false); + useCollections(true); + blankNodeStyle(BlankNodeStyleEnum.ANONYMOUS); + + Map commonTurtlePrefixes = new HashMap<>(); + commonTurtlePrefixes.put("rdf", SerializationConstants.RDF_NS); + commonTurtlePrefixes.put("rdfs", SerializationConstants.RDFS_NS); + commonTurtlePrefixes.put("xsd", SerializationConstants.XSD_NS); + commonTurtlePrefixes.put("owl", SerializationConstants.OWL_NS); + addCustomPrefixes(commonTurtlePrefixes); + + + } + + /** + * Builds and returns a new {@link TurtleConfig} instance with the current builder settings. + * + * @return A new {@code TurtleConfig} instance. + */ + @Override + public TurtleConfig build() { + return new TurtleConfig(this); + } + } + + /** + * Returns a default configuration suitable for Turtle serialization. + * This provides a convenient way to get a standard Turtle configuration without + * manually building it. + * + * @return A {@code TurtleConfig} instance with default settings. + */ + public static TurtleConfig defaultConfig() { + return new Builder().build(); + } + + + /** + * Returns a new builder instance for {@link TurtleConfig}. + * This allows for fluent construction of custom Turtle configurations. + * + * @return A new {@code Builder} instance. + */ + public static TurtleConfig.Builder builder() { + return new TurtleConfig.Builder(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/XmlConfig.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/XmlConfig.java new file mode 100644 index 000000000..37aba7e28 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/config/XmlConfig.java @@ -0,0 +1,370 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +/** + * Configuration for RDF/XML serialization format. + * This class extends {@link AbstractSerializerConfig} directly as RDF/XML has + * distinct serialization characteristics not shared by the Turtle or N-Family formats. + * + *

Use the {@link Builder} class to create instances of {@code XmlConfig}. + * A predefined default configuration is available via {@link #defaultConfig()}.

+ */ +public class XmlConfig extends AbstractSerializerConfig { + + /** + * Whether prefix declarations (e.g., `xmlns:prefix="uri"`) should be used for compact IRIs. + * This is crucial for human-readable formats like RDF/XML. + */ + protected final boolean usePrefixes; + /** + * Whether the serializer should automatically discover and declare prefixes used in the graph. + * This avoids manual prefix configuration but can lead to more prefixes than strictly needed. + */ + protected final boolean autoDeclarePrefixes; + /** + * The policy for ordering prefix declarations (e.g., alphabetically, by usage, or custom). + * This impacts the determinism and readability of the prefix block. + */ + protected final PrefixOrderingEnum prefixOrdering; + /** + * A map of custom URI prefixes to be used for serialization, in addition to or instead of + * auto-declared prefixes. Useful for enforcing specific prefix names or when {@code autoDeclarePrefixes} is false. + * Keys are prefixes, values are namespace URIs. + */ + protected final Map customPrefixes; + /** + * Whether human-readable formatting with indentation and newlines (pretty-printing) is enabled. + * This makes the output easier for humans to read and debug, but increases file size slightly. + */ + protected final boolean prettyPrint; + /** + * The string used for indentation (e.g., " ", "\t"). + * This defines the visual spacing for nested structures when pretty-printing. + */ + protected final String indent; + /** + * The maximum desired line length before the serializer attempts to break lines. + * This helps ensure readability by preventing very long lines in the output. + */ + protected final int maxLineLength; + /** + * Whether subjects should be sorted alphabetically in the output. + * This ensures a consistent and reproducible order of subjects, useful for diffing or testing. + */ + protected final boolean sortSubjects; + /** + * Whether predicates should be sorted alphabetically within a subject group. + * This ensures a consistent and reproducible order of properties for a given subject. + */ + protected final boolean sortPredicates; + /** + * Whether multi-line literal syntax (e.g., CDATA sections or direct text nodes with newlines) + * should be used for literals containing newline characters. + */ + protected final boolean useMultilineLiterals; + + + /** + * Protected constructor to be used by the {@link Builder}. + * + * @param builder The builder instance containing the desired configuration values. + */ + protected XmlConfig(Builder builder) { + super(builder); + + this.usePrefixes = builder.usePrefixes; + this.autoDeclarePrefixes = builder.autoDeclarePrefixes; + this.prefixOrdering = Objects.requireNonNull(builder.prefixOrdering, "Prefix ordering cannot be null"); + this.customPrefixes = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(builder.customPrefixes, "Custom prefixes map cannot be null"))); + this.prettyPrint = builder.prettyPrint; + this.indent = Objects.requireNonNull(builder.indent, "Indentation string cannot be null"); + this.maxLineLength = builder.maxLineLength; + this.sortSubjects = builder.sortSubjects; + this.sortPredicates = builder.sortPredicates; + this.useMultilineLiterals = builder.useMultilineLiterals; + + } + + /** + * Checks if prefix declarations should be used for compact IRIs. + * + * @return {@code true} if prefixes are used, {@code false} otherwise. + */ + public boolean usePrefixes() { + return usePrefixes; + } + + /** + * Checks if the serializer should automatically discover and declare prefixes. + * + * @return {@code true} if auto-declaration is enabled, {@code false} otherwise. + */ + public boolean autoDeclarePrefixes() { + return autoDeclarePrefixes; + } + + /** + * Returns the policy for ordering prefix declarations. + * + * @return The {@link PrefixOrderingEnum} for prefix ordering. + */ + public PrefixOrderingEnum getPrefixOrdering() { + return prefixOrdering; + } + + /** + * Returns an unmodifiable map of custom URI prefixes. + * + * @return A map where keys are prefix names and values are namespace URIs. + */ + public Map getCustomPrefixes() { + return customPrefixes; + } + + /** + * Checks if human-readable formatting (pretty-printing) is enabled. + * + * @return {@code true} if pretty-printing is enabled, {@code false} otherwise. + */ + public boolean prettyPrint() { + return prettyPrint; + } + + /** + * Returns the string used for indentation when pretty-printing. + * + * @return The indentation string. + */ + public String getIndent() { + return indent; + } + + /** + * Returns the maximum desired line length before the serializer attempts to break lines. + * + * @return The maximum line length. + */ + public int getMaxLineLength() { + return maxLineLength; + } + + /** + * Checks if subjects should be sorted alphabetically in the output. + * + * @return {@code true} if subject sorting is enabled, {@code false} otherwise. + */ + public boolean sortSubjects() { + return sortSubjects; + } + + /** + * Checks if predicates should be sorted alphabetically within a subject group. + * + * @return {@code true} if predicate sorting is enabled, {@code false} otherwise. + */ + public boolean sortPredicates() { + return sortPredicates; + } + + /** + * Checks if multi-line literal syntax should be used. + * + * @return {@code true} if multi-line literals are enabled, {@code false} otherwise. + */ + public boolean useMultilineLiterals() { + return useMultilineLiterals; + } + + + /** + * Public Builder for {@link XmlConfig}. + * Provides a fluent API for constructing {@code XmlConfig} instances with default values + * specific to the RDF/XML format. + */ + public static class Builder extends AbstractSerializerConfig.AbstractBuilder { + protected boolean usePrefixes = true; + protected boolean autoDeclarePrefixes = true; + protected PrefixOrderingEnum prefixOrdering = PrefixOrderingEnum.ALPHABETICAL; + protected final Map customPrefixes = new HashMap<>(); + protected boolean prettyPrint = true; + protected String indent = SerializationConstants.DEFAULT_INDENTATION; + protected int maxLineLength = 0; + protected boolean sortSubjects = false; + protected boolean sortPredicates = false; + protected boolean useMultilineLiterals = true; + + + /** + * Default constructor initializes all options with their default values for RDF/XML. + */ + public Builder() { + // Call superclass builder methods for common properties + literalDatatypePolicy(LiteralDatatypePolicyEnum.ALWAYS_TYPED); // RDF/XML typically types all literals + trailingDot(false); // No trailing dot in RDF/XML + stableBlankNodeIds(true); // Good for reproducible RDF/XML outputs + escapeUnicode(false); // Usually direct UTF-8 for RDF/XML, not unicode escapes + + addCustomPrefix("rdf", SerializationConstants.RDF_NS); + addCustomPrefix("rdfs", SerializationConstants.RDFS_NS); + addCustomPrefix("xsd", SerializationConstants.XSD_NS); + addCustomPrefix("owl", SerializationConstants.OWL_NS); + } + + + /** + * Sets whether prefix declarations should be used for compact IRIs. + * + * @param usePrefixes {@code true} to use prefixes, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public Builder usePrefixes(boolean usePrefixes) { + this.usePrefixes = usePrefixes; + return self(); + } + + /** + * Sets whether the serializer should automatically discover and declare prefixes. + * + * @param autoDeclarePrefixes {@code true} to enable auto-declaration, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public Builder autoDeclarePrefixes(boolean autoDeclarePrefixes) { + this.autoDeclarePrefixes = autoDeclarePrefixes; + return self(); + } + + /** + * Sets the policy for ordering prefix declarations. + * + * @param prefixOrdering The {@link PrefixOrderingEnum} to set. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided policy is null. + */ + public Builder prefixOrdering(PrefixOrderingEnum prefixOrdering) { + this.prefixOrdering = Objects.requireNonNull(prefixOrdering); + return self(); + } + + /** + * Adds a custom prefix mapping to be used for serialization. + * + * @param prefix The prefix name (e.g., "ex"). Must not be null. + * @param namespace The namespace URI Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if prefix or namespace is null. + */ + public Builder addCustomPrefix(String prefix, String namespace) { + Objects.requireNonNull(prefix, "Prefix name cannot be null"); + Objects.requireNonNull(namespace, "Namespace URI cannot be null"); + this.customPrefixes.put(prefix, namespace); + return self(); + } + + /** + * Adds multiple custom prefix mappings from a map. + * + * @param prefixes A map of prefix names to namespace URIs. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided map is null. + */ + public Builder addCustomPrefixes(Map prefixes) { + Objects.requireNonNull(prefixes, "Prefixes map cannot be null"); + this.customPrefixes.putAll(prefixes); + return self(); + } + + /** + * Sets whether human-readable formatting (pretty-printing) is enabled. + * + * @param prettyPrint {@code true} to enable pretty-printing, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public Builder prettyPrint(boolean prettyPrint) { + this.prettyPrint = prettyPrint; + return self(); + } + + /** + * Sets the string used for indentation when pretty-printing. + * + * @param indent The indentation string. Must not be null. + * @return The builder instance for fluent chaining. + * @throws NullPointerException if the provided indent string is null. + */ + public Builder indent(String indent) { + this.indent = Objects.requireNonNull(indent); + return self(); + } + + /** + * Sets the maximum desired line length before the serializer attempts to break lines. + * + * @param maxLineLength The maximum line length. + * @return The builder instance for fluent chaining. + */ + public Builder maxLineLength(int maxLineLength) { + this.maxLineLength = maxLineLength; + return self(); + } + + /** + * Sets whether subjects should be sorted alphabetically in the output. + * + * @param sortSubjects {@code true} to enable subject sorting, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public Builder sortSubjects(boolean sortSubjects) { + this.sortSubjects = sortSubjects; + return self(); + } + + /** + * Sets whether predicates should be sorted alphabetically within a subject group. + * + * @param sortPredicates {@code true} to enable predicate sorting, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public Builder sortPredicates(boolean sortPredicates) { + this.sortPredicates = sortPredicates; + return self(); + } + + /** + * Sets whether multi-line literal syntax should be used. + * + * @param useMultilineLiterals {@code true} to enable multi-line literals, {@code false} otherwise. + * @return The builder instance for fluent chaining. + */ + public Builder useMultilineLiterals(boolean useMultilineLiterals) { + this.useMultilineLiterals = useMultilineLiterals; + return self(); + } + + /** + * Builds and returns a new {@link XmlConfig} instance with the current builder settings. + * + * @return A new {@code XmlConfig} instance. + */ + @Override + public XmlConfig build() { + return new XmlConfig(this); + } + } + + /** + * Returns a default configuration suitable for RDF/XML serialization. + * This provides a convenient way to get a standard RDF/XML configuration without + * manually building it. + * + * @return A {@code XmlConfig} instance with default settings. + */ + public static XmlConfig defaultConfig() { + return new Builder().build(); + } +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/common/serialization/util/SerializationConstants.java b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/util/SerializationConstants.java new file mode 100644 index 000000000..ef560f804 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/common/serialization/util/SerializationConstants.java @@ -0,0 +1,108 @@ +package fr.inria.corese.core.next.impl.common.serialization.util; + +import fr.inria.corese.core.next.impl.common.vocabulary.OWL; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.common.vocabulary.RDFS; +import fr.inria.corese.core.next.impl.common.vocabulary.XSD; + + +/** + * Provides common constants used throughout the RDF serialization process. + * This includes URIs for common RDF, RDFS, XSD, and OWL vocabularies, + * as well as various special characters and strings used in serialization formats + * like Turtle, N-Triples, and N-Quads. + */ +public final class SerializationConstants { + + private SerializationConstants() { + // Private constructor to prevent instantiation + } + + // --- Standard RDF/RDFS/XSD/OWL URIs --- + public static final String RDF_NS = RDF.HTML.getNamespace(); + public static final String RDF_TYPE = RDF.type.getIRI().stringValue(); + public static final String RDF_FIRST = RDF.first.getIRI().stringValue(); + public static final String RDF_REST = RDF.rest.getIRI().stringValue(); + public static final String RDF_NIL = RDF.nil.getIRI().stringValue(); + + + public static final String RDFS_NS = RDFS.Resource.getNamespace(); + + public static final String XSD_NS = XSD.xsdString.getNamespace(); + + public static final String OWL_NS = OWL.NS; + + public static final String XSD_STRING = XSD_NS + "string"; + public static final String XSD_INTEGER = XSD_NS + "integer"; + public static final String XSD_DECIMAL = XSD_NS + "decimal"; + public static final String XSD_DOUBLE = XSD_NS + "double"; + public static final String XSD_BOOLEAN = XSD_NS + "boolean"; + public static final String XSD_DATETIME = XSD_NS + "dateTime"; + + // Nouveau namespace FOAF + public static final String FOAF_NS = "http://xmlns.com/foaf/0.1/"; + + + // --- Common Delimiters and Special Characters in Serialization --- + public static final String SPACE = " "; + public static final String TAB = "\t"; + public static final String LINE_FEED = "\n"; + public static final String CARRIAGE_RETURN = "\r"; + public static final String NEWLINE = LINE_FEED; + + public static final String POINT = "."; + public static final String SEMICOLON = ";"; + public static final String COMMA = ","; + public static final String AT_SIGN = "@"; + public static final String CARET = "^"; + public static final String LT = "<"; // Less than + public static final String GT = ">"; // Greater than + public static final String QUOTE = "\""; + public static final String COLON = ":"; + public static final String BACK_SLASH = "\\"; + + // Nouveaux délimiteurs + public static final String HASH = "#"; + public static final String SLASH = "/"; + + + // Turtle-specific + public static final String RDF_TYPE_SHORTCUT = "a"; + public static final String BNODE_PREFIX = "_:"; + public static final String DATATYPE_SEPARATOR = "^^"; + public static final String BLANK_NODE_START = "["; + public static final String BLANK_NODE_END = "]"; + + public static final String OPEN_PARENTHESIS = "("; + public static final String CLOSE_PARENTHESIS = ")"; + + // --- Default Values for Configuration --- + public static final String DEFAULT_INDENTATION = " "; // Two spaces + public static final String DEFAULT_LINE_ENDING = "\n"; // Unix-style + + public static final String EMPTY_STRING = ""; + + // TriG-specific + public static final String OPEN_BRACE = "{"; + public static final String CLOSE_BRACE = "}"; + + // XML-specific constants + public static final String XML_DECLARATION_START = ""; + public static final String RDF_ROOT_START = "OWL Class + */ + Class("Class"), + /** + * @see OWL Ontology + */ + Ontology("Ontology"), + /** + * @see OWL ObjectProperty + */ + ObjectProperty("ObjectProperty"), + /** + * @see OWL DatatypeProperty + */ + DatatypeProperty("DatatypeProperty"), + /** + * @see OWL AnnotationProperty + */ + AnnotationProperty("AnnotationProperty"), + /** + * @see OWL NamedIndividual + */ + NamedIndividual("NamedIndividual"), + /** + * @see OWL Restriction + */ + Restriction("Restriction"), + /** + * @see OWL equivalentClass + */ + equivalentClass("equivalentClass"), + /** + * @see OWL sameAs + */ + sameAs("sameAs"), + /** + * @see OWL differentFrom + */ + differentFrom("differentFrom") + ; + + private final IRI iri; + + public static final String NS = "http://www.w3.org/2002/07/owl#"; + + OWL(String localName) { + this.iri = new BasicIRI(getNamespace(), localName); + } + + @Override + public IRI getIRI() { + return this.iri; + } + + @Override + public String getNamespace() { + return NS; // Referencing the directly defined static NS + } + + @Override + public String getPreferredPrefix() { + return "owl"; + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/exception/SerializationException.java b/src/main/java/fr/inria/corese/core/next/impl/exception/SerializationException.java index 710ba0c93..ee622a195 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/exception/SerializationException.java +++ b/src/main/java/fr/inria/corese/core/next/impl/exception/SerializationException.java @@ -1,19 +1,39 @@ package fr.inria.corese.core.next.impl.exception; /** - * Exception levée lors d'échecs de sérialisation/désérialisation RDF. - * Peut contenir des détails spécifiques au format (NTriples, JSON-LD, etc.). + * Exception thrown during RDF serialization or deserialization failures. + * This exception can carry format-specific details (e.g., NTriples, JSON-LD, XML, etc.), + * as well as information about the location of the error within the data stream. */ public class SerializationException extends Exception { private final String formatName; private final int lineNumber; private final int columnNumber; + /** + * Constructs a new {@code SerializationException} with the specified detail message, + * format name, and cause. Line and column numbers are set to -1 (unknown). + * + * @param message the detail message (which is saved for later retrieval by the {@link #getMessage()} method). + * @param formatName the name of the RDF format being processed when the error occurred. + * Use "unknown" if the format is not applicable or cannot be determined. + * @param cause the cause (which is saved for later retrieval by the {@link #getCause()} method). + * (A {@code null} value is permitted, and indicates that the cause is nonexistent or unknown.) + */ public SerializationException(String message, String formatName, Throwable cause) { this(message, formatName, -1, -1, cause); } - + /** + * Constructs a new {@code SerializationException} with the specified detail message, + * format name, line number, column number, and cause. + * + * @param message the detail message. + * @param formatName the name of the RDF format being processed. + * @param lineNumber the line number where the error occurred, or -1 if unknown. + * @param columnNumber the column number where the error occurred, or -1 if unknown. + * @param cause the cause of the exception. + */ public SerializationException(String message, String formatName, int lineNumber, int columnNumber, Throwable cause) { super(buildMessage(message, formatName, lineNumber, columnNumber), cause); this.formatName = formatName; @@ -21,6 +41,16 @@ public SerializationException(String message, String formatName, int lineNumber, this.columnNumber = columnNumber; } + /** + * Builds the complete exception message by incorporating the base message, + * format name, and line/column numbers if available. + * + * @param base the base detail message. + * @param format the name of the RDF format. + * @param line the line number. + * @param col the column number. + * @return the formatted exception message string. + */ private static String buildMessage(String base, String format, int line, int col) { StringBuilder sb = new StringBuilder(base); if (!"unknown".equals(format)) { @@ -35,15 +65,29 @@ private static String buildMessage(String base, String format, int line, int col return sb.toString(); } - + /** + * Returns the name of the RDF format that was being processed when the error occurred. + * + * @return the format name, or "unknown" if not specified. + */ public String getFormatName() { return formatName; } + /** + * Returns the line number where the error occurred. + * + * @return the line number, or -1 if unknown. + */ public int getLineNumber() { return lineNumber; } + /** + * Returns the column number where the error occurred. + * + * @return the column number, or -1 if unknown. + */ public int getColumnNumber() { return columnNumber; } diff --git a/src/main/java/fr/inria/corese/core/sparql/datatype/function/StringHelper.java b/src/main/java/fr/inria/corese/core/sparql/datatype/function/StringHelper.java index ad8d95821..87581a0bc 100644 --- a/src/main/java/fr/inria/corese/core/sparql/datatype/function/StringHelper.java +++ b/src/main/java/fr/inria/corese/core/sparql/datatype/function/StringHelper.java @@ -358,7 +358,7 @@ public static int indexOfWordIgnoreCaseAccentAndPlurial(String string1, String s * qualifier, scientific notation and numbers marked with a type * qualifier (e.g. 123L). * - * @see org.apache.commons.lang.math.NumberUtils + * @see org.apache.commons.lang3.math.NumberUtils * @param str the string to check * @return true if the string denotes a correctly formatted number, false otherwise. */ diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/DefaultSerializerFactoryTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/DefaultSerializerFactoryTest.java new file mode 100644 index 000000000..b6ad13c7e --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/DefaultSerializerFactoryTest.java @@ -0,0 +1,118 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.RdfSerializer; +import fr.inria.corese.core.next.api.SerializationConfig; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.mockito.MockedConstruction; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockConstruction; + +/** + * Unit tests for the {@link DefaultSerializerFactory} class. + * This class verifies that the factory correctly creates instances of + * various {@link RdfSerializer} implementations based on the provided + * {@link RdfFormat} and handles null inputs gracefully. + */ +class DefaultSerializerFactoryTest { + + private DefaultSerializerFactory factory; + private Model mockModel; + private SerializationConfig mockConfig; + + + @BeforeEach + void setUp() { + factory = new DefaultSerializerFactory(); + mockModel = mock(Model.class); + mockConfig = mock(SerializationConfig.class); + } + + @Test + @DisplayName("createSerializer should return TurtleSerializer for TURTLE format") + void createSerializer_shouldReturnTurtleSerializer_forTurtleFormat() { + try (MockedConstruction mockedConstruction = mockConstruction(TurtleSerializer.class)) { + RdfSerializer serializer = factory.createSerializer(RdfFormat.TURTLE, mockModel, mockConfig); + + assertNotNull(serializer); + assertTrue(serializer instanceof TurtleSerializer); + assertEquals(1, mockedConstruction.constructed().size(), "TurtleSerializer constructor should be called once"); + } + } + + @Test + @DisplayName("createSerializer should return NTriplesSerializer for NTRIPLES format") + void createSerializer_shouldReturnNTriplesSerializer_forNTriplesFormat() { + try (MockedConstruction mockedConstruction = mockConstruction(NTriplesSerializer.class)) { + RdfSerializer serializer = factory.createSerializer(RdfFormat.NTRIPLES, mockModel, mockConfig); + + assertNotNull(serializer); + assertTrue(serializer instanceof NTriplesSerializer); + assertEquals(1, mockedConstruction.constructed().size(), "NTriplesSerializer constructor should be called once"); + } + } + + @Test + @DisplayName("createSerializer should return NQuadsSerializer for NQUADS format") + void createSerializer_shouldReturnNQuadsSerializer_forNQuadsFormat() { + try (MockedConstruction mockedConstruction = mockConstruction(NQuadsSerializer.class)) { + RdfSerializer serializer = factory.createSerializer(RdfFormat.NQUADS, mockModel, mockConfig); + + assertNotNull(serializer); + assertTrue(serializer instanceof NQuadsSerializer); + assertEquals(1, mockedConstruction.constructed().size(), "NQuadsSerializer constructor should be called once"); + } + } + + @Test + @DisplayName("createSerializer should return TriGSerializer for TRIG format") + void createSerializer_shouldReturnTriGSerializer_forTriGFormat() { + try (MockedConstruction mockedConstruction = mockConstruction(TriGSerializer.class)) { + RdfSerializer serializer = factory.createSerializer(RdfFormat.TRIG, mockModel, mockConfig); + + assertNotNull(serializer); + assertTrue(serializer instanceof TriGSerializer); + assertEquals(1, mockedConstruction.constructed().size(), "TriGSerializer constructor should be called once"); + } + } + + @Test + @DisplayName("createSerializer should return XmlSerializer for RDFXML format") + void createSerializer_shouldReturnXmlSerializer_forRdfXmlFormat() { + try (MockedConstruction mockedConstruction = mockConstruction(XmlSerializer.class)) { + RdfSerializer serializer = factory.createSerializer(RdfFormat.RDFXML, mockModel, mockConfig); + + assertNotNull(serializer); + assertTrue(serializer instanceof XmlSerializer); + assertEquals(1, mockedConstruction.constructed().size(), "XmlSerializer constructor should be called once"); + } + } + + @Test + @DisplayName("createSerializer should throw NullPointerException for a null format") + void createSerializer_shouldThrowNPE_forNullFormat() { + assertThrows(NullPointerException.class, + () -> factory.createSerializer(null, mockModel, mockConfig), + "Should throw NullPointerException for null RdfFormat"); + } + + @Test + @DisplayName("createSerializer should throw NullPointerException for a null model") + void createSerializer_shouldThrowNPE_forNullModel() { + assertThrows(NullPointerException.class, + () -> factory.createSerializer(RdfFormat.TURTLE, null, mockConfig), + "Should throw NullPointerException for null Model"); + } + + @Test + @DisplayName("createSerializer should throw NullPointerException for a null config") + void createSerializer_shouldThrowNPE_forNullConfig() { + assertThrows(NullPointerException.class, + () -> factory.createSerializer(RdfFormat.TURTLE, mockModel, null), + "Should throw NullPointerException for null SerializationConfig"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/FormatConfigTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/FormatConfigTest.java deleted file mode 100644 index c1ee05f95..000000000 --- a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/FormatConfigTest.java +++ /dev/null @@ -1,56 +0,0 @@ -package fr.inria.corese.core.next.impl.common.serialization; - -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; - -class FormatConfigTest { - - @Test - @DisplayName("Builder should create FormatConfig with default blank node prefix") - void builderShouldCreateWithDefaultBlankNodePrefix() { - - FormatConfig config = new FormatConfig.Builder().build(); - - - assertNotNull(config, "FormatConfig should not be null"); - assertEquals("_:", config.getBlankNodePrefix(), "Default blank node prefix should be '_:'"); - } - - @Test - @DisplayName("Builder should create FormatConfig with custom blank node prefix") - void builderShouldCreateWithCustomBlankNodePrefix() { - String customPrefix = "genid-"; - - - FormatConfig config = new FormatConfig.Builder() - .blankNodePrefix(customPrefix) - .build(); - - - assertNotNull(config, "FormatConfig should not be null"); - assertEquals(customPrefix, config.getBlankNodePrefix(), "Blank node prefix should match the custom value"); - } - - @Test - @DisplayName("blankNodePrefix method in Builder should throw NullPointerException for null prefix") - void blankNodePrefixShouldThrowForNull() { - - FormatConfig.Builder builder = new FormatConfig.Builder(); - - - assertThrows(NullPointerException.class, () -> builder.blankNodePrefix(null), - "Setting a null blank node prefix should throw NullPointerException"); - } - - @Test - @DisplayName("FormatConfig constructor should be private and only accessible via builder") - void constructorIsPrivateAndAccessibleViaBuilder() { - - FormatConfig config = new FormatConfig.Builder().build(); - assertNotNull(config); - } -} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsFormatTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsSerializerTest.java similarity index 55% rename from src/test/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsFormatTest.java rename to src/test/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsSerializerTest.java index 5d1a3636d..2feaa385b 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsFormatTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NQuadsSerializerTest.java @@ -1,7 +1,7 @@ package fr.inria.corese.core.next.impl.common.serialization; import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; +import fr.inria.corese.core.next.impl.common.serialization.config.NQuadsConfig; import fr.inria.corese.core.next.impl.exception.SerializationException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; @@ -13,27 +13,23 @@ import java.io.StringWriter; import java.io.Writer; import java.util.Iterator; -import java.util.Optional; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.Mockito.*; -class NQuadsFormatTest { +class NQuadsSerializerTest { private Model model; - private FormatConfig config; - private NQuadsFormat nQuadsFormat; + private NQuadsConfig config; + private NQuadsSerializer nQuadsSerializer; + private TestStatementFactory factory; private Resource mockExPerson; private IRI mockExName; - private IRI mockExKnows; - private IRI mockExContext; - private final String lexJohn = "John Doe"; - private final String hello = "Hello"; private Literal mockLiteralJohn; @@ -44,40 +40,37 @@ class NQuadsFormatTest { @BeforeEach void setUp() { model = mock(Model.class); - config = new FormatConfig.Builder().build(); - nQuadsFormat = new NQuadsFormat(model, config); - - mockExPerson = createIRI("http://example.org/Person"); - mockExName = createIRI("http://example.org/name"); - - mockExKnows = createIRI("http://example.org/knows"); + config = NQuadsConfig.defaultConfig(); + nQuadsSerializer = new NQuadsSerializer(model, config); + factory = new TestStatementFactory(); + mockExPerson = factory.createIRI("http://example.org/Person"); + mockExName = factory.createIRI("http://example.org/name"); + mockExKnows = factory.createIRI("http://example.org/knows"); - mockLiteralJohn = createLiteral(lexJohn, null, null); - mockLiteralHelloEn = createLiteral(hello, null, "en"); + mockLiteralJohn = factory.createLiteral(lexJohn, null, null); + mockLiteralHelloEn = factory.createLiteral(hello, null, "en"); - mockBNode1 = createBlankNode("b1"); - mockBNode2 = createBlankNode("b2"); - mockExContext = createIRI("http://example.org/myGraph"); + mockBNode1 = factory.createBlankNode("b1"); + mockBNode2 = factory.createBlankNode("b2"); } @Test @DisplayName("Constructor should throw NullPointerException for null model") void constructorShouldThrowForNullModel() { - assertThrows(NullPointerException.class, () -> new NQuadsFormat(null), "Model cannot be null"); - assertThrows(NullPointerException.class, () -> new NQuadsFormat(null, config), "Model cannot be null"); + assertThrows(NullPointerException.class, () -> new NQuadsSerializer(null), "Model cannot be null"); } @Test - @DisplayName("Constructor should throw NullPointerException for null config") + @DisplayName("Constructor should throw NullPointerException for null configuration") void constructorShouldThrowForNullConfig() { - assertThrows(NullPointerException.class, () -> new NQuadsFormat(model, null), "Configuration cannot be null"); + assertThrows(NullPointerException.class, () -> new NQuadsSerializer(model, null), "Configuration cannot be null"); } @Test - @DisplayName("Write should serialize simple statement correctly (default graph)") + @DisplayName("Write should serialize a simple statement correctly (default graph)") void writeShouldSerializeSimpleStatement() throws SerializationException { - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockExPerson, mockExName, mockLiteralJohn @@ -85,8 +78,7 @@ void writeShouldSerializeSimpleStatement() throws SerializationException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nQuadsFormat.write(writer); - + nQuadsSerializer.write(writer); String expected = String.format("<%s> <%s> \"%s\"", mockExPerson.stringValue(), @@ -96,34 +88,10 @@ void writeShouldSerializeSimpleStatement() throws SerializationException { assertEquals(expected, writer.toString()); } - @Test - @DisplayName("Write should serialize statement with context (named graph)") - void writeShouldSerializeStatementWithContext() throws SerializationException { - Statement stmt = createStatement( - mockExPerson, - mockExName, - mockLiteralJohn, - mockExContext - ); - when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); - - StringWriter writer = new StringWriter(); - nQuadsFormat.write(writer); - - - String expected = String.format("<%s> <%s> \"%s\" <%s>", - mockExPerson.stringValue(), - mockExName.stringValue(), - escapeNQuadsString(lexJohn), - mockExContext.stringValue()) + " .\n"; - - assertEquals(expected, writer.toString()); - } - @Test @DisplayName("Write should handle blank nodes with default prefix") void writeShouldHandleBlankNodes() throws SerializationException { - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockBNode1, mockExKnows, mockBNode2 @@ -131,7 +99,7 @@ void writeShouldHandleBlankNodes() throws SerializationException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nQuadsFormat.write(writer); + nQuadsSerializer.write(writer); String expected = String.format("_:%s <%s> _:%s", mockBNode1.stringValue(), @@ -144,8 +112,8 @@ void writeShouldHandleBlankNodes() throws SerializationException { @Test @DisplayName("Write should handle blank nodes in context with default prefix") void writeShouldHandleBlankNodesInContext() throws SerializationException { - Resource blankNodeContext = createBlankNode("b3"); - Statement stmt = createStatement( + Resource blankNodeContext = factory.createBlankNode("b3"); + Statement stmt = factory.createStatement( mockBNode1, mockExKnows, mockExPerson, @@ -154,7 +122,7 @@ void writeShouldHandleBlankNodesInContext() throws SerializationException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nQuadsFormat.write(writer); + nQuadsSerializer.write(writer); String expected = String.format("_:%s <%s> <%s> _:%s", mockBNode1.stringValue(), @@ -165,36 +133,10 @@ void writeShouldHandleBlankNodesInContext() throws SerializationException { assertEquals(expected, writer.toString()); } - @Test - @DisplayName("Write should handle blank nodes with custom prefix") - void writeShouldHandleBlankNodesWithCustomPrefix() throws SerializationException { - FormatConfig customConfig = new FormatConfig.Builder().blankNodePrefix("genid-").build(); - NQuadsFormat customSerializer = new NQuadsFormat(model, customConfig); - - Statement stmt = createStatement( - mockBNode1, - mockExKnows, - mockBNode2, - createBlankNode("b3") - ); - when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); - - StringWriter writer = new StringWriter(); - customSerializer.write(writer); - - String expected = String.format("genid-%s <%s> genid-%s genid-%s", - mockBNode1.stringValue(), - mockExKnows.stringValue(), - mockBNode2.stringValue(), - createBlankNode("b3").stringValue()) + " .\n"; - - assertEquals(expected, writer.toString()); - } - @Test @DisplayName("Write should throw SerializationException on IO error") void writeShouldThrowOnIOException() throws IOException { - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockExPerson, mockExName, mockLiteralJohn @@ -202,13 +144,16 @@ void writeShouldThrowOnIOException() throws IOException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); Writer faultyWriter = mock(Writer.class); - doThrow(new IOException("Simulated IO error")).when(faultyWriter).write(anyString()); + doThrow(new IOException("Simulated IO error during write")).when(faultyWriter).write(anyString()); + doThrow(new IOException("Simulated IO error (char array)")).when(faultyWriter).write(any(char[].class), anyInt(), anyInt()); + doThrow(new IOException("Simulated IO error (close)")).when(faultyWriter).close(); - assertThrows(SerializationException.class, () -> nQuadsFormat.write(faultyWriter)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nQuadsSerializer.write(faultyWriter)); + assertEquals("N-Quads serialization failed [Format: N-Quads]", thrown.getMessage()); } @Test - @DisplayName("Write should throw SerializationException on null subject value from Statement") + @DisplayName("Write should throw SerializationException for null subject value in strict mode") void writeShouldThrowOnNullSubjectValue() { Statement stmt = mock(Statement.class); when(stmt.getSubject()).thenReturn(null); @@ -218,11 +163,12 @@ void writeShouldThrowOnNullSubjectValue() { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - assertThrows(SerializationException.class, () -> nQuadsFormat.write(writer)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nQuadsSerializer.write(writer)); + assertEquals("Invalid N-Quads data: Value cannot be null in N-Quads format when strictMode is enabled. [Format: N-Quads]", thrown.getMessage()); } @Test - @DisplayName("Write should throw SerializationException on null predicate value from Statement") + @DisplayName("Write should throw SerializationException for null predicate value in strict mode") void writeShouldThrowOnNullPredicateValue() { Statement stmt = mock(Statement.class); when(stmt.getSubject()).thenReturn(mockExPerson); @@ -232,27 +178,28 @@ void writeShouldThrowOnNullPredicateValue() { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - assertThrows(SerializationException.class, () -> nQuadsFormat.write(writer)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nQuadsSerializer.write(writer)); + assertEquals("Invalid N-Quads data: Value cannot be null in N-Quads format when strictMode is enabled. [Format: N-Quads]", thrown.getMessage()); } @Test - @DisplayName("Write should throw SerializationException on null object value from Statement") + @DisplayName("Write should throw SerializationException for null object value in strict mode") void writeShouldThrowOnNullObjectValue() { Statement stmt = mock(Statement.class); when(stmt.getSubject()).thenReturn(mockExPerson); when(stmt.getPredicate()).thenReturn(mockExName); when(stmt.getObject()).thenReturn(null); - when(stmt.getContext()).thenReturn(null); when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - assertThrows(SerializationException.class, () -> nQuadsFormat.write(writer)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nQuadsSerializer.write(writer)); + assertEquals("Invalid N-Quads data: Value cannot be null in N-Quads format when strictMode is enabled. [Format: N-Quads]", thrown.getMessage()); } @Test - @DisplayName("Write should correctly handle null context (default graph)") + @DisplayName("Write should handle null context correctly (default graph)") void writeShouldHandleNullContext() throws SerializationException { - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockExPerson, mockExName, mockLiteralJohn, @@ -261,8 +208,7 @@ void writeShouldHandleNullContext() throws SerializationException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nQuadsFormat.write(writer); - + nQuadsSerializer.write(writer); String expected = String.format("<%s> <%s> \"%s\"", mockExPerson.stringValue(), @@ -283,12 +229,11 @@ void writeShouldHandleNullContext() throws SerializationException { "literal with \u0001 (SOH)", "literal with \u007F (DEL)" }) - @DisplayName("Write should handle various literal values with proper escaping") + @DisplayName("Write should handle various literal values with appropriate escaping (including Unicode)") void writeShouldHandleVariousLiterals(String literalValue) throws SerializationException { + Literal literalMock = factory.createLiteral(literalValue, null, null); - Literal literalMock = createLiteral(literalValue, null, null); - - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockExPerson, mockExName, literalMock @@ -296,8 +241,7 @@ void writeShouldHandleVariousLiterals(String literalValue) throws SerializationE when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nQuadsFormat.write(writer); - + nQuadsSerializer.write(writer); String expectedEscapedLiteral = escapeNQuadsString(literalValue); String expectedOutput = String.format("<%s> <%s> \"%s\"", @@ -308,65 +252,54 @@ void writeShouldHandleVariousLiterals(String literalValue) throws SerializationE assertEquals(expectedOutput, writer.toString()); } - @Test @DisplayName("Should handle literals with language tags") void shouldHandleLiteralsWithLanguageTags() throws SerializationException { - Statement stmt = createStatement(mockExPerson, createIRI("http://example.org/greeting"), mockLiteralHelloEn); + Statement stmt = factory.createStatement(mockExPerson, factory.createIRI("http://example.org/greeting"), mockLiteralHelloEn); Model currentTestModel = mock(Model.class); when(currentTestModel.iterator()).thenReturn(new MockStatementIterator(stmt)); Writer writer = new StringWriter(); - NQuadsFormat serializer = new NQuadsFormat(currentTestModel); + + NQuadsSerializer serializer = new NQuadsSerializer(currentTestModel, NQuadsConfig.defaultConfig()); serializer.write(writer); String expectedOutput = String.format("<%s> <%s> \"%s\"@%s", mockExPerson.stringValue(), - createIRI("http://example.org/greeting").stringValue(), + factory.createIRI("http://example.org/greeting").stringValue(), escapeNQuadsString(hello), mockLiteralHelloEn.getLanguage().get()) + " .\n"; assertEquals(expectedOutput, writer.toString()); } + @Test + @DisplayName("Should handle literals with custom datatypes") + void shouldHandleLiteralsWithCustomDatatypes() throws SerializationException { + IRI customDatatype = factory.createIRI("http://example.org/myDataType"); + Literal customLiteral = factory.createLiteral("123", customDatatype, null); - /** - * Creates a mocked Literal object. - * Important: The `lexicalForm` is the *raw string value* of the literal, - * without N-Quads specific quotes, lang tags, or datatype URIs. - * The `NQuadsFormat` class is responsible for adding those. - * - * @param lexicalForm The raw string value of the literal (e.g., "hello", "123"). - * @param dataTypeIRI The IRI of the literal's datatype (e.g., XSD.INTEGER.getIRI()), or null for plain/lang-tagged. - * @param langTag The language tag (e.g., "en"), or null if not language-tagged. - * @return A mocked Literal instance. - */ - private Literal createLiteral(String lexicalForm, IRI dataTypeIRI, String langTag) { - Literal literal = mock(Literal.class); - when(literal.isLiteral()).thenReturn(true); - when(literal.isResource()).thenReturn(false); - when(literal.stringValue()).thenReturn(lexicalForm); - - if (langTag != null && !langTag.isEmpty()) { - when(literal.getLanguage()).thenReturn(Optional.of(langTag)); - - when(literal.getDatatype()).thenReturn(RDF.langString.getIRI()); - } else { - when(literal.getLanguage()).thenReturn(Optional.empty()); - when(literal.getDatatype()).thenReturn(dataTypeIRI); - } - return literal; + Statement stmt = factory.createStatement(mockExPerson, factory.createIRI("http://example.org/value"), customLiteral); + + Model currentTestModel = mock(Model.class); + when(currentTestModel.iterator()).thenReturn(new MockStatementIterator(stmt)); + + NQuadsSerializer serializer = new NQuadsSerializer(currentTestModel, NQuadsConfig.defaultConfig()); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + + String expectedOutput = String.format("<%s> <%s> \"%s\"^^<%s>", + mockExPerson.stringValue(), + factory.createIRI("http://example.org/value").stringValue(), + escapeNQuadsString("123"), + customDatatype.stringValue()) + " .\n"; + + assertEquals(expectedOutput, writer.toString()); } - /** - * Escapes a string according to N-Quads literal escaping rules. - * This helper is used in tests to construct the *expected* output strings. - * It mimics the behavior of NQuadsFormat's internal escapeLiteral method. - * - * @param s The string to escape. - * @return The escaped string. - */ + private String escapeNQuadsString(String s) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); i++) { @@ -394,8 +327,18 @@ private String escapeNQuadsString(String s) { sb.append("\\\\"); break; default: - if (c >= '\u0000' && c <= '\u001F' || c == '\u007F') { + if (c <= 0x1F || c == 0x7F) { + sb.append(String.format("\\u%04X", (int) c)); + } else if (c >= 0x80 && c <= 0xFFFF) { sb.append(String.format("\\u%04X", (int) c)); + } else if (Character.isHighSurrogate(c)) { + int codePoint = s.codePointAt(i); + if (Character.isValidCodePoint(codePoint)) { + sb.append(String.format("\\U%08X", codePoint)); + i++; + } else { + sb.append(c); + } } else { sb.append(c); } @@ -404,7 +347,6 @@ private String escapeNQuadsString(String s) { return sb.toString(); } - private static class MockStatementIterator implements Iterator { private final Statement[] statements; private int index = 0; @@ -423,35 +365,4 @@ public Statement next() { return statements[index++]; } } - - private Statement createStatement(Resource subject, IRI predicate, Value object) { - return createStatement(subject, predicate, object, null); - } - - private Statement createStatement(Resource subject, IRI predicate, Value object, Resource context) { - Statement stmt = mock(Statement.class); - when(stmt.getSubject()).thenReturn(subject); - when(stmt.getPredicate()).thenReturn(predicate); - when(stmt.getObject()).thenReturn(object); - when(stmt.getContext()).thenReturn(context); - return stmt; - } - - private Resource createBlankNode(String id) { - Resource blankNode = mock(Resource.class); - when(blankNode.isResource()).thenReturn(true); - when(blankNode.isBNode()).thenReturn(true); - when(blankNode.isIRI()).thenReturn(false); - when(blankNode.stringValue()).thenReturn(id); - return blankNode; - } - - private IRI createIRI(String uri) { - IRI iri = mock(IRI.class); - when(iri.isResource()).thenReturn(true); - when(iri.isIRI()).thenReturn(true); - when(iri.isBNode()).thenReturn(false); - when(iri.stringValue()).thenReturn(uri); - return iri; - } -} \ No newline at end of file +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesFormatTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesSerializerTest.java similarity index 54% rename from src/test/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesFormatTest.java rename to src/test/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesSerializerTest.java index e00d89d41..aa51f1536 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesFormatTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/NTriplesSerializerTest.java @@ -1,8 +1,7 @@ package fr.inria.corese.core.next.impl.common.serialization; import fr.inria.corese.core.next.api.*; -import fr.inria.corese.core.next.impl.common.vocabulary.RDF; - +import fr.inria.corese.core.next.impl.common.serialization.config.NTriplesConfig; import fr.inria.corese.core.next.impl.exception.SerializationException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; @@ -14,17 +13,17 @@ import java.io.StringWriter; import java.io.Writer; import java.util.Iterator; -import java.util.Optional; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.Mockito.*; -class NTriplesFormatTest { +class NTriplesSerializerTest { private Model model; - private FormatConfig config; - private NTriplesFormat nTriplesFormat; + private NTriplesConfig config; + private NTriplesSerializer nTriplesSerializer; + private TestStatementFactory factory; private Resource mockExPerson; private IRI mockExName; @@ -34,7 +33,6 @@ class NTriplesFormatTest { private final String hello = "Hello"; private Literal mockLiteralJohn; - private Literal mockLiteralHelloEn; private Resource mockBNode1; private Resource mockBNode2; @@ -42,41 +40,37 @@ class NTriplesFormatTest { @BeforeEach void setUp() { model = mock(Model.class); - config = new FormatConfig.Builder().build(); - nTriplesFormat = new NTriplesFormat(model, config); - - - mockExPerson = createIRI("http://example.org/Person"); - mockExName = createIRI("http://example.org/name"); - - mockExKnows = createIRI("http://example.org/knows"); - + config = NTriplesConfig.defaultConfig(); + nTriplesSerializer = new NTriplesSerializer(model, config); + factory = new TestStatementFactory(); - mockLiteralJohn = createLiteral(lexJohn, null, null); + mockExPerson = factory.createIRI("http://example.org/Person"); + mockExName = factory.createIRI("http://example.org/name"); + mockExKnows = factory.createIRI("http://example.org/knows"); - mockLiteralHelloEn = createLiteral(hello, null, "en"); + mockLiteralJohn = factory.createLiteral(lexJohn, null, null); + mockLiteralHelloEn = factory.createLiteral(hello, null, "en"); - mockBNode1 = createBlankNode("b1"); - mockBNode2 = createBlankNode("b2"); + mockBNode1 = factory.createBlankNode("b1"); + mockBNode2 = factory.createBlankNode("b2"); } @Test @DisplayName("Constructor should throw NullPointerException for null model") void constructorShouldThrowForNullModel() { - assertThrows(NullPointerException.class, () -> new NTriplesFormat(null), "Model cannot be null"); - assertThrows(NullPointerException.class, () -> new NTriplesFormat(null, config), "Model cannot be null"); + assertThrows(NullPointerException.class, () -> new NTriplesSerializer(null), "Model cannot be null"); } @Test - @DisplayName("Constructor should throw NullPointerException for null config") + @DisplayName("Constructor should throw NullPointerException for null configuration") void constructorShouldThrowForNullConfig() { - assertThrows(NullPointerException.class, () -> new NTriplesFormat(model, null), "Configuration cannot be null"); + assertThrows(NullPointerException.class, () -> new NTriplesSerializer(model, null), "Configuration cannot be null"); } @Test - @DisplayName("Write should serialize simple statement correctly") + @DisplayName("Write should serialize a simple statement correctly (default graph)") void writeShouldSerializeSimpleStatement() throws SerializationException { - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockExPerson, mockExName, mockLiteralJohn @@ -84,8 +78,7 @@ void writeShouldSerializeSimpleStatement() throws SerializationException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nTriplesFormat.write(writer); - + nTriplesSerializer.write(writer); String expected = String.format("<%s> <%s> \"%s\"", mockExPerson.stringValue(), @@ -96,10 +89,10 @@ void writeShouldSerializeSimpleStatement() throws SerializationException { } @Test - @DisplayName("Write should serialize statement with context but ignore it (N-Triples)") + @DisplayName("Write should serialize a statement with context but ignore it (N-Triples)") void writeShouldSerializeStatementWithContext() throws SerializationException { - IRI mockContext = createIRI("http://example.org/ctx"); - Statement stmt = createStatement( + IRI mockContext = factory.createIRI("http://example.org/ctx"); + Statement stmt = factory.createStatement( mockExPerson, mockExName, mockLiteralJohn, @@ -108,7 +101,7 @@ void writeShouldSerializeStatementWithContext() throws SerializationException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nTriplesFormat.write(writer); + nTriplesSerializer.write(writer); String expected = String.format("<%s> <%s> \"%s\"", mockExPerson.stringValue(), @@ -119,9 +112,9 @@ void writeShouldSerializeStatementWithContext() throws SerializationException { } @Test - @DisplayName("Write should handle blank nodes with default prefix") + @DisplayName("Write should handle blank nodes with default N-Triples prefix (_:)") void writeShouldHandleBlankNodes() throws SerializationException { - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockBNode1, mockExKnows, mockBNode2 @@ -129,7 +122,7 @@ void writeShouldHandleBlankNodes() throws SerializationException { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nTriplesFormat.write(writer); + nTriplesSerializer.write(writer); String expected = String.format("_:%s <%s> _:%s", mockBNode1.stringValue(), @@ -139,34 +132,10 @@ void writeShouldHandleBlankNodes() throws SerializationException { assertEquals(expected, writer.toString()); } - @Test - @DisplayName("Write should handle blank nodes with custom prefix") - void writeShouldHandleBlankNodesWithCustomPrefix() throws SerializationException { - FormatConfig customConfig = new FormatConfig.Builder().blankNodePrefix("genid-").build(); - NTriplesFormat customSerializer = new NTriplesFormat(model, customConfig); - - Statement stmt = createStatement( - mockBNode1, - mockExKnows, - mockBNode2 - ); - when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); - - StringWriter writer = new StringWriter(); - customSerializer.write(writer); - - String expected = String.format("genid-%s <%s> genid-%s", - mockBNode1.stringValue(), - mockExKnows.stringValue(), - mockBNode2.stringValue()) + " .\n"; - - assertEquals(expected, writer.toString()); - } - @Test @DisplayName("Write should throw SerializationException on IO error") void writeShouldThrowOnIOException() throws IOException { - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockExPerson, mockExName, mockLiteralJohn @@ -175,39 +144,49 @@ void writeShouldThrowOnIOException() throws IOException { Writer faultyWriter = mock(Writer.class); - doThrow(new IOException("Simulated IO error")).when(faultyWriter).write(anyString()); + doThrow(new IOException("Simulated IO error during write")).when(faultyWriter).write(anyString()); + doThrow(new IOException("Simulated IO error (char array)")).when(faultyWriter).write(any(char[].class), anyInt(), anyInt()); + doThrow(new IOException("Simulated IO error (close)")).when(faultyWriter).close(); - assertThrows(SerializationException.class, () -> nTriplesFormat.write(faultyWriter)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nTriplesSerializer.write(faultyWriter)); + + assertEquals("N-Triples serialization failed [Format: N-Triples]", thrown.getMessage()); } + @Test - @DisplayName("Write should throw SerializationException on null subject value from Statement") + @DisplayName("Write should throw SerializationException for null subject value in strict mode") void writeShouldThrowOnNullSubjectValue() { Statement stmt = mock(Statement.class); when(stmt.getSubject()).thenReturn(null); when(stmt.getPredicate()).thenReturn(mockExName); when(stmt.getObject()).thenReturn(mockLiteralJohn); + when(stmt.getContext()).thenReturn(null); when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - assertThrows(SerializationException.class, () -> nTriplesFormat.write(writer)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nTriplesSerializer.write(writer)); + + assertEquals("Invalid N-Triples data: Value cannot be null in N-Triples format when strictMode is enabled. [Format: N-Triples]", thrown.getMessage()); } @Test - @DisplayName("Write should throw SerializationException on null predicate value from Statement") + @DisplayName("Write should throw SerializationException for null predicate value in strict mode") void writeShouldThrowOnNullPredicateValue() { Statement stmt = mock(Statement.class); when(stmt.getSubject()).thenReturn(mockExPerson); when(stmt.getPredicate()).thenReturn(null); when(stmt.getObject()).thenReturn(mockLiteralJohn); + when(stmt.getContext()).thenReturn(null); when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - assertThrows(SerializationException.class, () -> nTriplesFormat.write(writer)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nTriplesSerializer.write(writer)); + assertEquals("Invalid N-Triples data: Value cannot be null in N-Triples format when strictMode is enabled. [Format: N-Triples]", thrown.getMessage()); } @Test - @DisplayName("Write should throw SerializationException on null object value from Statement") + @DisplayName("Write should throw SerializationException for null object value in strict mode") void writeShouldThrowOnNullObjectValue() { Statement stmt = mock(Statement.class); when(stmt.getSubject()).thenReturn(mockExPerson); @@ -216,7 +195,32 @@ void writeShouldThrowOnNullObjectValue() { when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - assertThrows(SerializationException.class, () -> nTriplesFormat.write(writer)); + SerializationException thrown = assertThrows(SerializationException.class, () -> nTriplesSerializer.write(writer)); + assertEquals("Invalid N-Triples data: Value cannot be null in N-Triples format when strictMode is enabled. [Format: N-Triples]", thrown.getMessage()); + } + + + @Test + @DisplayName("Should handle null context correctly (default graph)") + void writeShouldHandleNullContext() throws SerializationException { + Statement stmt = factory.createStatement( + mockExPerson, + mockExName, + mockLiteralJohn, + null + ); + when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); + + StringWriter writer = new StringWriter(); + nTriplesSerializer.write(writer); + + + String expected = String.format("<%s> <%s> \"%s\"", + mockExPerson.stringValue(), + mockExName.stringValue(), + escapeNTriplesString(lexJohn)) + " .\n"; + + assertEquals(expected, writer.toString()); } @ParameterizedTest @@ -230,11 +234,11 @@ void writeShouldThrowOnNullObjectValue() { "literal with \u0001 (SOH)", "literal with \u007F (DEL)" }) - @DisplayName("Write should handle various literal values with proper escaping") + @DisplayName("Write should handle various literal values with appropriate escaping (including Unicode)") void writeShouldHandleVariousLiterals(String literalValue) throws SerializationException { - Literal literalMock = createLiteral(literalValue, null, null); + Literal literalMock = factory.createLiteral(literalValue, null, null); - Statement stmt = createStatement( + Statement stmt = factory.createStatement( mockExPerson, mockExName, literalMock @@ -242,7 +246,8 @@ void writeShouldHandleVariousLiterals(String literalValue) throws SerializationE when(model.iterator()).thenReturn(new MockStatementIterator(stmt)); StringWriter writer = new StringWriter(); - nTriplesFormat.write(writer); + nTriplesSerializer.write(writer); + String expectedEscapedLiteral = escapeNTriplesString(literalValue); String expectedOutput = String.format("<%s> <%s> \"%s\"", @@ -253,67 +258,56 @@ void writeShouldHandleVariousLiterals(String literalValue) throws SerializationE assertEquals(expectedOutput, writer.toString()); } + @Test - @DisplayName("Write should handle multiple statements") - void writeShouldHandleMultipleStatements() throws SerializationException { - Statement stmt1 = createStatement( - mockExPerson, - mockExName, - createLiteral("o1", null, null) - ); - Statement stmt2 = createStatement( - mockBNode1, - mockExKnows, - mockExPerson, - createIRI("http://example.org/ctx") - ); - when(model.iterator()).thenReturn(new MockStatementIterator(stmt1, stmt2)); + @DisplayName("Should handle literals with language tags") + void shouldHandleLiteralsWithLanguageTags() throws SerializationException { + Statement stmt = factory.createStatement(mockExPerson, factory.createIRI("http://example.org/greeting"), mockLiteralHelloEn); - StringWriter writer = new StringWriter(); - nTriplesFormat.write(writer); + Model currentTestModel = mock(Model.class); + when(currentTestModel.iterator()).thenReturn(new MockStatementIterator(stmt)); - String expectedOutput = String.format("<%s> <%s> \"%s\"", + Writer writer = new StringWriter(); + + NTriplesSerializer serializer = new NTriplesSerializer(currentTestModel, NTriplesConfig.defaultConfig()); + serializer.write(writer); + + String expectedOutput = String.format("<%s> <%s> \"%s\"@%s", mockExPerson.stringValue(), - mockExName.stringValue(), - escapeNTriplesString("o1")) + " .\n" + - String.format("_:%s <%s> <%s>", - mockBNode1.stringValue(), - mockExKnows.stringValue(), - mockExPerson.stringValue()) + " .\n"; + factory.createIRI("http://example.org/greeting").stringValue(), + escapeNTriplesString(hello), + mockLiteralHelloEn.getLanguage().get()) + " .\n"; assertEquals(expectedOutput, writer.toString()); } + @Test - @DisplayName("Should handle literals with language tags") - void shouldHandleLiteralsWithLanguageTags() throws SerializationException { - Statement stmt = createStatement(mockExPerson, createIRI("http://example.org/greeting"), mockLiteralHelloEn); + @DisplayName("Should handle literals with custom datatypes") + void shouldHandleLiteralsWithCustomDatatypes() throws SerializationException { + IRI customDatatype = factory.createIRI("http://example.org/myDataType"); + Literal customLiteral = factory.createLiteral("123", customDatatype, null); + + Statement stmt = factory.createStatement(mockExPerson, factory.createIRI("http://example.org/value"), customLiteral); Model currentTestModel = mock(Model.class); when(currentTestModel.iterator()).thenReturn(new MockStatementIterator(stmt)); - Writer writer = new StringWriter(); - NTriplesFormat serializer = new NTriplesFormat(currentTestModel); + NTriplesSerializer serializer = new NTriplesSerializer(currentTestModel, NTriplesConfig.defaultConfig()); + + StringWriter writer = new StringWriter(); serializer.write(writer); - String expectedOutput = String.format("<%s> <%s> \"%s\"@%s", + String expectedOutput = String.format("<%s> <%s> \"%s\"^^<%s>", mockExPerson.stringValue(), - createIRI("http://example.org/greeting").stringValue(), - escapeNTriplesString(hello), - mockLiteralHelloEn.getLanguage().get()) + " .\n"; + factory.createIRI("http://example.org/value").stringValue(), + escapeNTriplesString("123"), + customDatatype.stringValue()) + " .\n"; assertEquals(expectedOutput, writer.toString()); } - /** - * Escapes a string according to N-Triples literal escaping rules. - * This helper is used in tests to construct the *expected* output strings. - * It mimics the behavior of NTriplesFormat's internal escapeLiteral method. - * - * @param s The string to escape. - * @return The escaped string. - */ private String escapeNTriplesString(String s) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); i++) { @@ -341,7 +335,7 @@ private String escapeNTriplesString(String s) { sb.append("\\\\"); break; default: - if (c >= '\u0000' && c <= '\u001F' || c == '\u007F') { + if (c <= 0x1F || c == 0x7F) { sb.append(String.format("\\u%04X", (int) c)); } else { sb.append(c); @@ -351,7 +345,6 @@ private String escapeNTriplesString(String s) { return sb.toString(); } - private static class MockStatementIterator implements Iterator { private final Statement[] statements; private int index = 0; @@ -370,65 +363,4 @@ public Statement next() { return statements[index++]; } } - - - /** - * Creates a mocked Literal object. - * Important: The `lexicalForm` is the *raw string value* of the literal, - * without N-Triples specific quotes, lang tags, or datatype URIs. - * The `NTriplesFormat` class is responsible for adding those. - * - * @param lexicalForm The raw string value of the literal (e.g., "hello", "123"). - * @param dataTypeIRI The IRI of the literal's datatype (e.g., XSD.INTEGER.getIRI()), or null for plain/lang-tagged. - * @param langTag The language tag (e.g., "en"), or null if not language-tagged. - * @return A mocked Literal instance. - */ - private Literal createLiteral(String lexicalForm, IRI dataTypeIRI, String langTag) { - Literal literal = mock(Literal.class); - when(literal.isLiteral()).thenReturn(true); - when(literal.isResource()).thenReturn(false); - when(literal.stringValue()).thenReturn(lexicalForm); - - if (langTag != null && !langTag.isEmpty()) { - when(literal.getLanguage()).thenReturn(Optional.of(langTag)); - - - when(literal.getDatatype()).thenReturn(RDF.langString.getIRI()); - } else { - when(literal.getLanguage()).thenReturn(Optional.empty()); - when(literal.getDatatype()).thenReturn(dataTypeIRI); - } - return literal; - } - - private Statement createStatement(Resource subject, IRI predicate, Value object) { - return createStatement(subject, predicate, object, null); - } - - private Statement createStatement(Resource subject, IRI predicate, Value object, Resource context) { - Statement stmt = mock(Statement.class); - when(stmt.getSubject()).thenReturn(subject); - when(stmt.getPredicate()).thenReturn(predicate); - when(stmt.getObject()).thenReturn(object); - when(stmt.getContext()).thenReturn(context); - return stmt; - } - - private Resource createBlankNode(String id) { - Resource blankNode = mock(Resource.class); - when(blankNode.isResource()).thenReturn(true); - when(blankNode.isBNode()).thenReturn(true); - when(blankNode.isIRI()).thenReturn(false); - when(blankNode.stringValue()).thenReturn(id); - return blankNode; - } - - private IRI createIRI(String uri) { - IRI iri = mock(IRI.class); - when(iri.isResource()).thenReturn(true); - when(iri.isIRI()).thenReturn(true); - when(iri.isBNode()).thenReturn(false); - when(iri.stringValue()).thenReturn(uri); - return iri; - } -} \ No newline at end of file +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormatTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormatTest.java index 8f0df66d0..28f5da4b1 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormatTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/RdfFormatTest.java @@ -219,6 +219,18 @@ void rdfXmlConstant() { assertFalse(rdfxml.supportsNamedGraphs()); } + @Test + @DisplayName("TRIG constant should be correctly defined") + void trigConstant() { + RdfFormat trig = RdfFormat.TRIG; + + assertNotNull(trig, "TRIG constant should not be null"); + assertEquals("TriG", trig.getName()); + assertTrue(trig.getExtensions().contains("trig")); + assertTrue(trig.getMimeTypes().contains("application/trig")); + assertTrue(trig.supportsNamespaces()); + assertTrue(trig.supportsNamedGraphs()); + } @Test @DisplayName("byName() should find existing format by name (case-insensitive)") @@ -286,13 +298,14 @@ void allFormats() { List allFormats = RdfFormat.all(); assertNotNull(allFormats, "List of all formats should not be null"); - assertEquals(5, allFormats.size(), "List should contain 5 predefined formats"); // TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML + assertEquals(6, allFormats.size(), "List should contain 5 predefined formats"); // TURTLE, NTRIPLES, NQUADS, JSONLD, RDFXML, TRIG assertTrue(allFormats.contains(RdfFormat.TURTLE)); assertTrue(allFormats.contains(RdfFormat.NTRIPLES)); assertTrue(allFormats.contains(RdfFormat.NQUADS)); assertTrue(allFormats.contains(RdfFormat.JSONLD)); assertTrue(allFormats.contains(RdfFormat.RDFXML)); + assertTrue(allFormats.contains(RdfFormat.TRIG)); assertThrows(UnsupportedOperationException.class, () -> allFormats.add(RdfFormat.TURTLE), "The list returned by all() should be unmodifiable"); diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/SerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/SerializerTest.java deleted file mode 100644 index 0e8700810..000000000 --- a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/SerializerTest.java +++ /dev/null @@ -1,98 +0,0 @@ -package fr.inria.corese.core.next.impl.common.serialization; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.Mockito.mockConstruction; -import static org.mockito.Mockito.verify; - -import java.io.Writer; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; -import org.mockito.Mock; -import org.mockito.MockedConstruction; -import org.mockito.MockitoAnnotations; - -import fr.inria.corese.core.next.api.Model; -import fr.inria.corese.core.next.impl.exception.SerializationException; - -class SerializerTest { - - private Serializer serializer; - - @Mock - private Model mockModel; - @Mock - private FormatConfig mockConfig; - @Mock - private Writer mockWriter; - - @BeforeEach - void setUp() { - MockitoAnnotations.openMocks(this); - serializer = new Serializer(mockModel, mockConfig); - } - - // --- Constructor tests --- - - @Test - @DisplayName("Constructor should throw NullPointerException for null model") - void constructorShouldThrowForNullModel() { - assertThrows(NullPointerException.class, () -> new Serializer(null), "Model cannot be null"); - assertThrows(NullPointerException.class, () -> new Serializer(null, mockConfig), "Model cannot be null"); - } - - @Test - @DisplayName("Constructor should throw NullPointerException for null config") - void constructorShouldThrowForNullConfig() { - assertThrows(NullPointerException.class, () -> new Serializer(mockModel, null), "FormatConfig cannot be null"); - } - - // --- Tests for the arguments of the serialize method --- - - @Test - @DisplayName("serialize should throw NullPointerException for null writer") - void serializeShouldThrowForNullWriter() { - assertThrows(NullPointerException.class, () -> serializer.serialize(null, RdfFormat.NTRIPLES), - "Writer cannot be null"); - } - - @Test - @DisplayName("serialize should throw NullPointerException for null format") - void serializeShouldThrowForNullFormat() { - assertThrows(NullPointerException.class, () -> serializer.serialize(mockWriter, null), - "RdfFormat cannot be null"); - } - - // --- Serialization delegation tests --- - - @Test - @DisplayName("serialize should delegate to NTriplesFormat for NTRIPLES format") - void serializeShouldDelegateToNTriplesFormat() throws SerializationException { - try (MockedConstruction mockedNtConstructor = mockConstruction(NTriplesFormat.class)) { - serializer.serialize(mockWriter, RdfFormat.NTRIPLES); - - assertEquals(1, mockedNtConstructor.constructed().size(), - "NTriplesFormat constructor should be called once"); - - NTriplesFormat createdNtSerializer = mockedNtConstructor.constructed().get(0); - - verify(createdNtSerializer).write(mockWriter); - } - } - - @Test - @DisplayName("serialize should delegate to NQuadsFormat for NQUADS format") - void serializeShouldDelegateToNQuadsFormat() throws SerializationException { - try (MockedConstruction mockedNqConstructor = mockConstruction(NQuadsFormat.class)) { - serializer.serialize(mockWriter, RdfFormat.NQUADS); - - assertEquals(1, mockedNqConstructor.constructed().size(), "NQuadsFormat constructor should be called once"); - NQuadsFormat createdNqSerializer = mockedNqConstructor.constructed().get(0); - - verify(createdNqSerializer).write(mockWriter); - } - } - -} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TestStatementFactory.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TestStatementFactory.java new file mode 100644 index 000000000..3fd8dc034 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TestStatementFactory.java @@ -0,0 +1,106 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Literal; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.api.Value; +import fr.inria.corese.core.next.impl.common.vocabulary.RDF; + +import java.util.Optional; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * A factory class to create mocked RDF components (Statements, IRIs, Literals, Blank Nodes) + * for use in unit tests. This centralizes the creation logic and reduces duplication + * across various serializer test classes. + */ +public class TestStatementFactory { + + /** + * Creates a mocked Statement object with a subject, predicate, and object. + * The context (named graph) is set to null. + * + * @param subject The mocked Resource representing the subject. + * @param predicate The mocked IRI representing the predicate. + * @param object The mocked Value representing the object (IRI, Literal, or Blank Node). + * @return A mocked Statement instance. + */ + public Statement createStatement(Resource subject, IRI predicate, Value object) { + return createStatement(subject, predicate, object, null); + } + + /** + * Creates a mocked Statement object with a subject, predicate, object, and an optional context. + * + * @param subject The mocked Resource representing the subject. + * @param predicate The mocked IRI representing the predicate. + * @param object The mocked Value representing the object (IRI, Literal, or Blank Node). + * @param context The mocked Resource representing the context (named graph), or null if none. + * @return A mocked Statement instance. + */ + public Statement createStatement(Resource subject, IRI predicate, Value object, Resource context) { + Statement stmt = mock(Statement.class); + when(stmt.getSubject()).thenReturn(subject); + when(stmt.getPredicate()).thenReturn(predicate); + when(stmt.getObject()).thenReturn(object); + when(stmt.getContext()).thenReturn(context); + return stmt; + } + + /** + * Creates a mocked Blank Node (Resource) object with a given ID. + * + * @param id The string identifier for the blank node (e.g., "b1"). + * @return A mocked Resource instance representing a blank node. + */ + public Resource createBlankNode(String id) { + Resource blankNode = mock(Resource.class); + when(blankNode.isResource()).thenReturn(true); + when(blankNode.isBNode()).thenReturn(true); + when(blankNode.isIRI()).thenReturn(false); + when(blankNode.stringValue()).thenReturn(id); + return blankNode; + } + + /** + * Creates a mocked IRI object with a given URI string. + * + * @param uri The string URI for the IRI (e.g., "http://example.org/Person"). + * @return A mocked IRI instance. + */ + public IRI createIRI(String uri) { + IRI iri = mock(IRI.class); + when(iri.isResource()).thenReturn(true); + when(iri.isIRI()).thenReturn(true); + when(iri.isBNode()).thenReturn(false); + when(iri.stringValue()).thenReturn(uri); + return iri; + } + + /** + * Creates a mocked Literal object. + * + * @param lexicalForm The raw string value of the literal (e.g., "hello", "123"). + * @param dataTypeIRI The IRI of the literal's datatype (e.g., XSD.INTEGER.getIRI()), or null for plain/lang-tagged. + * @param langTag The language tag (e.g., "en"), or null if not language-tagged. + * @return A mocked Literal instance. + */ + public Literal createLiteral(String lexicalForm, IRI dataTypeIRI, String langTag) { + Literal literal = mock(Literal.class); + when(literal.isLiteral()).thenReturn(true); + when(literal.isResource()).thenReturn(false); + when(literal.stringValue()).thenReturn(lexicalForm); + + if (langTag != null && !langTag.isEmpty()) { + when(literal.getLanguage()).thenReturn(Optional.of(langTag)); + when(literal.getDatatype()).thenReturn(RDF.langString.getIRI()); + } else { + when(literal.getLanguage()).thenReturn(Optional.empty()); + when(literal.getDatatype()).thenReturn(dataTypeIRI); + } + return literal; + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TriGSerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TriGSerializerTest.java new file mode 100644 index 000000000..cda7322cc --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TriGSerializerTest.java @@ -0,0 +1,487 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.impl.common.literal.RDF; +import fr.inria.corese.core.next.impl.common.serialization.config.LiteralDatatypePolicyEnum; +import fr.inria.corese.core.next.impl.common.serialization.config.TriGConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.Collections; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.*; + +/** + * Test class for {@link TriGSerializer} using Mockito to verify serialization behavior + * under various configurations and RDF graph structures. + */ +class TriGSerializerTest { + + private Model mockModel; + private TriGConfig defaultConfig; + private TestStatementFactory factory; + + @BeforeEach + void setUp() { + mockModel = mock(Model.class); + defaultConfig = TriGConfig.defaultConfig(); + factory = new TestStatementFactory(); + } + + /** + * Tests basic TriG serialization of a simple triple. + * Verifies that the subject, predicate, and object are correctly formatted + * and that standard prefixes are declared and used. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testBasicTriGSerialization() throws SerializationException, IOException { + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/ns/person1"), + factory.createIRI("http://example.org/ns/hasName"), + factory.createLiteral("John Doe", null, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + StringWriter writer = new StringWriter(); + + TriGSerializer triGSerializer = new TriGSerializer(mockModel, defaultConfig); + + + triGSerializer.write(writer); + + verify(mockModel, times(2)).stream(); + + String expected = """ + @prefix ns: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + ns:person1 ns:hasName "John Doe" . + + """; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests the `rdf:type` shortcut (using `a`). + * Verifies that `rdf:type` is serialized as `a` when the option is enabled. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testRdfTypeShortcut() throws SerializationException, IOException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/ns/person1"), + factory.createIRI(SerializationConstants.RDF_TYPE), + factory.createIRI("http://xmlns.com/foaf/0.1/Person"), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + StringWriter writer = new StringWriter(); + TriGSerializer triGSerializer = new TriGSerializer(mockModel, defaultConfig); + + + triGSerializer.write(writer); + + + verify(mockModel, times(2)).stream(); + + String expected = """ + @prefix foaf: . + @prefix ns: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + ns:person1 a foaf:Person . + + """; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization of a literal with a language tag. + * Verifies that the language tag is appended correctly. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testLiteralWithLanguageTag() throws SerializationException, IOException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/data/book1"), + factory.createIRI("http://purl.org/dc/elements/1.1/title"), + factory.createLiteral("The Odyssey", null, "en"), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + + TriGConfig customConfig = new TriGConfig.Builder() + .strictMode(false) + .build(); + TriGSerializer triGSerializer = new TriGSerializer(mockModel, customConfig); + + + triGSerializer.write(writer); + + + verify(mockModel, times(2)).stream(); + String expected = """ + @prefix 11: . + @prefix data: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + data:book1 11:title "The Odyssey"@en . + + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization of a literal with an explicit `xsd:string` datatype. + * Verifies that the datatype is printed when `ALWAYS_TYPED` policy is used. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testLiteralWithExplicitXsdStringType() throws SerializationException, IOException { + IRI mockDatatype = factory.createIRI(SerializationConstants.XSD_STRING); + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/data/book2"), + factory.createIRI("http://purl.org/dc/elements/1.1/creator"), + factory.createLiteral("Homer", mockDatatype, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + + TriGConfig customConfig = new TriGConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.ALWAYS_TYPED) + .build(); + TriGSerializer triGSerializer = new TriGSerializer(mockModel, customConfig); + + + triGSerializer.write(writer); + + + verify(mockModel, times(2)).stream(); + String expected = """ + @prefix 11: . + @prefix data: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + data:book2 11:creator "Homer"^^xsd:string . + + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + + /** + * Tests serialization with a base IRI defined. + * Verifies that the `@base` directive is included in the output. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testBaseIRI() throws SerializationException, IOException { + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/base/resource1"), + factory.createIRI("http://example.org/base/prop"), + factory.createLiteral("Test", null, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + + TriGConfig configWithBase = new TriGConfig.Builder() + .baseIRI("http://example.org/base/") + .build(); + TriGSerializer triGSerializer = new TriGSerializer(mockModel, configWithBase); + + triGSerializer.write(writer); + + verify(mockModel, times(2)).stream(); + String expected = """ + @base . + @prefix base: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + base:resource1 base:prop "Test" . + + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization of an empty model. + * Verifies that only prefix declarations (if auto-declared) are written, with no statements. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testEmptyModel() throws SerializationException, IOException { + + Model emptyModel = mock(Model.class); + when(emptyModel.iterator()).thenAnswer(invocation -> Collections.emptyList().iterator()); + when(emptyModel.stream()) + .thenReturn(Stream.empty()) + .thenReturn(Stream.empty()); + + + StringWriter writer = new StringWriter(); + TriGSerializer triGSerializer = new TriGSerializer(emptyModel, defaultConfig); + + + triGSerializer.write(writer); + + + verify(emptyModel, times(2)).stream(); + + String expected = """ + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests strict mode validation for an invalid literal (rdf:langString without language tag). + * Verifies that a {@link SerializationException} is thrown. + * + * @throws SerializationException (expected) if a serialization error occurs due to strict mode. + */ + @Test + void testStrictModeInvalidLiteral() throws SerializationException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/s"), + factory.createIRI("http://example.org/p"), + factory.createLiteral("invalid", RDF.LANGSTRING.getIRI(), null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + TriGConfig strictConfig = new TriGConfig.Builder().strictMode(true).build(); + TriGSerializer triGSerializer = new TriGSerializer(mockModel, strictConfig); + + + SerializationException thrown = assertThrows(SerializationException.class, () -> { + triGSerializer.write(writer); + }); + + assertEquals("TriG", thrown.getFormatName()); + + assertEquals("Invalid data for format TriG: An rdf:langString literal must have a language tag. [Format: TriG]", thrown.getMessage()); + } + + /** + * Tests strict mode validation for an IRI containing invalid characters (e.g., space). + * Verifies that a {@link SerializationException} is thrown. + * + * @throws SerializationException (expected) if a serialization error occurs due to strict mode. + */ + @Test + void testStrictModeInvalidIRICharacters() throws SerializationException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/s"), + factory.createIRI("http://example.org/p"), + factory.createIRI("http://example.org/invalid iri"), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + TriGConfig strictConfig = new TriGConfig.Builder().strictMode(true).validateURIs(true).build(); + TriGSerializer triGSerializer = new TriGSerializer(mockModel, strictConfig); + + + SerializationException thrown = assertThrows(SerializationException.class, () -> { + triGSerializer.write(writer); + }); + + assertEquals("TriG", thrown.getFormatName()); + + assertEquals("Invalid data for format TriG: IRI contains illegal characters (space, quotes, angle brackets) for the unescaped form of TriG: http://example.org/invalid iri [Format: TriG]", thrown.getMessage()); + } + + /** + * Tests serialization of a literal containing multiple lines. + * Verifies that the literal is wrapped in triple quotes `"""` when `useMultilineLiterals` is true. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testMultilineLiteralSerialization() throws SerializationException, IOException { + + String multilineText = "This is the first line.\nThis is the second line."; + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/book/1"), + factory.createIRI("http://example.org/properties/description"), + factory.createLiteral(multilineText, null, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + StringWriter writer = new StringWriter(); + TriGConfig customConfig = new TriGConfig.Builder() + .useMultilineLiterals(true) + .prettyPrint(true) + .build(); + TriGSerializer triGSerializer = new TriGSerializer(mockModel, customConfig); + + + triGSerializer.write(writer); + + + verify(mockModel, times(2)).stream(); + + String expected = """ + @prefix book: . + @prefix owl: . + @prefix properties: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + book:1 properties:description\s""" + "\"\"\"" + multilineText + "\"\"\"" + " .\n\n"; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests basic TriG serialization with a named graph. + * Verifies that the graph name and graph block are correctly formatted. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testBasicTrigSerializationWithNamedGraph() throws SerializationException, IOException { + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/data/person1"), + factory.createIRI("http://example.org/data/name"), + factory.createLiteral("Alice", null, null), + factory.createIRI("http://example.org/graph/g1") + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + StringWriter writer = new StringWriter(); + + TriGSerializer triGSerializer = new TriGSerializer(mockModel, defaultConfig); + + triGSerializer.write(writer); + + verify(mockModel, times(2)).stream(); + String expected = """ + @prefix data: . + @prefix graph: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + graph:g1 { + data:person1 data:name "Alice" . + } . + + """; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TurtleSerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TurtleSerializerTest.java new file mode 100644 index 000000000..053e111ef --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/TurtleSerializerTest.java @@ -0,0 +1,498 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.IRI; +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.impl.common.literal.RDF; +import fr.inria.corese.core.next.impl.common.serialization.config.LiteralDatatypePolicyEnum; +import fr.inria.corese.core.next.impl.common.serialization.config.TurtleConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.Arrays; +import java.util.Collections; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.*; + +/** + * Test class for {@link TurtleSerializer} using Mockito to verify serialization behavior + * under various configurations and RDF graph structures. + */ +class TurtleSerializerTest { + + private Model mockModel; + private TurtleConfig defaultConfig; + private TestStatementFactory factory; + + @BeforeEach + void setUp() { + mockModel = mock(Model.class); + defaultConfig = TurtleConfig.defaultConfig(); + factory = new TestStatementFactory(); + } + + /** + * Tests basic Turtle serialization of a simple triple. + * Verifies that the subject, predicate, and object are correctly formatted + * and that standard prefixes are declared and used. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testBasicTurtleSerialization() throws SerializationException, IOException { + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/ns/person1"), + factory.createIRI("http://example.org/ns/hasName"), + factory.createLiteral("John Doe", null, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + StringWriter writer = new StringWriter(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, defaultConfig); + + turtleSerializer.write(writer); + + verify(mockModel, times(2)).stream(); + + String expected = """ + @prefix ns: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + ns:person1 ns:hasName "John Doe" . + """; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests the `rdf:type` shortcut (using `a`). + * Verifies that `rdf:type` is serialized as `a` when the option is enabled. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testRdfTypeShortcut() throws SerializationException, IOException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/ns/person1"), + factory.createIRI(SerializationConstants.RDF_TYPE), + factory.createIRI("http://xmlns.com/foaf/0.1/Person"), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + StringWriter writer = new StringWriter(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, defaultConfig); + + + turtleSerializer.write(writer); + + verify(mockModel, times(2)).stream(); + + String expected = """ + @prefix foaf: . + @prefix ns: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + ns:person1 a foaf:Person . + """; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization of a literal with a language tag. + * Verifies that the language tag is appended correctly. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testLiteralWithLanguageTag() throws SerializationException, IOException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/data/book1"), + factory.createIRI("http://purl.org/dc/elements/1.1/title"), + factory.createLiteral("The Odyssey", null, "en"), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + + TurtleConfig config = new TurtleConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.MINIMAL) + .useRdfTypeShortcut(true) + .useCollections(true) + .groupBySubject(true) + .prettyPrint(true) + .indent(" ") + .lineEnding("\n") + .autoDeclarePrefixes(true) + .trailingDot(true) + .strictMode(false) + .build(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, config); + + + turtleSerializer.write(writer); + + + verify(mockModel, times(2)).stream(); + String expected = """ + @prefix 11: . + @prefix data: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + data:book1 11:title "The Odyssey"@en . + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization of a literal with an explicit `xsd:string` datatype. + * Verifies that the datatype is printed when `ALWAYS_TYPED` policy is used. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + @DisplayName("Should serialize literal with xsd:string datatype (minimal policy)") + void testLiteralWithExplicitXsdStringType() throws SerializationException, IOException { + + IRI mockDatatype = factory.createIRI(SerializationConstants.XSD_STRING); + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/data/book2"), + factory.createIRI("http://purl.org/dc/elements/1.1/creator"), + factory.createLiteral("Homer", mockDatatype, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + + + TurtleConfig config = new TurtleConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.ALWAYS_TYPED) + .usePrefixes(true) + .autoDeclarePrefixes(true) + .addCustomPrefix("dc", "http://purl.org/dc/elements/1.1/") + .build(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, config); + + + turtleSerializer.write(writer); + + + verify(mockModel, times(2)).stream(); + String expected = """ + @prefix data: . + @prefix dc: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + data:book2 dc:creator "Homer"^^xsd:string . + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization of a blank node subject using the default anonymous style (`[]`). + * Verifies that the blank node is serialized inline with its properties. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testBlankNodeSerialization() throws SerializationException, IOException { + + Statement mainStatement = factory.createStatement( + factory.createIRI("http://example.org/ns/mainSubject"), + factory.createIRI("http://example.org/ns/refersTo"), + factory.createBlankNode("b1"), + null + ); + + Statement bNodePropertyStatement = factory.createStatement( + factory.createBlankNode("b1"), + factory.createIRI("http://example.org/ns/hasValue"), + factory.createLiteral("Value of BNode", null, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Arrays.asList(mainStatement, bNodePropertyStatement).iterator()); + when(mockModel.stream()).thenAnswer(invocation -> Stream.of(mainStatement, bNodePropertyStatement)); + + + StringWriter writer = new StringWriter(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, defaultConfig); + + turtleSerializer.write(writer); + + + verify(mockModel, atLeastOnce()).stream(); + + String expected = """ + @prefix ns: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + ns:mainSubject ns:refersTo _:b1 . + """; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization with a base IRI defined. + * Verifies that the `@base` directive is included in the output. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testBaseIRI() throws SerializationException, IOException { + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/base/resource1"), + factory.createIRI("http://example.org/base/prop"), + factory.createLiteral("Test", null, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + + TurtleConfig configWithBase = new TurtleConfig.Builder() + .baseIRI("http://example.org/base/") + .usePrefixes(true) + .autoDeclarePrefixes(true) + .build(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, configWithBase); + + turtleSerializer.write(writer); + + verify(mockModel, times(2)).stream(); + String expected = """ + @base . + @prefix base: . + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + base:resource1 base:prop "Test" . + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests serialization of an empty model. + * Verifies that only prefix declarations (if auto-declared) are written, with no statements. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testEmptyModel() throws SerializationException, IOException { + + Model emptyModel = mock(Model.class); + when(emptyModel.iterator()).thenAnswer(invocation -> Collections.emptyList().iterator()); + when(emptyModel.stream()) + .thenReturn(Stream.empty()) + .thenReturn(Stream.empty()); + + + StringWriter writer = new StringWriter(); + TurtleSerializer turtleSerializer = new TurtleSerializer(emptyModel, defaultConfig); + + + turtleSerializer.write(writer); + + + verify(emptyModel, times(2)).stream(); + + String expected = """ + @prefix owl: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + """; + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + + /** + * Tests strict mode validation for an invalid literal (rdf:langString without language tag). + * Verifies that a {@link SerializationException} is thrown. + * + * @throws SerializationException (expected) if a serialization error occurs due to strict mode. + */ + @Test + void testStrictModeInvalidLiteral() throws SerializationException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/s"), + factory.createIRI("http://example.org/p"), + factory.createLiteral("invalid", RDF.LANGSTRING.getIRI(), null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + TurtleConfig strictConfig = new TurtleConfig.Builder().strictMode(true).build(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, strictConfig); + + + SerializationException thrown = assertThrows(SerializationException.class, () -> { + turtleSerializer.write(writer); + }); + + assertEquals("Turtle", thrown.getFormatName()); + + assertEquals("Invalid data for format Turtle: An rdf:langString literal must have a language tag. [Format: Turtle]", thrown.getMessage()); + } + + /** + * Tests strict mode validation for an IRI containing invalid characters (e.g., space). + * Verifies that a {@link SerializationException} is thrown. + * + * @throws SerializationException (expected) if a serialization error occurs due to strict mode. + */ + @Test + void testStrictModeInvalidIRICharacters() throws SerializationException { + + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/s"), + factory.createIRI("http://example.org/p"), + factory.createIRI("http://example.org/invalid iri"), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + + StringWriter writer = new StringWriter(); + TurtleConfig strictConfig = new TurtleConfig.Builder().strictMode(true).validateURIs(true).build(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, strictConfig); + + + SerializationException thrown = assertThrows(SerializationException.class, () -> { + turtleSerializer.write(writer); + }); + + assertEquals("Turtle", thrown.getFormatName()); + + assertEquals("Invalid data for format Turtle: IRI contains illegal characters (space, quotes, angle brackets) for the unescaped form of Turtle: http://example.org/invalid iri [Format: Turtle]", thrown.getMessage()); + } + + /** + * Tests serialization of a literal containing multiple lines. + * Verifies that the literal is wrapped in triple quotes `"""` when `useMultilineLiterals` is true. + * + * @throws SerializationException if a serialization error occurs. + * @throws IOException if an I/O error occurs during writing. + */ + @Test + void testMultilineLiteralSerialization() throws SerializationException, IOException { + String multilineText = "This is the first line.\nThis is the second line."; + Statement mockStatement = factory.createStatement( + factory.createIRI("http://example.org/book/1"), + factory.createIRI("http://example.org/properties/description"), + factory.createLiteral(multilineText, null, null), + null + ); + + when(mockModel.iterator()).thenAnswer(invocation -> Collections.singletonList(mockStatement).iterator()); + when(mockModel.stream()) + .thenReturn(Stream.of(mockStatement)) + .thenReturn(Stream.of(mockStatement)); + + StringWriter writer = new StringWriter(); + TurtleConfig config = new TurtleConfig.Builder() + .useMultilineLiterals(true) + .prettyPrint(true) + .autoDeclarePrefixes(true) + .build(); + TurtleSerializer turtleSerializer = new TurtleSerializer(mockModel, config); + + turtleSerializer.write(writer); + + verify(mockModel, times(2)).stream(); + + String expected = """ + @prefix book: . + @prefix owl: . + @prefix properties: . + @prefix rdf: . + @prefix rdfs: . + @prefix xsd: . + + book:1 properties:description\s""" + "\"\"\"" + multilineText + "\"\"\"" + " .\n"; + + String actual = writer.toString().replace("\r\n", "\n"); + assertEquals(expected, actual); + } + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/XmlSerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/XmlSerializerTest.java new file mode 100644 index 000000000..54d27e46c --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/XmlSerializerTest.java @@ -0,0 +1,515 @@ +package fr.inria.corese.core.next.impl.common.serialization; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.impl.common.serialization.config.LiteralDatatypePolicyEnum; +import fr.inria.corese.core.next.impl.common.serialization.config.PrefixOrderingEnum; +import fr.inria.corese.core.next.impl.common.serialization.config.XmlConfig; +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.io.StringWriter; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; + +/** + * Unit tests for the XmlSerializer class. + */ +class XmlSerializerTest { + + @Mock + private Model mockModel; + XmlConfig mockConfig; + private TestStatementFactory factory; + + private StringWriter writer; + + @BeforeEach + void setUp() { + MockitoAnnotations.openMocks(this); + writer = new StringWriter(); + factory = new TestStatementFactory(); + + mockConfig = XmlConfig.defaultConfig(); + } + + + @Test + @DisplayName("Should serialize a simple IRI triple with auto-declared namespaces") + void shouldSerializeSimpleIriTriple() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/subject"), + factory.createIRI("http://xmlns.com/foaf/0.1/name"), + factory.createIRI("http://example.org/object") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .autoDeclarePrefixes(true) + .usePrefixes(true) + .addCustomPrefix("foaf", "http://xmlns.com/foaf/0.1/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + + String expected = """ + + + + + + + """; + + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should handle blank node subject") + void shouldHandleBlankNodeSubject() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createBlankNode("b1"), + factory.createIRI("http://xmlns.com/foaf/0.1/name"), + factory.createIRI("http://example.org/Alice") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .stableBlankNodeIds(true) + .addCustomPrefix("foaf", "http://xmlns.com/foaf/0.1/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + + + + """; + + + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should handle blank node object with correct namespace ordering") + void shouldHandleBlankNodeObject() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/book"), + factory.createIRI("http://purl.org/dc/elements/1.1/creator"), + factory.createBlankNode("b2") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .stableBlankNodeIds(true) + .addCustomPrefix("dc", "http://purl.org/dc/elements/1.1/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + + String expected = """ + + + + + + + """; + + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should serialize literal with xsd:string datatype (minimal policy)") + void shouldSerializeLiteralWithStringDatatypeMinimalPolicy() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/person"), + factory.createIRI("http://xmlns.com/foaf/0.1/name"), + factory.createLiteral("John Doe", factory.createIRI(SerializationConstants.XSD_STRING), null) + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.MINIMAL) + .addCustomPrefix("foaf", "http://xmlns.com/foaf/0.1/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + John Doe + + + """; + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should serialize literal with custom datatype using minimal policy") + void shouldSerializeLiteralWithCustomDatatypeMinimalPolicy() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/data"), + factory.createIRI("http://example.org/vocabulary/value"), + factory.createLiteral("123", factory.createIRI(SerializationConstants.XSD_INTEGER), null) + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.MINIMAL) + .addCustomPrefix("ex", "http://example.org/vocabulary/") + .addCustomPrefix("xsd", "http://www.w3.org/2001/XMLSchema#") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + 123 + + + """; + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should serialize literal with language tag") + void shouldSerializeLiteralWithLanguage() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/book"), + factory.createIRI("http://purl.org/dc/elements/1.1/title"), + factory.createLiteral("The Book", null, "en") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .addCustomPrefix("dc", "http://purl.org/dc/elements/1.1/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + The Book + + + """; + + assertEquals(expected, writer.toString()); + } + + + @Test + @DisplayName("Should respect default prefix ordering (non-deterministic for subjects)") + void shouldRespectPrefixOrderingDefault() throws SerializationException { + Statement stmt1 = factory.createStatement( + factory.createIRI("http://ex.org/s1"), + factory.createIRI("http://ex.org/p1"), + factory.createIRI("http://ex.org/o1") + ); + Statement stmt2 = factory.createStatement( + factory.createIRI("http://ex.com/s2"), + factory.createIRI("http://ex.com/p2"), + factory.createIRI("http://ex.com/o2") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt1, stmt2)); + + XmlConfig testConfig = new XmlConfig.Builder() + .addCustomPrefix("exorg", "http://ex.org/") + .addCustomPrefix("excom", "http://ex.com/") + .prefixOrdering(PrefixOrderingEnum.USAGE_ORDER) + .sortSubjects(false) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String actual = writer.toString(); + + assertTrue(actual.startsWith("\n\n")); + + assertTrue(actual.contains("xmlns:exorg=\"http://ex.org/\"")); + assertTrue(actual.contains("xmlns:excom=\"http://ex.com/\"")); + assertTrue(actual.contains("xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"")); + + String desc1 = " \n \n "; + String desc2 = " \n \n "; + + assertTrue(actual.contains(desc1)); + assertTrue(actual.contains(desc2)); + } + + @Test + @DisplayName("Should sort subjects alphabetically") + void shouldSortSubjectsAlphabetically() throws SerializationException { + Statement stmt1 = factory.createStatement( + factory.createIRI("http://ex.org/B"), + factory.createIRI("http://ex.org/p"), + factory.createIRI("http://ex.org/o") + ); + Statement stmt2 = factory.createStatement( + factory.createIRI("http://ex.org/A"), + factory.createIRI("http://ex.org/p"), + factory.createIRI("http://ex.org/o") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt1, stmt2)); + + XmlConfig testConfig = new XmlConfig.Builder() + .sortSubjects(true) + .addCustomPrefix("ex", "http://ex.org/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + + + + + + + """; + + assertEquals(expected, writer.toString()); + } + + + @Test + @DisplayName("Should escape XML attribute values") + void shouldEscapeXmlAttributeValues() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/sub&ject<"), + factory.createIRI("http://example.org/pred"), + factory.createIRI("http://example.org/obj\"ect'") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + + + + """; + + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should escape XML content values") + void shouldEscapeXmlContentValues() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/item"), + factory.createIRI("http://example.org/prop"), + factory.createLiteral("Value with & entities", null, null) + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.ALWAYS_TYPED) + .addCustomPrefix("ex", "http://example.org/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + Value with <tags> & entities + + + """; + + assertEquals(expected, writer.toString()); + } + + + @Test + @DisplayName("Should not auto-declare prefixes if disabled in configuration") + void shouldNotAutoDeclarePrefixesIfDisabled() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/subject"), + factory.createIRI("http://xmlns.com/foaf/0.1/name"), + factory.createIRI("http://example.org/object") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .autoDeclarePrefixes(false) + .usePrefixes(true) + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + + + + """; + + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should not use prefixes if disabled in configuration") + void shouldNotUsePrefixesIfDisabled() throws SerializationException { + Statement stmt = factory.createStatement( + factory.createIRI("http://example.org/subject"), + factory.createIRI("http://xmlns.com/foaf/0.1/name"), + factory.createIRI("http://example.org/object") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt)); + + XmlConfig testConfig = new XmlConfig.Builder() + .usePrefixes(false) + .autoDeclarePrefixes(true) + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + + + + """; + + assertEquals(expected, writer.toString()); + } + + + @Test + @DisplayName("Should not generate stable blank node IDs and sort subjects alphabetically") + void shouldNotGenerateStableBlankNodeIds() throws SerializationException { + Statement stmt1 = factory.createStatement( + factory.createBlankNode("bnode-abc"), + factory.createIRI("http://example.org/p"), + factory.createIRI("http://example.org/o") + ); + Statement stmt2 = factory.createStatement( + factory.createIRI("http://example.org/s"), + factory.createIRI("http://example.org/p"), + factory.createBlankNode("bnode-xyz") + ); + + when(mockModel.stream()).thenReturn(Stream.of(stmt1, stmt2)); + + XmlConfig testConfig = new XmlConfig.Builder() + .stableBlankNodeIds(false) + .sortSubjects(true) + .addCustomPrefix("ex", "http://example.org/") + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + + String expected = """ + + + + + + + + + + """; + + assertEquals(expected, writer.toString()); + } + + @Test + @DisplayName("Should handle an empty model") + void shouldHandleEmptyModel() throws SerializationException { + when(mockModel.stream()).thenReturn(Stream.empty()); + + XmlConfig testConfig = new XmlConfig.Builder() + .prefixOrdering(PrefixOrderingEnum.ALPHABETICAL) + .build(); + + XmlSerializer serializer = new XmlSerializer(mockModel, testConfig); + serializer.write(writer); + + String expected = """ + + + + """; + + assertEquals(expected, writer.toString()); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/NQuadsConfigTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/NQuadsConfigTest.java new file mode 100644 index 000000000..8fdb09247 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/NQuadsConfigTest.java @@ -0,0 +1,123 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for the {@link NQuadsConfig} class. + * These tests verify the default configuration settings and the functionality + * of the builder pattern for customizing N-Quads serialization options. + */ +class NQuadsConfigTest { + + @Test + @DisplayName("defaultConfig() should return a config with expected N-Quads defaults") + void defaultConfig_shouldReturnExpectedDefaults() { + NQuadsConfig config = NQuadsConfig.defaultConfig(); + + assertNotNull(config, "Default config should not be null"); + + assertTrue(config.strictMode, "Default strictMode should be true for N-Quads"); + assertTrue(config.escapeUnicode(), "Default escapeUnicode should be true for N-Quads"); + assertEquals(LiteralDatatypePolicyEnum.ALWAYS_TYPED, config.getLiteralDatatypePolicy(), "Default literalDatatypePolicy should be ALWAYS_TYPED"); + assertNull(config.getBaseIRI(), "Default baseIRI should be null"); + assertFalse(config.stableBlankNodeIds(), "Default stableBlankNodeIds should be false"); + + assertTrue(config.includeContext(), "Default includeContext should be true for N-Quads"); + } + + @Test + @DisplayName("Builder should allow overriding includeContext") + void builder_shouldAllowOverridingIncludeContext() { + NQuadsConfig config = NQuadsConfig.builder() + .includeContext(false) + .build(); + + assertFalse(config.includeContext(), "includeContext should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding literalDatatypePolicy") + void builder_shouldAllowOverridingLiteralDatatypePolicy() { + NQuadsConfig config = NQuadsConfig.builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.MINIMAL) + .build(); + + assertEquals(LiteralDatatypePolicyEnum.MINIMAL, config.getLiteralDatatypePolicy(), "literalDatatypePolicy should be overridden to MINIMAL"); + } + + @Test + @DisplayName("Builder should allow overriding escapeUnicode") + void builder_shouldAllowOverridingEscapeUnicode() { + NQuadsConfig config = NQuadsConfig.builder() + .escapeUnicode(false) + .build(); + + assertFalse(config.escapeUnicode(), "escapeUnicode should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding strictMode") + void builder_shouldAllowOverridingStrictMode() { + NQuadsConfig config = NQuadsConfig.builder() + .strictMode(false) + .build(); + + assertFalse(config.strictMode, "strictMode should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow setting baseIRI") + void builder_shouldAllowSettingBaseIRI() { + String testBaseIRI = "http://example.org/base/"; + NQuadsConfig config = NQuadsConfig.builder() + .baseIRI(testBaseIRI) + .build(); + + assertEquals(testBaseIRI, config.getBaseIRI(), "baseIRI should be set correctly"); + } + + @Test + @DisplayName("Builder should allow setting lineEnding") + void builder_shouldAllowSettingLineEnding() { + String customLineEnding = "\r\n"; + NQuadsConfig config = NQuadsConfig.builder() + .lineEnding(customLineEnding) + .build(); + + assertEquals(customLineEnding, config.getLineEnding(), "lineEnding should be set correctly"); + } + + @Test + @DisplayName("Builder should allow overriding validateURIs") + void builder_shouldAllowOverridingValidateURIs() { + NQuadsConfig config = NQuadsConfig.builder() + .validateURIs(true) + .build(); + + assertTrue(config.validateURIs(), "validateURIs should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding stableBlankNodeIds") + void builder_shouldAllowOverridingStableBlankNodeIds() { + NQuadsConfig config = NQuadsConfig.builder() + .stableBlankNodeIds(true) + .build(); + + assertTrue(config.stableBlankNodeIds(), "stableBlankNodeIds should be overridden to true"); + } + + @Test + @DisplayName("Builder should handle null values for optional fields gracefully (e.g., baseIRI)") + void builder_shouldHandleNullForOptionalFields() { + NQuadsConfig config = NQuadsConfig.builder() + .baseIRI(null) + .build(); + + assertNull(config.getBaseIRI(), "baseIRI should be null when explicitly set to null"); + } + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/NTriplesConfigTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/NTriplesConfigTest.java new file mode 100644 index 000000000..0f7ec0927 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/NTriplesConfigTest.java @@ -0,0 +1,124 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for the {@link NTriplesConfig} class. + * These tests verify the default configuration settings and the functionality + * of the builder pattern for customizing N-Triples serialization options. + */ +class NTriplesConfigTest { + + @Test + @DisplayName("defaultConfig() should return a config with expected N-Triples defaults") + void defaultConfig_shouldReturnExpectedDefaults() { + NTriplesConfig config = NTriplesConfig.defaultConfig(); + + assertNotNull(config, "Default config should not be null"); + + assertTrue(config.strictMode, "Default strictMode should be true for N-Triples"); + assertTrue(config.escapeUnicode(), "Default escapeUnicode should be true for N-Triples"); + assertEquals(LiteralDatatypePolicyEnum.ALWAYS_TYPED, config.getLiteralDatatypePolicy(), "Default literalDatatypePolicy should be ALWAYS_TYPED"); + assertNull(config.getBaseIRI(), "Default baseIRI should be null"); + assertFalse(config.stableBlankNodeIds(), "Default stableBlankNodeIds should be false"); + + assertFalse(config.includeContext(), "Default includeContext should be false for N-Triples"); + } + + @Test + @DisplayName("Builder should allow overriding includeContext") + void builder_shouldAllowOverridingIncludeContext() { + NTriplesConfig config = NTriplesConfig.builder() + .includeContext(true) + .build(); + + assertTrue(config.includeContext(), "includeContext should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding literalDatatypePolicy") + void builder_shouldAllowOverridingLiteralDatatypePolicy() { + NTriplesConfig config = NTriplesConfig.builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.MINIMAL) + .build(); + + assertEquals(LiteralDatatypePolicyEnum.MINIMAL, config.getLiteralDatatypePolicy(), "literalDatatypePolicy should be overridden to MINIMAL"); + } + + @Test + @DisplayName("Builder should allow overriding escapeUnicode") + void builder_shouldAllowOverridingEscapeUnicode() { + NTriplesConfig config = NTriplesConfig.builder() + .escapeUnicode(false) + .build(); + + assertFalse(config.escapeUnicode(), "escapeUnicode should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding strictMode") + void builder_shouldAllowOverridingStrictMode() { + NTriplesConfig config = NTriplesConfig.builder() + .strictMode(false) + .build(); + + assertFalse(config.strictMode, "strictMode should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow setting baseIRI") + void builder_shouldAllowSettingBaseIRI() { + String testBaseIRI = "http://example.org/base/"; + NTriplesConfig config = NTriplesConfig.builder() + .baseIRI(testBaseIRI) + .build(); + + assertEquals(testBaseIRI, config.getBaseIRI(), "baseIRI should be set correctly"); + } + + @Test + @DisplayName("Builder should allow setting lineEnding") + void builder_shouldAllowSettingLineEnding() { + String customLineEnding = "\r\n"; + NTriplesConfig config = NTriplesConfig.builder() + .lineEnding(customLineEnding) + .build(); + + assertEquals(customLineEnding, config.getLineEnding(), "lineEnding should be set correctly"); + } + + @Test + @DisplayName("Builder should allow overriding validateURIs") + void builder_shouldAllowOverridingValidateURIs() { + NTriplesConfig config = NTriplesConfig.builder() + .validateURIs(true) + .build(); + + assertTrue(config.validateURIs(), "validateURIs should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding stableBlankNodeIds") + void builder_shouldAllowOverridingStableBlankNodeIds() { + NTriplesConfig config = NTriplesConfig.builder() + .stableBlankNodeIds(true) + .build(); + + assertTrue(config.stableBlankNodeIds(), "stableBlankNodeIds should be overridden to true"); + } + + @Test + @DisplayName("Builder should handle null values for optional fields gracefully (e.g., baseIRI)") + void builder_shouldHandleNullForOptionalFields() { + NTriplesConfig config = NTriplesConfig.builder() + .baseIRI(null) + .build(); + + assertNull(config.getBaseIRI(), "baseIRI should be null when explicitly set to null"); + } + + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/TriGConfigTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/TriGConfigTest.java new file mode 100644 index 000000000..ff16f055f --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/TriGConfigTest.java @@ -0,0 +1,268 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for the {@link TriGConfig} class. + * These tests verify the default configuration settings and the functionality + * of the builder pattern for customizing TriG serialization options. + */ +class TriGConfigTest { + + @Test + @DisplayName("defaultConfig() should return a config with expected TriG defaults") + void defaultConfig_shouldReturnExpectedDefaults() { + TriGConfig config = TriGConfig.defaultConfig(); + + assertNotNull(config, "Default config should not be null"); + + assertTrue(config.includeContext(), "Default includeContext should be true for TriG"); + assertEquals(BlankNodeStyleEnum.NAMED, config.getBlankNodeStyle(), "Default blankNodeStyle should be NAMED for TriG"); + assertFalse(config.useCollections(), "Default useCollections should be false for TriG"); + + Map expectedPrefixes = new HashMap<>(); + expectedPrefixes.put("rdf", SerializationConstants.RDF_NS); + expectedPrefixes.put("rdfs", SerializationConstants.RDFS_NS); + expectedPrefixes.put("xsd", SerializationConstants.XSD_NS); + expectedPrefixes.put("owl", SerializationConstants.OWL_NS); + assertEquals(expectedPrefixes.size(), config.getCustomPrefixes().size(), "Default custom prefixes size mismatch"); + assertTrue(config.getCustomPrefixes().entrySet().containsAll(expectedPrefixes.entrySet()), "Default custom prefixes should contain common RDF prefixes"); + + assertTrue(config.usePrefixes(), "Default usePrefixes should be true"); + assertTrue(config.autoDeclarePrefixes(), "Default autoDeclarePrefixes should be true"); + assertEquals(PrefixOrderingEnum.ALPHABETICAL, config.getPrefixOrdering(), "Default prefixOrdering should be ALPHABETICAL"); + assertTrue(config.useCompactTriples(), "Default useCompactTriples should be true"); + assertTrue(config.useRdfTypeShortcut(), "Default useRdfTypeShortcut should be true"); + assertTrue(config.useMultilineLiterals(), "Default useMultilineLiterals should be true"); + assertTrue(config.prettyPrint(), "Default prettyPrint should be true"); + assertEquals(SerializationConstants.DEFAULT_INDENTATION, config.getIndent(), "Default indent should be " + SerializationConstants.DEFAULT_INDENTATION); + assertEquals(80, config.getMaxLineLength(), "Default maxLineLength should be 80"); + assertTrue(config.groupBySubject(), "Default groupBySubject should be true"); + assertFalse(config.sortSubjects(), "Default sortSubjects should be false"); + assertFalse(config.sortPredicates(), "Default sortPredicates should be false"); + + assertTrue(config.strictMode, "Default strictMode should be true"); + assertFalse(config.escapeUnicode(), "Default escapeUnicode should be false"); + assertEquals(LiteralDatatypePolicyEnum.MINIMAL, config.getLiteralDatatypePolicy(), "Default literalDatatypePolicy should be MINIMAL"); + assertNull(config.getBaseIRI(), "Default baseIRI should be null"); + } + + @Test + @DisplayName("Builder should allow overriding includeContext") + void builder_shouldAllowOverridingIncludeContext() { + TriGConfig config = TriGConfig.builder() + .includeContext(false) + .build(); + assertFalse(config.includeContext(), "includeContext should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding blankNodeStyle") + void builder_shouldAllowOverridingBlankNodeStyle() { + TriGConfig config = TriGConfig.builder() + .blankNodeStyle(BlankNodeStyleEnum.ANONYMOUS) + .build(); + assertEquals(BlankNodeStyleEnum.ANONYMOUS, config.getBlankNodeStyle(), "blankNodeStyle should be overridden to ANONYMOUS"); + } + + @Test + @DisplayName("Builder should allow overriding useCollections") + void builder_shouldAllowOverridingUseCollections() { + TriGConfig config = TriGConfig.builder() + .useCollections(true) + .build(); + assertTrue(config.useCollections(), "useCollections should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow adding custom prefixes") + void builder_shouldAllowAddingCustomPrefixes() { + String customPrefix = "my"; + String customNamespace = "http://my.example.org/"; + TriGConfig config = TriGConfig.builder() + .addCustomPrefix(customPrefix, customNamespace) + .build(); + + assertTrue(config.getCustomPrefixes().containsKey(customPrefix), "Custom prefix should be added"); + assertEquals(customNamespace, config.getCustomPrefixes().get(customPrefix), "Custom prefix namespace should be correct"); + assertTrue(config.getCustomPrefixes().containsKey("rdf")); + assertTrue(config.getCustomPrefixes().containsKey("xsd")); + } + + @Test + @DisplayName("Builder should allow overriding usePrefixes") + void builder_shouldAllowOverridingUsePrefixes() { + TriGConfig config = TriGConfig.builder() + .usePrefixes(false) + .build(); + assertFalse(config.usePrefixes(), "usePrefixes should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding autoDeclarePrefixes") + void builder_shouldAllowOverridingAutoDeclarePrefixes() { + TriGConfig config = TriGConfig.builder() + .autoDeclarePrefixes(false) + .build(); + assertFalse(config.autoDeclarePrefixes(), "autoDeclarePrefixes should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding prefixOrdering") + void builder_shouldAllowOverridingPrefixOrdering() { + TriGConfig config = TriGConfig.builder() + .prefixOrdering(PrefixOrderingEnum.USAGE_ORDER) + .build(); + assertEquals(PrefixOrderingEnum.USAGE_ORDER, config.getPrefixOrdering(), "prefixOrdering should be overridden to USAGE_ORDER"); + } + + @Test + @DisplayName("Builder should allow overriding useCompactTriples") + void builder_shouldAllowOverridingUseCompactTriples() { + TriGConfig config = TriGConfig.builder() + .useCompactTriples(false) + .build(); + assertFalse(config.useCompactTriples(), "useCompactTriples should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding useRdfTypeShortcut") + void builder_shouldAllowOverridingUseRdfTypeShortcut() { + TriGConfig config = TriGConfig.builder() + .useRdfTypeShortcut(false) + .build(); + assertFalse(config.useRdfTypeShortcut(), "useRdfTypeShortcut should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding useMultilineLiterals") + void builder_shouldAllowOverridingUseMultilineLiterals() { + TriGConfig config = TriGConfig.builder() + .useMultilineLiterals(false) + .build(); + assertFalse(config.useMultilineLiterals(), "useMultilineLiterals should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding prettyPrint") + void builder_shouldAllowOverridingPrettyPrint() { + TriGConfig config = TriGConfig.builder() + .prettyPrint(false) + .build(); + assertFalse(config.prettyPrint(), "prettyPrint should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding indent") + void builder_shouldAllowOverridingIndent() { + String customIndent = "\t"; + TriGConfig config = TriGConfig.builder() + .indent(customIndent) + .build(); + assertEquals(customIndent, config.getIndent(), "indent should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding maxLineLength") + void builder_shouldAllowOverridingMaxLineLength() { + int customLength = 120; + TriGConfig config = TriGConfig.builder() + .maxLineLength(customLength) + .build(); + assertEquals(customLength, config.getMaxLineLength(), "maxLineLength should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding groupBySubject") + void builder_shouldAllowOverridingGroupBySubject() { + TriGConfig config = TriGConfig.builder() + .groupBySubject(false) + .build(); + assertFalse(config.groupBySubject(), "groupBySubject should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding sortSubjects") + void builder_shouldAllowOverridingSortSubjects() { + TriGConfig config = TriGConfig.builder() + .sortSubjects(true) + .build(); + assertTrue(config.sortSubjects(), "sortSubjects should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding sortPredicates") + void builder_shouldAllowOverridingSortPredicates() { + TriGConfig config = TriGConfig.builder() + .sortPredicates(true) + .build(); + assertTrue(config.sortPredicates(), "sortPredicates should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding strictMode") + void builder_shouldAllowOverridingStrictMode() { + TriGConfig config = TriGConfig.builder() + .strictMode(false) + .build(); + assertFalse(config.strictMode, "strictMode should be overridden to false"); + } + + + @Test + @DisplayName("Builder should allow overriding literalDatatypePolicy") + void builder_shouldAllowOverridingLiteralDatatypePolicy() { + TriGConfig config = TriGConfig.builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.ALWAYS_TYPED) + .build(); + assertEquals(LiteralDatatypePolicyEnum.ALWAYS_TYPED, config.getLiteralDatatypePolicy(), "literalDatatypePolicy should be overridden to ALWAYS_TYPED"); + } + + @Test + @DisplayName("Builder should allow setting baseIRI") + void builder_shouldAllowSettingBaseIRI() { + String testBaseIRI = "http://example.org/base/"; + TriGConfig config = TriGConfig.builder() + .baseIRI(testBaseIRI) + .build(); + assertEquals(testBaseIRI, config.getBaseIRI(), "baseIRI should be set correctly"); + } + + @Test + @DisplayName("Builder should allow overriding lineEnding") + void builder_shouldAllowOverridingLineEnding() { + String customLineEnding = "\r\n"; + TriGConfig config = TriGConfig.builder() + .lineEnding(customLineEnding) + .build(); + assertEquals(customLineEnding, config.getLineEnding(), "lineEnding should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding validateURIs") + void builder_shouldAllowOverridingValidateURIs() { + TriGConfig config = TriGConfig.builder() + .validateURIs(true) + .build(); + assertTrue(config.validateURIs(), "validateURIs should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding stableBlankNodeIds") + void builder_shouldAllowOverridingStableBlankNodeIds() { + TriGConfig config = TriGConfig.builder() + .stableBlankNodeIds(true) + .build(); + assertTrue(config.stableBlankNodeIds(), "stableBlankNodeIds should be overridden to true"); + } + + + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/TurtleConfigTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/TurtleConfigTest.java new file mode 100644 index 000000000..482d9718c --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/TurtleConfigTest.java @@ -0,0 +1,268 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for the {@link TurtleConfig} class. + * These tests verify the default configuration settings and the functionality + * of the builder pattern for customizing Turtle serialization options. + */ +class TurtleConfigTest { + + @Test + @DisplayName("defaultConfig() should return a config with expected Turtle defaults") + void defaultConfig_shouldReturnExpectedDefaults() { + TurtleConfig config = TurtleConfig.defaultConfig(); + + assertNotNull(config, "Default config should not be null"); + + assertTrue(config.useCollections(), "Default useCollections should be true for Turtle"); + assertEquals(BlankNodeStyleEnum.ANONYMOUS, config.getBlankNodeStyle(), "Default blankNodeStyle should be ANONYMOUS for Turtle"); + + Map expectedPrefixes = new HashMap<>(); + expectedPrefixes.put("rdf", SerializationConstants.RDF_NS); + expectedPrefixes.put("rdfs", SerializationConstants.RDFS_NS); + expectedPrefixes.put("xsd", SerializationConstants.XSD_NS); + expectedPrefixes.put("owl", SerializationConstants.OWL_NS); + assertEquals(expectedPrefixes.size(), config.getCustomPrefixes().size(), "Default custom prefixes size mismatch"); + assertTrue(config.getCustomPrefixes().entrySet().containsAll(expectedPrefixes.entrySet()), "Default custom prefixes should contain common RDF prefixes"); + + + assertTrue(config.usePrefixes(), "Default usePrefixes should be true"); + assertTrue(config.autoDeclarePrefixes(), "Default autoDeclarePrefixes should be true"); + assertEquals(PrefixOrderingEnum.ALPHABETICAL, config.getPrefixOrdering(), "Default prefixOrdering should be ALPHABETICAL"); + assertTrue(config.useCompactTriples(), "Default useCompactTriples should be true"); + assertTrue(config.useRdfTypeShortcut(), "Default useRdfTypeShortcut should be true"); + assertTrue(config.useMultilineLiterals(), "Default useMultilineLiterals should be true"); + assertTrue(config.prettyPrint(), "Default prettyPrint should be true"); + assertEquals(SerializationConstants.DEFAULT_INDENTATION, config.getIndent(), "Default indent should be " + SerializationConstants.DEFAULT_INDENTATION); + assertEquals(80, config.getMaxLineLength(), "Default maxLineLength should be 80"); + assertTrue(config.groupBySubject(), "Default groupBySubject should be true"); + + assertTrue(config.strictMode, "Default strictMode should be true"); + assertFalse(config.escapeUnicode(), "Default escapeUnicode should be false"); + assertEquals(LiteralDatatypePolicyEnum.MINIMAL, config.getLiteralDatatypePolicy(), "Default literalDatatypePolicy should be MINIMAL"); + assertNull(config.getBaseIRI(), "Default baseIRI should be null"); + assertEquals(System.lineSeparator(), config.getLineEnding(), "Default lineEnding should be system's line separator"); + assertFalse(config.validateURIs(), "Default validateURIs should be false"); + assertFalse(config.stableBlankNodeIds(), "Default stableBlankNodeIds should be false"); + } + + @Test + @DisplayName("Builder should allow overriding useCollections") + void builder_shouldAllowOverridingUseCollections() { + TurtleConfig config = new TurtleConfig.Builder() + .useCollections(false) + .build(); + assertFalse(config.useCollections(), "useCollections should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding blankNodeStyle") + void builder_shouldAllowOverridingBlankNodeStyle() { + TurtleConfig config = new TurtleConfig.Builder() + .blankNodeStyle(BlankNodeStyleEnum.NAMED) + .build(); + assertEquals(BlankNodeStyleEnum.NAMED, config.getBlankNodeStyle(), "blankNodeStyle should be overridden to NAMED"); + } + + @Test + @DisplayName("Builder should allow adding custom prefixes") + void builder_shouldAllowAddingCustomPrefixes() { + String customPrefix = "my"; + String customNamespace = "http://my.example.org/"; + TurtleConfig config = new TurtleConfig.Builder() + .addCustomPrefix(customPrefix, customNamespace) + .build(); + + assertTrue(config.getCustomPrefixes().containsKey(customPrefix), "Custom prefix should be added"); + assertEquals(customNamespace, config.getCustomPrefixes().get(customPrefix), "Custom prefix namespace should be correct"); + assertTrue(config.getCustomPrefixes().containsKey("rdf")); + assertTrue(config.getCustomPrefixes().containsKey("xsd")); + } + + @Test + @DisplayName("Builder should allow overriding usePrefixes") + void builder_shouldAllowOverridingUsePrefixes() { + TurtleConfig config = new TurtleConfig.Builder() + .usePrefixes(false) + .build(); + assertFalse(config.usePrefixes(), "usePrefixes should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding autoDeclarePrefixes") + void builder_shouldAllowOverridingAutoDeclarePrefixes() { + TurtleConfig config = new TurtleConfig.Builder() + .autoDeclarePrefixes(false) + .build(); + assertFalse(config.autoDeclarePrefixes(), "autoDeclarePrefixes should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding prefixOrdering") + void builder_shouldAllowOverridingPrefixOrdering() { + TurtleConfig config = new TurtleConfig.Builder() + .prefixOrdering(PrefixOrderingEnum.USAGE_ORDER) + .build(); + assertEquals(PrefixOrderingEnum.USAGE_ORDER, config.getPrefixOrdering(), "prefixOrdering should be overridden to USAGE_ORDER"); + } + + @Test + @DisplayName("Builder should allow overriding useCompactTriples") + void builder_shouldAllowOverridingUseCompactTriples() { + TurtleConfig config = new TurtleConfig.Builder() + .useCompactTriples(false) + .build(); + assertFalse(config.useCompactTriples(), "useCompactTriples should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding useRdfTypeShortcut") + void builder_shouldAllowOverridingUseRdfTypeShortcut() { + TurtleConfig config = new TurtleConfig.Builder() + .useRdfTypeShortcut(false) + .build(); + assertFalse(config.useRdfTypeShortcut(), "useRdfTypeShortcut should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding useMultilineLiterals") + void builder_shouldAllowOverridingUseMultilineLiterals() { + TurtleConfig config = new TurtleConfig.Builder() + .useMultilineLiterals(false) + .build(); + assertFalse(config.useMultilineLiterals(), "useMultilineLiterals should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding prettyPrint") + void builder_shouldAllowOverridingPrettyPrint() { + TurtleConfig config = new TurtleConfig.Builder() + .prettyPrint(false) + .build(); + assertFalse(config.prettyPrint(), "prettyPrint should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding indent") + void builder_shouldAllowOverridingIndent() { + String customIndent = "\t"; + TurtleConfig config = new TurtleConfig.Builder() + .indent(customIndent) + .build(); + assertEquals(customIndent, config.getIndent(), "indent should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding maxLineLength") + void builder_shouldAllowOverridingMaxLineLength() { + int customLength = 120; + TurtleConfig config = new TurtleConfig.Builder() + .maxLineLength(customLength) + .build(); + assertEquals(customLength, config.getMaxLineLength(), "maxLineLength should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding groupBySubject") + void builder_shouldAllowOverridingGroupBySubject() { + TurtleConfig config = new TurtleConfig.Builder() + .groupBySubject(false) + .build(); + assertFalse(config.groupBySubject(), "groupBySubject should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding sortSubjects") + void builder_shouldAllowOverridingSortSubjects() { + TurtleConfig config = new TurtleConfig.Builder() + .sortSubjects(true) + .build(); + assertTrue(config.sortSubjects(), "sortSubjects should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding sortPredicates") + void builder_shouldAllowOverridingSortPredicates() { + TurtleConfig config = new TurtleConfig.Builder() + .sortPredicates(true) + .build(); + assertTrue(config.sortPredicates(), "sortPredicates should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding strictMode") + void builder_shouldAllowOverridingStrictMode() { + TurtleConfig config = new TurtleConfig.Builder() + .strictMode(false) + .build(); + assertFalse(config.strictMode, "strictMode should be overridden to false"); + } + + + @Test + @DisplayName("Builder should allow overriding literalDatatypePolicy") + void builder_shouldAllowOverridingLiteralDatatypePolicy() { + TurtleConfig config = new TurtleConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.ALWAYS_TYPED) + .build(); + assertEquals(LiteralDatatypePolicyEnum.ALWAYS_TYPED, config.getLiteralDatatypePolicy(), "literalDatatypePolicy should be overridden to ALWAYS_TYPED"); + } + + @Test + @DisplayName("Builder should allow setting baseIRI") + void builder_shouldAllowSettingBaseIRI() { + String testBaseIRI = "http://example.org/base/"; + TurtleConfig config = new TurtleConfig.Builder() + .baseIRI(testBaseIRI) + .build(); + assertEquals(testBaseIRI, config.getBaseIRI(), "baseIRI should be set correctly"); + } + + @Test + @DisplayName("Builder should allow overriding lineEnding") + void builder_shouldAllowOverridingLineEnding() { + String customLineEnding = "\r\n"; + TurtleConfig config = new TurtleConfig.Builder() + .lineEnding(customLineEnding) + .build(); + assertEquals(customLineEnding, config.getLineEnding(), "lineEnding should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding validateURIs") + void builder_shouldAllowOverridingValidateURIs() { + TurtleConfig config = new TurtleConfig.Builder() + .validateURIs(true) + .build(); + assertTrue(config.validateURIs(), "validateURIs should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding stableBlankNodeIds") + void builder_shouldAllowOverridingStableBlankNodeIds() { + TurtleConfig config = new TurtleConfig.Builder() + .stableBlankNodeIds(true) + .build(); + assertTrue(config.stableBlankNodeIds(), "stableBlankNodeIds should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding includeContext") + void builder_shouldAllowOverridingIncludeContext() { + TurtleConfig config = new TurtleConfig.Builder() + .includeContext(true) + .build(); + assertTrue(config.includeContext(), "includeContext should be overridden to true"); + } + + +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/XmlConfigTest.java b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/XmlConfigTest.java new file mode 100644 index 000000000..7e55fd903 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/common/serialization/config/XmlConfigTest.java @@ -0,0 +1,225 @@ +package fr.inria.corese.core.next.impl.common.serialization.config; + +import fr.inria.corese.core.next.impl.common.serialization.util.SerializationConstants; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for the {@link XmlConfig} class. + * These tests verify the default configuration settings and the functionality + * of the builder pattern for customizing RDF/XML serialization options. + */ +class XmlConfigTest { + + @Test + @DisplayName("defaultConfig() should return a config with expected RDF/XML defaults") + void defaultConfig_shouldReturnExpectedDefaults() { + XmlConfig config = XmlConfig.defaultConfig(); + + assertNotNull(config, "Default config should not be null"); + + assertTrue(config.usePrefixes(), "Default usePrefixes should be true for XML"); + assertTrue(config.autoDeclarePrefixes(), "Default autoDeclarePrefixes should be true for XML"); + assertEquals(PrefixOrderingEnum.ALPHABETICAL, config.getPrefixOrdering(), "Default prefixOrdering should be ALPHABETICAL for XML"); + + Map expectedPrefixes = new HashMap<>(); + expectedPrefixes.put("rdf", SerializationConstants.RDF_NS); + expectedPrefixes.put("rdfs", SerializationConstants.RDFS_NS); + expectedPrefixes.put("xsd", SerializationConstants.XSD_NS); + expectedPrefixes.put("owl", SerializationConstants.OWL_NS); + assertEquals(expectedPrefixes.size(), config.getCustomPrefixes().size(), "Default custom prefixes size mismatch"); + assertTrue(config.getCustomPrefixes().entrySet().containsAll(expectedPrefixes.entrySet()), "Default custom prefixes should contain common RDF prefixes"); + + assertTrue(config.prettyPrint(), "Default prettyPrint should be true for XML"); + assertEquals(SerializationConstants.DEFAULT_INDENTATION, config.getIndent(), "Default indent should be " + SerializationConstants.DEFAULT_INDENTATION); + assertEquals(0, config.getMaxLineLength(), "Default maxLineLength should be 0 (no line length constraint) for XML"); + assertFalse(config.sortSubjects(), "Default sortSubjects should be false for XML"); + assertFalse(config.sortPredicates(), "Default sortPredicates should be false for XML"); + assertTrue(config.useMultilineLiterals(), "Default useMultilineLiterals should be true for XML"); + + assertTrue(config.strictMode, "Default strictMode should be true"); + assertFalse(config.escapeUnicode(), "Default escapeUnicode should be false for XML"); + assertEquals(LiteralDatatypePolicyEnum.ALWAYS_TYPED, config.getLiteralDatatypePolicy(), "Default literalDatatypePolicy should be ALWAYS_TYPED for XML"); + assertNull(config.getBaseIRI(), "Default baseIRI should be null"); + assertFalse(config.includeContext(), "Default includeContext should be false for XML (RDF/XML doesn't support named graphs)"); + } + + @Test + @DisplayName("Builder should allow overriding usePrefixes") + void builder_shouldAllowOverridingUsePrefixes() { + XmlConfig config = new XmlConfig.Builder() + .usePrefixes(false) + .build(); + assertFalse(config.usePrefixes(), "usePrefixes should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding autoDeclarePrefixes") + void builder_shouldAllowOverridingAutoDeclarePrefixes() { + XmlConfig config = new XmlConfig.Builder() + .autoDeclarePrefixes(false) + .build(); + assertFalse(config.autoDeclarePrefixes(), "autoDeclarePrefixes should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding prefixOrdering") + void builder_shouldAllowOverridingPrefixOrdering() { + XmlConfig config = new XmlConfig.Builder() + .prefixOrdering(PrefixOrderingEnum.USAGE_ORDER) + .build(); + assertEquals(PrefixOrderingEnum.USAGE_ORDER, config.getPrefixOrdering(), "prefixOrdering should be overridden to USAGE_ORDER"); + } + + @Test + @DisplayName("Builder should allow adding custom prefixes") + void builder_shouldAllowAddingCustomPrefixes() { + String customPrefix = "my"; + String customNamespace = "http://my.example.org/"; + XmlConfig config = new XmlConfig.Builder() + .addCustomPrefix(customPrefix, customNamespace) + .build(); + + assertTrue(config.getCustomPrefixes().containsKey(customPrefix), "Custom prefix should be added"); + assertEquals(customNamespace, config.getCustomPrefixes().get(customPrefix), "Custom prefix namespace should be correct"); + assertTrue(config.getCustomPrefixes().containsKey("rdf")); + assertTrue(config.getCustomPrefixes().containsKey("xsd")); + } + + @Test + @DisplayName("Builder should allow overriding prettyPrint") + void builder_shouldAllowOverridingPrettyPrint() { + XmlConfig config = new XmlConfig.Builder() + .prettyPrint(false) + .build(); + assertFalse(config.prettyPrint(), "prettyPrint should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding indent") + void builder_shouldAllowOverridingIndent() { + String customIndent = "\t"; + XmlConfig config = new XmlConfig.Builder() + .indent(customIndent) + .build(); + assertEquals(customIndent, config.getIndent(), "indent should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding maxLineLength") + void builder_shouldAllowOverridingMaxLineLength() { + int customLength = 120; + XmlConfig config = new XmlConfig.Builder() + .maxLineLength(customLength) + .build(); + assertEquals(customLength, config.getMaxLineLength(), "maxLineLength should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding sortSubjects") + void builder_shouldAllowOverridingSortSubjects() { + XmlConfig config = new XmlConfig.Builder() + .sortSubjects(true) + .build(); + assertTrue(config.sortSubjects(), "sortSubjects should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding sortPredicates") + void builder_shouldAllowOverridingSortPredicates() { + XmlConfig config = new XmlConfig.Builder() + .sortPredicates(true) + .build(); + assertTrue(config.sortPredicates(), "sortPredicates should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding useMultilineLiterals") + void builder_shouldAllowOverridingUseMultilineLiterals() { + XmlConfig config = new XmlConfig.Builder() + .useMultilineLiterals(false) + .build(); + assertFalse(config.useMultilineLiterals(), "useMultilineLiterals should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding strictMode") + void builder_shouldAllowOverridingStrictMode() { + XmlConfig config = new XmlConfig.Builder() + .strictMode(false) + .build(); + assertFalse(config.strictMode, "strictMode should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding escapeUnicode") + void builder_shouldAllowOverridingEscapeUnicode() { + XmlConfig config = new XmlConfig.Builder() + .escapeUnicode(true) + .build(); + assertTrue(config.escapeUnicode(), "escapeUnicode should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding literalDatatypePolicy") + void builder_shouldAllowOverridingLiteralDatatypePolicy() { + XmlConfig config = new XmlConfig.Builder() + .literalDatatypePolicy(LiteralDatatypePolicyEnum.MINIMAL) + .build(); + assertEquals(LiteralDatatypePolicyEnum.MINIMAL, config.getLiteralDatatypePolicy(), "literalDatatypePolicy should be overridden to MINIMAL"); + } + + @Test + @DisplayName("Builder should allow setting baseIRI") + void builder_shouldAllowSettingBaseIRI() { + String testBaseIRI = "http://example.org/base/"; + XmlConfig config = new XmlConfig.Builder() + .baseIRI(testBaseIRI) + .build(); + assertEquals(testBaseIRI, config.getBaseIRI(), "baseIRI should be set correctly"); + } + + @Test + @DisplayName("Builder should allow overriding lineEnding") + void builder_shouldAllowOverridingLineEnding() { + String customLineEnding = "\r\n"; + XmlConfig config = new XmlConfig.Builder() + .lineEnding(customLineEnding) + .build(); + assertEquals(customLineEnding, config.getLineEnding(), "lineEnding should be overridden to custom value"); + } + + @Test + @DisplayName("Builder should allow overriding validateURIs") + void builder_shouldAllowOverridingValidateURIs() { + XmlConfig config = new XmlConfig.Builder() + .validateURIs(true) + .build(); + assertTrue(config.validateURIs(), "validateURIs should be overridden to true"); + } + + @Test + @DisplayName("Builder should allow overriding stableBlankNodeIds") + void builder_shouldAllowOverridingStableBlankNodeIds() { + XmlConfig config = new XmlConfig.Builder() + .stableBlankNodeIds(false) + .build(); + assertFalse(config.stableBlankNodeIds(), "stableBlankNodeIds should be overridden to false"); + } + + @Test + @DisplayName("Builder should allow overriding includeContext") + void builder_shouldAllowOverridingIncludeContext() { + XmlConfig config = new XmlConfig.Builder() + .includeContext(true) + .build(); + assertTrue(config.includeContext(), "includeContext should be overridden to true"); + } + + +}