diff --git a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java index bcd84f5a6..e16c3a92c 100644 --- a/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java +++ b/src/main/java/fr/inria/corese/core/next/api/base/io/RDFFormat.java @@ -59,6 +59,13 @@ public class RDFFormat extends FileFormat { true, true); + public static final RDFFormat RDFC_1_0 = new RDFFormat( + "RDFC-1.0", + List.of("nq"), + List.of("application/n-quads", "application/n-quads"), + false, + true); + /** * Constructs a new RDF format. * diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java index ed7e35d65..c7d918c03 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactory.java @@ -1,10 +1,14 @@ package fr.inria.corese.core.next.impl.io.serialization; import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.ValueFactory; import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; import fr.inria.corese.core.next.api.io.serialization.SerializationOption; import fr.inria.corese.core.next.api.io.serialization.SerializerFactory; +import fr.inria.corese.core.next.impl.io.serialization.canonical.Rdfc10Canonicalizer; +import fr.inria.corese.core.next.impl.io.serialization.canonical.Rdfc10Options; +import fr.inria.corese.core.next.impl.io.serialization.canonical.Rdfc10Serializer; import fr.inria.corese.core.next.impl.io.serialization.nquads.NQuadsOption; import fr.inria.corese.core.next.impl.io.serialization.nquads.NQuadsSerializer; import fr.inria.corese.core.next.impl.io.serialization.ntriples.NTriplesOption; @@ -15,6 +19,7 @@ import fr.inria.corese.core.next.impl.io.serialization.trig.TriGSerializer; import fr.inria.corese.core.next.impl.io.serialization.turtle.TurtleOption; import fr.inria.corese.core.next.impl.io.serialization.turtle.TurtleSerializer; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +45,7 @@ public class DefaultSerializerFactory implements SerializerFactory { private static final Logger logger = LoggerFactory.getLogger(DefaultSerializerFactory.class); private final Map> registry; + private final ValueFactory coreseValueFactory; /** * Constructs a {@code DefaultSerializerFactory} and populates its registry @@ -49,6 +55,8 @@ public class DefaultSerializerFactory implements SerializerFactory { * it falls back to the format's default configuration. */ public DefaultSerializerFactory() { + this.coreseValueFactory = new CoreseAdaptedValueFactory(); + Map> tempRegistry = new HashMap<>(); tempRegistry.put(RDFFormat.TURTLE, (model, genericConfig) -> { @@ -101,6 +109,28 @@ public DefaultSerializerFactory() { } }); + tempRegistry.put(RDFFormat.RDFC_1_0, (model, genericConfig) -> { + if (genericConfig instanceof Rdfc10Options specificConfig) { + Rdfc10Canonicalizer canonicalizer = new Rdfc10Canonicalizer( + specificConfig.getHashAlgorithm(), + specificConfig.getPermutationLimit(), + coreseValueFactory + ); + return new Rdfc10Serializer(model, specificConfig, canonicalizer); + } else { + logger.warn("Provided config for RDFC_1_0 is not CanonicalOption (was {}). Using default CanonicalOption.", + genericConfig != null ? genericConfig.getClass().getSimpleName() : "null"); + Rdfc10Options defaultConfig = Rdfc10Options.defaultConfig(); + Rdfc10Canonicalizer canonicalizer = new Rdfc10Canonicalizer( + defaultConfig.getHashAlgorithm(), + defaultConfig.getPermutationLimit(), + coreseValueFactory + ); + return new Rdfc10Serializer(model, defaultConfig, canonicalizer); + } + }); + + this.registry = Collections.unmodifiableMap(tempRegistry); } @@ -129,4 +159,4 @@ public RDFSerializer createSerializer(RDFFormat format, Model model, Serializati return constructor.apply(model, config); } -} +} \ No newline at end of file diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Canonicalizer.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Canonicalizer.java new file mode 100644 index 000000000..f534d9b38 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Canonicalizer.java @@ -0,0 +1,464 @@ +package fr.inria.corese.core.next.impl.io.serialization.canonical; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.api.ValueFactory; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import fr.inria.corese.core.next.impl.io.serialization.util.SerializationConstants; +import fr.inria.corese.core.next.impl.io.serialization.util.StatementUtils; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.*; +import java.util.stream.Stream; + +/** + * Implementation of the RDFC-1.0 canonicalization algorithm as specified by W3C. + * This class deterministically re-labels blank nodes and sorts all RDF statements + * to produce a canonical representation of a dataset. + */ +public class Rdfc10Canonicalizer { + + private final Rdfc10Options.HashAlgorithm hashAlgorithm; + private final int maxCallsHashNDegreeQuads; + private final StatementUtils statementUtils; + private int callsHashNDegreeQuads = 0; + + /** + * Constructs a new Rdfc10Canonicalizer with specified configuration. + * + * @param hashAlgorithm The hashing algorithm to use for canonicalization (SHA-256 or SHA-384). + * @param maxCalls The maximum number of recursive calls to the Hash N-Degree Quads algorithm + * to prevent infinite loops on complex cyclic graphs. + * @param valueFactory The factory for creating RDF values, used by StatementUtils for + * blank node replacement and serialization. + */ + public Rdfc10Canonicalizer(Rdfc10Options.HashAlgorithm hashAlgorithm, int maxCalls, ValueFactory valueFactory) { + this.hashAlgorithm = Objects.requireNonNull(hashAlgorithm, "Hash algorithm cannot be null"); + this.maxCallsHashNDegreeQuads = maxCalls; + this.statementUtils = new StatementUtils(valueFactory); + } + + /** + * Canonicalizes all statements within a given Model. + * This is the main entry point for the canonicalization process. + * The process involves: + * 1. Identifying all blank nodes and their associated statements. + * 2. Creating canonical identifiers for blank nodes. + * 3. Replacing original blank node IDs with canonical ones. + * 4. Sorting the resulting statements lexicographically. + * + * @param model The input model to canonicalize. Must not be null. + * @return A list of canonicalized and sorted statements ready for serialization. + * @throws SerializationException if canonicalization fails due to algorithmic constraints + * or invalid input data. + */ + public List canonicalize(Model model) { + Objects.requireNonNull(model, "Model cannot be null"); + return canonicalize(model.stream()); + } + + /** + * Internal canonicalization method that processes a stream of statements. + * This method handles all the steps of the RDFC-1.0 algorithm in sequence. + * + * @param statements A stream of statements to canonicalize. + * @return A list of canonicalized and sorted statements. + */ + private List canonicalize(Stream statements) { + List stmtList = statements.toList(); + + // Reset the recursive call counter for each canonicalization operation + callsHashNDegreeQuads = 0; + + // Step 1: Create a mapping of blank nodes to their associated statements + Map> blankNodeToQuads = createBNodeToQuadsMap(stmtList); + + // If no blank nodes are found, simply sort and return the original statements + if (blankNodeToQuads.isEmpty()) { + return stmtList.stream() + .sorted(Comparator.comparing(StatementUtils::toNQuad)) + .toList(); + } + + // Step 2: Generate a canonical replacement mapping for blank nodes + Map canonicalReplacementMap = createCanonicalMap(blankNodeToQuads); + + // Step 3: Apply the replacement and sort the final statements + return replaceBlankNodesAndSort(stmtList, canonicalReplacementMap); + } + + /** + * Creates a map where each blank node identifier is associated with all statements + * (quads) in which it appears as a subject, object, or graph name. + * This is the foundation for the Hash First Degree Quads algorithm. + * + * @param statements The list of statements to process. + * @return A map linking blank node identifiers to their associated statements. + */ + private Map> createBNodeToQuadsMap(List statements) { + Map> blankNodeToQuads = new HashMap<>(); + + for (Statement stmt : statements) { + if (stmt == null) continue; + + if (StatementUtils.isBlankNode(stmt.getSubject())) { + String blankNodeId = StatementUtils.getBlankNodeId(stmt.getSubject()); + blankNodeToQuads.computeIfAbsent(blankNodeId, k -> new HashSet<>()).add(stmt); + } + + if (StatementUtils.isBlankNode(stmt.getObject())) { + String blankNodeId = StatementUtils.getBlankNodeId(stmt.getObject()); + blankNodeToQuads.computeIfAbsent(blankNodeId, k -> new HashSet<>()).add(stmt); + } + + if (stmt.getContext() != null && StatementUtils.isBlankNode(stmt.getContext())) { + String blankNodeId = StatementUtils.getBlankNodeId(stmt.getContext()); + blankNodeToQuads.computeIfAbsent(blankNodeId, k -> new HashSet<>()).add(stmt); + } + } + + return blankNodeToQuads; + } + + /** + * Performs the core canonicalization logic to create a map of blank node replacements. + * This method implements the main flow of the RDFC-1.0 algorithm. + * + * @return A deterministic mapping from original blank node identifiers to canonical ones. + */ + private Map createCanonicalMap(Map> bnodeToQuads) { + Map canonicalIssuer = new HashMap<>(); + int counter = 0; + + // Step 1: Calculate first-degree hashes for all blank nodes + Map firstDegreeHashes = new HashMap<>(); + for (String bnode : bnodeToQuads.keySet()) { + String hash = hashFirstDegreeQuads(bnode, bnodeToQuads); + firstDegreeHashes.put(bnode, hash); + } + + // Step 2: Create hash groups + Map> hashToNodes = new HashMap<>(); + for (String node : bnodeToQuads.keySet()) { + String hash = firstDegreeHashes.get(node); + hashToNodes.computeIfAbsent(hash, k -> new ArrayList<>()).add(node); + } + + // Step 3: Separate into single-node and multi-node groups + List singleNodeHashes = new ArrayList<>(); + List multiNodeHashes = new ArrayList<>(); + for (Map.Entry> entry : hashToNodes.entrySet()) { + if (entry.getValue().size() == 1) { + singleNodeHashes.add(entry.getKey()); + } else { + multiNodeHashes.add(entry.getKey()); + } + } + + // Sort hashes within their groups + Collections.sort(singleNodeHashes); + Collections.sort(multiNodeHashes); + + // Step 4: Process single-node groups first + for (String hash : singleNodeHashes) { + String node = hashToNodes.get(hash).get(0); + canonicalIssuer.put(node, SerializationConstants.C14N + counter++); + } + + // Step 5: Process multi-node groups using N-degree hashing + for (String hash : multiNodeHashes) { + List nodes = hashToNodes.get(hash); + + Map nDegreeHashes = new HashMap<>(); + for (String node : nodes) { + TemporaryIssuer tempIssuer = new TemporaryIssuer(); + String nDegreeHash = hashNDegreeQuads(node, bnodeToQuads, canonicalIssuer, tempIssuer); + nDegreeHashes.put(node, nDegreeHash); + } + + nodes.sort((n1, n2) -> { + int cmp = nDegreeHashes.get(n1).compareTo(nDegreeHashes.get(n2)); + if (cmp != 0) return cmp; + return n1.compareTo(n2); + }); + + for (String node : nodes) { + canonicalIssuer.put(node, SerializationConstants.C14N + counter++); + } + } + + return canonicalIssuer; + } + + /** + * Implements the "Hash First Degree Quads" algorithm from the RDFC-1.0 specification. + * It computes a hash for a blank node based on canonical representations of all statements + * in which it appears. It replaces the blank node itself with a standardized placeholder. + * + * @param blankNode The blank node identifier to hash. + * @param blankNodeToQuads The map of blank nodes to their associated statements. + * @return A cryptographic hash representing the blank node's first-degree context. + */ + private String hashFirstDegreeQuads(String blankNode, Map> blankNodeToQuads) { + Set quads = blankNodeToQuads.get(blankNode); + List nquads = new ArrayList<>(); + + for (Statement quad : quads) { + String nquad = quadToNQuad(quad, blankNode, SerializationConstants.CANONICAL_BNODE_PLACEHOLDER); + nquads.add(nquad); + } + + Collections.sort(nquads); + String toHash = String.join(SerializationConstants.EMPTY_STRING, nquads); + + return hash(toHash); + } + + /** + * Implements the "Hash N-Degree Quads" algorithm for resolving blank node permutations. + * This recursive method handles cases where multiple blank nodes have identical + * first-degree hashes by considering their relationships to other blank nodes. + * + * @param identifier The blank node identifier currently being processed. + * @param blankNodeToQuads The map of blank nodes to their associated statements. + * @param canonicalIssuer Map of already-assigned canonical identifiers. + * @param issuer Temporary identifier issuer for the current recursion path. + * @return A hash representing the N-degree context of the blank node. + * @throws SerializationException if the maximum recursion depth is exceeded. + */ + private String hashNDegreeQuads(String identifier, Map> blankNodeToQuads, + Map canonicalIssuer, TemporaryIssuer issuer) { + + if (++callsHashNDegreeQuads > maxCallsHashNDegreeQuads) { + throw new SerializationException( + "Maximum calls to Hash N-Degree Quads exceeded: " + maxCallsHashNDegreeQuads, + "Rdfc10Canonicalizer" + ); + } + + // Collect all related blank nodes from all quads containing this node + Set relatedBlankNodes = new HashSet<>(); + for (Statement quad : blankNodeToQuads.get(identifier)) { + relatedBlankNodes.addAll(getRelatedBlankNodes(quad, identifier)); + } + + // Calculate hashes for each related blank node + List relatedHashes = new ArrayList<>(); + for (String relatedNode : relatedBlankNodes) { + String relatedHash; + + if (canonicalIssuer.containsKey(relatedNode)) { + // Use canonical ID if already assigned + relatedHash = canonicalIssuer.get(relatedNode); + } else if (issuer.hasIssued(relatedNode)) { + // Use temporary ID if already issued + relatedHash = issuer.issue(relatedNode); + } else { + // Recursively calculate N-degree hash + TemporaryIssuer newIssuer = issuer.copy(); + relatedHash = hashNDegreeQuads(relatedNode, blankNodeToQuads, canonicalIssuer, newIssuer); + } + + relatedHashes.add(relatedHash); + } + + // Sort the related hashes + Collections.sort(relatedHashes); + + // Build the final hash input + StringBuilder hashInput = new StringBuilder(); + hashInput.append(hashFirstDegreeQuads(identifier, blankNodeToQuads)); + for (String relatedHash : relatedHashes) { + hashInput.append(relatedHash); + } + + return hash(hashInput.toString()); + } + + /** + * Converts a statement to canonical N-Quad format for hashing, replacing + * a specific blank node with a placeholder string. + * + * @param quad The statement to convert. + * @param blankNodeToReplace The blank node identifier to replace. + * @param replacement The placeholder string to use for replacement. + * @return A canonical N-Quad string with placeholder substitution. + */ + private String quadToNQuad(Statement quad, String blankNodeToReplace, String replacement) { + StringBuilder sb = new StringBuilder(); + + // Handle subject + if (StatementUtils.isBlankNode(quad.getSubject())) { + String bnodeId = StatementUtils.getBlankNodeId(quad.getSubject()); + sb.append(bnodeId.equals(blankNodeToReplace) ? replacement : SerializationConstants.CANONICAL_BNODE_PREFIX); + } else { + sb.append(StatementUtils.serializeForComparison(quad.getSubject())); + } + sb.append(SerializationConstants.SPACE); + + // Predicate + sb.append(StatementUtils.serializeForComparison(quad.getPredicate())).append(SerializationConstants.SPACE); + + // Handle object + if (StatementUtils.isBlankNode(quad.getObject())) { + String bnodeId = StatementUtils.getBlankNodeId(quad.getObject()); + sb.append(bnodeId.equals(blankNodeToReplace) ? replacement : SerializationConstants.CANONICAL_BNODE_PREFIX); + } else { + sb.append(StatementUtils.serializeForComparison(quad.getObject())); + } + + // Handle context + if (quad.getContext() != null) { + sb.append(SerializationConstants.SPACE); + if (StatementUtils.isBlankNode(quad.getContext())) { + String bnodeId = StatementUtils.getBlankNodeId(quad.getContext()); + sb.append(bnodeId.equals(blankNodeToReplace) ? replacement : SerializationConstants.CANONICAL_BNODE_PREFIX); + } else { + sb.append(StatementUtils.serializeForComparison(quad.getContext())); + } + } + + sb.append(SerializationConstants.SPACE).append(SerializationConstants.POINT); + return sb.toString(); + } + + /** + * Identifies all blank nodes in a statement that are related to but different from + * a specified blank node. This is used to explore the graph context during N-degree hashing. + * + * @param quad The statement to examine. + * @param excludeBlankNode The blank node to exclude from the results. + * @return A set of blank node identifiers related to the excluded node. + */ + private Set getRelatedBlankNodes(Statement quad, String excludeBlankNode) { + Set relatedBlankNodes = new HashSet<>(); + + // Check subject position + if (StatementUtils.isBlankNode(quad.getSubject())) { + String id = StatementUtils.getBlankNodeId(quad.getSubject()); + if (!id.equals(excludeBlankNode)) { + relatedBlankNodes.add(id); + } + } + + // Check object position + if (StatementUtils.isBlankNode(quad.getObject())) { + String id = StatementUtils.getBlankNodeId(quad.getObject()); + if (!id.equals(excludeBlankNode)) { + relatedBlankNodes.add(id); + } + } + + // Check context position + if (quad.getContext() != null && StatementUtils.isBlankNode(quad.getContext())) { + String id = StatementUtils.getBlankNodeId(quad.getContext()); + if (!id.equals(excludeBlankNode)) { + relatedBlankNodes.add(id); + } + } + + return relatedBlankNodes; + } + + /** + * Replaces blank node identifiers in statements and sorts them lexicographically. + * This is the final step of the canonicalization process. + * + * @param statements The original statements to process. + * @param canonicalMap The map of blank node replacements. + * @return A sorted list of statements with canonical blank node identifiers. + */ + private List replaceBlankNodesAndSort(List statements, Map canonicalMap) { + + List replaced = statements.stream() + .map(stmt -> statementUtils.replaceBlankNodes(stmt, canonicalMap)) + .toList(); + + return replaced.stream() + .sorted(Comparator.comparing(StatementUtils::toNQuad)) + .toList(); + } + + /** + * Computes a cryptographic hash of the input data using the configured algorithm. + * + * @param data The string data to hash. + * @return A hexadecimal string representation of the hash. + * @throws SerializationException if the hash algorithm is unavailable. + */ + private String hash(String data) { + try { + String algorithm = hashAlgorithm == Rdfc10Options.HashAlgorithm.SHA_384 ? + SerializationConstants.SHA_384 : SerializationConstants.SHA_256; + MessageDigest digest = MessageDigest.getInstance(algorithm); + byte[] hash = digest.digest(data.getBytes(StandardCharsets.UTF_8)); + return bytesToHex(hash); + } catch (NoSuchAlgorithmException e) { + throw new SerializationException("Hash algorithm not available: " + e.getMessage(), + "Rdfc10Canonicalizer", e); + } catch (Exception e) { + throw new SerializationException("Hash computation failed for data: " + data, + "Rdfc10Canonicalizer", e); + } + } + + /** + * Converts a byte array to its hexadecimal string representation. + * + * @param bytes The byte array to convert. + * @return A hexadecimal string. + */ + private String bytesToHex(byte[] bytes) { + StringBuilder result = new StringBuilder(); + for (byte b : bytes) { + result.append(String.format(SerializationConstants.HEX_FORMAT, b)); + } + return result.toString(); + } + + /** + * Helper class for managing temporary identifiers during recursive hashing. + * It ensures that each exploration path maintains independent temporary labeling + * to avoid contamination between different permutation explorations. + */ + private static class TemporaryIssuer { + private Map issued = new HashMap<>(); + private int counter = 0; + + /** + * Issues a temporary identifier for a blank node. + * If the node already has a temporary ID, it returns the existing one. + * + * @param identifier The blank node identifier to issue an ID for. + * @return A temporary canonical identifier. + */ + public String issue(String identifier) { + return issued.computeIfAbsent(identifier, k -> SerializationConstants.CANONICAL_BNODE_PREFIX + counter++); + } + + /** + * Checks if a temporary identifier has been issued for a blank node. + * + * @param identifier The blank node identifier to check. + * @return true if a temporary ID exists, false otherwise. + */ + public boolean hasIssued(String identifier) { + return issued.containsKey(identifier); + } + + /** + * Creates an independent copy of this TemporaryIssuer. + * This is crucial for maintaining path isolation during recursive exploration. + * + * @return A new TemporaryIssuer instance with the same state. + */ + public TemporaryIssuer copy() { + TemporaryIssuer copy = new TemporaryIssuer(); + copy.issued = new HashMap<>(this.issued); + copy.counter = this.counter; + return copy; + } + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Options.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Options.java new file mode 100644 index 000000000..6936fa988 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Options.java @@ -0,0 +1,119 @@ +package fr.inria.corese.core.next.impl.io.serialization.canonical; + +import fr.inria.corese.core.next.impl.io.serialization.option.AbstractSerializerOption; + +/** + * Configuration for Canonical RDF serialization format (RDFC-1.0). + * This class extends {@link AbstractSerializerOption} and provides specific defaults + * and options tailored for the RDFC-10 canonicalization algorithm. + * It includes options relevant to blank node canonicalization, such as the hashing algorithm + * to use, the depth factor for graph isomorphism, and the permutation limit. + * Use the {@link Builder} class to create instances of {@code CanonicalOption}. + * A predefined default configuration is available via {@link #defaultConfig()}. + */ +public class Rdfc10Options extends AbstractSerializerOption { + + /** + * Enumeration for the supported hashing algorithms. + */ + public enum HashAlgorithm { + SHA_256, + SHA_384 + } + + private final HashAlgorithm hashAlgorithm; + private final int depthFactor; + private final int permutationLimit; + + /** + * Protected constructor to be used by the {@link Builder}. + * It initializes a new instance of {@code CanonicalOption} with the values + * provided by the builder. + * + * @param builder The builder instance containing the desired configuration values. + */ + protected Rdfc10Options(Builder builder) { + super(builder); + this.hashAlgorithm = builder.hashAlgorithm; + this.depthFactor = builder.depthFactor; + this.permutationLimit = builder.permutationLimit; + } + + /** + * Gets the hashing algorithm used for blank node canonicalization. + * + * @return The {@link HashAlgorithm} used. + */ + public HashAlgorithm getHashAlgorithm() { + return hashAlgorithm; + } + + /** + * Gets the depth factor for graph isomorphism resolution. + * This value is used to limit the depth of the recursive hashing algorithm. + * + * @return The depth factor. + */ + public int getDepthFactor() { + return depthFactor; + } + + /** + * Gets the permutation limit used in the canonicalization algorithm. + * This value is used to limit the number of permutations attempted during blank node canonicalization + * to prevent excessive computation time. + * + * @return The permutation limit. + */ + public int getPermutationLimit() { + return permutationLimit; + } + + /** + * Public Builder for {@link Rdfc10Options}. + * Provides a fluent API for constructing {@code CanonicalOption} instances with default values + * specific to the Canonical RDF format. + */ + public static class Builder extends AbstractSerializerOption.AbstractBuilder { + private HashAlgorithm hashAlgorithm = HashAlgorithm.SHA_256; + private int depthFactor = 5; + private int permutationLimit = 50000; + + /** + * Default constructor for the Builder. + * Initializes the builder with the default values for Canonical RDF serialization. + */ + public Builder() { + // Default constructor initializes + } + + /** + * Builds a new {@link Rdfc10Options} instance with the configured values. + * + * @return A new instance of {@code CanonicalOption}. + */ + @Override + public Rdfc10Options build() { + return new Rdfc10Options(this); + } + } + + /** + * Creates and returns a new {@code CanonicalOption} instance with the default configuration. + * + * @return A new {@code CanonicalOption} with default settings. + */ + public static Rdfc10Options defaultConfig() { + return new Builder().build(); + } + + /** + * Creates and returns a new {@link Builder} instance, which can be used to customize + * the {@code CanonicalOption} before building. + * + * @return A new {@code Builder} instance. + */ + public static Rdfc10Options.Builder builder() { + return new Rdfc10Options.Builder(); + } +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Serializer.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Serializer.java new file mode 100644 index 000000000..5487f890a --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10Serializer.java @@ -0,0 +1,118 @@ +package fr.inria.corese.core.next.impl.io.serialization.canonical; + +import fr.inria.corese.core.next.api.Model; +import fr.inria.corese.core.next.api.Resource; +import fr.inria.corese.core.next.api.Statement; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import fr.inria.corese.core.next.impl.io.serialization.base.AbstractLineBasedSerializer; +import fr.inria.corese.core.next.impl.io.serialization.util.SerializationConstants; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.List; +import java.util.Objects; + +/** + * Serializes a Corese {@link Model} into an RDFC-1.0 canonical RDF format. + * This serializer is designed to integrate with the W3C RDFC-1.0 algorithm + * to ensure a deterministic output by re-labeling blank nodes and sorting all statements + * according to the specification. The output format is canonicalized N-Quads. + *

+ * This implementation now acts as a wrapper, preparing the model for a dedicated + * RDFC-1.0 canonicalization component and then writing the resulting canonical statements. + */ +public class Rdfc10Serializer extends AbstractLineBasedSerializer { + + private final Rdfc10Options config; + private final Rdfc10Canonicalizer canonicalizer; + private final Model model; + + /** + * Constructs a new CanonicalSerializer. + * This constructor is now adapted to be used by the DefaultSerializerFactory. + * + * @param model The model to be serialized. + * @param config The configuration options for the canonicalization process. + * @param canonicalizer The canonicalizer component to use. + */ + public Rdfc10Serializer(Model model, Rdfc10Options config, Rdfc10Canonicalizer canonicalizer) { + super(model, config); + this.model = Objects.requireNonNull(model); + this.config = Objects.requireNonNull(config); + this.canonicalizer = Objects.requireNonNull(canonicalizer); + } + + @Override + public String getFormatName() { + return "RDFC-1.0"; + } + + /** + * Serializes the model into the specified writer using the RDFC-1.0 canonical format. + * The model is first canonicalized by the internal canonicalizer component, and then + * the resulting statements are written line by line to the writer. + * + * @param writer the {@link Writer} to which the serialized model will be written. + * @throws SerializationException if serialization fails due to an I/O error or invalid data. + */ + @Override + public void write(Writer writer) { + try (BufferedWriter bufferedWriter = new BufferedWriter(writer)) { + List canonicalStatements = canonicalizer.canonicalize(model); + + for (Statement stmt : canonicalStatements) { + writeCanonicalStatement(bufferedWriter, stmt); + } + + } catch (IOException e) { + throw new SerializationException(getFormatName() + " serialization failed", getFormatName(), e); + } catch (IllegalArgumentException e) { + throw new SerializationException("Invalid " + getFormatName() + " data: " + e.getMessage(), getFormatName(), e); + } + } + + /** + * Writes the context (graph URI) of a statement to the writer. + * + * @param writer the {@link BufferedWriter} to which the context will be written. + * @param stmt the statement whose context will be written. + * @throws IOException if an I/O error occurs. + */ + @Override + protected void writeContext(Writer writer, Statement stmt) throws IOException { + Resource context = stmt.getContext(); + if (context != null) { + writer.write(SerializationConstants.SPACE); + writeValue(writer, context); + } + } + + /** + * Writes a single canonical {@link Statement} to the writer. + * This method is designed to write a statement that has already been processed + * by the RDFC-1.0 canonicalization algorithm. + * + * @param writer the {@link Writer} to which the statement will be written. + * @param stmt the {@link Statement} to write (already canonicalized). + * @throws IOException if an I/O error occurs. + */ + private void writeCanonicalStatement(Writer writer, Statement stmt) throws IOException { + writeValue(writer, stmt.getSubject()); + writer.write(SerializationConstants.SPACE); + writeValue(writer, stmt.getPredicate()); + writer.write(SerializationConstants.SPACE); + writeValue(writer, stmt.getObject()); + + writeContext(writer, stmt); + + if (config.trailingDot()) { + writer.write(SerializationConstants.SPACE); + writer.write(SerializationConstants.POINT); + } + + writer.write(config.getLineEnding()); + } + + +} diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java index dda97db5f..6e4ae0098 100644 --- a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/SerializationConstants.java @@ -107,4 +107,15 @@ private SerializationConstants() { public static final String DEFAULT_GRAPH_IRI = "http://ns.inria.fr/corese/default-graph"; + public static final String C14N = "c14n"; + + public static final String CANONICAL_BNODE_PLACEHOLDER = "<>"; + public static final String HEX_FORMAT = "%02x"; + public static final String CANONICAL_BNODE_PREFIX = "_:b"; + + + // Algorithmes de hachage + public static final String SHA_256 = "SHA-256"; + public static final String SHA_384 = "SHA-384"; + } diff --git a/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/StatementUtils.java b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/StatementUtils.java new file mode 100644 index 000000000..6d385f238 --- /dev/null +++ b/src/main/java/fr/inria/corese/core/next/impl/io/serialization/util/StatementUtils.java @@ -0,0 +1,228 @@ +package fr.inria.corese.core.next.impl.io.serialization.util; + +import fr.inria.corese.core.next.api.*; + +import java.util.Map; + +/** + * Utility class for handling Statement manipulation during RDFC-1.0 canonicalization. + * This class provides methods to create new statements with replaced blank node identifiers + * and to serialize them for comparison and hashing according to the RDFC-1.0 specification. + *

+ * Key functionalities: + * - Replacement of blank node identifiers with canonical IDs + * - Serialization of RDF values for lexicographic comparison + * - Conversion of statements to N-Quads format + * - Blank node identification and manipulation + */ +public class StatementUtils { + + private final ValueFactory valueFactory; + + /** + * Constructs a new StatementUtils instance. + * + * @param valueFactory The factory for creating RDF values, used for creating new statements + * with replaced blank node identifiers. + */ + public StatementUtils(ValueFactory valueFactory) { + this.valueFactory = valueFactory; + } + + /** + * Creates a new statement with blank nodes replaced according to the canonical mapping. + * This method is used during the canonicalization process to replace original blank node + * identifiers with their canonical counterparts. + * + * @param originalStatement The original statement containing blank nodes to be replaced. + * @param canonicalMapping A map from original blank node IDs to canonical IDs. + * @return A new statement with blank node identifiers replaced according to the mapping. + */ + public Statement replaceBlankNodes(Statement originalStatement, Map canonicalMapping) { + Resource newSubject = replaceIfBlankNodeResource(originalStatement.getSubject(), canonicalMapping); + IRI newPredicate = originalStatement.getPredicate(); + Value newObject = replaceIfBlankNodeValue(originalStatement.getObject(), canonicalMapping); + Resource newContext = replaceIfBlankNodeResource(originalStatement.getContext(), canonicalMapping); + + return valueFactory.createStatement(newSubject, newPredicate, newObject, newContext); + } + + /** + * Replaces a blank node Resource with its canonical identifier if it exists in the mapping. + * If the resource is not a blank node or has no mapping, returns the original resource unchanged. + * + * @param original The original Resource to potentially replace. + * @param mapping The canonical mapping from original to canonical blank node IDs. + * @return The replaced Resource or the original if no replacement is needed. + */ + private Resource replaceIfBlankNodeResource(Resource original, Map mapping) { + if (original != null && isBlankNode(original)) { + String originalId = getBlankNodeId(original); + String canonicalId = mapping.get(originalId); + if (canonicalId != null) { + return valueFactory.createBNode(canonicalId); + } + } + return original; + } + + /** + * Replaces a blank node Value with its canonical identifier if it exists in the mapping. + * If the value is not a blank node or has no mapping, returns the original value unchanged. + * + * @param original The original Value to potentially replace. + * @param mapping The canonical mapping from original to canonical blank node IDs. + * @return The replaced Value or the original if no replacement is needed. + */ + private Value replaceIfBlankNodeValue(Value original, Map mapping) { + if (original != null && isBlankNode(original)) { + String originalId = getBlankNodeId(original); + String canonicalId = mapping.get(originalId); + if (canonicalId != null) { + return valueFactory.createBNode(canonicalId); + } + } + return original; + } + + /** + * Checks if a given Value is a blank node. + * Blank nodes are anonymous resources that don't have a URI identifier. + * + * @param value The Value to check. + * @return true if the value is a blank node, false otherwise. + */ + public static boolean isBlankNode(Value value) { + return value != null && value.isBNode(); + } + + /** + * Extracts the identifier string from a blank node Value. + * For blank nodes, this returns the local identifier without the ":_" prefix. + * + * @param value The blank node Value from which to extract the identifier. + * @return The blank node identifier string, or null if the value is not a blank node. + */ + public static String getBlankNodeId(Value value) { + if (value == null) return null; + if (isBlankNode(value)) { + String str = value.stringValue(); + if (str.startsWith(SerializationConstants.BNODE_PREFIX)) { + return str.substring(2); + } + return str; + } + return null; + } + + /** + * Serializes a Value for lexicographic comparison according to RDFC-1.0 specifications. + * This method produces a string representation suitable for deterministic sorting and hashing. + * + * @param value The Value to serialize. + * @return A string representation of the value for comparison purposes. + */ + public static String serializeForComparison(Value value) { + if (value == null) { + return SerializationConstants.EMPTY_STRING; + } + + if (value instanceof IRI) { + IRI iri = (IRI) value; + String uri = iri.stringValue(); + + return SerializationConstants.LT + uri + SerializationConstants.GT; + } + + + if (value instanceof BNode) { + return serializeBNode((BNode) value); + } + + if (value instanceof Literal) { + return serializeLiteral((Literal) value); + } + + return value.toString(); + } + + + /** + * Serializes a blank node for comparison. + * Blank nodes are serialized with the ":_" prefix followed by their identifier. + * + * @param bnode The blank node to serialize. + * @return The serialized blank node string. + */ + private static String serializeBNode(BNode bnode) { + return SerializationConstants.BNODE_PREFIX + bnode.getID(); + } + + /** + * Serializes a literal for comparison according to RDFC-1.0 specifications. + * Handles string escaping, datatypes, and language tags appropriately. + * + * @param literal The literal to serialize. + * @return The serialized literal string. + */ + private static String serializeLiteral(Literal literal) { + StringBuilder sb = new StringBuilder(); + + // Escape special characters in the literal label + String escapedLabel = literal.getLabel() + .replace(SerializationConstants.BACK_SLASH, "\\\\") + .replace(SerializationConstants.QUOTE, "\\\""); + + sb.append('"').append(escapedLabel).append('"'); + + // Handle datatype or language tag + if (literal.getDatatype() != null) { + String datatypeUri = literal.getDatatype().stringValue(); + // Omit xsd:string datatype for brevity (implied by default) + if (!"http://www.w3.org/2001/XMLSchema#string".equals(datatypeUri)) { + sb.append(SerializationConstants.DATATYPE_SEPARATOR).append(serializeForComparison(literal.getDatatype())); + } + } else if (literal.getLanguage() != null) { + sb.append(SerializationConstants.AT_SIGN).append(literal.getLanguage()); + } + + return sb.toString(); + } + + /** + * Converts a Statement to N-Quads format for lexicographic comparison. + * This produces a canonical string representation suitable for sorting and hashing + * according to the RDFC-1.0 specification. + * + * @param statement The statement to convert. + * @return The N-Quads representation of the statement. + */ + public static String toNQuad(Statement statement) { + if (statement == null) { + return SerializationConstants.EMPTY_STRING; + } + + StringBuilder sb = new StringBuilder(); + + // Serialize subject, predicate, and object + sb.append(serializeForComparison(statement.getSubject())) + .append(SerializationConstants.SPACE); + sb.append(serializeForComparison(statement.getPredicate())) + .append(SerializationConstants.SPACE); + sb.append(serializeForComparison(statement.getObject())); + + // Serialize context (graph) if present + if (statement.getContext() != null) { + sb.append(SerializationConstants.SPACE) + .append(serializeForComparison(statement.getContext())); + } + + // Terminate with space and period + sb.append(SerializationConstants.SPACE) + .append(SerializationConstants.POINT); + + return sb.toString(); + } + + +} \ No newline at end of file diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java index cbc605952..598900ad6 100644 --- a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java +++ b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/DefaultSerializerFactoryTest.java @@ -4,6 +4,7 @@ import fr.inria.corese.core.next.api.base.io.RDFFormat; import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; import fr.inria.corese.core.next.api.io.serialization.SerializationOption; +import fr.inria.corese.core.next.impl.io.serialization.canonical.Rdfc10Serializer; import fr.inria.corese.core.next.impl.io.serialization.nquads.NQuadsSerializer; import fr.inria.corese.core.next.impl.io.serialization.ntriples.NTriplesSerializer; import fr.inria.corese.core.next.impl.io.serialization.rdfxml.XmlSerializer; @@ -98,6 +99,18 @@ void createSerializer_shouldReturnXmlSerializer_forRdfXmlFormat() { } } + @Test + @DisplayName("createSerializer should return CanonicalSerializer for CANONICAL_RDF format") + void createSerializer_shouldReturnCanonicalSerializer_forCanonicalRdfFormat() { + try (MockedConstruction mockedConstruction = mockConstruction(Rdfc10Serializer.class)) { + RDFSerializer serializer = factory.createSerializer(RDFFormat.RDFC_1_0, mockModel, mockConfig); + + assertNotNull(serializer); + assertTrue(serializer instanceof Rdfc10Serializer); + assertEquals(1, mockedConstruction.constructed().size(), "CanonicalSerializer constructor should be called once"); + } + } + @Test @DisplayName("createSerializer should throw NullPointerException for a null format") void createSerializer_shouldThrowNPE_forNullFormat() { diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10OptionsTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10OptionsTest.java new file mode 100644 index 000000000..a59214f30 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10OptionsTest.java @@ -0,0 +1,60 @@ +package fr.inria.corese.core.next.impl.io.serialization.canonical; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for the {@link Rdfc10Options} class. + * This class verifies the default configuration and the builder functionality + * for the Canonical RDF serialization options. + */ +class Rdfc10OptionsTest { + + @Test + @DisplayName("defaultConfig should return an instance with expected default values") + void defaultConfig_shouldReturnExpectedValues() { + Rdfc10Options config = Rdfc10Options.defaultConfig(); + + assertNotNull(config, "Default config should not be null"); + assertTrue(config.isStrictMode(), "Default strictMode should be true for canonicalization"); + assertTrue(config.validateURIs(), "Default validateURIs should be true for canonicalization"); + assertFalse(config.escapeUnicode(), "Default escapeUnicode should be false for canonicalization"); + assertTrue(config.trailingDot(), "Default trailingDot should be true for canonicalization"); + assertFalse(config.includeContext(), "Default includeContext should be false for canonicalization (N-Triples like)"); + } + + @Test + @DisplayName("builder should allow setting custom options") + void builder_shouldAllowCustomOptions() { + Rdfc10Options customConfig = Rdfc10Options.builder() + .strictMode(false) + .validateURIs(false) + .escapeUnicode(false) + .trailingDot(false) + .includeContext(true) + .build(); + + assertNotNull(customConfig, "Custom config should not be null"); + assertFalse(customConfig.isStrictMode(), "Custom strictMode should be false"); + assertFalse(customConfig.validateURIs(), "Custom validateURIs should be false"); + assertFalse(customConfig.escapeUnicode(), "Custom escapeUnicode should be false"); + assertFalse(customConfig.trailingDot(), "Custom trailingDot should be false"); + assertTrue(customConfig.includeContext(), "Custom includeContext should be true"); + } + + @Test + @DisplayName("builder should use default values for un-set options") + void builder_shouldUseDefaultValues_forUnsetOptions() { + Rdfc10Options config = Rdfc10Options.builder() + .strictMode(false) + .build(); + + assertFalse(config.isStrictMode(), "strictMode should be overridden to false"); + assertTrue(config.validateURIs(), "validateURIs should remain default (true)"); + assertFalse(config.escapeUnicode(), "escapeUnicode should remain default (false)"); + assertTrue(config.trailingDot(), "trailingDot should remain default (true)"); + assertFalse(config.includeContext(), "includeContext should remain default (false)"); + } +} diff --git a/src/test/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10SerializerTest.java b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10SerializerTest.java new file mode 100644 index 000000000..b80d08651 --- /dev/null +++ b/src/test/java/fr/inria/corese/core/next/impl/io/serialization/canonical/Rdfc10SerializerTest.java @@ -0,0 +1,355 @@ +package fr.inria.corese.core.next.impl.io.serialization.canonical; + +import fr.inria.corese.core.next.api.*; +import fr.inria.corese.core.next.api.base.io.RDFFormat; +import fr.inria.corese.core.next.api.io.parser.RDFParser; +import fr.inria.corese.core.next.api.io.serialization.RDFSerializer; +import fr.inria.corese.core.next.impl.exception.SerializationException; +import fr.inria.corese.core.next.impl.io.parser.ParserFactory; +import fr.inria.corese.core.next.impl.io.serialization.DefaultSerializerFactory; +import fr.inria.corese.core.next.impl.temp.CoreseAdaptedValueFactory; +import fr.inria.corese.core.next.impl.temp.CoreseModel; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.io.Writer; +import java.util.Collections; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +/** + * Unit tests for the CanonicalSerializer class. + * These tests verify that the serializer correctly delegates to an RDFC-1.0 canonicalization + * component and formats the resulting canonical statements. + */ +class Rdfc10SerializerTest { + + @Mock + private Model mockModel; + @Mock + private Rdfc10Canonicalizer mockCanonicalizer; + @Mock + private BNode mockBNodeE0; + @Mock + private BNode mockBNodeE1; + @Mock + private BNode mockBNodeE2; + @Mock + private BNode mockBNodeE3; + + + @Mock + private BNode canonicalBNodeC0; + @Mock + private BNode canonicalBNodeC1; + @Mock + private BNode canonicalBNodeC2; + @Mock + private BNode canonicalBNodeC3; + + @Mock + private BNode actualBNodeB0; + @Mock + private BNode actualBNodeB1; + @Mock + private BNode actualBNodeB2; + @Mock + private BNode actualBNodeB3; + + + @Mock + private IRI mockIRIP; + @Mock + private IRI mockIRIQ; + @Mock + private IRI mockIRIR; + + @Mock + private IRI mockIRI1; + @Mock + private IRI mockIRI2; + @Mock + private Literal mockLiteral1; + @Mock + private Literal mockLiteral2; + + + private Rdfc10Serializer serializer; + private Rdfc10Options defaultConfig; + + @BeforeEach + void setUp() { + MockitoAnnotations.openMocks(this); + defaultConfig = Rdfc10Options.defaultConfig(); + + setupBasicMocks(); + + serializer = new Rdfc10Serializer(mockModel, defaultConfig, mockCanonicalizer) { + @Override + protected void writeValue(Writer w, Value v) throws IOException { + if (v != null) { + w.write(v.stringValue()); + } + } + }; + } + + /** + * Configures the basic stringValue() and isBNode() behavior for all mock RDF elements. + * This ensures consistency across tests. + */ + private void setupBasicMocks() { + + when(mockIRI1.stringValue()).thenReturn(""); + when(mockIRI2.stringValue()).thenReturn(""); + when(mockIRI1.isBNode()).thenReturn(false); + when(mockIRI2.isBNode()).thenReturn(false); + + when(mockIRIP.stringValue()).thenReturn(""); + when(mockIRIQ.stringValue()).thenReturn(""); + when(mockIRIR.stringValue()).thenReturn(""); + when(mockIRIP.isBNode()).thenReturn(false); + when(mockIRIQ.isBNode()).thenReturn(false); + when(mockIRIR.isBNode()).thenReturn(false); + + + when(mockLiteral1.stringValue()).thenReturn("\"literal1\""); + when(mockLiteral2.stringValue()).thenReturn("\"literal2\""); + when(mockLiteral1.isBNode()).thenReturn(false); + when(mockLiteral2.isBNode()).thenReturn(false); + + when(mockBNodeE0.stringValue()).thenReturn("_:e0"); + when(mockBNodeE1.stringValue()).thenReturn("_:e1"); + when(mockBNodeE2.stringValue()).thenReturn("_:e2"); + when(mockBNodeE3.stringValue()).thenReturn("_:e3"); + when(mockBNodeE0.isBNode()).thenReturn(true); + when(mockBNodeE1.isBNode()).thenReturn(true); + when(mockBNodeE2.isBNode()).thenReturn(true); + when(mockBNodeE3.isBNode()).thenReturn(true); + + + when(canonicalBNodeC0.stringValue()).thenReturn("_:c14n0"); + when(canonicalBNodeC1.stringValue()).thenReturn("_:c14n1"); + when(canonicalBNodeC2.stringValue()).thenReturn("_:c14n2"); + when(canonicalBNodeC3.stringValue()).thenReturn("_:c14n3"); + when(canonicalBNodeC0.isBNode()).thenReturn(true); + when(canonicalBNodeC1.isBNode()).thenReturn(true); + when(canonicalBNodeC2.isBNode()).thenReturn(true); + when(canonicalBNodeC3.isBNode()).thenReturn(true); + + when(actualBNodeB0.stringValue()).thenReturn("_:b0"); + when(actualBNodeB1.stringValue()).thenReturn("_:b1"); + when(actualBNodeB2.stringValue()).thenReturn("_:b2"); + when(actualBNodeB3.stringValue()).thenReturn("_:b3"); + when(actualBNodeB0.isBNode()).thenReturn(true); + when(actualBNodeB1.isBNode()).thenReturn(true); + when(actualBNodeB2.isBNode()).thenReturn(true); + when(actualBNodeB3.isBNode()).thenReturn(true); + } + + /** + * Helper method to create a mock Statement with configured subject, predicate, object, and context. + * It also sets up the toString() behavior which is used by the serializer for blank node fingerprinting. + * + * @param subject The subject of the statement. + * @param predicate The predicate of the statement. + * @param object The object of the statement. + * @param context The context (named graph) of the statement, can be null. + * @return A mocked Statement. + */ + private Statement createMockStatement(Resource subject, IRI predicate, Value object, Resource context) { + Statement stmt = mock(Statement.class); + when(stmt.getSubject()).thenReturn(subject); + when(stmt.getPredicate()).thenReturn(predicate); + when(stmt.getObject()).thenReturn(object); + when(stmt.getContext()).thenReturn(context); + + String contextPart = (context != null) ? " " + context.stringValue() : ""; + String expectedToString = subject.stringValue() + " " + predicate.stringValue() + " " + object.stringValue() + contextPart; + + doReturn(expectedToString).when(stmt).toString(); + + return stmt; + } + + @Test + @DisplayName("Constructor with valid parameters should create an instance") + void testConstructorWithValidParameters() { + assertNotNull(serializer); + assertEquals("RDFC-1.0", serializer.getFormatName()); + } + + @Test + @DisplayName("Constructor with null model should throw NullPointerException") + void testConstructorNullModel() { + assertThrows(NullPointerException.class, () -> + new Rdfc10Serializer(null, defaultConfig, mockCanonicalizer)); + } + + + @Test + @DisplayName("Constructor with null config should throw NullPointerException") + void testConstructorNullConfig() { + assertThrows(NullPointerException.class, () -> + new Rdfc10Serializer(mockModel, null, mockCanonicalizer)); + } + + @Test + @DisplayName("Constructor with null canonicalizer should throw NullPointerException") + void testConstructorNullCanonicalizer() { + assertThrows(NullPointerException.class, () -> + new Rdfc10Serializer(mockModel, defaultConfig, null)); + } + + @Test + @DisplayName("Constructor with default configuration") + void testConstructorWithDefaultConfig() { + Rdfc10Serializer defaultSerializer = new Rdfc10Serializer(mockModel, defaultConfig, mockCanonicalizer); + assertNotNull(defaultSerializer); + assertEquals("RDFC-1.0", defaultSerializer.getFormatName()); + } + + @Test + @DisplayName("Serialization of an empty model") + void testSerializeEmptyModel() throws SerializationException { + when(mockCanonicalizer.canonicalize(any(Model.class))).thenReturn(Collections.emptyList()); + + StringWriter writer = new StringWriter(); + + serializer.write(writer); + + assertEquals("", writer.toString()); + } + + @Test + @DisplayName("Serialization with a simple statement without blank nodes") + void testSerializeSimpleStatement() throws SerializationException { + Statement simpleStmt = createMockStatement(mockIRI1, mockIRI2, mockLiteral1, null); + + when(mockCanonicalizer.canonicalize(any(Model.class))).thenReturn(Collections.singletonList(simpleStmt)); + + StringWriter writer = new StringWriter(); + + serializer.write(writer); + + String expectedOutput = " \"literal1\" .\n"; + assertEquals(expectedOutput, writer.toString()); + + verify(mockCanonicalizer).canonicalize(any(Model.class)); + } + + + @Test + @DisplayName("Serialization with context (named graph)") + void testSerializeWithContext() throws SerializationException { + Statement stmtWithContext = createMockStatement(mockIRI1, mockIRI2, mockLiteral1, mockIRI1); + + when(mockCanonicalizer.canonicalize(any(Model.class))).thenReturn(Collections.singletonList(stmtWithContext)); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + + String expectedOutput = " \"literal1\" .\n"; + assertEquals(expectedOutput, writer.toString()); + + verify(mockCanonicalizer).canonicalize(any(Model.class)); + } + + @Test + @DisplayName("Test writeContext with null context") + void testWriteContextWithNullContext() throws IOException { + StringWriter writer = new StringWriter(); + Statement stmt = mock(Statement.class); + when(stmt.getContext()).thenReturn(null); + + serializer.writeContext(writer, stmt); + + assertEquals("", writer.toString()); + } + + @Test + @DisplayName("Test writeContext with non-null context") + void testWriteContextWithNonNullContext() throws IOException { + StringWriter writer = new StringWriter(); + Statement stmt = mock(Statement.class); + when(stmt.getContext()).thenReturn(mockIRI1); + + serializer.writeContext(writer, stmt); + + String expectedOutput = " "; + assertEquals(expectedOutput, writer.toString()); + } + + @Test + @DisplayName("Test serialization with figure3.ttl") + void testSerializeFigure3() { + String canonicalOutput = serializeToRdfCanonical("/canonical/figure3.ttl"); + + assertNotNull(canonicalOutput, "Canonical output should not be null"); + assertFalse(canonicalOutput.isEmpty(), "Canonical output should not be empty"); + String actual = canonicalOutput.trim().replace("\r\n", "\n"); + String expected = " _:c14n2 .\n" + + " _:c14n3 .\n" + + "_:c14n0 _:c14n1 .\n" + + "_:c14n2 _:c14n1 .\n" + + "_:c14n3 _:c14n0 ."; + + assertEquals(expected, actual, "Canonical output should match expected format"); + + } + + + @Test + @DisplayName("Test serialization with figure2.ttl") + void testSerializeFigure2() { + String canonicalOutput = serializeToRdfCanonical("/canonical/figure2.ttl"); + + assertNotNull(canonicalOutput, "Canonical output should not be null"); + assertFalse(canonicalOutput.isEmpty(), "Canonical output should not be empty"); + + String actual = canonicalOutput.trim().replace("\r\n", "\n"); + + String expected = " _:c14n0 .\n" + + " _:c14n1 .\n" + + "_:c14n0 .\n" + + "_:c14n1 ."; + + + assertEquals(expected, actual, "Canonical output should match RDFC-1.0 specification"); + } + + private String serializeToRdfCanonical(String resourcePath) { + Model model = new CoreseModel(); + ValueFactory valueFactory = new CoreseAdaptedValueFactory(); + + ParserFactory parserFactory = new ParserFactory(); + RDFParser parser = parserFactory.createRDFParser(RDFFormat.TURTLE, model, valueFactory); + + try (InputStream inputStream = getClass().getResourceAsStream(resourcePath)) { + if (inputStream == null) { + fail("Resource not found: " + resourcePath); + } + parser.parse(inputStream); + } catch (IOException e) { + fail("Failed to parse resource: " + resourcePath + " - " + e.getMessage()); + } + + DefaultSerializerFactory serializerFactory = new DefaultSerializerFactory(); + RDFSerializer serializer = serializerFactory.createSerializer( + RDFFormat.RDFC_1_0, + model, + Rdfc10Options.defaultConfig() + ); + + StringWriter writer = new StringWriter(); + serializer.write(writer); + return writer.toString(); + } +} diff --git a/src/test/resources/canonical/figure2.ttl b/src/test/resources/canonical/figure2.ttl new file mode 100644 index 000000000..874449ecd --- /dev/null +++ b/src/test/resources/canonical/figure2.ttl @@ -0,0 +1,9 @@ +@prefix ns1: . + +ns1:p ns1:q _:b0 ; + ns1:r _:b1 . + +_:b1 ns1:t ns1:u . + +_:b0 ns1:s ns1:u . + diff --git a/src/test/resources/canonical/figure3.ttl b/src/test/resources/canonical/figure3.ttl new file mode 100644 index 000000000..04e449b2a --- /dev/null +++ b/src/test/resources/canonical/figure3.ttl @@ -0,0 +1,7 @@ +@prefix : . + +:p :q _:e0 . +:p :q _:e1 . +_:e0 :p _:e2 . +_:e1 :p _:e3 . +_:e2 :r _:e3 .