Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions src/main/antlr/NQuads.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
grammar NQuads;

nquadsDoc
: statement? (EOL* statement)* EOL*
;

statement
: subject predicate object graphLabel? '.'
;

subject
: IRIREF
| BLANK_NODE_LABEL
;

predicate
: IRIREF
;

object
: IRIREF
| BLANK_NODE_LABEL
| literal
;

graphLabel
: IRIREF
| BLANK_NODE_LABEL
;

literal
: STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG)?
;

LANGTAG
: '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
;

EOL
: [\u000D\u000A]+
;

IRIREF
// '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
: '<' [a-zA-Z0-9-]+':' ((~( [\u0000-\u0020] | '<' | '>' | '"' | '{'| '}' | '|'| '^'| '`' | '\\' )) | UCHAR)* '>'
;

STRING_LITERAL_QUOTE
: '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"'
;

BLANK_NODE_LABEL
// '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
: '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
;

UCHAR
: '\\u' HEX HEX HEX HEX
| '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
;

HEX
: [0-9]
| [A-F]
| [a-f]
;

ECHAR
: '\\' [tbnrf"'\\]
;

PN_CHARS_BASE
: 'A' .. 'Z'
| 'a' .. 'z'
| '\u00C0' .. '\u00D6'
| '\u00D8' .. '\u00F6'
| '\u00F8' .. '\u02FF'
| '\u0370' .. '\u037D'
| '\u037F' .. '\u1FFF'
| '\u200C' .. '\u200D'
| '\u2070' .. '\u218F'
| '\u2C00' .. '\u2FEF'
| '\u3001' .. '\uD7FF'
| '\uF900' .. '\uFDCF'
| '\uFDF0' .. '\uFFFD'
// | '\u10000' .. '\uEFFFF'
;

PN_CHARS_U
// PN_CHARS_BASE | '_' | ':'
: PN_CHARS_BASE
| '_'
// | ':'
;

PN_CHARS
: PN_CHARS_U
| '-'
| [0-9]
| [\u00B7]
| [\u0300-\u036F]
| [\u203F-\u2040]
;

LC
: '#' ~[\r\n]* -> channel(HIDDEN)
;

WS
: ([\t\r\n\u000C] | ' ')+ -> skip
;
113 changes: 113 additions & 0 deletions src/main/antlr/NTriples.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@

// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging


grammar NTriples;


ntriplesDoc
// triple? (EOL triple)* EOL?
: triple? (EOL* triple)* EOL*
;

triple
: subject predicate object '.'
;

subject
: IRIREF
| BLANK_NODE_LABEL
;

predicate
: IRIREF
;

object
: IRIREF
| BLANK_NODE_LABEL
| literal
;

literal
: STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG)?
;

LANGTAG
: '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
;

EOL
: [\u000D\u000A]+
;

IRIREF
// '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
: '<' [a-zA-Z0-9-]+':' ((~( [\u0000-\u0020] | '<' | '>' | '"' | '{'| '}' | '|'| '^'| '`' | '\\' )) | UCHAR)* '>'
;

STRING_LITERAL_QUOTE
: '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"'
;

BLANK_NODE_LABEL
// '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
: '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
;

UCHAR
: '\\u' HEX HEX HEX HEX
| '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
;

HEX
: [0-9]
| [A-F]
| [a-f]
;

ECHAR
: '\\' [tbnrf"'\\]
;

PN_CHARS_BASE
: 'A' .. 'Z'
| 'a' .. 'z'
| '\u00C0' .. '\u00D6'
| '\u00D8' .. '\u00F6'
| '\u00F8' .. '\u02FF'
| '\u0370' .. '\u037D'
| '\u037F' .. '\u1FFF'
| '\u200C' .. '\u200D'
| '\u2070' .. '\u218F'
| '\u2C00' .. '\u2FEF'
| '\u3001' .. '\uD7FF'
| '\uF900' .. '\uFDCF'
| '\uFDF0' .. '\uFFFD'
// | '\u10000' .. '\uEFFFF'
;

PN_CHARS_U
// PN_CHARS_BASE | '_' | ':'
: PN_CHARS_BASE
| '_'
// | ':'
;

PN_CHARS
: PN_CHARS_U
| '-'
| [0-9]
| [\u00B7]
| [\u0300-\u036F]
| [\u203F-\u2040]
;

LC
: '#' ~[\r\n]* -> channel(HIDDEN)
;

WS
: ([\t\r\n\u000C] | ' ')+ -> skip
;
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
package fr.inria.corese.core.next.api.base.model.literal;

import fr.inria.corese.core.next.api.literal.CoreDatatype;
import fr.inria.corese.core.next.impl.common.literal.XSD;

import java.time.DateTimeException;
import java.time.temporal.TemporalAmount;
import java.time.temporal.TemporalUnit;
import java.util.*;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;

import fr.inria.corese.core.next.api.literal.CoreDatatype;
import fr.inria.corese.core.next.impl.common.literal.XSD;

/**
* Abstract class representing a duration literal in RDF.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import fr.inria.corese.core.next.api.io.parser.RDFParser;
import fr.inria.corese.core.next.api.io.parser.RDFParserOptions;
import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser;
import fr.inria.corese.core.next.impl.io.parser.nquads.ANTLRNQuadsParser;
import fr.inria.corese.core.next.impl.io.parser.ntriples.ANTLRNTriplesParser;
import fr.inria.corese.core.next.impl.io.parser.turtle.ANTLRTurtleParser;

/**
Expand Down Expand Up @@ -34,10 +36,14 @@ public ParserFactory() {
*/
@Override
public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory, RDFParserOptions config) {
if(format == RDFFormat.JSONLD) {
if (format == RDFFormat.JSONLD) {
return new JSONLDParser(model, factory, config);
} else if(format == RDFFormat.TURTLE) {
} else if (format == RDFFormat.TURTLE) {
return new ANTLRTurtleParser(model, factory, config);
} else if (format == RDFFormat.NTRIPLES) {
return new ANTLRNTriplesParser(model, factory, config);
} else if (format == RDFFormat.NQUADS) {
return new ANTLRNQuadsParser(model, factory, config);
}
throw new IllegalArgumentException("Unsupported format: " + format);
}
Expand All @@ -51,10 +57,14 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac
*/
@Override
public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory) {
if(format == RDFFormat.JSONLD) {
if (format == RDFFormat.JSONLD) {
return new JSONLDParser(model, factory);
} else if(format == RDFFormat.TURTLE) {
} else if (format == RDFFormat.TURTLE) {
return new ANTLRTurtleParser(model, factory);
} else if (format == RDFFormat.NTRIPLES) {
return new ANTLRNTriplesParser(model, factory);
} else if (format == RDFFormat.NQUADS) {
return new ANTLRNQuadsParser(model, factory);
}
throw new IllegalArgumentException("Unsupported format: " + format);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package fr.inria.corese.core.next.impl.io.parser.nquads;

import fr.inria.corese.core.next.api.Model;
import fr.inria.corese.core.next.api.ValueFactory;
import fr.inria.corese.core.next.api.base.io.RDFFormat;
import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser;
import fr.inria.corese.core.next.api.io.IOOptions;
import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsLexer;
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsParser;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.ParseTreeWalker;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;

/**
* An ANTLR4-based parser for N-Quads format.
* This parser uses an ANTLR grammar to tokenize and parse N-Quads documents,
* then a listener to build the RDF model.
*/
public class ANTLRNQuadsParser extends AbstractRDFParser {

/**
* Constructor for the ANTLRNQuadsParser.
*
* @param model The RDF model to populate.
* @param factory The ValueFactory for creating RDF resources.
*/
public ANTLRNQuadsParser(Model model, ValueFactory factory) {
super(model, factory);
}

/**
* Constructor for the ANTLRNQuadsParser with configuration options.
*
* @param model The RDF model to populate.
* @param factory The ValueFactory for creating RDF resources.
* @param config The configuration options for parsing.
*/
public ANTLRNQuadsParser(Model model, ValueFactory factory, IOOptions config) {
super(model, factory, config);
}

@Override
public RDFFormat getRDFFormat() {
return RDFFormat.NQUADS;
}


@Override
public void parse(InputStream in) throws ParsingErrorException {
parse(new InputStreamReader(in, StandardCharsets.UTF_8), null);
}

@Override
public void parse(InputStream in, String baseURI) throws ParsingErrorException {
parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURI);
}

@Override
public void parse(Reader reader) throws ParsingErrorException {
parse(reader, null);
}

/**
* Parses N-Quads data from a Reader using ANTLR4.
*
* @param reader The Reader to read RDF data from.
* @param baseURI The base URI (ignored for N-Quads as all URIs are absolute).
* @throws ParsingErrorException if a parsing or I/O error occurs.
*/
@Override
public void parse(Reader reader, String baseURI) throws ParsingErrorException {
try {
CharStream charStream = CharStreams.fromReader(reader);
NQuadsLexer lexer = new NQuadsLexer(charStream);
CommonTokenStream tokens = new CommonTokenStream(lexer);

NQuadsParser antlrParser = new NQuadsParser(tokens);
ParseTreeWalker walker = new ParseTreeWalker();
ParseTree tree = antlrParser.nquadsDoc();

NQuadsListener listener = new NQuadsListener(getModel(), getValueFactory(), getConfig());

walker.walk((ParseTreeListener) listener, tree);

} catch (IOException e) {
throw new ParsingErrorException("Failed to parse N-Quads: " + e.getMessage(), e);
} catch (Exception e) {
throw new ParsingErrorException("Unexpected error during N-Quads parsing: " + e.getMessage(), e);
}
}
}
Loading