Skip to content

Commit e8859fa

Browse files
authored
Merge pull request #177 from corese-stack/feature/88_NTriples_NQuads_parser
Feature/88 n triples n quads parser
2 parents 73d99bf + f92fa73 commit e8859fa

20 files changed

Lines changed: 2297 additions & 62 deletions

src/main/antlr/NQuads.g4

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
grammar NQuads;
2+
3+
nquadsDoc
4+
: statement? (EOL* statement)* EOL*
5+
;
6+
7+
statement
8+
: subject predicate object graphLabel? '.'
9+
;
10+
11+
subject
12+
: IRIREF
13+
| BLANK_NODE_LABEL
14+
;
15+
16+
predicate
17+
: IRIREF
18+
;
19+
20+
object
21+
: IRIREF
22+
| BLANK_NODE_LABEL
23+
| literal
24+
;
25+
26+
graphLabel
27+
: IRIREF
28+
| BLANK_NODE_LABEL
29+
;
30+
31+
literal
32+
: STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG)?
33+
;
34+
35+
LANGTAG
36+
: '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
37+
;
38+
39+
EOL
40+
: [\u000D\u000A]+
41+
;
42+
43+
IRIREF
44+
// '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
45+
: '<' [a-zA-Z0-9-]+':' ((~( [\u0000-\u0020] | '<' | '>' | '"' | '{'| '}' | '|'| '^'| '`' | '\\' )) | UCHAR)* '>'
46+
;
47+
48+
STRING_LITERAL_QUOTE
49+
: '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"'
50+
;
51+
52+
BLANK_NODE_LABEL
53+
// '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
54+
: '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
55+
;
56+
57+
UCHAR
58+
: '\\u' HEX HEX HEX HEX
59+
| '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
60+
;
61+
62+
HEX
63+
: [0-9]
64+
| [A-F]
65+
| [a-f]
66+
;
67+
68+
ECHAR
69+
: '\\' [tbnrf"'\\]
70+
;
71+
72+
PN_CHARS_BASE
73+
: 'A' .. 'Z'
74+
| 'a' .. 'z'
75+
| '\u00C0' .. '\u00D6'
76+
| '\u00D8' .. '\u00F6'
77+
| '\u00F8' .. '\u02FF'
78+
| '\u0370' .. '\u037D'
79+
| '\u037F' .. '\u1FFF'
80+
| '\u200C' .. '\u200D'
81+
| '\u2070' .. '\u218F'
82+
| '\u2C00' .. '\u2FEF'
83+
| '\u3001' .. '\uD7FF'
84+
| '\uF900' .. '\uFDCF'
85+
| '\uFDF0' .. '\uFFFD'
86+
// | '\u10000' .. '\uEFFFF'
87+
;
88+
89+
PN_CHARS_U
90+
// PN_CHARS_BASE | '_' | ':'
91+
: PN_CHARS_BASE
92+
| '_'
93+
// | ':'
94+
;
95+
96+
PN_CHARS
97+
: PN_CHARS_U
98+
| '-'
99+
| [0-9]
100+
| [\u00B7]
101+
| [\u0300-\u036F]
102+
| [\u203F-\u2040]
103+
;
104+
105+
LC
106+
: '#' ~[\r\n]* -> channel(HIDDEN)
107+
;
108+
109+
WS
110+
: ([\t\r\n\u000C] | ' ')+ -> skip
111+
;

src/main/antlr/NTriples.g4

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
2+
// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
3+
// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging
4+
5+
6+
grammar NTriples;
7+
8+
9+
ntriplesDoc
10+
// triple? (EOL triple)* EOL?
11+
: triple? (EOL* triple)* EOL*
12+
;
13+
14+
triple
15+
: subject predicate object '.'
16+
;
17+
18+
subject
19+
: IRIREF
20+
| BLANK_NODE_LABEL
21+
;
22+
23+
predicate
24+
: IRIREF
25+
;
26+
27+
object
28+
: IRIREF
29+
| BLANK_NODE_LABEL
30+
| literal
31+
;
32+
33+
literal
34+
: STRING_LITERAL_QUOTE ('^^' IRIREF | LANGTAG)?
35+
;
36+
37+
LANGTAG
38+
: '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
39+
;
40+
41+
EOL
42+
: [\u000D\u000A]+
43+
;
44+
45+
IRIREF
46+
// '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
47+
: '<' [a-zA-Z0-9-]+':' ((~( [\u0000-\u0020] | '<' | '>' | '"' | '{'| '}' | '|'| '^'| '`' | '\\' )) | UCHAR)* '>'
48+
;
49+
50+
STRING_LITERAL_QUOTE
51+
: '"' ( ~( [\u0022] | [\u005C] | [\u000A] | [\u000D] ) | ECHAR | UCHAR )* '"'
52+
;
53+
54+
BLANK_NODE_LABEL
55+
// '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
56+
: '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?
57+
;
58+
59+
UCHAR
60+
: '\\u' HEX HEX HEX HEX
61+
| '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
62+
;
63+
64+
HEX
65+
: [0-9]
66+
| [A-F]
67+
| [a-f]
68+
;
69+
70+
ECHAR
71+
: '\\' [tbnrf"'\\]
72+
;
73+
74+
PN_CHARS_BASE
75+
: 'A' .. 'Z'
76+
| 'a' .. 'z'
77+
| '\u00C0' .. '\u00D6'
78+
| '\u00D8' .. '\u00F6'
79+
| '\u00F8' .. '\u02FF'
80+
| '\u0370' .. '\u037D'
81+
| '\u037F' .. '\u1FFF'
82+
| '\u200C' .. '\u200D'
83+
| '\u2070' .. '\u218F'
84+
| '\u2C00' .. '\u2FEF'
85+
| '\u3001' .. '\uD7FF'
86+
| '\uF900' .. '\uFDCF'
87+
| '\uFDF0' .. '\uFFFD'
88+
// | '\u10000' .. '\uEFFFF'
89+
;
90+
91+
PN_CHARS_U
92+
// PN_CHARS_BASE | '_' | ':'
93+
: PN_CHARS_BASE
94+
| '_'
95+
// | ':'
96+
;
97+
98+
PN_CHARS
99+
: PN_CHARS_U
100+
| '-'
101+
| [0-9]
102+
| [\u00B7]
103+
| [\u0300-\u036F]
104+
| [\u203F-\u2040]
105+
;
106+
107+
LC
108+
: '#' ~[\r\n]* -> channel(HIDDEN)
109+
;
110+
111+
WS
112+
: ([\t\r\n\u000C] | ' ')+ -> skip
113+
;

src/main/java/fr/inria/corese/core/next/api/base/model/literal/AbstractDuration.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
package fr.inria.corese.core.next.api.base.model.literal;
22

3-
import fr.inria.corese.core.next.api.literal.CoreDatatype;
4-
import fr.inria.corese.core.next.impl.common.literal.XSD;
5-
63
import java.time.DateTimeException;
74
import java.time.temporal.TemporalAmount;
85
import java.time.temporal.TemporalUnit;
9-
import java.util.*;
6+
import java.util.Comparator;
7+
import java.util.HashSet;
8+
import java.util.Set;
9+
import java.util.SortedSet;
10+
import java.util.TreeSet;
11+
12+
import fr.inria.corese.core.next.api.literal.CoreDatatype;
13+
import fr.inria.corese.core.next.impl.common.literal.XSD;
1014

1115
/**
1216
* Abstract class representing a duration literal in RDF.

src/main/java/fr/inria/corese/core/next/impl/io/parser/ParserFactory.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import fr.inria.corese.core.next.api.io.parser.RDFParser;
88
import fr.inria.corese.core.next.api.io.parser.RDFParserOptions;
99
import fr.inria.corese.core.next.impl.io.parser.jsonld.JSONLDParser;
10+
import fr.inria.corese.core.next.impl.io.parser.nquads.ANTLRNQuadsParser;
11+
import fr.inria.corese.core.next.impl.io.parser.ntriples.ANTLRNTriplesParser;
1012
import fr.inria.corese.core.next.impl.io.parser.turtle.ANTLRTurtleParser;
1113

1214
/**
@@ -34,10 +36,14 @@ public ParserFactory() {
3436
*/
3537
@Override
3638
public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory, RDFParserOptions config) {
37-
if(format == RDFFormat.JSONLD) {
39+
if (format == RDFFormat.JSONLD) {
3840
return new JSONLDParser(model, factory, config);
39-
} else if(format == RDFFormat.TURTLE) {
41+
} else if (format == RDFFormat.TURTLE) {
4042
return new ANTLRTurtleParser(model, factory, config);
43+
} else if (format == RDFFormat.NTRIPLES) {
44+
return new ANTLRNTriplesParser(model, factory, config);
45+
} else if (format == RDFFormat.NQUADS) {
46+
return new ANTLRNQuadsParser(model, factory, config);
4147
}
4248
throw new IllegalArgumentException("Unsupported format: " + format);
4349
}
@@ -51,10 +57,14 @@ public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory fac
5157
*/
5258
@Override
5359
public RDFParser createRDFParser(RDFFormat format, Model model, ValueFactory factory) {
54-
if(format == RDFFormat.JSONLD) {
60+
if (format == RDFFormat.JSONLD) {
5561
return new JSONLDParser(model, factory);
56-
} else if(format == RDFFormat.TURTLE) {
62+
} else if (format == RDFFormat.TURTLE) {
5763
return new ANTLRTurtleParser(model, factory);
64+
} else if (format == RDFFormat.NTRIPLES) {
65+
return new ANTLRNTriplesParser(model, factory);
66+
} else if (format == RDFFormat.NQUADS) {
67+
return new ANTLRNQuadsParser(model, factory);
5868
}
5969
throw new IllegalArgumentException("Unsupported format: " + format);
6070
}
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package fr.inria.corese.core.next.impl.io.parser.nquads;
2+
3+
import fr.inria.corese.core.next.api.Model;
4+
import fr.inria.corese.core.next.api.ValueFactory;
5+
import fr.inria.corese.core.next.api.base.io.RDFFormat;
6+
import fr.inria.corese.core.next.api.base.io.parser.AbstractRDFParser;
7+
import fr.inria.corese.core.next.api.io.IOOptions;
8+
import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
9+
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsLexer;
10+
import fr.inria.corese.core.next.impl.parser.antlr.NQuadsParser;
11+
import org.antlr.v4.runtime.CharStream;
12+
import org.antlr.v4.runtime.CharStreams;
13+
import org.antlr.v4.runtime.CommonTokenStream;
14+
import org.antlr.v4.runtime.tree.ParseTree;
15+
import org.antlr.v4.runtime.tree.ParseTreeListener;
16+
import org.antlr.v4.runtime.tree.ParseTreeWalker;
17+
18+
import java.io.IOException;
19+
import java.io.InputStream;
20+
import java.io.InputStreamReader;
21+
import java.io.Reader;
22+
import java.nio.charset.StandardCharsets;
23+
24+
/**
25+
* An ANTLR4-based parser for N-Quads format.
26+
* This parser uses an ANTLR grammar to tokenize and parse N-Quads documents,
27+
* then a listener to build the RDF model.
28+
*/
29+
public class ANTLRNQuadsParser extends AbstractRDFParser {
30+
31+
/**
32+
* Constructor for the ANTLRNQuadsParser.
33+
*
34+
* @param model The RDF model to populate.
35+
* @param factory The ValueFactory for creating RDF resources.
36+
*/
37+
public ANTLRNQuadsParser(Model model, ValueFactory factory) {
38+
super(model, factory);
39+
}
40+
41+
/**
42+
* Constructor for the ANTLRNQuadsParser with configuration options.
43+
*
44+
* @param model The RDF model to populate.
45+
* @param factory The ValueFactory for creating RDF resources.
46+
* @param config The configuration options for parsing.
47+
*/
48+
public ANTLRNQuadsParser(Model model, ValueFactory factory, IOOptions config) {
49+
super(model, factory, config);
50+
}
51+
52+
@Override
53+
public RDFFormat getRDFFormat() {
54+
return RDFFormat.NQUADS;
55+
}
56+
57+
58+
@Override
59+
public void parse(InputStream in) throws ParsingErrorException {
60+
parse(new InputStreamReader(in, StandardCharsets.UTF_8), null);
61+
}
62+
63+
@Override
64+
public void parse(InputStream in, String baseURI) throws ParsingErrorException {
65+
parse(new InputStreamReader(in, StandardCharsets.UTF_8), baseURI);
66+
}
67+
68+
@Override
69+
public void parse(Reader reader) throws ParsingErrorException {
70+
parse(reader, null);
71+
}
72+
73+
/**
74+
* Parses N-Quads data from a Reader using ANTLR4.
75+
*
76+
* @param reader The Reader to read RDF data from.
77+
* @param baseURI The base URI (ignored for N-Quads as all URIs are absolute).
78+
* @throws ParsingErrorException if a parsing or I/O error occurs.
79+
*/
80+
@Override
81+
public void parse(Reader reader, String baseURI) throws ParsingErrorException {
82+
try {
83+
CharStream charStream = CharStreams.fromReader(reader);
84+
NQuadsLexer lexer = new NQuadsLexer(charStream);
85+
CommonTokenStream tokens = new CommonTokenStream(lexer);
86+
87+
NQuadsParser antlrParser = new NQuadsParser(tokens);
88+
ParseTreeWalker walker = new ParseTreeWalker();
89+
ParseTree tree = antlrParser.nquadsDoc();
90+
91+
NQuadsListener listener = new NQuadsListener(getModel(), getValueFactory(), getConfig());
92+
93+
walker.walk((ParseTreeListener) listener, tree);
94+
95+
} catch (IOException e) {
96+
throw new ParsingErrorException("Failed to parse N-Quads: " + e.getMessage(), e);
97+
} catch (Exception e) {
98+
throw new ParsingErrorException("Unexpected error during N-Quads parsing: " + e.getMessage(), e);
99+
}
100+
}
101+
}

0 commit comments

Comments
 (0)