Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
321 changes: 321 additions & 0 deletions src/main/antlr/TriG.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging

grammar TriG;

trigDoc
: ( directive | block )* EOF
;

block
: triplesOrGraph
| wrappedGraph
| triples2
| Graph_w labelOrSubject wrappedGraph
;

triplesOrGraph
: labelOrSubject (wrappedGraph | predicateObjectList '.')
;

triples2
: blankNodePropertyList predicateObjectList? '.'
| collection predicateObjectList '.'
;

wrappedGraph
: '{' triplesBlock? '}'
;

triplesBlock
: triples ('.' triplesBlock?)?
;

labelOrSubject
: iri
| blankNode
;

directive
: prefixID
| base
| sparqlPrefix
| sparqlBase
;

prefixID
: '@prefix' PNAME_NS IRIREF '.'
;

base
: '@base' IRIREF '.'
;

sparqlPrefix
: Prefix_w PNAME_NS IRIREF
;

sparqlBase
: Base_w IRIREF
;

triples
: subject predicateObjectList
| blankNodePropertyList predicateObjectList?
;

predicateObjectList
: verb objectList (';' (verb objectList)?)*
;

objectList
: object (',' object)*
;

verb
: predicate
| 'a'
;

subject
: iri
| blank
;

predicate
: iri
;

object
: iri
| blank
| blankNodePropertyList
| literal
;

literal
: rDFLiteral
| numericLiteral
| BooleanLiteral
;

blank
: blankNode
| collection
;

blankNodePropertyList
: '[' predicateObjectList ']'
;

collection
: '(' object* ')'
;

numericLiteral
: INTEGER
| DECIMAL
| DOUBLE
;

rDFLiteral
: string LANGTAG
| string ('^^' iri)?
;

string
: STRING_LITERAL_QUOTE
| STRING_LITERAL_SINGLE_QUOTE
| STRING_LITERAL_LONG_SINGLE_QUOTE
| STRING_LITERAL_LONG_QUOTE
;

iri
: prefixedName
| IRIREF
;

prefixedName
: PNAME_LN
| PNAME_NS
;

blankNode
: BLANK_NODE_LABEL
| ANON
;

WS
: (('\u0020' | '\u0009' | '\u000A' | '\u000D' ) )+ -> skip
;

// Terminals

Graph_w options { caseInsensitive=true; }
: 'GRAPH'
;

Base_w options { caseInsensitive=true; }
: 'BASE'
;

Prefix_w options { caseInsensitive=true; }
: 'PREFIX'
;

BooleanLiteral
: 'true'
| 'false'
;

IRIREF
: '<' (PN_CHARS | '.' | ':' | '#' | '@' | '%' | '&' | '$' | '!' | '\'' | '*' | '+' | '/' | '(' | ')' | '-' | ',' | '?' | '~' | UCHAR)* '>'
;

PNAME_NS
: PN_PREFIX? ':'
;

PNAME_LN
: PNAME_NS PN_LOCAL
;

BLANK_NODE_LABEL
: '_:' (PN_CHARS_U | '0' .. '9') ((PN_CHARS | '.')* PN_CHARS)?
;

LANGTAG
: '@' ('a'.. 'z' | 'A' .. 'Z')+ ('-' ('a'.. 'z' | 'A' .. 'Z' | '0' .. '9')* )*
;

INTEGER
: ('+' | '-' )? ('0' .. '9')+
;

DECIMAL
: ('+' | '-' )? ('0' .. '9')* '.' ('0' .. '9')+
;

DOUBLE
: ('+' | '-' )? (('0' .. '9')+ '.' ('0' .. '9')* EXPONENT
| '.' ('0' .. '9')+ EXPONENT
| ('0' .. '9')+ EXPONENT)
;

EXPONENT
: ('e' | 'E') ('+' | '-' )? ('0' .. '9')+
;

STRING_LITERAL_QUOTE
: '"' ((~[\u0022\u005C\u0010\u0013]) | ECHAR | UCHAR)* '"'
;

STRING_LITERAL_SINGLE_QUOTE
: '\'' ((~[\u0027\u005C\u0010\u0013]) | ECHAR | UCHAR)* '\''
;

STRING_LITERAL_LONG_SINGLE_QUOTE
: '\'\'\'' (('\'' | '\'\'')? ( (~['\\] ) | ECHAR | UCHAR))* '\'\'\''
;

STRING_LITERAL_LONG_QUOTE
: '"""' (('"' | '""')? ( (~["'] ) | ECHAR | UCHAR))* '"""'
;

UCHAR
: '\\u' HEX HEX HEX HEX
| '\\U' HEX HEX HEX HEX HEX HEX HEX HEX
;

ECHAR options { caseInsensitive=true; }
: '\\' [tbnrf"'\\]
;

WHITESPACE
: [\u0020\u0009\u000A\u000D]
;

ANON
: '[' WHITESPACE* ']'
;

PN_CHARS_BASE
: 'A' .. 'Z'
| 'a' .. 'z'
| '\u00C0' .. '\u00D6'
| '\u00D8' .. '\u00F6'
| '\u00F8' .. '\u02FF'
| '\u0370' .. '\u037D'
| '\u037F' .. '\u1FFF'
| '\u200C' .. '\u200D'
| '\u2070' .. '\u218F'
| '\u2C00' .. '\u2FEF'
| '\u3001' .. '\uD7FF'
| '\uF900' .. '\uFDCF'
| '\uFDF0' .. '\uFFFD'
// | '\u10000' .. '\uEFFFF'
;

PN_CHARS_U
: PN_CHARS_BASE
| '_'
;

PN_CHARS
: PN_CHARS_U
| '-'
| [0-9]
| [\u00B7]
| [\u0300-\u036F]
| [\u203F-\u2040]
;

PN_PREFIX
: PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
;

PN_LOCAL
: (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
;

PLX
: PERCENT
| PN_LOCAL_ESC
;

PERCENT
: '%' HEX HEX
;

HEX
: [0-9a-fA-F]
;

PN_LOCAL_ESC
: '\\' (
'_'
| '~'
| '.'
| '-'
| '!'
| '$'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| ';'
| '='
| '/'
| '?'
| '#'
| '@'
| '%'
)
;

LC
: '#' ~[\r\n]+ -> channel(HIDDEN)
;
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
*/
public class IRIUtils {

private static final Pattern IRI_PATTERN = Pattern.compile("^(([\\w\\-]+:\\/\\/([\\w\\-_:]+\\.)*[\\w\\-_:]*)(\\/([\\w\\-\\._\\:]+\\/)*))([\\w\\-\\._\\:]+)?(\\?[\\w\\-_\\:\\?\\=]+)?((\\#)?([\\w\\-_]+))?$");
private static final Pattern IRI_PATTERN = Pattern.compile("^(?<namespace>(?<protocol>[\\w\\-]+):(?<dblSlashes>\\/\\/)?(?<domain>([\\w\\-_:@]+\\.)*[\\w\\-_:]*))((?<path>\\/([\\w\\-\\._\\:]+\\/)*)(?<finalPath>[\\w\\-\\._\\:]+)?(?<query>\\?[\\w\\-_\\:\\?\\=]+)?(\\#)?(?<fragment>([\\w\\-_]+))?)?$");
private static final Pattern STANDARD_IRI_PATTERN = Pattern.compile("^(([^:/?#\\s]+):)(\\/\\/([^/?#\\s]*))?([^?#\\s]*)(\\?([^#\\s]*))?(#(.*))?");


/**
* Prevent instantiation of the utility class.
*/
Expand All @@ -29,15 +30,24 @@ public static String guessNamespace(String iri) {
Matcher matcher = IRI_PATTERN.matcher(iri);

if(matcher.matches()) {
if((matcher.group(8) == null) || (matcher.group(6) == null && matcher.group(9) == null) ) { // If the IRI has no fragment or ends with a slash

return matcher.group(1);
} else {
// 1: Domain and path ending with a slash, 6: final path element without slash, 9: final # if there is a fragment
return matcher.group(1) + matcher.group(6) + matcher.group(9);
if(matcher.group("protocol") != null && matcher.group("protocol").equals("_")) {
return "";
}
StringBuilder namespace = new StringBuilder();
namespace.append(matcher.group("protocol")).append(":");
if(matcher.group("dblSlashes") != null) {
namespace.append(matcher.group("dblSlashes"));
}
namespace.append(matcher.group("domain"));
if(matcher.group("path") != null) {
namespace.append(matcher.group("path"));
}
if(matcher.group("fragment") != null && matcher.group("finalPath") != null) {
namespace.append(matcher.group("finalPath")).append("#");
}
return namespace.toString();
} else {
return "";
throw new IllegalStateException("No namespace found for the given IRI: " + iri + ".");
}
} catch (IllegalStateException e) {
return "";
Expand All @@ -54,10 +64,10 @@ public static String guessLocalName(String iri) {
Matcher matcher = IRI_PATTERN.matcher(iri);

if(matcher.matches()) {
if(matcher.group(10) != null){ // If the IRI has a fragment
return matcher.group(10);
} else if(matcher.group(6) != null ) { // If the IRI has no fragment but do not ends with a slash
return matcher.group(6);
if(matcher.group("fragment") != null){ // If the IRI has a fragment
return matcher.group("fragment");
} else if(matcher.group("finalPath") != null ) { // If the IRI has no fragment but do not ends with a slash
return matcher.group("finalPath");
} else { // If the URI ends with a slash
return "";
}
Expand Down
Loading