Thank you so much for leac, its API is very elegant and easy to use.
I'm creating the new assignments for the compilers class at the college of engineering UBA (University of Buenos Aires). The students might use the following lua lexer which I want to share with you in case you find it useful. If I find the time I'll write some more tests and add an example in a PR.
import { createLexer, Rule, Rules } from "https://deno.land/x/leac/leac.ts";
const symbols = [
";",
"=",
",",
"::",
".",
"[",
"]",
"...",
"(",
")",
":",
"{",
"}",
];
const keywords = [
"break",
"goto",
"do",
"end",
"while",
"do",
"repeat",
"until",
"if",
"then",
"elseif",
"else",
"for",
"in",
"function",
"local",
"return",
"nil",
"false",
"true",
];
const ops = [
"+",
"-",
"*",
"/",
"//",
"^",
"%",
"&",
"~",
"|",
">>",
"<<",
"..",
"<",
"<=",
">",
">=",
"==",
"~=",
"and",
"or",
"#",
"not",
];
const doublequoteStringLexer = createLexer([
{
name: "stringContent",
regex: /(?:\\["abfnrtv/\\nz]|\\u[a-fA-F0-9]{4}|[^"\\\n])*/,
},
{
name: "LiteralStringEnd",
str: '"',
pop: true,
discard: true,
},
]);
const singlequoteStringLexer = createLexer([
{
name: "stringContent",
regex: /(?:\\['abfnrtv/\\nz]|\\u[a-fA-F0-9]{4}|[^'\\\n])*/,
},
{
name: "LiteralStringEnd",
str: "'",
pop: true,
discard: true,
},
]);
function smallerCaseRegexpPart(level: number) {
if (level == 0) {
return "";
}
if (level == 1) {
return "|\\]\\]";
}
return `|\\]={0,${level - 1}}(?=\\])`;
}
function regexNoLongBrackets(level: number) {
const smallerCase = smallerCaseRegexpPart(level);
const largerCase = `|\\]={${level + 1},}(?=\\])`;
const doesntEndWithBracketCase = `|\\]=*[^\\]=]`;
return new RegExp(
`([^\\]]${smallerCase}${largerCase}${doesntEndWithBracketCase})+`,
"m",
);
}
function longLiteralStringRule(level: number) {
const equalSigns = Array(level).fill("=").join("");
const lexer = createLexer([
{
name: "LiteralStringEnd",
str: "]" + equalSigns + "]",
discard: true,
pop: true,
},
{
name: "stringContent",
regex: regexNoLongBrackets(level),
discard: true,
},
]);
return {
name: "LiteralStringBegin",
str: "[" + equalSigns + "[",
push: lexer,
discard: true,
};
}
const simpleRules = [...ops, ...keywords, ...symbols].map((v) => ({ name: v }));
function longCommentRule(level: number) {
const equalSigns = Array(level).fill("=").join("");
const lexer = createLexer([
{
name: "longCommentEnd",
str: "]" + equalSigns + "]",
discard: true,
pop: true,
},
{
name: "commentContent",
regex: regexNoLongBrackets(level),
discard: true,
},
]);
return {
name: "longCommentBegin",
str: "--[" + equalSigns + "[",
push: lexer,
discard: true,
};
}
export const lex = createLexer(
[
{
name: "ws",
regex: /\s+/,
discard: true,
},
...Array(100).fill(0).map((_value, index) => longCommentRule(index)),
{
name: "shortComment",
regex: /--.*\n/m,
discard:true,
},
...simpleRules,
{
name: "Name",
regex: /[a-zA-Z_][a-zA-Z_0-9]*/,
},
{
name: "Numeral",
regex: /[0-9]*\.?[0-9]+/,
},
{
name: "LiteralStringBegin",
str: '"',
push: doublequoteStringLexer,
discard: true,
},
{
name: "LiteralStringBegin",
str: '"',
push: singlequoteStringLexer,
discard: true,
},
...Array(100).fill(0).map((_value, index) => longLiteralStringRule(index)),
],
);
Thank you so much for leac, its API is very elegant and easy to use.
I'm creating the new assignments for the compilers class at the college of engineering UBA (University of Buenos Aires). The students might use the following lua lexer which I want to share with you in case you find it useful. If I find the time I'll write some more tests and add an example in a PR.