Skip to content

Lua lexer #3

@jisbruzzi

Description

@jisbruzzi

Thank you so much for leac, its API is very elegant and easy to use.

I'm creating the new assignments for the compilers class at the college of engineering UBA (University of Buenos Aires). The students might use the following lua lexer which I want to share with you in case you find it useful. If I find the time I'll write some more tests and add an example in a PR.

import { createLexer, Rule, Rules } from "https://deno.land/x/leac/leac.ts";

const symbols = [
  ";",
  "=",
  ",",
  "::",
  ".",
  "[",
  "]",
  "...",
  "(",
  ")",
  ":",
  "{",
  "}",
];

const keywords = [
  "break",
  "goto",
  "do",
  "end",
  "while",
  "do",
  "repeat",
  "until",
  "if",
  "then",
  "elseif",
  "else",
  "for",
  "in",
  "function",
  "local",
  "return",
  "nil",
  "false",
  "true",
];

const ops = [
  "+",
  "-",
  "*",
  "/",
  "//",
  "^",
  "%",
  "&",
  "~",
  "|",
  ">>",
  "<<",
  "..",
  "<",
  "<=",
  ">",
  ">=",
  "==",
  "~=",
  "and",
  "or",
  "#",
  "not",
];
const doublequoteStringLexer = createLexer([
  {
    name: "stringContent",
    regex: /(?:\\["abfnrtv/\\nz]|\\u[a-fA-F0-9]{4}|[^"\\\n])*/,
  },
  {
    name: "LiteralStringEnd",
    str: '"',
    pop: true,
    discard: true,
  },
]);
const singlequoteStringLexer = createLexer([
  {
    name: "stringContent",
    regex: /(?:\\['abfnrtv/\\nz]|\\u[a-fA-F0-9]{4}|[^'\\\n])*/,
  },
  {
    name: "LiteralStringEnd",
    str: "'",
    pop: true,
    discard: true,
  },
]);
function smallerCaseRegexpPart(level: number) {
  if (level == 0) {
    return "";
  }
  if (level == 1) {
    return "|\\]\\]";
  }
  return `|\\]={0,${level - 1}}(?=\\])`;
}
function regexNoLongBrackets(level: number) {
  const smallerCase = smallerCaseRegexpPart(level);
  const largerCase = `|\\]={${level + 1},}(?=\\])`;
  const doesntEndWithBracketCase = `|\\]=*[^\\]=]`;
  return new RegExp(
    `([^\\]]${smallerCase}${largerCase}${doesntEndWithBracketCase})+`,
    "m",
  );
}
function longLiteralStringRule(level: number) {
  const equalSigns = Array(level).fill("=").join("");
  const lexer = createLexer([
    {
      name: "LiteralStringEnd",
      str: "]" + equalSigns + "]",
      discard: true,
      pop: true,
    },
    {
      name: "stringContent",
      regex: regexNoLongBrackets(level),
      discard: true,
    },
  ]);
  return {
    name: "LiteralStringBegin",
    str: "[" + equalSigns + "[",
    push: lexer,
    discard: true,
  };
}
const simpleRules = [...ops, ...keywords, ...symbols].map((v) => ({ name: v }));
function longCommentRule(level: number) {
  const equalSigns = Array(level).fill("=").join("");
  const lexer = createLexer([
    {
      name: "longCommentEnd",
      str: "]" + equalSigns + "]",
      discard: true,
      pop: true,
    },
    {
      name: "commentContent",
      regex: regexNoLongBrackets(level),
      discard: true,
    },
  ]);
  return {
    name: "longCommentBegin",
    str: "--[" + equalSigns + "[",
    push: lexer,
    discard: true,
  };
}
export const lex = createLexer(
  [
    {
      name: "ws",
      regex: /\s+/,
      discard: true,
    },
    ...Array(100).fill(0).map((_value, index) => longCommentRule(index)),
    {
      name: "shortComment",
      regex: /--.*\n/m,
      discard:true,
    },
    ...simpleRules,
    {
      name: "Name",
      regex: /[a-zA-Z_][a-zA-Z_0-9]*/,
    },
    {
      name: "Numeral",
      regex: /[0-9]*\.?[0-9]+/,
    },
    {
      name: "LiteralStringBegin",
      str: '"',
      push: doublequoteStringLexer,
      discard: true,
    },
    {
      name: "LiteralStringBegin",
      str: '"',
      push: singlequoteStringLexer,
      discard: true,
    },
    ...Array(100).fill(0).map((_value, index) => longLiteralStringRule(index)),
  ],
);

Metadata

Metadata

Assignees

No one assigned

    Labels

    exampleUser-provided demo

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions