Add shortest match support via match%sedlex.shortest#180
Conversation
156b2e6 to
074be93
Compare
bdfa1ab to
356a9f3
Compare
|
I've reworked the implementation to be much more self contained. let dfa = ... in
if shortest then (
(* Collect reachable states, stripping transitions from accepting ones *)
let n = Array.length dfa in
let remap = Array.make n (-1) in
let order = Array.make n 0 in
let next = ref 0 in
let rec mark i =
if remap.(i) = -1 then (
let j = !next in
remap.(i) <- j;
order.(j) <- i;
incr next;
let st = dfa.(i) in
if not (Array.exists Fun.id st.finals) then
Array.iter (fun (_, t) -> mark t) st.trans)
in
mark 0;
Array.init !next (fun j ->
let st = dfa.(order.(j)) in
if Array.exists Fun.id st.finals then { st with trans = [||] }
else { st with trans = Array.map (fun (c, t) -> (c, remap.(t))) st.trans }))
else dfa |
In shortest mode, the lexer returns as soon as any rule matches rather than continuing to find the longest match. This is purely a code generation change — the DFA is identical, but final states return immediately instead of calling mark/backtrack. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
|
@toots, the last implementation should be relatively easy to review, if you want to give it a try. |
toots
left a comment
There was a problem hiding this comment.
One factor out request, the rest looks good based on the plumbing and tests. The function to factor out is really the critical part so let's make it clear. Thanks!
| if shortest then ( | ||
| (* Collect reachable states, stripping transitions from accepting ones *) | ||
| let n = Array.length dfa in | ||
| let remap = Array.make n (-1) in | ||
| let order = Array.make n 0 in | ||
| let next = ref 0 in | ||
| let rec mark i = | ||
| if remap.(i) = -1 then ( | ||
| let j = !next in | ||
| remap.(i) <- j; | ||
| order.(j) <- i; | ||
| incr next; | ||
| let st = dfa.(i) in | ||
| if not (Array.exists Fun.id st.finals) then | ||
| Array.iter (fun (_, t) -> mark t) st.trans) | ||
| in | ||
| mark 0; | ||
| Array.init !next (fun j -> | ||
| let st = dfa.(order.(j)) in | ||
| if Array.exists Fun.id st.finals then { st with trans = [||] } | ||
| else | ||
| { st with trans = Array.map (fun (c, t) -> (c, remap.(t))) st.trans })) |
There was a problem hiding this comment.
Could you extract this as separate function so we can reason/change the implementation if it is ever needed?
|
Ill hold onto this PR until we decide what to do with https://github.com/ocaml-community/sedlex/pull/188/changes. Because we would require a different implementation |
|
@pmetzger I need admin rights to setup automerge: |

In shortest mode, the lexer returns as soon as any rule matches rather than continuing to find the longest match. This is purely a code generation change — the DFA is identical, but final states return immediately instead of calling mark/backtrack.