Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ocamlformat
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version=0.27.0
version=0.28.1
profile = conventional
break-separators = after
space-around-lists = false
Expand Down
94 changes: 50 additions & 44 deletions src/syntax/ppx_sedlex.ml
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,14 @@ let best_final final =
let state_fun state = Printf.sprintf "__sedlex_state_%i" state

let call_state lexbuf auto state =
let trans, final = auto.(state) in
let { Sedlex.trans; finals } = auto.(state) in
if Array.length trans = 0 then (
match best_final final with
match best_final finals with
| Some i -> eint ~loc:default_loc i
| None -> assert false)
else appfun (state_fun state) [lexbuf]

let gen_state (lexbuf_name, lexbuf) auto i (trans, final) =
let gen_state (lexbuf_name, lexbuf) auto i { Sedlex.trans; finals } =
let loc = default_loc in
let partition = Array.map fst trans in
let cases =
Expand Down Expand Up @@ -235,7 +235,7 @@ let gen_state (lexbuf_name, lexbuf) auto i (trans, final) =
~expr:(Exp.fun_ ~loc Nolabel None lhs body);
]
in
match best_final final with
match best_final finals with
| None -> ret (body ())
| Some _ when Array.length trans = 0 -> []
| Some i ->
Expand All @@ -249,25 +249,19 @@ let gen_recflag auto =
in states with no further transitions. *)
try
Array.iter
(fun (trans_i, _) ->
(fun { Sedlex.trans } ->
Array.iter
(fun (_, j) ->
let trans_j, _ = auto.(j) in
if Array.length trans_j > 0 then raise Exit)
trans_i)
if Array.length auto.(j).Sedlex.trans > 0 then raise Exit)
trans)
auto;
Nonrecursive
with Exit -> Recursive

let gen_definition ((_, lexbuf) as lexbuf_with_name) l error =
let gen_definition ((_, lexbuf) as lexbuf_with_name) auto l error =
let loc = default_loc in
let brs = Array.of_list l in
let auto = Sedlex.compile (Array.map fst brs) in
let cases =
Array.to_list
(Array.mapi
(fun i (_, e) -> case ~lhs:(pint ~loc i) ~guard:None ~rhs:e)
brs)
List.mapi (fun i (_, e) -> case ~lhs:(pint ~loc i) ~guard:None ~rhs:e) l
in
let states = Array.mapi (gen_state lexbuf_with_name auto) auto in
let states = List.flatten (Array.to_list states) in
Expand Down Expand Up @@ -467,6 +461,45 @@ let regexp_of_pattern env =
in
aux ~encoding:Ascii

let handle_sedlex_match ~env ~map_rhs match_expr =
let lexbuf =
match match_expr with
| { pexp_desc = Pexp_match (lexbuf, _) } -> (
match lexbuf with
| { pexp_desc = Pexp_ident { txt = Lident txt } } -> (txt, lexbuf)
| _ ->
err lexbuf.pexp_loc
"the matched expression must be a single identifier")
| _ ->
err match_expr.pexp_loc
"the %%sedlex extension is only recognized on match expressions"
in
let cases =
match match_expr with
| { pexp_desc = Pexp_match (_, cases) } -> cases
| _ -> assert false
in
let cases = List.rev cases in
let error =
match List.hd cases with
| { pc_lhs = [%pat? _]; pc_rhs = e; pc_guard = None } -> map_rhs e
| { pc_lhs = p } ->
err p.ppat_loc "the last branch must be a catch-all error case"
in
let cases = List.rev (List.tl cases) in
let cases =
List.map
(function
| { pc_lhs = p; pc_rhs = e; pc_guard = None } ->
(regexp_of_pattern env p, map_rhs e)
| { pc_guard = Some e } ->
err e.pexp_loc "'when' guards are not supported")
cases
in
let brs = Array.of_list cases in
let auto = Sedlex.compile (Array.map fst brs) in
(gen_definition lexbuf auto cases error, auto)

let previous = ref []
let regexps = ref []
let should_set_cookies = ref false
Expand All @@ -481,35 +514,8 @@ let mapper =

method! expression e =
match e with
| [%expr [%sedlex [%e? { pexp_desc = Pexp_match (lexbuf, cases) }]]] ->
let lexbuf =
match lexbuf with
| { pexp_desc = Pexp_ident { txt = Lident txt } } ->
(txt, lexbuf)
| _ ->
err lexbuf.pexp_loc
"the matched expression must be a single identifier"
in
let cases = List.rev cases in
let error =
match List.hd cases with
| { pc_lhs = [%pat? _]; pc_rhs = e; pc_guard = None } ->
this#expression e
| { pc_lhs = p } ->
err p.ppat_loc
"the last branch must be a catch-all error case"
in
let cases = List.rev (List.tl cases) in
let cases =
List.map
(function
| { pc_lhs = p; pc_rhs = e; pc_guard = None } ->
(regexp_of_pattern env p, this#expression e)
| { pc_guard = Some e } ->
err e.pexp_loc "'when' guards are not supported")
cases
in
gen_definition lexbuf cases error
| [%expr [%sedlex [%e? { pexp_desc = Pexp_match _ } as match_expr]]] ->
fst (handle_sedlex_match ~env ~map_rhs:this#expression match_expr)
| [%expr
let [%p? { ppat_desc = Ppat_var { txt = name } }] =
[%sedlex.regexp? [%p? p]]
Expand Down
58 changes: 57 additions & 1 deletion src/syntax/sedlex.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ let transition (state : state) =
Array.sort (fun (c1, _) (c2, _) -> compare c1 c2) t;
t

type dfa_state = { trans : (Cset.t * int) array; finals : bool array }
type dfa = dfa_state array

let compile rs =
let rs = Array.map compile_re rs in
let counter = ref 0 in
Expand All @@ -131,11 +134,64 @@ let compile rs =
let trans = transition state in
let trans = Array.map (fun (p, t) -> (p, aux t)) trans in
let finals = Array.map (fun (_, f) -> List.memq f state) rs in
Hashtbl.add states_def i (trans, finals);
Hashtbl.add states_def i { trans; finals };
i
in
let init = ref [] in
Array.iter (fun (i, _) -> init := add_node !init i) rs;
let i = aux !init in
assert (i = 0);
Array.init !counter (Hashtbl.find states_def)

let cset_to_label cset =
let escape_dot c =
match c with
| '"' -> "\\\""
| '\\' -> "\\\\"
| '<' -> "\\<"
| '>' -> "\\>"
| _ -> String.make 1 c
in
let format_interval (lo, hi) =
if lo = -1 && hi = -1 then "EOF"
else if lo = hi then
if lo >= 32 && lo <= 126 then "'" ^ escape_dot (Char.chr lo) ^ "'"
else Printf.sprintf "U+%04X" lo
else if lo >= 32 && lo <= 126 && hi >= 32 && hi <= 126 then
"'" ^ escape_dot (Char.chr lo) ^ "'-'" ^ escape_dot (Char.chr hi) ^ "'"
else Printf.sprintf "U+%04X-U+%04X" lo hi
in
String.concat ", "
(List.map format_interval (cset : Cset.t :> (int * int) list))

let dfa_to_dot dfa =
let buf = Buffer.create 1024 in
let bprintf = Printf.bprintf in
bprintf buf "digraph {\n";
bprintf buf " rankdir=LR;\n";
bprintf buf " node [shape=circle];\n\n";
bprintf buf " _start [shape=point];\n";
bprintf buf " _start -> state0;\n\n";
Array.iteri
(fun i { trans; finals } ->
let accepted =
let acc = ref [] in
for r = Array.length finals - 1 downto 0 do
if finals.(r) then acc := r :: !acc
done;
!acc
in
(match accepted with
| [] -> bprintf buf " state%d [label=\"%d\"];\n" i i
| rules ->
bprintf buf
" state%d [label=\"%d\\n[rule %s]\", shape=doublecircle];\n" i i
(String.concat "," (List.map string_of_int rules)));
Array.iter
(fun (cset, target) ->
let label = cset_to_label cset in
bprintf buf " state%d -> state%d [label=\"%s\"];\n" i target label)
trans)
dfa;
bprintf buf "}\n";
Buffer.contents buf
6 changes: 5 additions & 1 deletion src/syntax/sedlex.mli
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,8 @@ val intersection : regexp -> regexp -> regexp option
(* If each argument is a single [chars] regexp, returns a regexp
which matches the intersection set. Otherwise returns [None]. *)

val compile : regexp array -> ((Sedlex_cset.t * int) array * bool array) array
type dfa_state = { trans : (Sedlex_cset.t * int) array; finals : bool array }
type dfa = dfa_state array

val compile : regexp array -> dfa
val dfa_to_dot : dfa -> string
8 changes: 8 additions & 0 deletions test/codegen/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(library
(name sedlex_gen_test)
(libraries sedlex)
(inline_tests)
(enabled_if
(>= %{ocaml_version} 4.14))
(preprocess
(pps ppx_sedlex_test ppx_expect)))
120 changes: 120 additions & 0 deletions test/codegen/test_gen.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
let%expect_test "simple string match" =
(match%sedlex_test buf with "ab" | "de" -> () | _ -> ());
[%expect
{|
DOT:
digraph {
rankdir=LR;
node [shape=circle];

_start [shape=point];
_start -> state0;

state0 [label="0"];
state0 -> state1 [label="'a'"];
state0 -> state3 [label="'d'"];
state1 [label="1"];
state1 -> state2 [label="'b'"];
state2 [label="2\n[rule 0]", shape=doublecircle];
state3 [label="3"];
state3 -> state2 [label="'e'"];
}
CODE:
let rec __sedlex_state_0 buf =
match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with
| 0 -> __sedlex_state_1 buf
| 1 -> __sedlex_state_3 buf
| _ -> Sedlexing.backtrack buf
and __sedlex_state_1 buf =
match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with
| 0 -> 0
| _ -> Sedlexing.backtrack buf
and __sedlex_state_3 buf =
match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with
| 0 -> 0
| _ -> Sedlexing.backtrack buf in
Sedlexing.start buf; (match __sedlex_state_0 buf with | 0 -> () | _ -> ())
|}]

let%expect_test "character class" =
(match%sedlex_test buf with Plus 'a' .. 'z' -> () | _ -> ());
[%expect
{|
DOT:
digraph {
rankdir=LR;
node [shape=circle];

_start [shape=point];
_start -> state0;

state0 [label="0"];
state0 -> state1 [label="'a'-'z'"];
state1 [label="1\n[rule 0]", shape=doublecircle];
state1 -> state1 [label="'a'-'z'"];
}
CODE:
let rec __sedlex_state_0 buf =
match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with
| 0 -> __sedlex_state_1 buf
| _ -> Sedlexing.backtrack buf
and __sedlex_state_1 buf =
Sedlexing.mark buf 0;
(match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with
| 0 -> __sedlex_state_1 buf
| _ -> Sedlexing.backtrack buf) in
Sedlexing.start buf; (match __sedlex_state_0 buf with | 0 -> () | _ -> ())
|}]

let%expect_test "multi-rule" =
(match%sedlex_test buf with
| "ab" -> ()
| "de" -> ()
| Plus '0' .. '9' -> ()
| _ -> ());
[%expect
{|
DOT:
digraph {
rankdir=LR;
node [shape=circle];

_start [shape=point];
_start -> state0;

state0 [label="0"];
state0 -> state1 [label="'0'-'9'"];
state0 -> state2 [label="'a'"];
state0 -> state4 [label="'d'"];
state1 [label="1\n[rule 2]", shape=doublecircle];
state1 -> state1 [label="'0'-'9'"];
state2 [label="2"];
state2 -> state3 [label="'b'"];
state3 [label="3\n[rule 0]", shape=doublecircle];
state4 [label="4"];
state4 -> state5 [label="'e'"];
state5 [label="5\n[rule 1]", shape=doublecircle];
}
CODE:
let rec __sedlex_state_0 buf =
match __sedlex_partition_1 (Sedlexing.__private__next_int buf) with
| 0 -> __sedlex_state_1 buf
| 1 -> __sedlex_state_2 buf
| 2 -> __sedlex_state_4 buf
| _ -> Sedlexing.backtrack buf
and __sedlex_state_1 buf =
Sedlexing.mark buf 2;
(match __sedlex_partition_2 (Sedlexing.__private__next_int buf) with
| 0 -> __sedlex_state_1 buf
| _ -> Sedlexing.backtrack buf)
and __sedlex_state_2 buf =
match __sedlex_partition_3 (Sedlexing.__private__next_int buf) with
| 0 -> 0
| _ -> Sedlexing.backtrack buf
and __sedlex_state_4 buf =
match __sedlex_partition_4 (Sedlexing.__private__next_int buf) with
| 0 -> 1
| _ -> Sedlexing.backtrack buf in
Sedlexing.start buf;
(match __sedlex_state_0 buf with | 0 -> () | 1 -> () | 2 -> () | _ -> ())
|}]
6 changes: 6 additions & 0 deletions test/ppx_test/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
(library
(name ppx_sedlex_test)
(kind ppx_rewriter)
(libraries ppxlib sedlex_ppx)
(preprocess
(pps ppxlib.metaquot)))
Loading
Loading