diff --git a/src/compiler/ir.ml b/src/compiler/ir.ml index 1c04e80..03f6c7a 100644 --- a/src/compiler/ir.ml +++ b/src/compiler/ir.ml @@ -78,12 +78,21 @@ let rep t n m = | Ok t -> Ok (Rep (t, n, m))) let seq a b = - match (a, b) with - | Eps, x | x, Eps -> x - | Seq l1, Seq l2 -> Seq (l1 @ l2) - | Seq l1, x -> Seq (l1 @ [x]) - | x, Seq l2 -> Seq (x :: l2) - | _ -> Seq [a; b] + let an = capture_names a in + let bn = capture_names b in + let dups = SSet.inter an bn in + if not (SSet.is_empty dups) then + Error + (Printf.sprintf "'as' binding '%s' is bound in multiple positions" + (SSet.choose dups)) + else + Ok + (match (a, b) with + | Eps, x | x, Eps -> x + | Seq l1, Seq l2 -> Seq (l1 @ l2) + | Seq l1, x -> Seq (l1 @ [x]) + | x, Seq l2 -> Seq (x :: l2) + | _ -> Seq [a; b]) let alt a b = let an = capture_names a in @@ -110,7 +119,12 @@ let check_invariant t = SSet.add name names | Seq elems -> assert (List.length elems >= 2); - List.fold_left (fun acc x -> SSet.union acc (check x)) SSet.empty elems + List.fold_left + (fun acc x -> + let names = check x in + assert (SSet.is_empty (SSet.inter acc names)); + SSet.union acc names) + SSet.empty elems | Alt [] | Alt [_] -> assert false | Alt (first :: rest) -> let names = check first in diff --git a/src/compiler/ir.mli b/src/compiler/ir.mli index 4a498b4..2722874 100644 --- a/src/compiler/ir.mli +++ b/src/compiler/ir.mli @@ -35,10 +35,11 @@ type t = Constructors that enforce structural invariants return [result]. - {!alt} checks name consistency across branches. - {!capture} checks for shadowed inner bindings. + - {!seq} rejects duplicate capture names across elements. - {!star}, {!plus}, {!rep} reject inner captures. *) val chars : Cset.t -> t -val seq : t -> t -> t +val seq : t -> t -> (t, string) result val alt : t -> t -> (t, string) result val star : t -> (t, string) result val plus : t -> (t, string) result diff --git a/src/syntax/ppx_sedlex.ml b/src/syntax/ppx_sedlex.ml index 33df3bc..5e19306 100644 --- a/src/syntax/ppx_sedlex.ml +++ b/src/syntax/ppx_sedlex.ml @@ -562,7 +562,7 @@ let ir_of_pattern env = (* (p1, p2, ...) — sequence *) | Ppat_tuple (p :: pl) -> List.fold_left - (fun acc p -> Ir.seq acc (aux ~encoding p)) + (fun acc p -> unwrap p.ppat_loc (Ir.seq acc (aux ~encoding p))) (aux ~encoding p) pl (* Star p — zero-or-more repetition *) | Ppat_construct ({ txt = Lident "Star"; _ }, Some (_, p)) -> @@ -682,7 +682,9 @@ let ir_of_pattern env = | Pconst_string (s, _, _) -> let rev_l = rev_csets_of_string s ~loc:p.ppat_loc ~encoding in List.fold_left - (fun acc cset -> Ir.seq (Ir.chars cset) acc) + (fun acc cset -> + (* chars have no captures, so seq cannot fail *) + Result.get_ok (Ir.seq (Ir.chars cset) acc)) Ir.eps rev_l | Pconst_char c -> Ir.chars (char c) | Pconst_integer (i, _) -> diff --git a/test/codegen/test_errors.ml b/test/codegen/test_errors.ml index 1481898..f508a49 100644 --- a/test/codegen/test_errors.ml +++ b/test/codegen/test_errors.ml @@ -89,6 +89,17 @@ let%expect_test "error: as shadows inner binding" = Error: Sedlex: 'as' binding 'x' shadows an inner binding of the same name |}] +let%expect_test "error: duplicate as binding in sequence" = + [%compile_error + [%sedlex match buf with ('a' as x), 'b', ('c' as x) -> ignore x | _ -> ()]]; + [%expect + {| + File "test/codegen/test_errors.ml", characters 45-55: + | [%sedlex match buf with ('a' as x), 'b', ('c' as x) -> ignore x | _ -> ()]]; + ^^^^^^^^^^ + Error: Sedlex: 'as' binding 'x' is bound in multiple positions + |}] + let%expect_test "error: different names in or-pattern" = [%compile_error [%sedlex @@ -96,8 +107,8 @@ let%expect_test "error: different names in or-pattern" = [%expect {| File "test/codegen/test_errors.ml", characters 21-44: - | match buf with ('a' as x) | ('b' as y) -> ignore (x, y) | _ -> ()]]; - ^^^^^^^^^^^^^^^^^^^^^^^ + | match buf with ('a' as x) | ('b' as y) -> ignore (x, y) | _ -> ()]]; + ^^^^^^^^^^^^^^^^^^^^^^^ Error: Sedlex: all branches of '|' must bind the same names with 'as' |}]