From 86e46d053ee279719d122a01996b717435034c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Thu, 9 Apr 2026 12:35:01 +0200 Subject: [PATCH 01/11] Wasm: Extract loops from toplevel function into helper functions Wasm engines tier up (compile to optimised native code) functions that contain loops. For the toplevel/init function, which is large and called only once, this is wasteful. Extract contained loops into small helper functions so only the helpers get tiered up. The pass runs on the toplevel function after code generation. It extracts loops whose branches all stay within the loop body (no escaping Br, Return, or Rethrow). Variables are split into parameters (whose pre-loop values are needed) and locals (written before first read). Non-nullable ref locals are made nullable with RefCast on reads. Modified variables are returned. --- compiler/lib-wasm/generate.ml | 1 + compiler/lib-wasm/hoist_loops.ml | 315 ++++++++++++++++++++++++ compiler/lib-wasm/hoist_loops.mli | 25 ++ compiler/lib-wasm/initialize_locals.ml | 9 +- compiler/lib-wasm/initialize_locals.mli | 12 + 5 files changed, 359 insertions(+), 3 deletions(-) create mode 100644 compiler/lib-wasm/hoist_loops.ml create mode 100644 compiler/lib-wasm/hoist_loops.mli diff --git a/compiler/lib-wasm/generate.ml b/compiler/lib-wasm/generate.ml index 9a049ab9aa..2c0d701f0c 100644 --- a/compiler/lib-wasm/generate.ml +++ b/compiler/lib-wasm/generate.ml @@ -2446,6 +2446,7 @@ module Generate (Target : Target_sig.S) = struct functions in global_context.init_code <- []; + let functions = Hoist_loops.f ~toplevel:toplevel_name functions in global_context.other_fields <- List.rev_append functions global_context.other_fields; let js_code = StringMap.bindings global_context.fragments in global_context.fragments <- StringMap.empty; diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml new file mode 100644 index 0000000000..ceff79156b --- /dev/null +++ b/compiler/lib-wasm/hoist_loops.ml @@ -0,0 +1,315 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(* Extract loops from the toplevel function into separate helper + functions. Wasm engines tier up (compile to optimised native code) + functions containing loops. Since the toplevel function is large and + called only once, we want to avoid this: only the small helpers + should be tiered up. + + A loop is extractable when all its branches stay within the loop + body (no escaping [Br], [Return], or [Rethrow]). + + Variables used in the loop are split into parameters and locals. + Parameters are variables whose pre-loop value may be needed: + either read before being written in the loop body, or with a + defaultable type (scalars, nullable refs, [ref eq], …). + The remaining variables — non-defaultable non-nullable refs that + are always written before their first read — become locals of the + helper. They have no meaningful value before the loop, so passing + them as parameters would introduce a read that did not exist in + the original code. Since [Initialize_locals] has already run on + the original function, these locals have a set/get pattern that + the Wasm validator accepts without initialisation. + + Modified parameters are returned to the caller via a struct (or + directly when there are zero or one). Locals are not returned: + they are purely internal to the loop. *) + +open! Stdlib +module W = Wasm_ast + +(* Check that all branches in a loop body target labels within the loop. + [depth] counts the number of enclosing control flow constructs + including the loop itself, so it starts at 1 when called on the loop + body. A [Br n] escapes the loop when [n >= depth]. *) + +let rec is_contained_expr ~depth (e : W.expression) = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> true + | LocalGet _ -> true + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> is_contained_expr ~depth e' + | LocalTee (_, e') -> is_contained_expr ~depth e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> is_contained_expr ~depth e1 && is_contained_expr ~depth e2 + | Br_on_cast (n, _, _, e') | Br_on_cast_fail (n, _, _, e') -> + n < depth && is_contained_expr ~depth e' + | Br_on_null (n, e') -> n < depth && is_contained_expr ~depth e' + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> + List.for_all ~f:(is_contained_expr ~depth) l + | Call_ref (_, e', l) -> + is_contained_expr ~depth e' && List.for_all ~f:(is_contained_expr ~depth) l + | BlockExpr (_, body) -> is_contained_instrs ~depth:(depth + 1) body + | Seq (instrs, e') -> is_contained_instrs ~depth instrs && is_contained_expr ~depth e' + | IfExpr (_, cond, e1, e2) -> + is_contained_expr ~depth cond + && is_contained_expr ~depth:(depth + 1) e1 + && is_contained_expr ~depth:(depth + 1) e2 + | Try (_, body, catches) -> + is_contained_instrs ~depth:(depth + 1) body + && List.for_all ~f:(fun (_, l, _) -> l < depth) catches + +and is_contained_instr ~depth (i : W.instruction) = + match i with + | Drop e | GlobalSet (_, e) | Push e | Throw (_, e) -> is_contained_expr ~depth e + | LocalSet (_, e) -> is_contained_expr ~depth e + | Br (n, e_opt) -> ( + n < depth + && + match e_opt with + | None -> true + | Some e -> is_contained_expr ~depth e) + | Br_if (n, e) -> n < depth && is_contained_expr ~depth e + | Br_table (e, targets, default) -> + List.for_all ~f:(fun n -> n < depth) targets + && default < depth + && is_contained_expr ~depth e + | Return _ | Return_call _ | Return_call_ref _ -> false + | Loop (_, body) | Block (_, body) -> is_contained_instrs ~depth:(depth + 1) body + | If (_, e, l1, l2) -> + is_contained_expr ~depth e + && is_contained_instrs ~depth:(depth + 1) l1 + && is_contained_instrs ~depth:(depth + 1) l2 + | CallInstr (_, l) -> List.for_all ~f:(is_contained_expr ~depth) l + | Rethrow n -> n < depth + | Nop | Unreachable | Event _ -> true + | ArraySet (_, e1, e2, e3) -> + is_contained_expr ~depth e1 + && is_contained_expr ~depth e2 + && is_contained_expr ~depth e3 + | StructSet (_, _, e1, e2) -> is_contained_expr ~depth e1 && is_contained_expr ~depth e2 + +and is_contained_instrs ~depth l = List.for_all ~f:(is_contained_instr ~depth) l + +(* Collect local variables referenced in an instruction list. + [reads]: variables appearing in [LocalGet]. + [writes]: variables appearing in [LocalSet] or [LocalTee]. *) + +type var_sets = + { reads : Code.Var.Set.t + ; writes : Code.Var.Set.t + } + +let rec collect_expr acc (e : W.expression) = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> acc + | LocalGet v -> { acc with reads = Code.Var.Set.add v acc.reads } + | LocalTee (v, e') -> + collect_expr { acc with writes = Code.Var.Set.add v acc.writes } e' + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> collect_expr acc e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> collect_expr (collect_expr acc e1) e2 + | Br_on_cast (_, _, _, e') | Br_on_cast_fail (_, _, _, e') | Br_on_null (_, e') -> + collect_expr acc e' + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> collect_exprs acc l + | Call_ref (_, e', l) -> collect_expr (collect_exprs acc l) e' + | BlockExpr (_, body) -> collect_instrs acc body + | Seq (instrs, e') -> collect_expr (collect_instrs acc instrs) e' + | IfExpr (_, cond, e1, e2) -> collect_expr (collect_expr (collect_expr acc cond) e1) e2 + | Try (_, body, _) -> collect_instrs acc body + +and collect_exprs acc l = List.fold_left ~f:collect_expr ~init:acc l + +and collect_instr acc (i : W.instruction) = + match i with + | Drop e | GlobalSet (_, e) | Push e | Throw (_, e) -> collect_expr acc e + | LocalSet (v, e) -> collect_expr { acc with writes = Code.Var.Set.add v acc.writes } e + | Br (_, Some e) | Br_if (_, e) | Br_table (e, _, _) -> collect_expr acc e + | Br (_, None) | Return None | Nop | Unreachable | Event _ | Rethrow _ -> acc + | Return (Some e) -> collect_expr acc e + | Loop (_, body) | Block (_, body) -> collect_instrs acc body + | If (_, e, l1, l2) -> collect_instrs (collect_instrs (collect_expr acc e) l1) l2 + | CallInstr (_, l) | Return_call (_, l) -> collect_exprs acc l + | Return_call_ref (_, e', l) -> collect_expr (collect_exprs acc l) e' + | ArraySet (_, e1, e2, e3) -> collect_expr (collect_expr (collect_expr acc e1) e2) e3 + | StructSet (_, _, e1, e2) -> collect_expr (collect_expr acc e1) e2 + +and collect_instrs acc l = List.fold_left ~f:collect_instr ~init:acc l + +(* Transformation context *) + +type ctx = + { var_types : W.value_type Code.Var.Hashtbl.t + ; mutable new_fields : W.module_field list + } + +let lookup_types ctx vars = + Code.Var.Set.fold + (fun v acc -> + match Code.Var.Hashtbl.find_opt ctx.var_types v with + | Some t -> (v, t) :: acc + | None -> acc) + vars + [] + +let extract_loop ctx ~is_initialized body = + let { reads; writes } = + collect_instrs { reads = Code.Var.Set.empty; writes = Code.Var.Set.empty } body + in + let all_vars = Code.Var.Set.union reads writes in + (* Non-nullable ref variables that are not yet initialised when + reaching the loop become locals (passing them as parameters would + introduce a read that did not exist in the original code). + Scalars and nullable refs are safe as parameters since they have + Wasm default values. *) + let local_vars = + Code.Var.Set.filter + (fun v -> + (not (is_initialized v)) + && + match Code.Var.Hashtbl.find_opt ctx.var_types v with + | Some (Ref { nullable = false; _ }) -> true + | _ -> false) + all_vars + in + let param_vars = Code.Var.Set.diff all_vars local_vars in + let param_with_types = lookup_types ctx param_vars in + let local_with_types = lookup_types ctx local_vars in + (* Only return modified parameters — locals are loop-internal. *) + let modified_with_types = lookup_types ctx (Code.Var.Set.inter param_vars writes) in + let helper_name = Code.Var.fresh_n "loop_helper" in + let args = List.map ~f:(fun (v, _) -> W.LocalGet v) param_with_types in + let param_types = List.map ~f:snd param_with_types in + let param_names = List.map ~f:fst param_with_types in + let loop_instr = W.Loop ({ W.params = []; result = [] }, body) in + let make_helper ~signature ~extra_body = + W.Function + { name = helper_name + ; exported_name = None + ; typ = None + ; signature + ; param_names + ; locals = local_with_types + ; body = loop_instr :: extra_body + } + in + let result_types = List.map ~f:snd modified_with_types in + let extra_body = List.map ~f:(fun (v, _) -> W.Push (LocalGet v)) modified_with_types in + let signature = { W.params = param_types; result = result_types } in + ctx.new_fields <- make_helper ~signature ~extra_body :: ctx.new_fields; + match modified_with_types with + | [] -> [ W.CallInstr (helper_name, args) ] + | [ (v, _) ] -> [ W.LocalSet (v, Call (helper_name, args)) ] + | _ -> + (* Multi-value: call leaves results on the stack, pop in + reverse order (last pushed = top of stack = first popped). *) + W.CallInstr (helper_name, args) + :: List.rev_map ~f:(fun (v, t) -> W.LocalSet (v, Pop t)) modified_with_types + +let fork_il_ctx = Initialize_locals.fork_context + +let rec transform_instrs ctx il_ctx instrs = + List.concat_map ~f:(transform_instr ctx il_ctx) instrs + +and transform_instr ctx il_ctx (i : W.instruction) = + match i with + | Loop (ty, body) when List.is_empty ty.result && is_contained_instrs ~depth:1 body -> + (* Use the current initialized set — then scan the original + instruction to update the outer context for what follows. *) + let result = + extract_loop ctx ~is_initialized:(Initialize_locals.is_initialized il_ctx) body + in + Initialize_locals.scan_instruction il_ctx i; + result + | Loop (ty, body) -> + let inner = fork_il_ctx il_ctx in + let body' = transform_instrs ctx inner body in + Initialize_locals.scan_instruction il_ctx i; + [ W.Loop (ty, body') ] + | Block (ty, body) -> + let inner = fork_il_ctx il_ctx in + let body' = transform_instrs ctx inner body in + Initialize_locals.scan_instruction il_ctx i; + [ W.Block (ty, body') ] + | If (ty, e, l1, l2) -> + let inner1 = fork_il_ctx il_ctx in + let inner2 = fork_il_ctx il_ctx in + let l1' = transform_instrs ctx inner1 l1 in + let l2' = transform_instrs ctx inner2 l2 in + Initialize_locals.scan_instruction il_ctx i; + [ W.If (ty, e, l1', l2') ] + | _ -> + Initialize_locals.scan_instruction il_ctx i; + [ i ] + +let f ~toplevel fields = + List.concat_map + ~f:(fun field -> + match field with + | W.Function ({ name; _ } as func) when Code.Var.equal name toplevel -> + let var_types = Code.Var.Hashtbl.create 16 in + List.iter2 + ~f:(fun v t -> Code.Var.Hashtbl.add var_types v t) + func.param_names + func.signature.params; + List.iter ~f:(fun (v, t) -> Code.Var.Hashtbl.add var_types v t) func.locals; + let ctx = { var_types; new_fields = [] } in + let il_ctx = Initialize_locals.create_context () in + List.iter ~f:(Initialize_locals.mark_initialized il_ctx) func.param_names; + List.iter + ~f:(fun (var, typ) -> + match (typ : W.value_type) with + | I32 | I64 | F32 | F64 | Ref { nullable = true; _ } -> + Initialize_locals.mark_initialized il_ctx var + | Ref { nullable = false; _ } -> ()) + func.locals; + let body = transform_instrs ctx il_ctx func.body in + let func' = W.Function { func with body } in + List.rev ctx.new_fields @ [ func' ] + | _ -> [ field ]) + fields diff --git a/compiler/lib-wasm/hoist_loops.mli b/compiler/lib-wasm/hoist_loops.mli new file mode 100644 index 0000000000..1d8ef97ad9 --- /dev/null +++ b/compiler/lib-wasm/hoist_loops.mli @@ -0,0 +1,25 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(** Extract loops from a function into separate helper functions. + + This avoids Wasm engines unnecessarily tiering up (compiling to optimised + native code) a large function that is called only once but contains a loop. + Only the small helper functions containing the loops will be tiered up. *) + +val f : toplevel:Code.Var.t -> Wasm_ast.module_field list -> Wasm_ast.module_field list diff --git a/compiler/lib-wasm/initialize_locals.ml b/compiler/lib-wasm/initialize_locals.ml index bb9733286a..4f37c34386 100644 --- a/compiler/lib-wasm/initialize_locals.ml +++ b/compiler/lib-wasm/initialize_locals.ml @@ -23,8 +23,13 @@ type ctx = ; uninitialized : Code.Var.Set.t ref } +let create_context () = + { initialized = Code.Var.Set.empty; uninitialized = ref Code.Var.Set.empty } + let mark_initialized ctx i = ctx.initialized <- Code.Var.Set.add i ctx.initialized +let is_initialized ctx i = Code.Var.Set.mem i ctx.initialized + let fork_context { initialized; uninitialized } = { initialized; uninitialized } let check_initialized ctx i = @@ -217,9 +222,7 @@ let has_default (ty : Wasm_ast.heap_type) = | Func | Extern | Array | Struct | None_ | Type _ -> false let f ~param_names ~locals instrs = - let ctx = - { initialized = Code.Var.Set.empty; uninitialized = ref Code.Var.Set.empty } - in + let ctx = create_context () in List.iter ~f:(fun x -> mark_initialized ctx x) param_names; List.iter ~f:(fun (var, typ) -> diff --git a/compiler/lib-wasm/initialize_locals.mli b/compiler/lib-wasm/initialize_locals.mli index c356aa396b..ac9a898f51 100644 --- a/compiler/lib-wasm/initialize_locals.mli +++ b/compiler/lib-wasm/initialize_locals.mli @@ -16,6 +16,18 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *) +type ctx + +val create_context : unit -> ctx + +val mark_initialized : ctx -> Code.Var.t -> unit + +val is_initialized : ctx -> Code.Var.t -> bool + +val fork_context : ctx -> ctx + +val scan_instruction : ctx -> Wasm_ast.instruction -> unit + val f : param_names:Wasm_ast.var list -> locals:(Wasm_ast.var * Wasm_ast.value_type) list From 0e1c1c7d86603ca935d83ff775222e4d03ac635b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Fri, 10 Apr 2026 17:29:51 +0200 Subject: [PATCH 02/11] Wrap toplevel loop bodies with error handlers Array bounds checks and division-by-zero checks use Br to jump to handler blocks set up by wrap_with_handlers. When those handler blocks are outside the loop, the Br escapes the loop and is_contained returns false. Wrapping the loop body with its own handlers keeps those Br targets inside the loop. Only done for the toplevel function (where loop hoisting applies). --- compiler/lib-wasm/generate.ml | 36 ++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/compiler/lib-wasm/generate.ml b/compiler/lib-wasm/generate.ml index 2c0d701f0c..8f14e24c79 100644 --- a/compiler/lib-wasm/generate.ml +++ b/compiler/lib-wasm/generate.ml @@ -2132,7 +2132,7 @@ module Generate (Target : Target_sig.S) = struct | Cond (_, (pc1, _), (pc2, _)) when pc' = pc1 && pc' = pc2 -> true | _ -> Structure.is_merge_node g pc' in - let code ~context = + let code ~result_typ ~fall_through ~context = let block = Addr.Map.find pc ctx.blocks in let* () = translate_instrs ctx context block.body in translate_node_within @@ -2149,8 +2149,38 @@ module Generate (Target : Target_sig.S) = struct in if Structure.is_loop_header g pc then - loop { params = []; result = result_typ } (code ~context:(`Block pc :: context)) - else code ~context + let outermost_toplevel_loop = + Option.is_none name_opt + && not + (List.exists + ~f:(function + | `Block pc' when pc' >= 0 -> Structure.is_loop_header g pc' + | _ -> false) + context) + in + loop + { params = []; result = result_typ } + (if outermost_toplevel_loop + then + (* The outermost loops of the toplevel function are later + hoisted into helper functions by [Hoist_loops], which + requires them to be self-contained (no [Br] escaping + the loop body). Bounds and zero-divide checks normally + branch to handler blocks installed at the top of the + function; we install handlers around the loop body + itself so those branches stay inside the loop. Nested + loops do not need their own wrap: only the outermost + one is extracted, and its body is copied verbatim into + the helper along with the surrounding handler blocks. *) + wrap_with_handlers + p + pc + ~result_typ + ~fall_through + ~context:(`Block pc :: context) + code + else code ~result_typ ~fall_through ~context:(`Block pc :: context)) + else code ~result_typ ~fall_through ~context and translate_node_within ~result_typ ~fall_through ~pc ~l ~context = match l with | pc' :: rem -> From 7b7f05f2c9b692e5af3e8e159f3545d9ebc54ac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Tue, 14 Apr 2026 12:49:56 +0200 Subject: [PATCH 03/11] Wasm / toplevel loop hoisting: return a struct rather than multiple values. Have the helper functions return a struct rather than multiple values, since this is better optimized by Binaryen. --- compiler/lib-wasm/hoist_loops.ml | 50 ++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index ceff79156b..bec081905f 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -186,6 +186,7 @@ and collect_instrs acc l = List.fold_left ~f:collect_instr ~init:acc l type ctx = { var_types : W.value_type Code.Var.Hashtbl.t ; mutable new_fields : W.module_field list + ; mutable extra_locals : (Code.Var.t * W.value_type) list } let lookup_types ctx vars = @@ -238,18 +239,41 @@ let extract_loop ctx ~is_initialized body = ; body = loop_instr :: extra_body } in - let result_types = List.map ~f:snd modified_with_types in - let extra_body = List.map ~f:(fun (v, _) -> W.Push (LocalGet v)) modified_with_types in - let signature = { W.params = param_types; result = result_types } in - ctx.new_fields <- make_helper ~signature ~extra_body :: ctx.new_fields; match modified_with_types with - | [] -> [ W.CallInstr (helper_name, args) ] - | [ (v, _) ] -> [ W.LocalSet (v, Call (helper_name, args)) ] + | [] -> + let signature = { W.params = param_types; result = [] } in + ctx.new_fields <- make_helper ~signature ~extra_body:[] :: ctx.new_fields; + [ W.CallInstr (helper_name, args) ] + | [ (v, vt) ] -> + let signature = { W.params = param_types; result = [ vt ] } in + ctx.new_fields <- + make_helper ~signature ~extra_body:[ Push (LocalGet v) ] :: ctx.new_fields; + [ W.LocalSet (v, Call (helper_name, args)) ] | _ -> - (* Multi-value: call leaves results on the stack, pop in - reverse order (last pushed = top of stack = first popped). *) - W.CallInstr (helper_name, args) - :: List.rev_map ~f:(fun (v, t) -> W.LocalSet (v, Pop t)) modified_with_types + let ret_type_name = Code.Var.fresh_n "loop_ret" in + let fields = + List.map ~f:(fun (_, t) -> { W.mut = false; typ = W.Value t }) modified_with_types + in + ctx.new_fields <- + W.Type + [ { name = ret_type_name; typ = Struct fields; supertype = None; final = true } + ] + :: ctx.new_fields; + let ret_ref_type = W.Ref { nullable = false; typ = Type ret_type_name } in + let signature = { W.params = param_types; result = [ ret_ref_type ] } in + let struct_new = + W.StructNew + (ret_type_name, List.map ~f:(fun (v, _) -> W.LocalGet v) modified_with_types) + in + ctx.new_fields <- + make_helper ~signature ~extra_body:[ Push struct_new ] :: ctx.new_fields; + let tmp = Code.Var.fresh_n "loop_ret" in + ctx.extra_locals <- (tmp, ret_ref_type) :: ctx.extra_locals; + W.LocalSet (tmp, Call (helper_name, args)) + :: List.mapi + ~f:(fun i (v, _) -> + W.LocalSet (v, StructGet (None, ret_type_name, i, LocalGet tmp))) + modified_with_types let fork_il_ctx = Initialize_locals.fork_context @@ -298,7 +322,7 @@ let f ~toplevel fields = func.param_names func.signature.params; List.iter ~f:(fun (v, t) -> Code.Var.Hashtbl.add var_types v t) func.locals; - let ctx = { var_types; new_fields = [] } in + let ctx = { var_types; new_fields = []; extra_locals = [] } in let il_ctx = Initialize_locals.create_context () in List.iter ~f:(Initialize_locals.mark_initialized il_ctx) func.param_names; List.iter @@ -309,7 +333,9 @@ let f ~toplevel fields = | Ref { nullable = false; _ } -> ()) func.locals; let body = transform_instrs ctx il_ctx func.body in - let func' = W.Function { func with body } in + let func' = + W.Function { func with body; locals = func.locals @ ctx.extra_locals } + in List.rev ctx.new_fields @ [ func' ] | _ -> [ field ]) fields From 5dcd93597d8472a1dafe3dccfc84cdc1f3a91e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 13 May 2026 16:56:41 +0200 Subject: [PATCH 04/11] Wasm: bound needed_handlers traversal to the dominator subtree needed_handlers used CFG-reachability from the start pc, which spills past the wrap's structural region: starting at a try body entry, the traversal followed Poptrap edges into post-try blocks; starting at a loop header, it could reach blocks not dominated by the loop header. Those checks live outside the wrap, so counting them caused dead handler blocks to be emitted inside the wrap. Walk the dominator subtree of the start pc instead, still skipping the try body when a Pushtrap is encountered (it has its own wrap). The function-level wrap is unaffected since the function entry dominates every block in the function. --- compiler/lib-wasm/generate.ml | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/compiler/lib-wasm/generate.ml b/compiler/lib-wasm/generate.ml index 8f14e24c79..c707b1589a 100644 --- a/compiler/lib-wasm/generate.ml +++ b/compiler/lib-wasm/generate.ml @@ -2012,9 +2012,28 @@ module Generate (Target : Target_sig.S) = struct | (`Block _ | `Catch | `Skip) as b -> b :: context | `Return -> `Skip :: context - let needed_handlers (p : program) pc = + (* Walk the dominator subtree of [pc] (the structural region of the + loop body, try body, or function body that the wrap covers), + skipping any nested try body since it carries its own wrap. *) + let needed_handlers (p : program) ~dom pc = + let fold : 'c. _ -> _ -> (Addr.t -> 'c -> 'c) -> 'c -> 'c = + fun _blocks pc' f accu -> + let block = Addr.Map.find pc' p.blocks in + let try_body = + match block.branch with + | Pushtrap ((pc'', _), _, _) -> Some pc'' + | _ -> None + in + Addr.Set.fold + (fun child acc -> + match try_body with + | Some pc'' when pc'' = child -> acc + | _ -> f child acc) + (Structure.get_edges dom pc') + accu + in Code.traverse - { fold = fold_children_skip_try_body } + { fold } (fun pc n -> let block = Addr.Map.find pc p.blocks in List.fold_left @@ -2084,8 +2103,8 @@ module Generate (Target : Target_sig.S) = struct instr W.Unreachable else body ~result_typ ~fall_through ~context - let wrap_with_handlers p pc ~result_typ ~fall_through ~context body = - let need_zero_divide_handler, need_bound_error_handler = needed_handlers p pc in + let wrap_with_handlers p ~dom pc ~result_typ ~fall_through ~context body = + let need_zero_divide_handler, need_bound_error_handler = needed_handlers p ~dom pc in wrap_with_handler need_bound_error_handler bound_error_pc @@ -2174,6 +2193,7 @@ module Generate (Target : Target_sig.S) = struct the helper along with the surrounding handler blocks. *) wrap_with_handlers p + ~dom pc ~result_typ ~fall_through @@ -2259,6 +2279,7 @@ module Generate (Target : Target_sig.S) = struct ~context:(extend_context fall_through context) (wrap_with_handlers p + ~dom (fst cont) (fun ~result_typ ~fall_through ~context -> translate_branch result_typ fall_through pc cont context)) @@ -2330,6 +2351,7 @@ module Generate (Target : Target_sig.S) = struct let* () = wrap_with_handlers p + ~dom pc ~result_typ:[ Option.value ~default:Type.value (unboxed_type return_type) ] ~fall_through:`Return From d5b47170603996ba1c20a039f3ca2d52f26e8700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 20 May 2026 17:17:25 +0200 Subject: [PATCH 05/11] WIP --- compiler/lib-wasm/hoist_loops.ml | 224 ++++++++++++++++++++++++------- 1 file changed, 173 insertions(+), 51 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index bec081905f..ead74b8d27 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -26,20 +26,21 @@ body (no escaping [Br], [Return], or [Rethrow]). Variables used in the loop are split into parameters and locals. - Parameters are variables whose pre-loop value may be needed: - either read before being written in the loop body, or with a - defaultable type (scalars, nullable refs, [ref eq], …). - The remaining variables — non-defaultable non-nullable refs that - are always written before their first read — become locals of the - helper. They have no meaningful value before the loop, so passing - them as parameters would introduce a read that did not exist in - the original code. Since [Initialize_locals] has already run on - the original function, these locals have a set/get pattern that - the Wasm validator accepts without initialisation. + A variable becomes a parameter when its pre-loop value is needed: + either it is a non-nullable [ref] (no default value available), the + body's first use of it may be a read, or the body writes it and the + caller reads it after the loop (a conditional in-loop write must not + discard the caller's value, since the helper writes back what it + returns). The remaining variables become locals of the helper — + non-nullable refs that the body sets before getting (the Wasm + validator accepts this since [Initialize_locals] has already run on + the original function), or defaultable types whose helper default + matches the caller's. - Modified parameters are returned to the caller via a struct (or - directly when there are zero or one). Locals are not returned: - they are purely internal to the loop. *) + Modified parameters read by the caller are returned via a struct + (or directly when there are zero or one). Locals are never returned: + by the parameter rules above, any modified variable read by the + caller is already a parameter. *) open! Stdlib module W = Wasm_ast @@ -181,6 +182,124 @@ and collect_instr acc (i : W.instruction) = and collect_instrs acc l = List.fold_left ~f:collect_instr ~init:acc l +let read_before_written_instrs body = + let rec expr (reads, writes) (e : W.expression) = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> (reads, writes) + | LocalGet v -> + if Code.Var.Set.mem v writes + then (reads, writes) + else (Code.Var.Set.add v reads, writes) + | LocalTee (v, e') -> + let reads, writes = expr (reads, writes) e' in + (reads, Code.Var.Set.add v writes) + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> expr (reads, writes) e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> expr (expr (reads, writes) e1) e2 + | Br_on_cast (_, _, _, e') | Br_on_cast_fail (_, _, _, e') | Br_on_null (_, e') -> + expr (reads, writes) e' + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> exprs (reads, writes) l + | Call_ref (_, e', l) -> expr (exprs (reads, writes) l) e' + | BlockExpr (_, body) -> instrs (reads, writes) body + | Seq (instrs', e') -> expr (instrs (reads, writes) instrs') e' + | IfExpr (_, cond, e1, e2) -> + let reads, writes = expr (reads, writes) cond in + let reads1, writes1 = expr (reads, writes) e1 in + let reads2, writes2 = expr (reads, writes) e2 in + (Code.Var.Set.union reads1 reads2, Code.Var.Set.inter writes1 writes2) + | Try (_, body, _) -> instrs (reads, writes) body + + and exprs acc l = List.fold_left ~f:expr ~init:acc l + + and instr (reads, writes) (i : W.instruction) = + match i with + | Drop e | GlobalSet (_, e) | Push e | Throw (_, e) -> expr (reads, writes) e + | LocalSet (v, e) -> + let reads, writes = expr (reads, writes) e in + (reads, Code.Var.Set.add v writes) + | Br (_, Some e) | Br_if (_, e) | Br_table (e, _, _) -> expr (reads, writes) e + | Br (_, None) | Return None | Nop | Unreachable | Event _ | Rethrow _ -> (reads, writes) + | Return (Some e) -> expr (reads, writes) e + | Loop (_, body) | Block (_, body) -> instrs (reads, writes) body + | If (_, e, l1, l2) -> + let reads, writes = expr (reads, writes) e in + let reads1, writes1 = instrs (reads, writes) l1 in + let reads2, writes2 = instrs (reads, writes) l2 in + (Code.Var.Set.union reads1 reads2, Code.Var.Set.inter writes1 writes2) + | CallInstr (_, l) | Return_call (_, l) -> exprs (reads, writes) l + | Return_call_ref (_, e', l) -> expr (exprs (reads, writes) l) e' + | ArraySet (_, e1, e2, e3) -> expr (expr (expr (reads, writes) e1) e2) e3 + | StructSet (_, _, e1, e2) -> expr (expr (reads, writes) e1) e2 + + and instrs acc l = List.fold_left ~f:instr ~init:acc l + in + let reads, _ = instrs (Code.Var.Set.empty, Code.Var.Set.empty) body in + reads + +let empty_var_sets = { reads = Code.Var.Set.empty; writes = Code.Var.Set.empty } + +let reads_in_expr e = (collect_expr empty_var_sets e).reads + +let reads_in_instr i = (collect_instr empty_var_sets i).reads + +(* Backward scan over the function body, producing one entry per [Loop] + encountered in source order: [Some s] for extractable loops, where + [s] is the set of variables read after the loop on any path through + the rest of the function; [None] for non-extractable loops. The + forward pass in [transform_instrs] consumes the list in the same + order. *) +let scan_right_to_left body = + let rec instr (loops, acc_reads) i = + match i with + | W.Loop (ty, body) when List.is_empty ty.result && is_contained_instrs ~depth:1 body -> + let loops' = Some acc_reads :: loops in + let acc_reads' = + Code.Var.Set.union acc_reads (read_before_written_instrs body) + in + (loops', acc_reads') + | W.Loop (_, body) -> + let acc_reads' = + Code.Var.Set.union acc_reads (read_before_written_instrs body) + in + let loops', acc_reads'' = instrs (loops, acc_reads') body in + (None :: loops', acc_reads'') + | W.Block (_, body) -> instrs (loops, acc_reads) body + | W.If (_, cond, l1, l2) -> + let loops', l2_reads = instrs (loops, acc_reads) l2 in + let loops'', l1_reads = instrs (loops', acc_reads) l1 in + let acc_reads' = + Code.Var.Set.union + (reads_in_expr cond) + (Code.Var.Set.union l1_reads l2_reads) + in + (loops'', acc_reads') + | W.LocalSet (v, e) -> + let acc_reads' = + Code.Var.Set.union (Code.Var.Set.remove v acc_reads) (reads_in_expr e) + in + (loops, acc_reads') + | _ -> (loops, Code.Var.Set.union acc_reads (reads_in_instr i)) + and instrs (loops, acc_reads) l = + List.fold_right l ~init:(loops, acc_reads) ~f:(fun i acc -> instr acc i) + in + let loops, _ = instrs ([], Code.Var.Set.empty) body in + loops + (* Transformation context *) type ctx = @@ -198,31 +317,26 @@ let lookup_types ctx vars = vars [] -let extract_loop ctx ~is_initialized body = - let { reads; writes } = - collect_instrs { reads = Code.Var.Set.empty; writes = Code.Var.Set.empty } body - in +let extract_loop ctx ~is_initialized ~after_reads body = + let { reads; writes } = collect_instrs empty_var_sets body in let all_vars = Code.Var.Set.union reads writes in - (* Non-nullable ref variables that are not yet initialised when - reaching the loop become locals (passing them as parameters would - introduce a read that did not exist in the original code). - Scalars and nullable refs are safe as parameters since they have - Wasm default values. *) - let local_vars = + let read_before_written = read_before_written_instrs body in + let param_vars = Code.Var.Set.filter (fun v -> - (not (is_initialized v)) - && - match Code.Var.Hashtbl.find_opt ctx.var_types v with - | Some (Ref { nullable = false; _ }) -> true - | _ -> false) + is_initialized v + && ((match Code.Var.Hashtbl.find_opt ctx.var_types v with + | Some (Ref { nullable = false; _ }) -> true + | _ -> false) + || Code.Var.Set.mem v read_before_written + || (Code.Var.Set.mem v writes && Code.Var.Set.mem v after_reads))) all_vars in - let param_vars = Code.Var.Set.diff all_vars local_vars in + let local_vars = Code.Var.Set.diff all_vars param_vars in let param_with_types = lookup_types ctx param_vars in let local_with_types = lookup_types ctx local_vars in - (* Only return modified parameters — locals are loop-internal. *) - let modified_with_types = lookup_types ctx (Code.Var.Set.inter param_vars writes) in + let returned_vars = Code.Var.Set.inter writes after_reads in + let modified_with_types = lookup_types ctx returned_vars in let helper_name = Code.Var.fresh_n "loop_helper" in let args = List.map ~f:(fun (v, _) -> W.LocalGet v) param_with_types in let param_types = List.map ~f:snd param_with_types in @@ -277,34 +391,41 @@ let extract_loop ctx ~is_initialized body = let fork_il_ctx = Initialize_locals.fork_context -let rec transform_instrs ctx il_ctx instrs = - List.concat_map ~f:(transform_instr ctx il_ctx) instrs +let rec transform_instrs ctx il_ctx pending_loops instrs = + List.concat_map ~f:(transform_instr ctx il_ctx pending_loops) instrs -and transform_instr ctx il_ctx (i : W.instruction) = +and transform_instr ctx il_ctx pending_loops (i : W.instruction) = match i with - | Loop (ty, body) when List.is_empty ty.result && is_contained_instrs ~depth:1 body -> - (* Use the current initialized set — then scan the original - instruction to update the outer context for what follows. *) - let result = - extract_loop ctx ~is_initialized:(Initialize_locals.is_initialized il_ctx) body - in - Initialize_locals.scan_instruction il_ctx i; - result - | Loop (ty, body) -> - let inner = fork_il_ctx il_ctx in - let body' = transform_instrs ctx inner body in - Initialize_locals.scan_instruction il_ctx i; - [ W.Loop (ty, body') ] + | Loop (ty, body) -> ( + match !pending_loops with + | Some after_reads :: tl -> + pending_loops := tl; + let result = + extract_loop + ctx + ~is_initialized:(Initialize_locals.is_initialized il_ctx) + ~after_reads + body + in + Initialize_locals.scan_instruction il_ctx i; + result + | None :: tl -> + pending_loops := tl; + let inner = fork_il_ctx il_ctx in + let body' = transform_instrs ctx inner pending_loops body in + Initialize_locals.scan_instruction il_ctx i; + [ W.Loop (ty, body') ] + | [] -> assert false) | Block (ty, body) -> let inner = fork_il_ctx il_ctx in - let body' = transform_instrs ctx inner body in + let body' = transform_instrs ctx inner pending_loops body in Initialize_locals.scan_instruction il_ctx i; [ W.Block (ty, body') ] | If (ty, e, l1, l2) -> let inner1 = fork_il_ctx il_ctx in let inner2 = fork_il_ctx il_ctx in - let l1' = transform_instrs ctx inner1 l1 in - let l2' = transform_instrs ctx inner2 l2 in + let l1' = transform_instrs ctx inner1 pending_loops l1 in + let l2' = transform_instrs ctx inner2 pending_loops l2 in Initialize_locals.scan_instruction il_ctx i; [ W.If (ty, e, l1', l2') ] | _ -> @@ -332,7 +453,8 @@ let f ~toplevel fields = Initialize_locals.mark_initialized il_ctx var | Ref { nullable = false; _ } -> ()) func.locals; - let body = transform_instrs ctx il_ctx func.body in + let pending_loops = ref (scan_right_to_left func.body) in + let body = transform_instrs ctx il_ctx pending_loops func.body in let func' = W.Function { func with body; locals = func.locals @ ctx.extra_locals } in From f451066ba76f3d9c0ea392f7853e7f5a5d11f92a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 20 May 2026 18:18:45 +0200 Subject: [PATCH 06/11] WIP --- compiler/lib-wasm/hoist_loops.ml | 273 +++++++++++++++++-------------- 1 file changed, 146 insertions(+), 127 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index ead74b8d27..55783e2c28 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -26,21 +26,17 @@ body (no escaping [Br], [Return], or [Rethrow]). Variables used in the loop are split into parameters and locals. - A variable becomes a parameter when its pre-loop value is needed: - either it is a non-nullable [ref] (no default value available), the - body's first use of it may be a read, or the body writes it and the - caller reads it after the loop (a conditional in-loop write must not - discard the caller's value, since the helper writes back what it - returns). The remaining variables become locals of the helper — - non-nullable refs that the body sets before getting (the Wasm - validator accepts this since [Initialize_locals] has already run on - the original function), or defaultable types whose helper default - matches the caller's. - - Modified parameters read by the caller are returned via a struct - (or directly when there are zero or one). Locals are never returned: - by the parameter rules above, any modified variable read by the - caller is already a parameter. *) + A variable becomes a parameter when its current value is live at the + loop head: either the body may read it before rewriting it on some + reachable path, or the loop may exit to a caller read without first + rewriting it. The remaining variables become locals of the helper. + This includes non-nullable refs whose set/get discipline is already + known to be valid because [Initialize_locals] has run on the original + function. + + Modified variables that are live after the loop are returned via a + struct (or directly when there are zero or one), whether they are + parameters or helper locals. *) open! Stdlib module W = Wasm_ast @@ -182,80 +178,142 @@ and collect_instr acc (i : W.instruction) = and collect_instrs acc l = List.fold_left ~f:collect_instr ~init:acc l -let read_before_written_instrs body = - let rec expr (reads, writes) (e : W.expression) = - match e with - | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> (reads, writes) - | LocalGet v -> - if Code.Var.Set.mem v writes - then (reads, writes) - else (Code.Var.Set.add v reads, writes) - | LocalTee (v, e') -> - let reads, writes = expr (reads, writes) e' in - (reads, Code.Var.Set.add v writes) - | UnOp (_, e') - | I32WrapI64 e' - | I64ExtendI32 (_, e') - | F32DemoteF64 e' - | F64PromoteF32 e' - | RefI31 e' - | I31Get (_, e') - | ArrayLen e' - | StructGet (_, _, _, e') - | RefCast (_, e') - | RefTest (_, e') - | ExternConvertAny e' - | AnyConvertExtern e' -> expr (reads, writes) e' - | BinOp (_, e1, e2) - | ArrayNew (_, e1, e2) - | ArrayNewData (_, _, e1, e2) - | ArrayGet (_, _, e1, e2) - | RefEq (e1, e2) -> expr (expr (reads, writes) e1) e2 - | Br_on_cast (_, _, _, e') | Br_on_cast_fail (_, _, _, e') | Br_on_null (_, e') -> - expr (reads, writes) e' - | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> exprs (reads, writes) l - | Call_ref (_, e', l) -> expr (exprs (reads, writes) l) e' - | BlockExpr (_, body) -> instrs (reads, writes) body - | Seq (instrs', e') -> expr (instrs (reads, writes) instrs') e' - | IfExpr (_, cond, e1, e2) -> - let reads, writes = expr (reads, writes) cond in - let reads1, writes1 = expr (reads, writes) e1 in - let reads2, writes2 = expr (reads, writes) e2 in - (Code.Var.Set.union reads1 reads2, Code.Var.Set.inter writes1 writes2) - | Try (_, body, _) -> instrs (reads, writes) body - - and exprs acc l = List.fold_left ~f:expr ~init:acc l - - and instr (reads, writes) (i : W.instruction) = - match i with - | Drop e | GlobalSet (_, e) | Push e | Throw (_, e) -> expr (reads, writes) e - | LocalSet (v, e) -> - let reads, writes = expr (reads, writes) e in - (reads, Code.Var.Set.add v writes) - | Br (_, Some e) | Br_if (_, e) | Br_table (e, _, _) -> expr (reads, writes) e - | Br (_, None) | Return None | Nop | Unreachable | Event _ | Rethrow _ -> (reads, writes) - | Return (Some e) -> expr (reads, writes) e - | Loop (_, body) | Block (_, body) -> instrs (reads, writes) body - | If (_, e, l1, l2) -> - let reads, writes = expr (reads, writes) e in - let reads1, writes1 = instrs (reads, writes) l1 in - let reads2, writes2 = instrs (reads, writes) l2 in - (Code.Var.Set.union reads1 reads2, Code.Var.Set.inter writes1 writes2) - | CallInstr (_, l) | Return_call (_, l) -> exprs (reads, writes) l - | Return_call_ref (_, e', l) -> expr (exprs (reads, writes) l) e' - | ArraySet (_, e1, e2, e3) -> expr (expr (expr (reads, writes) e1) e2) e3 - | StructSet (_, _, e1, e2) -> expr (expr (reads, writes) e1) e2 - - and instrs acc l = List.fold_left ~f:instr ~init:acc l +let empty_var_sets = { reads = Code.Var.Set.empty; writes = Code.Var.Set.empty } + +let empty_vars = Code.Var.Set.empty + +let label_reads labels depth = + let rec find labels depth = + match labels, depth with + | live :: _, 0 -> live + | _ :: tl, n -> find tl (n - 1) + | [], _ -> assert false in - let reads, _ = instrs (Code.Var.Set.empty, Code.Var.Set.empty) body in - reads + find labels depth -let empty_var_sets = { reads = Code.Var.Set.empty; writes = Code.Var.Set.empty } +let rec live_before_expr ~labels ~live_out (e : W.expression) = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> live_out + | LocalGet v -> Code.Var.Set.add v live_out + | LocalTee (v, e') -> live_before_expr ~labels ~live_out:(Code.Var.Set.remove v live_out) e' + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> live_before_expr ~labels ~live_out e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> + let live_out = live_before_expr ~labels ~live_out e2 in + live_before_expr ~labels ~live_out e1 + | Br_on_cast (n, _, _, e') | Br_on_cast_fail (n, _, _, e') | Br_on_null (n, e') -> + let live_out = Code.Var.Set.union live_out (label_reads labels n) in + live_before_expr ~labels ~live_out e' + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> live_before_exprs ~labels ~live_out l + | Call_ref (_, e', l) -> live_before_exprs ~labels ~live_out (l @ [ e' ]) + | BlockExpr (_, body) -> + let _, live_in = live_before_instrs ~labels:(live_out :: labels) ~live_out body in + live_in + | Seq (instrs, e') -> + let live_out = live_before_expr ~labels ~live_out e' in + let _, live_in = live_before_instrs ~labels ~live_out instrs in + live_in + | IfExpr (_, cond, e1, e2) -> + let branch_labels = live_out :: labels in + let live1 = live_before_expr ~labels:branch_labels ~live_out e1 in + let live2 = live_before_expr ~labels:branch_labels ~live_out e2 in + let live_out = Code.Var.Set.union live1 live2 in + live_before_expr ~labels ~live_out cond + | Try (_, body, _) -> + let _, live_in = live_before_instrs ~labels:(live_out :: labels) ~live_out body in + live_in + +and live_before_exprs ~labels ~live_out l = + List.fold_right l ~init:live_out ~f:(fun e live_out -> live_before_expr ~labels ~live_out e) + +and live_before_loop_body ~labels ~live_out body = + let rec fix live_head = + let _, live_head' = live_before_instrs ~labels:(live_head :: labels) ~live_out body in + if Code.Var.Set.equal live_head live_head' then live_head else fix live_head' + in + let live_head = fix empty_vars in + live_before_instrs ~labels:(live_head :: labels) ~live_out body -let reads_in_expr e = (collect_expr empty_var_sets e).reads +and live_before_instr ~labels ~rest_loops ~live_out (i : W.instruction) = + match i with + | Drop e | Push e -> + rest_loops, live_before_expr ~labels ~live_out e + | GlobalSet (_, e) -> + rest_loops, live_before_expr ~labels ~live_out e + | Throw (_, e) -> + rest_loops, live_before_expr ~labels ~live_out:empty_vars e + | LocalSet (v, e) -> + let live_out = Code.Var.Set.remove v live_out in + rest_loops, live_before_expr ~labels ~live_out e + | Br (n, None) -> rest_loops, label_reads labels n + | Br (n, Some e) -> + let live_out = label_reads labels n in + rest_loops, live_before_expr ~labels ~live_out e + | Br_if (n, e) -> + let live_out = Code.Var.Set.union live_out (label_reads labels n) in + rest_loops, live_before_expr ~labels ~live_out e + | Br_table (e, targets, default) -> + let live_out = + List.fold_left + ~init:(label_reads labels default) + ~f:(fun acc n -> Code.Var.Set.union acc (label_reads labels n)) + targets + in + rest_loops, live_before_expr ~labels ~live_out e + | Return None -> rest_loops, empty_vars + | Return (Some e) -> rest_loops, live_before_expr ~labels ~live_out:empty_vars e + | Loop (ty, body) -> + let body_loops, live_in = live_before_loop_body ~labels ~live_out body in + let loops = + if List.is_empty ty.result && is_contained_instrs ~depth:1 body + then Some live_out :: rest_loops + else (None :: body_loops) @ rest_loops + in + loops, live_in + | Block (_, body) -> + let body_loops, live_in = live_before_instrs ~labels:(live_out :: labels) ~live_out body in + body_loops @ rest_loops, live_in + | If (_, e, l1, l2) -> + let branch_labels = live_out :: labels in + let loops1, live1 = live_before_instrs ~labels:branch_labels ~live_out l1 in + let loops2, live2 = live_before_instrs ~labels:branch_labels ~live_out l2 in + let live_out = Code.Var.Set.union live1 live2 in + let live_in = live_before_expr ~labels ~live_out e in + loops1 @ loops2 @ rest_loops, live_in + | CallInstr (_, l) -> rest_loops, live_before_exprs ~labels ~live_out l + | Nop | Event _ -> rest_loops, live_out + | ArraySet (_, e1, e2, e3) -> + let live_out = live_before_expr ~labels ~live_out e3 in + let live_out = live_before_expr ~labels ~live_out e2 in + rest_loops, live_before_expr ~labels ~live_out e1 + | StructSet (_, _, e1, e2) -> + let live_out = live_before_expr ~labels ~live_out e2 in + rest_loops, live_before_expr ~labels ~live_out e1 + | Return_call (_, l) -> rest_loops, live_before_exprs ~labels ~live_out:empty_vars l + | Return_call_ref (_, e', l) -> + rest_loops, live_before_exprs ~labels ~live_out:empty_vars (l @ [ e' ]) + | Rethrow _ | Unreachable -> rest_loops, empty_vars -let reads_in_instr i = (collect_instr empty_var_sets i).reads +and live_before_instrs ~labels ~live_out l = + List.fold_right + l + ~init:([], live_out) + ~f:(fun i (rest_loops, live_out) -> live_before_instr ~labels ~rest_loops ~live_out i) (* Backward scan over the function body, producing one entry per [Loop] encountered in source order: [Some s] for extractable loops, where @@ -264,40 +322,7 @@ let reads_in_instr i = (collect_instr empty_var_sets i).reads forward pass in [transform_instrs] consumes the list in the same order. *) let scan_right_to_left body = - let rec instr (loops, acc_reads) i = - match i with - | W.Loop (ty, body) when List.is_empty ty.result && is_contained_instrs ~depth:1 body -> - let loops' = Some acc_reads :: loops in - let acc_reads' = - Code.Var.Set.union acc_reads (read_before_written_instrs body) - in - (loops', acc_reads') - | W.Loop (_, body) -> - let acc_reads' = - Code.Var.Set.union acc_reads (read_before_written_instrs body) - in - let loops', acc_reads'' = instrs (loops, acc_reads') body in - (None :: loops', acc_reads'') - | W.Block (_, body) -> instrs (loops, acc_reads) body - | W.If (_, cond, l1, l2) -> - let loops', l2_reads = instrs (loops, acc_reads) l2 in - let loops'', l1_reads = instrs (loops', acc_reads) l1 in - let acc_reads' = - Code.Var.Set.union - (reads_in_expr cond) - (Code.Var.Set.union l1_reads l2_reads) - in - (loops'', acc_reads') - | W.LocalSet (v, e) -> - let acc_reads' = - Code.Var.Set.union (Code.Var.Set.remove v acc_reads) (reads_in_expr e) - in - (loops, acc_reads') - | _ -> (loops, Code.Var.Set.union acc_reads (reads_in_instr i)) - and instrs (loops, acc_reads) l = - List.fold_right l ~init:(loops, acc_reads) ~f:(fun i acc -> instr acc i) - in - let loops, _ = instrs ([], Code.Var.Set.empty) body in + let loops, _ = live_before_instrs ~labels:[] ~live_out:empty_vars body in loops (* Transformation context *) @@ -320,16 +345,10 @@ let lookup_types ctx vars = let extract_loop ctx ~is_initialized ~after_reads body = let { reads; writes } = collect_instrs empty_var_sets body in let all_vars = Code.Var.Set.union reads writes in - let read_before_written = read_before_written_instrs body in + let _, live_in = live_before_loop_body ~labels:[] ~live_out:after_reads body in let param_vars = Code.Var.Set.filter - (fun v -> - is_initialized v - && ((match Code.Var.Hashtbl.find_opt ctx.var_types v with - | Some (Ref { nullable = false; _ }) -> true - | _ -> false) - || Code.Var.Set.mem v read_before_written - || (Code.Var.Set.mem v writes && Code.Var.Set.mem v after_reads))) + (fun v -> is_initialized v && Code.Var.Set.mem v live_in) all_vars in let local_vars = Code.Var.Set.diff all_vars param_vars in From dae95c2f20f9adb5a19160dae3c5c4a14517e9ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 20 May 2026 18:42:14 +0200 Subject: [PATCH 07/11] WIP --- compiler/lib-wasm/hoist_loops.ml | 44 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index 55783e2c28..c5b708e627 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -241,12 +241,15 @@ let rec live_before_expr ~labels ~live_out (e : W.expression) = and live_before_exprs ~labels ~live_out l = List.fold_right l ~init:live_out ~f:(fun e live_out -> live_before_expr ~labels ~live_out e) -and live_before_loop_body ~labels ~live_out body = +and loop_live_in ~labels ~live_out body = let rec fix live_head = let _, live_head' = live_before_instrs ~labels:(live_head :: labels) ~live_out body in if Code.Var.Set.equal live_head live_head' then live_head else fix live_head' in - let live_head = fix empty_vars in + fix empty_vars + +and live_before_loop_body ~labels ~live_out body = + let live_head = loop_live_in ~labels ~live_out body in live_before_instrs ~labels:(live_head :: labels) ~live_out body and live_before_instr ~labels ~rest_loops ~live_out (i : W.instruction) = @@ -278,13 +281,13 @@ and live_before_instr ~labels ~rest_loops ~live_out (i : W.instruction) = | Return None -> rest_loops, empty_vars | Return (Some e) -> rest_loops, live_before_expr ~labels ~live_out:empty_vars e | Loop (ty, body) -> - let body_loops, live_in = live_before_loop_body ~labels ~live_out body in - let loops = - if List.is_empty ty.result && is_contained_instrs ~depth:1 body - then Some live_out :: rest_loops - else (None :: body_loops) @ rest_loops - in - loops, live_in + if List.is_empty ty.result && is_contained_instrs ~depth:1 body + then + let live_in = loop_live_in ~labels ~live_out body in + Some (live_out, live_in) :: rest_loops, live_in + else + let body_loops, live_in = live_before_loop_body ~labels ~live_out body in + (None :: body_loops) @ rest_loops, live_in | Block (_, body) -> let body_loops, live_in = live_before_instrs ~labels:(live_out :: labels) ~live_out body in body_loops @ rest_loops, live_in @@ -315,13 +318,14 @@ and live_before_instrs ~labels ~live_out l = ~init:([], live_out) ~f:(fun i (rest_loops, live_out) -> live_before_instr ~labels ~rest_loops ~live_out i) -(* Backward scan over the function body, producing one entry per [Loop] - encountered in source order: [Some s] for extractable loops, where - [s] is the set of variables read after the loop on any path through - the rest of the function; [None] for non-extractable loops. The - forward pass in [transform_instrs] consumes the list in the same - order. *) -let scan_right_to_left body = +(* Backward dataflow over the function body, producing one entry per + [Loop] encountered in source order: [Some (live_out, live_in)] for + extractable loops, where [live_out] is the set of variables read + after the loop on any path through the rest of the function and + [live_in] is the fixpoint set of variables whose pre-loop value the + body may need; [None] for non-extractable loops. The forward pass + in [transform_instrs] consumes the list in the same order. *) +let loops_after_reads body = let loops, _ = live_before_instrs ~labels:[] ~live_out:empty_vars body in loops @@ -342,10 +346,9 @@ let lookup_types ctx vars = vars [] -let extract_loop ctx ~is_initialized ~after_reads body = +let extract_loop ctx ~is_initialized ~after_reads ~live_in body = let { reads; writes } = collect_instrs empty_var_sets body in let all_vars = Code.Var.Set.union reads writes in - let _, live_in = live_before_loop_body ~labels:[] ~live_out:after_reads body in let param_vars = Code.Var.Set.filter (fun v -> is_initialized v && Code.Var.Set.mem v live_in) @@ -417,13 +420,14 @@ and transform_instr ctx il_ctx pending_loops (i : W.instruction) = match i with | Loop (ty, body) -> ( match !pending_loops with - | Some after_reads :: tl -> + | Some (after_reads, live_in) :: tl -> pending_loops := tl; let result = extract_loop ctx ~is_initialized:(Initialize_locals.is_initialized il_ctx) ~after_reads + ~live_in body in Initialize_locals.scan_instruction il_ctx i; @@ -472,7 +476,7 @@ let f ~toplevel fields = Initialize_locals.mark_initialized il_ctx var | Ref { nullable = false; _ } -> ()) func.locals; - let pending_loops = ref (scan_right_to_left func.body) in + let pending_loops = ref (loops_after_reads func.body) in let body = transform_instrs ctx il_ctx pending_loops func.body in let func' = W.Function { func with body; locals = func.locals @ ctx.extra_locals } From 3fa0d213b8a644c0f93b54c9867ea8f098268269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 20 May 2026 19:23:08 +0200 Subject: [PATCH 08/11] WIP --- compiler/lib-wasm/hoist_loops.ml | 182 ++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 62 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index c5b708e627..0a85a50901 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -23,7 +23,11 @@ should be tiered up. A loop is extractable when all its branches stay within the loop - body (no escaping [Br], [Return], or [Rethrow]). + body (no escaping [Br], [Return], or [Rethrow]). Whether it is + actually hoisted is then decided by a backward liveness analysis with + both normal and exceptional continuations: if a local written in the + loop may be observed after an exceptional exit, the loop is left in + place because the helper call has no write-back path on exceptions. Variables used in the loop are split into parameters and locals. A variable becomes a parameter when its current value is live at the @@ -118,6 +122,8 @@ and is_contained_instr ~depth (i : W.instruction) = and is_contained_instrs ~depth l = List.for_all ~f:(is_contained_instr ~depth) l +let is_extractable_loop_body body = is_contained_instrs ~depth:1 body + (* Collect local variables referenced in an instruction list. [reads]: variables appearing in [LocalGet]. [writes]: variables appearing in [LocalSet] or [LocalTee]. *) @@ -191,11 +197,22 @@ let label_reads labels depth = in find labels depth -let rec live_before_expr ~labels ~live_out (e : W.expression) = +let catches_live_out labels ~exn_live_out catches = + List.fold_left + catches + ~init:exn_live_out + ~f:(fun acc (_, label, _) -> Code.Var.Set.union acc (label_reads labels label)) + +let rec live_before_expr ~labels ~live_out ~exn_live_out (e : W.expression) = match e with | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> live_out | LocalGet v -> Code.Var.Set.add v live_out - | LocalTee (v, e') -> live_before_expr ~labels ~live_out:(Code.Var.Set.remove v live_out) e' + | LocalTee (v, e') -> + live_before_expr + ~labels + ~live_out:(Code.Var.Set.remove v live_out) + ~exn_live_out + e' | UnOp (_, e') | I32WrapI64 e' | I64ExtendI32 (_, e') @@ -208,68 +225,84 @@ let rec live_before_expr ~labels ~live_out (e : W.expression) = | RefCast (_, e') | RefTest (_, e') | ExternConvertAny e' - | AnyConvertExtern e' -> live_before_expr ~labels ~live_out e' + | AnyConvertExtern e' -> live_before_expr ~labels ~live_out ~exn_live_out e' | BinOp (_, e1, e2) | ArrayNew (_, e1, e2) | ArrayNewData (_, _, e1, e2) | ArrayGet (_, _, e1, e2) | RefEq (e1, e2) -> - let live_out = live_before_expr ~labels ~live_out e2 in - live_before_expr ~labels ~live_out e1 + let live_out = live_before_expr ~labels ~live_out ~exn_live_out e2 in + live_before_expr ~labels ~live_out ~exn_live_out e1 | Br_on_cast (n, _, _, e') | Br_on_cast_fail (n, _, _, e') | Br_on_null (n, e') -> let live_out = Code.Var.Set.union live_out (label_reads labels n) in - live_before_expr ~labels ~live_out e' - | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> live_before_exprs ~labels ~live_out l - | Call_ref (_, e', l) -> live_before_exprs ~labels ~live_out (l @ [ e' ]) + live_before_expr ~labels ~live_out ~exn_live_out e' + | Call (_, l) -> + let live_out = Code.Var.Set.union live_out exn_live_out in + live_before_exprs ~labels ~live_out ~exn_live_out l + | ArrayNewFixed (_, l) | StructNew (_, l) -> + live_before_exprs ~labels ~live_out ~exn_live_out l + | Call_ref (_, e', l) -> + let live_out = Code.Var.Set.union live_out exn_live_out in + live_before_exprs ~labels ~live_out ~exn_live_out (l @ [ e' ]) | BlockExpr (_, body) -> - let _, live_in = live_before_instrs ~labels:(live_out :: labels) ~live_out body in + let _, live_in = + live_before_instrs ~labels:(live_out :: labels) ~live_out ~exn_live_out body + in live_in | Seq (instrs, e') -> - let live_out = live_before_expr ~labels ~live_out e' in - let _, live_in = live_before_instrs ~labels ~live_out instrs in + let live_out = live_before_expr ~labels ~live_out ~exn_live_out e' in + let _, live_in = live_before_instrs ~labels ~live_out ~exn_live_out instrs in live_in | IfExpr (_, cond, e1, e2) -> let branch_labels = live_out :: labels in - let live1 = live_before_expr ~labels:branch_labels ~live_out e1 in - let live2 = live_before_expr ~labels:branch_labels ~live_out e2 in + let live1 = live_before_expr ~labels:branch_labels ~live_out ~exn_live_out e1 in + let live2 = live_before_expr ~labels:branch_labels ~live_out ~exn_live_out e2 in let live_out = Code.Var.Set.union live1 live2 in - live_before_expr ~labels ~live_out cond - | Try (_, body, _) -> - let _, live_in = live_before_instrs ~labels:(live_out :: labels) ~live_out body in + live_before_expr ~labels ~live_out ~exn_live_out cond + | Try (_, body, catches) -> + let exn_live_out = catches_live_out labels ~exn_live_out catches in + let _, live_in = + live_before_instrs ~labels:(live_out :: labels) ~live_out ~exn_live_out body + in live_in -and live_before_exprs ~labels ~live_out l = - List.fold_right l ~init:live_out ~f:(fun e live_out -> live_before_expr ~labels ~live_out e) +and live_before_exprs ~labels ~live_out ~exn_live_out l = + List.fold_right + l + ~init:live_out + ~f:(fun e live_out -> live_before_expr ~labels ~live_out ~exn_live_out e) -and loop_live_in ~labels ~live_out body = +and loop_live_in ~labels ~live_out ~exn_live_out body = let rec fix live_head = - let _, live_head' = live_before_instrs ~labels:(live_head :: labels) ~live_out body in + let _, live_head' = + live_before_instrs ~labels:(live_head :: labels) ~live_out ~exn_live_out body + in if Code.Var.Set.equal live_head live_head' then live_head else fix live_head' in fix empty_vars -and live_before_loop_body ~labels ~live_out body = - let live_head = loop_live_in ~labels ~live_out body in - live_before_instrs ~labels:(live_head :: labels) ~live_out body +and live_before_loop_body ~labels ~live_out ~exn_live_out body = + let live_head = loop_live_in ~labels ~live_out ~exn_live_out body in + live_before_instrs ~labels:(live_head :: labels) ~live_out ~exn_live_out body -and live_before_instr ~labels ~rest_loops ~live_out (i : W.instruction) = +and live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out (i : W.instruction) = match i with | Drop e | Push e -> - rest_loops, live_before_expr ~labels ~live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e | GlobalSet (_, e) -> - rest_loops, live_before_expr ~labels ~live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e | Throw (_, e) -> - rest_loops, live_before_expr ~labels ~live_out:empty_vars e + rest_loops, live_before_expr ~labels ~live_out:exn_live_out ~exn_live_out e | LocalSet (v, e) -> let live_out = Code.Var.Set.remove v live_out in - rest_loops, live_before_expr ~labels ~live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e | Br (n, None) -> rest_loops, label_reads labels n | Br (n, Some e) -> let live_out = label_reads labels n in - rest_loops, live_before_expr ~labels ~live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e | Br_if (n, e) -> let live_out = Code.Var.Set.union live_out (label_reads labels n) in - rest_loops, live_before_expr ~labels ~live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e | Br_table (e, targets, default) -> let live_out = List.fold_left @@ -277,56 +310,74 @@ and live_before_instr ~labels ~rest_loops ~live_out (i : W.instruction) = ~f:(fun acc n -> Code.Var.Set.union acc (label_reads labels n)) targets in - rest_loops, live_before_expr ~labels ~live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e | Return None -> rest_loops, empty_vars - | Return (Some e) -> rest_loops, live_before_expr ~labels ~live_out:empty_vars e + | Return (Some e) -> + rest_loops, live_before_expr ~labels ~live_out:empty_vars ~exn_live_out e | Loop (ty, body) -> - if List.is_empty ty.result && is_contained_instrs ~depth:1 body + if List.is_empty ty.result && is_extractable_loop_body body then - let live_in = loop_live_in ~labels ~live_out body in - Some (live_out, live_in) :: rest_loops, live_in + let live_in = loop_live_in ~labels ~live_out ~exn_live_out body in + Some (live_out, exn_live_out, live_in) :: rest_loops, live_in else - let body_loops, live_in = live_before_loop_body ~labels ~live_out body in + let body_loops, live_in = + live_before_loop_body ~labels ~live_out ~exn_live_out body + in (None :: body_loops) @ rest_loops, live_in | Block (_, body) -> - let body_loops, live_in = live_before_instrs ~labels:(live_out :: labels) ~live_out body in + let body_loops, live_in = + live_before_instrs ~labels:(live_out :: labels) ~live_out ~exn_live_out body + in body_loops @ rest_loops, live_in | If (_, e, l1, l2) -> let branch_labels = live_out :: labels in - let loops1, live1 = live_before_instrs ~labels:branch_labels ~live_out l1 in - let loops2, live2 = live_before_instrs ~labels:branch_labels ~live_out l2 in + let loops1, live1 = + live_before_instrs ~labels:branch_labels ~live_out ~exn_live_out l1 + in + let loops2, live2 = + live_before_instrs ~labels:branch_labels ~live_out ~exn_live_out l2 + in let live_out = Code.Var.Set.union live1 live2 in - let live_in = live_before_expr ~labels ~live_out e in + let live_in = live_before_expr ~labels ~live_out ~exn_live_out e in loops1 @ loops2 @ rest_loops, live_in - | CallInstr (_, l) -> rest_loops, live_before_exprs ~labels ~live_out l + | CallInstr (_, l) -> + let live_out = Code.Var.Set.union live_out exn_live_out in + rest_loops, live_before_exprs ~labels ~live_out ~exn_live_out l | Nop | Event _ -> rest_loops, live_out | ArraySet (_, e1, e2, e3) -> - let live_out = live_before_expr ~labels ~live_out e3 in - let live_out = live_before_expr ~labels ~live_out e2 in - rest_loops, live_before_expr ~labels ~live_out e1 + let live_out = live_before_expr ~labels ~live_out ~exn_live_out e3 in + let live_out = live_before_expr ~labels ~live_out ~exn_live_out e2 in + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e1 | StructSet (_, _, e1, e2) -> - let live_out = live_before_expr ~labels ~live_out e2 in - rest_loops, live_before_expr ~labels ~live_out e1 - | Return_call (_, l) -> rest_loops, live_before_exprs ~labels ~live_out:empty_vars l + let live_out = live_before_expr ~labels ~live_out ~exn_live_out e2 in + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e1 + | Return_call (_, l) -> + rest_loops, live_before_exprs ~labels ~live_out:exn_live_out ~exn_live_out l | Return_call_ref (_, e', l) -> - rest_loops, live_before_exprs ~labels ~live_out:empty_vars (l @ [ e' ]) - | Rethrow _ | Unreachable -> rest_loops, empty_vars + rest_loops, live_before_exprs ~labels ~live_out:exn_live_out ~exn_live_out (l @ [ e' ]) + | Rethrow _ -> rest_loops, exn_live_out + | Unreachable -> rest_loops, empty_vars -and live_before_instrs ~labels ~live_out l = +and live_before_instrs ~labels ~live_out ~exn_live_out l = List.fold_right l ~init:([], live_out) - ~f:(fun i (rest_loops, live_out) -> live_before_instr ~labels ~rest_loops ~live_out i) + ~f:(fun i (rest_loops, live_out) -> + live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out i) (* Backward dataflow over the function body, producing one entry per - [Loop] encountered in source order: [Some (live_out, live_in)] for + [Loop] encountered in source order: [Some (live_out, exn_live_out, live_in)] for extractable loops, where [live_out] is the set of variables read - after the loop on any path through the rest of the function and + after the loop on any normal path through the rest of the function, + [exn_live_out] is the set of variables read after an exceptional exit + from the loop, and [live_in] is the fixpoint set of variables whose pre-loop value the body may need; [None] for non-extractable loops. The forward pass in [transform_instrs] consumes the list in the same order. *) let loops_after_reads body = - let loops, _ = live_before_instrs ~labels:[] ~live_out:empty_vars body in + let loops, _ = + live_before_instrs ~labels:[] ~live_out:empty_vars ~exn_live_out:empty_vars body + in loops (* Transformation context *) @@ -420,15 +471,22 @@ and transform_instr ctx il_ctx pending_loops (i : W.instruction) = match i with | Loop (ty, body) -> ( match !pending_loops with - | Some (after_reads, live_in) :: tl -> + | Some (after_reads, exn_after_reads, live_in) :: tl -> pending_loops := tl; + let { writes; _ } = collect_instrs empty_var_sets body in let result = - extract_loop - ctx - ~is_initialized:(Initialize_locals.is_initialized il_ctx) - ~after_reads - ~live_in - body + if Code.Var.Set.is_empty (Code.Var.Set.inter writes exn_after_reads) + then + extract_loop + ctx + ~is_initialized:(Initialize_locals.is_initialized il_ctx) + ~after_reads + ~live_in + body + else + let inner = fork_il_ctx il_ctx in + let body' = transform_instrs ctx inner pending_loops body in + [ W.Loop (ty, body') ] in Initialize_locals.scan_instruction il_ctx i; result From 217b8b9c5d6b4531e5eb017e0769b678254d6321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 20 May 2026 19:38:34 +0200 Subject: [PATCH 09/11] WIP --- compiler/lib-wasm/hoist_loops.ml | 51 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index 0a85a50901..587e4dbf28 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -315,10 +315,17 @@ and live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out (i : W.instruc | Return (Some e) -> rest_loops, live_before_expr ~labels ~live_out:empty_vars ~exn_live_out e | Loop (ty, body) -> - if List.is_empty ty.result && is_extractable_loop_body body + let extractable = + List.is_empty ty.result + && is_extractable_loop_body body + && + let { writes; _ } = collect_instrs empty_var_sets body in + Code.Var.Set.is_empty (Code.Var.Set.inter writes exn_live_out) + in + if extractable then let live_in = loop_live_in ~labels ~live_out ~exn_live_out body in - Some (live_out, exn_live_out, live_in) :: rest_loops, live_in + Some (live_out, live_in) :: rest_loops, live_in else let body_loops, live_in = live_before_loop_body ~labels ~live_out ~exn_live_out body @@ -352,9 +359,9 @@ and live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out (i : W.instruc let live_out = live_before_expr ~labels ~live_out ~exn_live_out e2 in rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e1 | Return_call (_, l) -> - rest_loops, live_before_exprs ~labels ~live_out:exn_live_out ~exn_live_out l + rest_loops, live_before_exprs ~labels ~live_out:empty_vars ~exn_live_out l | Return_call_ref (_, e', l) -> - rest_loops, live_before_exprs ~labels ~live_out:exn_live_out ~exn_live_out (l @ [ e' ]) + rest_loops, live_before_exprs ~labels ~live_out:empty_vars ~exn_live_out (l @ [ e' ]) | Rethrow _ -> rest_loops, exn_live_out | Unreachable -> rest_loops, empty_vars @@ -366,14 +373,15 @@ and live_before_instrs ~labels ~live_out ~exn_live_out l = live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out i) (* Backward dataflow over the function body, producing one entry per - [Loop] encountered in source order: [Some (live_out, exn_live_out, live_in)] for - extractable loops, where [live_out] is the set of variables read + [Loop] encountered in source order: [Some (live_out, live_in)] for + loops that will be hoisted — [live_out] is the set of variables read after the loop on any normal path through the rest of the function, - [exn_live_out] is the set of variables read after an exceptional exit - from the loop, and - [live_in] is the fixpoint set of variables whose pre-loop value the - body may need; [None] for non-extractable loops. The forward pass - in [transform_instrs] consumes the list in the same order. *) + and [live_in] is the fixpoint set of variables whose pre-loop value + the body may need; [None] for loops that are left in place (either + not contained, or contained but writing a variable that is read on + an exceptional exit, which the helper has no way to write back). + The forward pass in [transform_instrs] consumes the list in the same + order. *) let loops_after_reads body = let loops, _ = live_before_instrs ~labels:[] ~live_out:empty_vars ~exn_live_out:empty_vars body @@ -471,22 +479,15 @@ and transform_instr ctx il_ctx pending_loops (i : W.instruction) = match i with | Loop (ty, body) -> ( match !pending_loops with - | Some (after_reads, exn_after_reads, live_in) :: tl -> + | Some (after_reads, live_in) :: tl -> pending_loops := tl; - let { writes; _ } = collect_instrs empty_var_sets body in let result = - if Code.Var.Set.is_empty (Code.Var.Set.inter writes exn_after_reads) - then - extract_loop - ctx - ~is_initialized:(Initialize_locals.is_initialized il_ctx) - ~after_reads - ~live_in - body - else - let inner = fork_il_ctx il_ctx in - let body' = transform_instrs ctx inner pending_loops body in - [ W.Loop (ty, body') ] + extract_loop + ctx + ~is_initialized:(Initialize_locals.is_initialized il_ctx) + ~after_reads + ~live_in + body in Initialize_locals.scan_instruction il_ctx i; result From 8c87d3d177a8687e52ac67845a9bf63de86a4d12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 20 May 2026 20:28:51 +0200 Subject: [PATCH 10/11] WIP --- compiler/lib-wasm/hoist_loops.ml | 287 +++++++++++++++++++++++++------ 1 file changed, 230 insertions(+), 57 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index 587e4dbf28..a4a9a34fc6 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -188,6 +188,91 @@ let empty_var_sets = { reads = Code.Var.Set.empty; writes = Code.Var.Set.empty } let empty_vars = Code.Var.Set.empty +(* The liveness analysis below only needs to track variables that are + written in some [Loop] body — those are the only ones whose value + the helper would write back. Other variables are either never + written (read-only in the loop, always need the pre-loop value) or + only written outside loops (never appear in any [returned_vars] or + [live_in] decision). Restricting the dataflow to this set keeps the + live sets small and speeds up the unions inside the fixpoint. + + We use a hashtable since the only operations needed are insertion + (during this pre-pass) and O(1) membership lookup at every LocalGet + in the liveness analysis. *) +let writes_in_loops body = + let acc = Code.Var.Hashtbl.create 16 in + let add_writes body = + let { writes; _ } = collect_instrs empty_var_sets body in + Code.Var.Set.iter (fun v -> Code.Var.Hashtbl.replace acc v ()) writes + in + let rec expr (e : W.expression) = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ | LocalGet _ -> () + | LocalTee (_, e') + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | Br_on_cast (_, _, _, e') + | Br_on_cast_fail (_, _, _, e') + | Br_on_null (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> expr e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> + expr e1; + expr e2 + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> List.iter ~f:expr l + | Call_ref (_, e', l) -> + List.iter ~f:expr l; + expr e' + | BlockExpr (_, body) | Try (_, body, _) -> instrs body + | Seq (l, e') -> + instrs l; + expr e' + | IfExpr (_, cond, e1, e2) -> + expr cond; + expr e1; + expr e2 + and instr (i : W.instruction) = + match i with + | Loop (_, body) -> + (* [collect_instrs] descends into nested structures, so this + captures writes from any nested loops as well. *) + add_writes body + | Block (_, body) -> instrs body + | If (_, e, l1, l2) -> + expr e; + instrs l1; + instrs l2 + | Drop e | GlobalSet (_, e) | Push e | Throw (_, e) | LocalSet (_, e) -> expr e + | Br (_, Some e) | Br_if (_, e) | Br_table (e, _, _) | Return (Some e) -> expr e + | Br (_, None) | Return None | Nop | Unreachable | Event _ | Rethrow _ -> () + | CallInstr (_, l) | Return_call (_, l) -> List.iter ~f:expr l + | Return_call_ref (_, e', l) -> + List.iter ~f:expr l; + expr e' + | ArraySet (_, e1, e2, e3) -> + expr e1; + expr e2; + expr e3 + | StructSet (_, _, e1, e2) -> + expr e1; + expr e2 + and instrs l = List.iter ~f:instr l in + instrs body; + acc + let label_reads labels depth = let rec find labels depth = match labels, depth with @@ -203,15 +288,19 @@ let catches_live_out labels ~exn_live_out catches = ~init:exn_live_out ~f:(fun acc (_, label, _) -> Code.Var.Set.union acc (label_reads labels label)) -let rec live_before_expr ~labels ~live_out ~exn_live_out (e : W.expression) = +let rec live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars (e : W.expression) = match e with | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> live_out - | LocalGet v -> Code.Var.Set.add v live_out + | LocalGet v -> + if Code.Var.Hashtbl.mem tracked_vars v + then Code.Var.Set.add v live_out + else live_out | LocalTee (v, e') -> live_before_expr ~labels ~live_out:(Code.Var.Set.remove v live_out) ~exn_live_out + ~tracked_vars e' | UnOp (_, e') | I32WrapI64 e' @@ -225,84 +314,126 @@ let rec live_before_expr ~labels ~live_out ~exn_live_out (e : W.expression) = | RefCast (_, e') | RefTest (_, e') | ExternConvertAny e' - | AnyConvertExtern e' -> live_before_expr ~labels ~live_out ~exn_live_out e' + | AnyConvertExtern e' -> + live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e' | BinOp (_, e1, e2) | ArrayNew (_, e1, e2) | ArrayNewData (_, _, e1, e2) | ArrayGet (_, _, e1, e2) | RefEq (e1, e2) -> - let live_out = live_before_expr ~labels ~live_out ~exn_live_out e2 in - live_before_expr ~labels ~live_out ~exn_live_out e1 + let live_out = live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e2 in + live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e1 | Br_on_cast (n, _, _, e') | Br_on_cast_fail (n, _, _, e') | Br_on_null (n, e') -> let live_out = Code.Var.Set.union live_out (label_reads labels n) in - live_before_expr ~labels ~live_out ~exn_live_out e' + live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e' | Call (_, l) -> - let live_out = Code.Var.Set.union live_out exn_live_out in - live_before_exprs ~labels ~live_out ~exn_live_out l + let live_out = + if Code.Var.Set.is_empty exn_live_out + then live_out + else Code.Var.Set.union live_out exn_live_out + in + live_before_exprs ~labels ~live_out ~exn_live_out ~tracked_vars l | ArrayNewFixed (_, l) | StructNew (_, l) -> - live_before_exprs ~labels ~live_out ~exn_live_out l + live_before_exprs ~labels ~live_out ~exn_live_out ~tracked_vars l | Call_ref (_, e', l) -> - let live_out = Code.Var.Set.union live_out exn_live_out in - live_before_exprs ~labels ~live_out ~exn_live_out (l @ [ e' ]) + let live_out = + if Code.Var.Set.is_empty exn_live_out + then live_out + else Code.Var.Set.union live_out exn_live_out + in + live_before_exprs ~labels ~live_out ~exn_live_out ~tracked_vars (l @ [ e' ]) | BlockExpr (_, body) -> let _, live_in = - live_before_instrs ~labels:(live_out :: labels) ~live_out ~exn_live_out body + live_before_instrs + ~labels:(live_out :: labels) + ~live_out + ~exn_live_out + ~tracked_vars + body in live_in | Seq (instrs, e') -> - let live_out = live_before_expr ~labels ~live_out ~exn_live_out e' in - let _, live_in = live_before_instrs ~labels ~live_out ~exn_live_out instrs in + let live_out = live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e' in + let _, live_in = + live_before_instrs ~labels ~live_out ~exn_live_out ~tracked_vars instrs + in live_in | IfExpr (_, cond, e1, e2) -> let branch_labels = live_out :: labels in - let live1 = live_before_expr ~labels:branch_labels ~live_out ~exn_live_out e1 in - let live2 = live_before_expr ~labels:branch_labels ~live_out ~exn_live_out e2 in + let live1 = + live_before_expr ~labels:branch_labels ~live_out ~exn_live_out ~tracked_vars e1 + in + let live2 = + live_before_expr ~labels:branch_labels ~live_out ~exn_live_out ~tracked_vars e2 + in let live_out = Code.Var.Set.union live1 live2 in - live_before_expr ~labels ~live_out ~exn_live_out cond + live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars cond | Try (_, body, catches) -> let exn_live_out = catches_live_out labels ~exn_live_out catches in let _, live_in = - live_before_instrs ~labels:(live_out :: labels) ~live_out ~exn_live_out body + live_before_instrs + ~labels:(live_out :: labels) + ~live_out + ~exn_live_out + ~tracked_vars + body in live_in -and live_before_exprs ~labels ~live_out ~exn_live_out l = +and live_before_exprs ~labels ~live_out ~exn_live_out ~tracked_vars l = List.fold_right l ~init:live_out - ~f:(fun e live_out -> live_before_expr ~labels ~live_out ~exn_live_out e) + ~f:(fun e live_out -> live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e) -and loop_live_in ~labels ~live_out ~exn_live_out body = +and loop_live_in ~labels ~live_out ~exn_live_out ~tracked_vars body = let rec fix live_head = let _, live_head' = - live_before_instrs ~labels:(live_head :: labels) ~live_out ~exn_live_out body + live_before_instrs + ~labels:(live_head :: labels) + ~live_out + ~exn_live_out + ~tracked_vars + body in if Code.Var.Set.equal live_head live_head' then live_head else fix live_head' in fix empty_vars -and live_before_loop_body ~labels ~live_out ~exn_live_out body = - let live_head = loop_live_in ~labels ~live_out ~exn_live_out body in - live_before_instrs ~labels:(live_head :: labels) ~live_out ~exn_live_out body - -and live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out (i : W.instruction) = +and live_before_loop_body ~labels ~live_out ~exn_live_out ~tracked_vars body = + let live_head = loop_live_in ~labels ~live_out ~exn_live_out ~tracked_vars body in + live_before_instrs + ~labels:(live_head :: labels) + ~live_out + ~exn_live_out + ~tracked_vars + body + +and live_before_instr + ~labels + ~rest_loops + ~live_out + ~exn_live_out + ~tracked_vars + (i : W.instruction) = match i with | Drop e | Push e -> - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e | GlobalSet (_, e) -> - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e | Throw (_, e) -> - rest_loops, live_before_expr ~labels ~live_out:exn_live_out ~exn_live_out e + ( rest_loops + , live_before_expr ~labels ~live_out:exn_live_out ~exn_live_out ~tracked_vars e ) | LocalSet (v, e) -> let live_out = Code.Var.Set.remove v live_out in - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e | Br (n, None) -> rest_loops, label_reads labels n | Br (n, Some e) -> let live_out = label_reads labels n in - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e | Br_if (n, e) -> let live_out = Code.Var.Set.union live_out (label_reads labels n) in - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e | Br_table (e, targets, default) -> let live_out = List.fold_left @@ -310,10 +441,11 @@ and live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out (i : W.instruc ~f:(fun acc n -> Code.Var.Set.union acc (label_reads labels n)) targets in - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e | Return None -> rest_loops, empty_vars | Return (Some e) -> - rest_loops, live_before_expr ~labels ~live_out:empty_vars ~exn_live_out e + ( rest_loops + , live_before_expr ~labels ~live_out:empty_vars ~exn_live_out ~tracked_vars e ) | Loop (ty, body) -> let extractable = List.is_empty ty.result @@ -324,53 +456,81 @@ and live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out (i : W.instruc in if extractable then - let live_in = loop_live_in ~labels ~live_out ~exn_live_out body in + let live_in = + loop_live_in ~labels ~live_out ~exn_live_out ~tracked_vars body + in Some (live_out, live_in) :: rest_loops, live_in else let body_loops, live_in = - live_before_loop_body ~labels ~live_out ~exn_live_out body + live_before_loop_body ~labels ~live_out ~exn_live_out ~tracked_vars body in (None :: body_loops) @ rest_loops, live_in | Block (_, body) -> let body_loops, live_in = - live_before_instrs ~labels:(live_out :: labels) ~live_out ~exn_live_out body + live_before_instrs + ~labels:(live_out :: labels) + ~live_out + ~exn_live_out + ~tracked_vars + body in body_loops @ rest_loops, live_in | If (_, e, l1, l2) -> let branch_labels = live_out :: labels in let loops1, live1 = - live_before_instrs ~labels:branch_labels ~live_out ~exn_live_out l1 + live_before_instrs + ~labels:branch_labels + ~live_out + ~exn_live_out + ~tracked_vars + l1 in let loops2, live2 = - live_before_instrs ~labels:branch_labels ~live_out ~exn_live_out l2 + live_before_instrs + ~labels:branch_labels + ~live_out + ~exn_live_out + ~tracked_vars + l2 in let live_out = Code.Var.Set.union live1 live2 in - let live_in = live_before_expr ~labels ~live_out ~exn_live_out e in + let live_in = live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e in loops1 @ loops2 @ rest_loops, live_in | CallInstr (_, l) -> - let live_out = Code.Var.Set.union live_out exn_live_out in - rest_loops, live_before_exprs ~labels ~live_out ~exn_live_out l + let live_out = + if Code.Var.Set.is_empty exn_live_out + then live_out + else Code.Var.Set.union live_out exn_live_out + in + rest_loops, live_before_exprs ~labels ~live_out ~exn_live_out ~tracked_vars l | Nop | Event _ -> rest_loops, live_out | ArraySet (_, e1, e2, e3) -> - let live_out = live_before_expr ~labels ~live_out ~exn_live_out e3 in - let live_out = live_before_expr ~labels ~live_out ~exn_live_out e2 in - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e1 + let live_out = live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e3 in + let live_out = live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e2 in + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e1 | StructSet (_, _, e1, e2) -> - let live_out = live_before_expr ~labels ~live_out ~exn_live_out e2 in - rest_loops, live_before_expr ~labels ~live_out ~exn_live_out e1 + let live_out = live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e2 in + rest_loops, live_before_expr ~labels ~live_out ~exn_live_out ~tracked_vars e1 | Return_call (_, l) -> - rest_loops, live_before_exprs ~labels ~live_out:empty_vars ~exn_live_out l + ( rest_loops + , live_before_exprs ~labels ~live_out:empty_vars ~exn_live_out ~tracked_vars l ) | Return_call_ref (_, e', l) -> - rest_loops, live_before_exprs ~labels ~live_out:empty_vars ~exn_live_out (l @ [ e' ]) + ( rest_loops + , live_before_exprs + ~labels + ~live_out:empty_vars + ~exn_live_out + ~tracked_vars + (l @ [ e' ]) ) | Rethrow _ -> rest_loops, exn_live_out | Unreachable -> rest_loops, empty_vars -and live_before_instrs ~labels ~live_out ~exn_live_out l = +and live_before_instrs ~labels ~live_out ~exn_live_out ~tracked_vars l = List.fold_right l ~init:([], live_out) ~f:(fun i (rest_loops, live_out) -> - live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out i) + live_before_instr ~labels ~rest_loops ~live_out ~exn_live_out ~tracked_vars i) (* Backward dataflow over the function body, producing one entry per [Loop] encountered in source order: [Some (live_out, live_in)] for @@ -382,9 +542,14 @@ and live_before_instrs ~labels ~live_out ~exn_live_out l = an exceptional exit, which the helper has no way to write back). The forward pass in [transform_instrs] consumes the list in the same order. *) -let loops_after_reads body = +let loops_after_reads ~tracked_vars body = let loops, _ = - live_before_instrs ~labels:[] ~live_out:empty_vars ~exn_live_out:empty_vars body + live_before_instrs + ~labels:[] + ~live_out:empty_vars + ~exn_live_out:empty_vars + ~tracked_vars + body in loops @@ -392,6 +557,7 @@ let loops_after_reads body = type ctx = { var_types : W.value_type Code.Var.Hashtbl.t + ; tracked_vars : unit Code.Var.Hashtbl.t ; mutable new_fields : W.module_field list ; mutable extra_locals : (Code.Var.t * W.value_type) list } @@ -408,9 +574,15 @@ let lookup_types ctx vars = let extract_loop ctx ~is_initialized ~after_reads ~live_in body = let { reads; writes } = collect_instrs empty_var_sets body in let all_vars = Code.Var.Set.union reads writes in + (* Variables in [all_vars] that are not tracked are read in the body + and written nowhere in any loop — they trivially need their + pre-loop value, so treat them as live-in. *) let param_vars = Code.Var.Set.filter - (fun v -> is_initialized v && Code.Var.Set.mem v live_in) + (fun v -> + is_initialized v + && (Code.Var.Set.mem v live_in + || not (Code.Var.Hashtbl.mem ctx.tracked_vars v))) all_vars in let local_vars = Code.Var.Set.diff all_vars param_vars in @@ -525,7 +697,8 @@ let f ~toplevel fields = func.param_names func.signature.params; List.iter ~f:(fun (v, t) -> Code.Var.Hashtbl.add var_types v t) func.locals; - let ctx = { var_types; new_fields = []; extra_locals = [] } in + let tracked_vars = writes_in_loops func.body in + let ctx = { var_types; tracked_vars; new_fields = []; extra_locals = [] } in let il_ctx = Initialize_locals.create_context () in List.iter ~f:(Initialize_locals.mark_initialized il_ctx) func.param_names; List.iter @@ -535,7 +708,7 @@ let f ~toplevel fields = Initialize_locals.mark_initialized il_ctx var | Ref { nullable = false; _ } -> ()) func.locals; - let pending_loops = ref (loops_after_reads func.body) in + let pending_loops = ref (loops_after_reads ~tracked_vars func.body) in let body = transform_instrs ctx il_ctx pending_loops func.body in let func' = W.Function { func with body; locals = func.locals @ ctx.extra_locals } From 89c18b6eb9e8678d3aa7d8571cbe57e73684014a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 20 May 2026 20:53:58 +0200 Subject: [PATCH 11/11] Stats --- compiler/lib-wasm/hoist_loops.ml | 79 ++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/compiler/lib-wasm/hoist_loops.ml b/compiler/lib-wasm/hoist_loops.ml index a4a9a34fc6..cd081862f9 100644 --- a/compiler/lib-wasm/hoist_loops.ml +++ b/compiler/lib-wasm/hoist_loops.ml @@ -45,6 +45,10 @@ open! Stdlib module W = Wasm_ast +let times = Debug.find "times" + +let stats = Debug.find "stats" + (* Check that all branches in a loop body target labels within the loop. [depth] counts the number of enclosing control flow constructs including the loop itself, so it starts at 1 when called on the loop @@ -687,32 +691,49 @@ and transform_instr ctx il_ctx pending_loops (i : W.instruction) = [ i ] let f ~toplevel fields = - List.concat_map - ~f:(fun field -> - match field with - | W.Function ({ name; _ } as func) when Code.Var.equal name toplevel -> - let var_types = Code.Var.Hashtbl.create 16 in - List.iter2 - ~f:(fun v t -> Code.Var.Hashtbl.add var_types v t) - func.param_names - func.signature.params; - List.iter ~f:(fun (v, t) -> Code.Var.Hashtbl.add var_types v t) func.locals; - let tracked_vars = writes_in_loops func.body in - let ctx = { var_types; tracked_vars; new_fields = []; extra_locals = [] } in - let il_ctx = Initialize_locals.create_context () in - List.iter ~f:(Initialize_locals.mark_initialized il_ctx) func.param_names; - List.iter - ~f:(fun (var, typ) -> - match (typ : W.value_type) with - | I32 | I64 | F32 | F64 | Ref { nullable = true; _ } -> - Initialize_locals.mark_initialized il_ctx var - | Ref { nullable = false; _ } -> ()) - func.locals; - let pending_loops = ref (loops_after_reads ~tracked_vars func.body) in - let body = transform_instrs ctx il_ctx pending_loops func.body in - let func' = - W.Function { func with body; locals = func.locals @ ctx.extra_locals } - in - List.rev ctx.new_fields @ [ func' ] - | _ -> [ field ]) - fields + let t = Timer.make () in + let hoisted = ref 0 in + let left_in_place = ref 0 in + let result = + List.concat_map + ~f:(fun field -> + match field with + | W.Function ({ name; _ } as func) when Code.Var.equal name toplevel -> + let var_types = Code.Var.Hashtbl.create 16 in + List.iter2 + ~f:(fun v t -> Code.Var.Hashtbl.add var_types v t) + func.param_names + func.signature.params; + List.iter ~f:(fun (v, t) -> Code.Var.Hashtbl.add var_types v t) func.locals; + let tracked_vars = writes_in_loops func.body in + let ctx = { var_types; tracked_vars; new_fields = []; extra_locals = [] } in + let il_ctx = Initialize_locals.create_context () in + List.iter ~f:(Initialize_locals.mark_initialized il_ctx) func.param_names; + List.iter + ~f:(fun (var, typ) -> + match (typ : W.value_type) with + | I32 | I64 | F32 | F64 | Ref { nullable = true; _ } -> + Initialize_locals.mark_initialized il_ctx var + | Ref { nullable = false; _ } -> ()) + func.locals; + let loops = loops_after_reads ~tracked_vars func.body in + List.iter loops ~f:(function + | Some _ -> incr hoisted + | None -> incr left_in_place); + let pending_loops = ref loops in + let body = transform_instrs ctx il_ctx pending_loops func.body in + let func' = + W.Function { func with body; locals = func.locals @ ctx.extra_locals } + in + List.rev ctx.new_fields @ [ func' ] + | _ -> [ field ]) + fields + in + if times () then Format.eprintf " loop hoisting: %a@." Timer.print t; + if stats () + then + Format.eprintf + "Stats - loop hoisting: hoisted %d, left in place %d@." + !hoisted + !left_in_place; + result