diff --git a/compiler/bin-wasm_of_ocaml/compile.ml b/compiler/bin-wasm_of_ocaml/compile.ml index 73d6df565b..3305d00e65 100644 --- a/compiler/bin-wasm_of_ocaml/compile.ml +++ b/compiler/bin-wasm_of_ocaml/compile.ml @@ -150,77 +150,104 @@ let link_and_optimize | Some _ | None -> opt_sourcemap in let enable_source_maps = Option.is_some opt_sourcemap_file in + let run_dce = not dynlink in + let run_opt = + match (profile : Profile.t) with + | O1 -> false + | O2 | O3 -> true + in + (* [with_step_output ~is_last base] invokes its continuation with the + output file and sourcemap to pass to a Binaryen pass. If [is_last] the + pass writes directly to the final location; otherwise a pair of temp + files scoped to the continuation is used. *) + let with_step_output ~is_last base k = + if is_last + then k ~file:output_file ~opt_sm:opt_sourcemap_file + else + Fs.with_intermediate_file (Filename.temp_file base ".wasm") + @@ fun file -> + opt_with + Fs.with_intermediate_file + (if enable_source_maps then Some (Filename.temp_file base ".wasm.map") else None) + @@ fun opt_sm -> k ~file ~opt_sm + in Fs.with_intermediate_file (Filename.temp_file "runtime" ".wasm") @@ fun runtime_file -> build_runtime ~runtime_file; - Fs.with_intermediate_file (Filename.temp_file "wasm-merged" ".wasm") - @@ fun temp_file -> - opt_with - Fs.with_intermediate_file - (if enable_source_maps - then Some (Filename.temp_file "wasm-merged" ".wasm.map") - else None) - @@ fun opt_temp_sourcemap -> - (with_runtime_files ~runtime_wasm_files - @@ fun runtime_inputs -> - let t = Timer.make ~get_time:Unix.time () in - Binaryen.link - ~inputs: - ({ Binaryen.module_name = "env"; file = runtime_file; source_map_file = None } - :: runtime_inputs - @ List.map - ~f:(fun (file, source_map_file) -> - { Binaryen.module_name = "OCaml"; file; source_map_file }) - wat_files) - ~opt_output_sourcemap:opt_temp_sourcemap - ~output_file:temp_file - (); - if binaryen_times () then Format.eprintf " binaryen link: %a@." Timer.print t); - - let optimize_and_finish ~opt_input_sourcemap ~input_file primitives = + let link ~output_file ~opt_output_sourcemap = + with_runtime_files ~runtime_wasm_files + @@ fun runtime_inputs -> + let t = Timer.make ~get_time:Unix.time () in + Binaryen.link + ~inputs: + ({ Binaryen.module_name = "env"; file = runtime_file; source_map_file = None } + :: runtime_inputs + @ List.map + ~f:(fun (file, source_map_file) -> + { Binaryen.module_name = "OCaml"; file; source_map_file }) + wat_files) + ~opt_output_sourcemap + ~output_file + (); + if binaryen_times () then Format.eprintf " binaryen link: %a@." Timer.print t + in + let dce ~input_file ~opt_input_sourcemap ~output_file ~opt_output_sourcemap = + let t = Timer.make ~get_time:Unix.time () in + let primitives = + Binaryen.dead_code_elimination + ~dependencies:Runtime_files.dependencies + ~opt_input_sourcemap + ~opt_output_sourcemap + ~input_file + ~output_file + in + if binaryen_times () then Format.eprintf " binaryen dce: %a@." Timer.print t; + primitives + in + let optimize ~input_file ~opt_input_sourcemap = let t = Timer.make ~get_time:Unix.time () in Binaryen.optimize ~profile ~opt_input_sourcemap - ~opt_output_sourcemap:opt_sourcemap + ~opt_output_sourcemap:opt_sourcemap_file ~input_file ~output_file (); - if binaryen_times () then Format.eprintf " binaryen opt: %a@." Timer.print t; + if binaryen_times () then Format.eprintf " binaryen opt: %a@." Timer.print t + in + let finish primitives = Option.iter ~f:(update_sourcemap ~sourcemap_root ~sourcemap_don't_inline_content) opt_sourcemap_file; primitives in - if dynlink + let link_is_last = (not run_dce) && not run_opt in + with_step_output ~is_last:link_is_last "wasm-merged" + @@ fun ~file:linked ~opt_sm:opt_linked_sm -> + link ~output_file:linked ~opt_output_sourcemap:opt_linked_sm; + if link_is_last + then finish (Linker.list_all ()) + else if run_dce then - optimize_and_finish - ~opt_input_sourcemap:opt_temp_sourcemap - ~input_file:temp_file - (Linker.list_all ()) - else - Fs.with_intermediate_file (Filename.temp_file "wasm-dce" ".wasm") - @@ fun temp_file' -> - opt_with - Fs.with_intermediate_file - (if enable_source_maps - then Some (Filename.temp_file "wasm-dce" ".wasm.map") - else None) - @@ fun opt_temp_sourcemap' -> - let t = Timer.make ~get_time:Unix.time () in + let dce_is_last = not run_opt in + with_step_output ~is_last:dce_is_last "wasm-dce" + @@ fun ~file:dced ~opt_sm:opt_dced_sm -> let primitives = - Binaryen.dead_code_elimination - ~dependencies:Runtime_files.dependencies - ~opt_input_sourcemap:opt_temp_sourcemap - ~opt_output_sourcemap:opt_temp_sourcemap' - ~input_file:temp_file - ~output_file:temp_file' + dce + ~input_file:linked + ~opt_input_sourcemap:opt_linked_sm + ~output_file:dced + ~opt_output_sourcemap:opt_dced_sm in - if binaryen_times () then Format.eprintf " binaryen dce: %a@." Timer.print t; - optimize_and_finish - ~opt_input_sourcemap:opt_temp_sourcemap' - ~input_file:temp_file' - primitives + if dce_is_last + then finish primitives + else ( + optimize ~input_file:dced ~opt_input_sourcemap:opt_dced_sm; + finish primitives) + else ( + (* [dynlink] + O2/O3: skip DCE, go straight to [wasm-opt]. *) + optimize ~input_file:linked ~opt_input_sourcemap:opt_linked_sm; + finish (Linker.list_all ())) let link_runtime ~profile runtime_wasm_files output_file = if List.is_empty runtime_wasm_files @@ -275,6 +302,7 @@ let generate_prelude ~out_file = let context = Generate.start () in let _ = Generate.f + ~profile ~context ~unit_name:(Some "prelude") ~live_vars:variable_uses @@ -488,6 +516,7 @@ let run let context = Generate.start () in let toplevel_name, generated_js = Generate.f + ~profile ~context ~unit_name ~live_vars:variable_uses @@ -579,31 +608,45 @@ let run else None) @@ fun opt_tmp_map_file -> let unit_data, shapes = - Fs.with_intermediate_file (Filename.temp_file unit_name ".wasm") - @@ fun input_file -> - opt_with - Fs.with_intermediate_file - (if enable_source_maps - then Some (Filename.temp_file unit_name ".wasm.map") - else None) - @@ fun opt_input_sourcemap -> - let fragments, shapes = - output - code - ~wat_file: - (Filename.concat (Filename.dirname output_file) (unit_name ^ ".wat")) - ~unit_name:(Some unit_name) - ~file:input_file - ~opt_source_map_file:opt_input_sourcemap - in - Binaryen.optimize - ~profile - ~opt_input_sourcemap - ~opt_output_sourcemap:opt_tmp_map_file - ~input_file - ~output_file:tmp_wasm_file - (); - { Link.unit_name; unit_info; fragments }, shapes + match profile with + | Profile.O1 -> + (* At O1, skip Binaryen.optimize — write directly *) + let fragments, shapes = + output + code + ~wat_file: + (Filename.concat (Filename.dirname output_file) (unit_name ^ ".wat")) + ~unit_name:(Some unit_name) + ~file:tmp_wasm_file + ~opt_source_map_file:opt_tmp_map_file + in + { Link.unit_name; unit_info; fragments }, shapes + | O2 | O3 -> + Fs.with_intermediate_file (Filename.temp_file unit_name ".wasm") + @@ fun input_file -> + opt_with + Fs.with_intermediate_file + (if enable_source_maps + then Some (Filename.temp_file unit_name ".wasm.map") + else None) + @@ fun opt_input_sourcemap -> + let fragments, shapes = + output + code + ~wat_file: + (Filename.concat (Filename.dirname output_file) (unit_name ^ ".wat")) + ~unit_name:(Some unit_name) + ~file:input_file + ~opt_source_map_file:opt_input_sourcemap + in + Binaryen.optimize + ~profile + ~opt_input_sourcemap + ~opt_output_sourcemap:opt_tmp_map_file + ~input_file + ~output_file:tmp_wasm_file + (); + { Link.unit_name; unit_info; fragments }, shapes in cont unit_data unit_name tmp_wasm_file opt_tmp_map_file shapes cmi_files in diff --git a/compiler/lib-wasm/gc_target.ml b/compiler/lib-wasm/gc_target.ml index b7261ab74a..2d83d81ade 100644 --- a/compiler/lib-wasm/gc_target.ml +++ b/compiler/lib-wasm/gc_target.ml @@ -2041,7 +2041,33 @@ let handle_exceptions ~result_typ ~fall_through ~context body x exn_handler = let* () = no_event in exn_handler ~result_typ ~fall_through ~context) -let post_process_function_body = Initialize_locals.f +let post_process_function_body ~profile ~param_names ~param_types ~locals body = + (* At [--opt 1] we skip [wasm-opt] entirely (both for .cmo/.cma and + for executables), so our own passes are the only ones tightening + the body. [Local_sink] runs first: shortening live ranges and + dropping [local.set]s simplifies the input to [Var_coalescing]. *) + let body = + match (profile : Profile.t) with + | O1 when Config.Flag.wasm_local_sink () -> Local_sink.f body + | O1 | O2 | O3 -> body + in + let locals, body = + match (profile : Profile.t) with + | O1 when Config.Flag.wasm_var_coalescing () -> + Var_coalescing.f ~param_names ~param_types ~locals body + | O1 | O2 | O3 -> locals, body + in + let locals, body = Initialize_locals.f ~param_names ~locals body in + (* Reorder locals so frequently-used ones get low Wasm indices + (one-byte LEB128 encoding for indices < 128). Gated on [O1] to + match the rest of this pipeline; at [O2]/[O3] [wasm-opt] handles + local reordering. *) + let locals = + match (profile : Profile.t) with + | O1 when Config.Flag.wasm_reorder_locals () -> Reorder_locals.f ~locals body + | O1 | O2 | O3 -> locals + in + locals, body let entry_point ~toplevel_fun = let code = diff --git a/compiler/lib-wasm/generate.ml b/compiler/lib-wasm/generate.ml index 9a049ab9aa..c9efb95753 100644 --- a/compiler/lib-wasm/generate.ml +++ b/compiler/lib-wasm/generate.ml @@ -2108,6 +2108,7 @@ module Generate (Target : Target_sig.S) = struct ~context let translate_function + ~profile p ctx name_opt @@ -2311,7 +2312,32 @@ module Generate (Target : Target_sig.S) = struct | Some loc -> event loc | None -> return ()) in - let locals, body = post_process_function_body ~param_names ~locals body in + let signature = + match name_opt with + | None -> Type.primitive_type param_count + | Some f -> + if Typing.can_unbox_parameters ctx.fun_info f + then + { W.params = + List.map + ~f:(fun x -> + Option.value + ~default:Type.value + (unboxed_type (Typing.var_type ctx.types x))) + params + @ [ Type.value ] + ; result = [ Option.value ~default:Type.value (unboxed_type return_type) ] + } + else Type.func_type (param_count - 1) + in + let locals, body = + post_process_function_body + ~profile + ~param_names + ~param_types:signature.params + ~locals + body + in W.Function { name = (match name_opt with @@ -2322,23 +2348,7 @@ module Generate (Target : Target_sig.S) = struct | None -> Option.map ~f:(fun name -> name ^ ".init") unit_name | Some _ -> None) ; typ = None - ; signature = - (match name_opt with - | None -> Type.primitive_type param_count - | Some f -> - if Typing.can_unbox_parameters ctx.fun_info f - then - { W.params = - List.map - ~f:(fun x -> - Option.value - ~default:Type.value - (unboxed_type (Typing.var_type ctx.types x))) - params - @ [ Type.value ] - ; result = [ Option.value ~default:Type.value (unboxed_type return_type) ] - } - else Type.func_type (param_count - 1)) + ; signature ; param_names ; locals ; body @@ -2402,6 +2412,7 @@ module Generate (Target : Target_sig.S) = struct add_start_function ~context (init_function ~context ~to_link) let f + ~profile ~context:global_context ~unit_name (p : Code.program) @@ -2433,7 +2444,16 @@ module Generate (Target : Target_sig.S) = struct Code.fold_closures_outermost_first p (fun name_opt params cont cloc -> - translate_function p ctx name_opt ~toplevel_name ~unit_name params cont cloc) + translate_function + ~profile + p + ctx + name_opt + ~toplevel_name + ~unit_name + params + cont + cloc) [] in let functions = @@ -2549,7 +2569,15 @@ let init = G.init let start () = make_context ~value_type:Gc_target.Type.value -let f ~context ~unit_name p ~live_vars ~in_cps ~deadcode_sentinel ~global_flow_data = +let f + ~profile + ~context + ~unit_name + p + ~live_vars + ~in_cps + ~deadcode_sentinel + ~global_flow_data = let global_flow_state, global_flow_info = global_flow_data in let fun_info = Call_graph_analysis.f p global_flow_info in let types = @@ -2559,8 +2587,19 @@ let f ~context ~unit_name p ~live_vars ~in_cps ~deadcode_sentinel ~global_flow_d let p = Structure.norm p in let p = fix_switch_branches p in let res = - G.f ~context ~unit_name ~live_vars ~in_cps ~global_flow_info ~fun_info ~types p + G.f + ~profile + ~context + ~unit_name + ~live_vars + ~in_cps + ~global_flow_info + ~fun_info + ~types + p in + Local_sink.report_stats (); + Var_coalescing.report_stats (); if times () then Format.eprintf " code gen.: %a@." Timer.print t; res @@ -2583,12 +2622,14 @@ let wasm_output ch ~opt_source_map_file ~context = if times () then Format.eprintf " output: %a@." Timer.print t let compile ~unit_name code = + let profile = Profile.O1 in let Driver.{ program; variable_uses; in_cps; deadcode_sentinel; _ }, global_flow_data = - Driver.optimize_for_wasm ~shapes:false ~profile:O1 code + Driver.optimize_for_wasm ~shapes:false ~profile code in let context = start () in let toplevel_name, fragments = f + ~profile ~context ~unit_name ~live_vars:variable_uses diff --git a/compiler/lib-wasm/generate.mli b/compiler/lib-wasm/generate.mli index 47cfb17095..1ac8ce29b3 100644 --- a/compiler/lib-wasm/generate.mli +++ b/compiler/lib-wasm/generate.mli @@ -21,7 +21,8 @@ val init : unit -> unit val start : unit -> Code_generation.context val f : - context:Code_generation.context + profile:Profile.t + -> context:Code_generation.context -> unit_name:string option -> Code.program -> live_vars:int array diff --git a/compiler/lib-wasm/local_sink.ml b/compiler/lib-wasm/local_sink.ml new file mode 100644 index 0000000000..afe8f130da --- /dev/null +++ b/compiler/lib-wasm/local_sink.ml @@ -0,0 +1,670 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * Copyright (C) 2026 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(* + Sink [local.set x e] to the first reachable [local.get x], turning the + pair into a single [local.tee x e]. We search forward in evaluation + order, stop at any control-flow boundary or compound construct, and + bail if we meet another write to [x] (a [local.set x] or [local.tee + x]) before a read. + + Effect ordering: moving [e] past other code is sound iff + + - [e] itself has no observable side effects (no writes, no calls, + no traps beyond the ones [effect_free] already tolerates); + - every sub-expression we cross is [effect_free]; and + - no instruction we cross writes any local that [e] reads — if it + did, [e]'s reads would see a different value at the new position. + + The last condition is what makes an "[effect_free] implies safe" + rule unsound: [effect_free] expressions can still read mutable + locals, and if the intermediate code writes those locals the move + would change [e]'s result. +*) + +open! Stdlib +module W = Wasm_ast +module Var = Code.Var + +(* Hard upper bound on how far a single sink attempt may search forward. + The walker is O(N) per candidate with O(N) candidates per function, so + without a cap the pass is O(N²). Most profitable sinks have the target + within a few steps; this bound just clips the long tail. *) +let max_walk_distance = 64 + +let times = Debug.find "times" + +let stats = Debug.find "stats" + +(* Aggregated statistics across all calls to [f]. The pass runs once per + Wasm function; per-function logs are noisy, so we accumulate here and + emit a single summary via [report_stats]. *) +let total_time = ref 0. + +let total_calls = ref 0 + +let total_candidates = ref 0 + +let total_sunk = ref 0 + +let report_stats () = + if !total_calls > 0 + then ( + if times () then Format.eprintf " wasm local sink: %.2f@." !total_time; + if stats () + then + Format.eprintf + "Stats - wasm local sink: %d functions, %d candidates, %d sunk@." + !total_calls + !total_candidates + !total_sunk; + total_time := 0.; + total_calls := 0; + total_candidates := 0; + total_sunk := 0) + +(* Same as [Gc_target.effect_free]. Copied here (it is a small and + self-contained helper) to avoid exposing it as a shared interface. *) +let rec effect_free (e : W.expression) = + match e with + | Const _ | LocalGet _ | GlobalGet _ | RefFunc _ | RefNull _ -> true + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> effect_free e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> effect_free e1 && effect_free e2 + | LocalTee _ + | BlockExpr _ + | Call _ + | Seq _ + | Pop _ + | Call_ref _ + | Br_on_cast _ + | Br_on_cast_fail _ + | Br_on_null _ + | Try _ -> false + | IfExpr (_, e1, e2, e3) -> effect_free e1 && effect_free e2 && effect_free e3 + | ArrayNewFixed (_, l) | StructNew (_, l) -> List.for_all ~f:effect_free l + +(* Locals read and written by an expression. A [LocalTee y e'] both reads + [e']'s locals and writes [y]. *) +type rw = + { mutable reads : Var.Set.t + ; mutable writes : Var.Set.t + } + +let rec collect_rw rw (e : W.expression) = + match e with + | Const _ | GlobalGet _ | RefFunc _ | RefNull _ | Pop _ -> () + | LocalGet x -> rw.reads <- Var.Set.add x rw.reads + | LocalTee (y, e') -> + rw.writes <- Var.Set.add y rw.writes; + collect_rw rw e' + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' + | Br_on_cast (_, _, _, e') + | Br_on_cast_fail (_, _, _, e') + | Br_on_null (_, e') -> collect_rw rw e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> + collect_rw rw e1; + collect_rw rw e2 + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> + List.iter l ~f:(collect_rw rw) + | Call_ref (_, f, l) -> + collect_rw rw f; + List.iter l ~f:(collect_rw rw) + | IfExpr (_, c, t, el) -> + collect_rw rw c; + collect_rw rw t; + collect_rw rw el + | BlockExpr (_, l) -> List.iter l ~f:(collect_rw_instr rw) + | Seq (l, e') -> + List.iter l ~f:(collect_rw_instr rw); + collect_rw rw e' + | Try (_, body, _) -> List.iter body ~f:(collect_rw_instr rw) + +and collect_rw_instr rw (i : W.instruction) = + match i with + | Nop | Event _ | Br (_, None) | Return None | Rethrow _ | Unreachable -> () + | Drop e + | Push e + | GlobalSet (_, e) + | Br (_, Some e) + | Br_if (_, e) + | Br_table (e, _, _) + | Throw (_, e) + | Return (Some e) -> collect_rw rw e + | LocalSet (y, e) -> + rw.writes <- Var.Set.add y rw.writes; + collect_rw rw e + | StructSet (_, _, e1, e2) -> + collect_rw rw e1; + collect_rw rw e2 + | ArraySet (_, e1, e2, e3) -> + collect_rw rw e1; + collect_rw rw e2; + collect_rw rw e3 + | CallInstr (_, l) | Return_call (_, l) -> List.iter l ~f:(collect_rw rw) + | Return_call_ref (_, f, l) -> + collect_rw rw f; + List.iter l ~f:(collect_rw rw) + | Loop (_, l) | Block (_, l) -> List.iter l ~f:(collect_rw_instr rw) + | If (_, c, t, el) -> + collect_rw rw c; + List.iter t ~f:(collect_rw_instr rw); + List.iter el ~f:(collect_rw_instr rw) + +let rw_of_expr e = + let rw = { reads = Var.Set.empty; writes = Var.Set.empty } in + collect_rw rw e; + rw.reads, rw.writes + +(* Reads of a sub-expression, used at [may_cross_sibling] check points. *) +let reads_of_expr e = + let rw = { reads = Var.Set.empty; writes = Var.Set.empty } in + collect_rw rw e; + rw.reads + +(* Walker result for a single expression. [Clean] means "no occurrence + of x in this expression; the caller may continue past, using + [effect_free] to gate the path-clean state". *) +type walk_result = + | Found of W.expression + | Bail + | Clean + +(* [ctx] bundles the parameters that don't change during a sink attempt: + the target variable, the expression to sink, the locals it reads and + writes, and whether the expression itself is [effect_free]. + - [reads]: crossing a write to any of these would change [e]'s result. + - [writes]: crossing a read of any of these would make the reader see + the pre-sink value instead of the one [e] would have stored. + When [e_effect_free] is [false] we may not cross any *evaluated* + sub-expression or instruction even if it is itself [effect_free] — + the path could read heap/global state that [e]'s side effects would + change. *) +type ctx = + { x : Var.t + ; e_to_sink : W.expression + ; reads : Var.Set.t + ; writes : Var.Set.t + ; e_effect_free : bool + ; mutable budget : int + } + +(* Spend one unit of walk budget. Returns [false] when the budget is + exhausted, in which case callers must Bail rather than continue. We + only tick at the points where the walker steps forward in evaluation + order (instruction-to-instruction or sibling-to-sibling); descending + into a unary sub-expression is free. *) +let tick ctx = + if ctx.budget <= 0 + then false + else ( + ctx.budget <- ctx.budget - 1; + true) + +(* True iff [vs] (a set of *reads* by intermediate code) does not + intersect [ctx.writes] — i.e. no variable that [e] writes is + observed by the intermediate code. *) +let reads_disjoint_from_e_writes ctx vs = Var.Set.is_empty (Var.Set.inter vs ctx.writes) + +(* Purely-local expression — no heap/global reads, no calls, no traps. + Stricter than [effect_free]: a [GlobalGet] or [ArrayGet] is + [effect_free] but not [trivially_pure], because crossing it with an + effectful [e] would reorder a read against [e]'s writes. *) +let rec trivially_pure (e : W.expression) = + match e with + | W.Const _ | RefFunc _ | RefNull _ | LocalGet _ | Pop _ -> true + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> trivially_pure e' + | BinOp (_, e1, e2) | RefEq (e1, e2) -> trivially_pure e1 && trivially_pure e2 + | LocalTee _ + | GlobalGet _ + | ArrayLen _ + | StructGet _ + | ArrayGet _ + | ArrayNew _ + | ArrayNewData _ + | ArrayNewFixed _ + | StructNew _ + | RefCast _ + | RefTest _ + | Br_on_cast _ + | Br_on_cast_fail _ + | Br_on_null _ + | Call _ + | Call_ref _ + | IfExpr _ + | BlockExpr _ + | Seq _ + | Try _ -> false + +(* A sibling sub-expression was walked [Clean] (no x) and we're about + to continue to the next sibling. This is the reorder point: [e] will + evaluate *after* [sibling] in the sunk version, whereas originally + [e] ran first. Three conditions must all hold: + - [sibling] has no observable side effects; + - either [e] is effect-free or [sibling] is trivially pure (no + heap/global reads, no calls); + - [sibling]'s reads are disjoint from [e]'s writes — otherwise + [sibling] would read the pre-sink value of a local that [e] + overwrites. (The converse — [sibling] writing something [e] + reads — is caught by the walker returning [Bail] at intermediate + [LocalSet]/[LocalTee] and by [can_cross_instr] for instructions.) *) +let may_cross_sibling ctx sibling = + effect_free sibling + && (ctx.e_effect_free || trivially_pure sibling) + && reads_disjoint_from_e_writes ctx (reads_of_expr sibling) + +let rec walk_expr ctx (e : W.expression) = + match e with + | W.Const _ | GlobalGet _ | RefFunc _ | RefNull _ | Pop _ -> Clean + | LocalGet y -> + if Var.equal y ctx.x then Found (W.LocalTee (ctx.x, ctx.e_to_sink)) else Clean + | LocalTee (y, _) when Var.equal y ctx.x -> + (* Another write to [x] — bail. *) + Bail + | LocalTee (y, e') -> ( + (* Reading [e'] first, then this tee writes [y]. Crossing this + point means [e] would run after the tee. Bail if: + - [y] is read by [e] (we'd read the tee's value instead of the + pre-tee one), or + - [y] is written by [e] (the final value of [y] would change). *) + match walk_expr ctx e' with + | Found e'' -> Found (W.LocalTee (y, e'')) + | Bail -> Bail + | Clean -> + if Var.Set.mem y ctx.reads || Var.Set.mem y ctx.writes then Bail else Clean) + | UnOp (op, e') -> wrap_unary (fun e -> W.UnOp (op, e)) ctx e' + | I32WrapI64 e' -> wrap_unary (fun e -> W.I32WrapI64 e) ctx e' + | I64ExtendI32 (s, e') -> wrap_unary (fun e -> W.I64ExtendI32 (s, e)) ctx e' + | F32DemoteF64 e' -> wrap_unary (fun e -> W.F32DemoteF64 e) ctx e' + | F64PromoteF32 e' -> wrap_unary (fun e -> W.F64PromoteF32 e) ctx e' + | RefI31 e' -> wrap_unary (fun e -> W.RefI31 e) ctx e' + | I31Get (s, e') -> wrap_unary (fun e -> W.I31Get (s, e)) ctx e' + | ArrayLen e' -> wrap_unary (fun e -> W.ArrayLen e) ctx e' + | StructGet (s, ty, i, e') -> wrap_unary (fun e -> W.StructGet (s, ty, i, e)) ctx e' + | RefCast (ty, e') -> wrap_unary (fun e -> W.RefCast (ty, e)) ctx e' + | RefTest (ty, e') -> wrap_unary (fun e -> W.RefTest (ty, e)) ctx e' + | ExternConvertAny e' -> wrap_unary (fun e -> W.ExternConvertAny e) ctx e' + | AnyConvertExtern e' -> wrap_unary (fun e -> W.AnyConvertExtern e) ctx e' + | BinOp (op, e1, e2) -> wrap_binary (fun a b -> W.BinOp (op, a, b)) ctx e1 e2 + | ArrayNew (ty, e1, e2) -> wrap_binary (fun a b -> W.ArrayNew (ty, a, b)) ctx e1 e2 + | ArrayNewData (ty, d, e1, e2) -> + wrap_binary (fun a b -> W.ArrayNewData (ty, d, a, b)) ctx e1 e2 + | ArrayGet (s, ty, e1, e2) -> + wrap_binary (fun a b -> W.ArrayGet (s, ty, a, b)) ctx e1 e2 + | RefEq (e1, e2) -> wrap_binary (fun a b -> W.RefEq (a, b)) ctx e1 e2 + | Call (f, args) -> wrap_list (fun args' -> W.Call (f, args')) ctx args + | ArrayNewFixed (ty, args) -> + wrap_list (fun args' -> W.ArrayNewFixed (ty, args')) ctx args + | StructNew (ty, args) -> wrap_list (fun args' -> W.StructNew (ty, args')) ctx args + | Call_ref (ty, f, args) -> ( + (* Wasm evaluates args before the funcref. *) + match wrap_list_intermediate ctx args with + | `Found args' -> Found (W.Call_ref (ty, f, args')) + | `Bail -> Bail + | `Clean -> ( + (* Between args and f, we've crossed every arg — allowed + only if each is [may_cross_sibling]-safe. That check was + already made inside [wrap_list_intermediate]. *) + match walk_expr ctx f with + | Found f' -> Found (W.Call_ref (ty, f', args)) + | Bail -> Bail + | Clean -> Clean)) + | IfExpr _ + | BlockExpr _ + | Seq _ + | Try _ + | Br_on_cast _ + | Br_on_cast_fail _ + | Br_on_null _ -> + (* Compound / control-flow-like expressions; we don't descend past + these for sinking purposes. *) + Bail + +and wrap_unary make ctx e' = + match walk_expr ctx e' with + | Found e'' -> Found (make e'') + | Bail -> Bail + | Clean -> Clean + +and wrap_binary make ctx e1 e2 = + match walk_expr ctx e1 with + | Found e1' -> Found (make e1' e2) + | Bail -> Bail + | Clean -> ( + if not (may_cross_sibling ctx e1 && tick ctx) + then Bail + else + match walk_expr ctx e2 with + | Found e2' -> Found (make e1 e2') + | Bail -> Bail + | Clean -> Clean) + +and wrap_list make ctx args = + match wrap_list_intermediate ctx args with + | `Found args' -> Found (make args') + | `Bail -> Bail + | `Clean -> Clean + +and wrap_list_intermediate ctx args = + let rec loop acc = function + | [] -> `Clean + | a :: rest -> ( + match walk_expr ctx a with + | Found a' -> `Found (List.rev_append acc (a' :: rest)) + | Bail -> `Bail + | Clean -> + if not (may_cross_sibling ctx a && tick ctx) + then `Bail + else loop (a :: acc) rest) + in + loop [] args + +(* Walk a single instruction looking for a sink target for [ctx.x]. *) +type instr_result = + | IFound of W.instruction + | IBail + | IClean + +let try_sink_in_instr ctx instr : instr_result = + let wrap_one make e = + match walk_expr ctx e with + | Found e' -> IFound (make e') + | Bail -> IBail + | Clean -> IClean + in + match (instr : W.instruction) with + | Nop | Event _ -> IClean + | Drop e -> wrap_one (fun e -> W.Drop e) e + | Push e -> wrap_one (fun e -> W.Push e) e + | LocalSet (y, e) when Var.equal y ctx.x -> ( + (* x may still appear inside [e] (evaluated before the set). If we + find and rewrite it there, we stop (the [local.set x] after + would be a shadowing write). If [e] is [Clean], this is a + shadowing write without a sink target → bail. *) + match walk_expr ctx e with + | Found e' -> IFound (W.LocalSet (y, e')) + | Bail -> IBail + | Clean -> IBail) + | LocalSet (y, e) -> wrap_one (fun e -> W.LocalSet (y, e)) e + | GlobalSet (g, e) -> wrap_one (fun e -> W.GlobalSet (g, e)) e + | StructSet (ty, i, e1, e2) -> ( + match walk_expr ctx e1 with + | Found e1' -> IFound (W.StructSet (ty, i, e1', e2)) + | Bail -> IBail + | Clean -> ( + if not (may_cross_sibling ctx e1 && tick ctx) + then IBail + else + match walk_expr ctx e2 with + | Found e2' -> IFound (W.StructSet (ty, i, e1, e2')) + | Bail -> IBail + | Clean -> IClean)) + | ArraySet (ty, e1, e2, e3) -> ( + match walk_expr ctx e1 with + | Found e1' -> IFound (W.ArraySet (ty, e1', e2, e3)) + | Bail -> IBail + | Clean -> ( + if not (may_cross_sibling ctx e1 && tick ctx) + then IBail + else + match walk_expr ctx e2 with + | Found e2' -> IFound (W.ArraySet (ty, e1, e2', e3)) + | Bail -> IBail + | Clean -> ( + if not (may_cross_sibling ctx e2 && tick ctx) + then IBail + else + match walk_expr ctx e3 with + | Found e3' -> IFound (W.ArraySet (ty, e1, e2, e3')) + | Bail -> IBail + | Clean -> IClean))) + | CallInstr (f, args) -> ( + match wrap_list_intermediate ctx args with + | `Found args' -> IFound (W.CallInstr (f, args')) + | `Bail -> IBail + | `Clean -> IClean) + (* Control-flow-terminal instructions with sub-expressions: we can still + rewrite within the expression, but cannot continue past on a Clean. *) + | Return (Some e) -> ( + match walk_expr ctx e with + | Found e' -> IFound (W.Return (Some e')) + | Bail -> IBail + | Clean -> IBail) + | Throw (t, e) -> ( + match walk_expr ctx e with + | Found e' -> IFound (W.Throw (t, e')) + | Bail -> IBail + | Clean -> IBail) + | Br (n, Some e) -> ( + match walk_expr ctx e with + | Found e' -> IFound (W.Br (n, Some e')) + | Bail -> IBail + | Clean -> IBail) + | Br_if (n, e) -> ( + match walk_expr ctx e with + | Found e' -> IFound (W.Br_if (n, e')) + | Bail -> IBail + | Clean -> IBail) + | Br_table (e, tl, d) -> ( + match walk_expr ctx e with + | Found e' -> IFound (W.Br_table (e', tl, d)) + | Bail -> IBail + | Clean -> IBail) + | Return_call (f, args) -> ( + match wrap_list_intermediate ctx args with + | `Found args' -> IFound (W.Return_call (f, args')) + | `Bail -> IBail + | `Clean -> IBail) + | Return_call_ref (ty, f, args) -> ( + match wrap_list_intermediate ctx args with + | `Found args' -> IFound (W.Return_call_ref (ty, f, args')) + | `Bail -> IBail + | `Clean -> ( + match walk_expr ctx f with + | Found f' -> IFound (W.Return_call_ref (ty, f', args)) + | Bail -> IBail + | Clean -> IBail)) + | Return None | Br (_, None) | Rethrow _ | Unreachable -> IBail + | If (ty, cond, l1, l2) -> ( + (* The condition is evaluated unconditionally before branching — + we can still sink into it. If no [x] is found there, we bail + because we can't continue past the branch. *) + match walk_expr ctx cond with + | Found cond' -> IFound (W.If (ty, cond', l1, l2)) + | Bail -> IBail + | Clean -> IBail) + | Loop _ | Block _ -> + (* No expression to walk at this level; do not sink into the body. *) + IBail + +(* Can we cross this instruction without [x] being found? Must be safe + with respect to [ctx.e_to_sink]'s potential side effects. An [Event] + carries a source-map location for everything that follows it: moving + an effectful [e_to_sink] past an event would re-attribute its call / + trap to the wrong source position, so we only cross events when [e] + is itself effect-free. *) +let can_cross_instr ctx (instr : W.instruction) = + match instr with + | Nop -> true + | Event _ -> ctx.e_effect_free + | Drop e | Push e -> may_cross_sibling ctx e + | LocalSet (y, e) -> + may_cross_sibling ctx e + && (not (Var.Set.mem y ctx.reads)) + && not (Var.Set.mem y ctx.writes) + | _ -> false + +let try_sink_in_list ctx instrs = + let rec loop acc = function + | [] -> None + | instr :: rest -> ( + match try_sink_in_instr ctx instr with + | IFound instr' -> Some (List.rev_append acc (instr' :: rest)) + | IBail -> None + | IClean -> + if can_cross_instr ctx instr && tick ctx + then loop (instr :: acc) rest + else None) + in + loop [] instrs + +(* Bottom-up transformation: recurse first, then try to sink each + [local.set] into the (already-transformed) tail. *) +let rec transform_instrs instrs = + match instrs with + | [] -> [] + | W.LocalSet (x, e) :: rest -> ( + let e = transform_expr e in + let rest = transform_instrs rest in + let reads, writes = rw_of_expr e in + let ctx = + { x + ; e_to_sink = e + ; reads + ; writes + ; e_effect_free = effect_free e + ; budget = max_walk_distance + } + in + incr total_candidates; + match try_sink_in_list ctx rest with + | Some new_rest -> + incr total_sunk; + new_rest + | None -> W.LocalSet (x, e) :: rest) + | (W.Event _ as ev) :: rest -> ( + (* Sinking a [local.set] that previously separated two events + leaves them adjacent. Drop the earlier one so the closer + (later) event wins, matching the policy used in + [parse_bytecode], [code_generation], [deadcode], etc. *) + let rest = transform_instrs rest in + match rest with + | W.Event _ :: _ -> rest + | _ -> ev :: rest) + | instr :: rest -> + let instr = transform_instr instr in + let rest = transform_instrs rest in + instr :: rest + +and transform_instr (instr : W.instruction) : W.instruction = + match instr with + | Nop | Event _ | Br (_, None) | Return None | Rethrow _ | Unreachable -> instr + | Drop e -> Drop (transform_expr e) + | Push e -> Push (transform_expr e) + | LocalSet (x, e) -> LocalSet (x, transform_expr e) + | GlobalSet (g, e) -> GlobalSet (g, transform_expr e) + | StructSet (ty, i, e1, e2) -> StructSet (ty, i, transform_expr e1, transform_expr e2) + | ArraySet (ty, e1, e2, e3) -> + ArraySet (ty, transform_expr e1, transform_expr e2, transform_expr e3) + | CallInstr (f, args) -> CallInstr (f, List.map args ~f:transform_expr) + | Return (Some e) -> Return (Some (transform_expr e)) + | Throw (t, e) -> Throw (t, transform_expr e) + | Br (n, Some e) -> Br (n, Some (transform_expr e)) + | Br_if (n, e) -> Br_if (n, transform_expr e) + | Br_table (e, tl, d) -> Br_table (transform_expr e, tl, d) + | Return_call (f, args) -> Return_call (f, List.map args ~f:transform_expr) + | Return_call_ref (ty, f, args) -> + Return_call_ref (ty, transform_expr f, List.map args ~f:transform_expr) + | Loop (ty, l) -> Loop (ty, transform_instrs l) + | Block (ty, l) -> Block (ty, transform_instrs l) + | If (ty, cond, l1, l2) -> + If (ty, transform_expr cond, transform_instrs l1, transform_instrs l2) + +and transform_expr (e : W.expression) : W.expression = + match e with + | Const _ | LocalGet _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> e + | UnOp (op, e') -> UnOp (op, transform_expr e') + | I32WrapI64 e' -> I32WrapI64 (transform_expr e') + | I64ExtendI32 (s, e') -> I64ExtendI32 (s, transform_expr e') + | F32DemoteF64 e' -> F32DemoteF64 (transform_expr e') + | F64PromoteF32 e' -> F64PromoteF32 (transform_expr e') + | RefI31 e' -> RefI31 (transform_expr e') + | I31Get (s, e') -> I31Get (s, transform_expr e') + | ArrayLen e' -> ArrayLen (transform_expr e') + | StructGet (s, ty, i, e') -> StructGet (s, ty, i, transform_expr e') + | RefCast (ty, e') -> RefCast (ty, transform_expr e') + | RefTest (ty, e') -> RefTest (ty, transform_expr e') + | ExternConvertAny e' -> ExternConvertAny (transform_expr e') + | AnyConvertExtern e' -> AnyConvertExtern (transform_expr e') + | Br_on_cast (i, ty, ty', e') -> Br_on_cast (i, ty, ty', transform_expr e') + | Br_on_cast_fail (i, ty, ty', e') -> Br_on_cast_fail (i, ty, ty', transform_expr e') + | Br_on_null (i, e') -> Br_on_null (i, transform_expr e') + | LocalTee (x, e') -> LocalTee (x, transform_expr e') + | BinOp (op, e1, e2) -> BinOp (op, transform_expr e1, transform_expr e2) + | ArrayNew (ty, e1, e2) -> ArrayNew (ty, transform_expr e1, transform_expr e2) + | ArrayNewData (ty, d, e1, e2) -> + ArrayNewData (ty, d, transform_expr e1, transform_expr e2) + | ArrayGet (s, ty, e1, e2) -> ArrayGet (s, ty, transform_expr e1, transform_expr e2) + | RefEq (e1, e2) -> RefEq (transform_expr e1, transform_expr e2) + | Call (f, args) -> Call (f, List.map args ~f:transform_expr) + | ArrayNewFixed (ty, args) -> ArrayNewFixed (ty, List.map args ~f:transform_expr) + | StructNew (ty, args) -> StructNew (ty, List.map args ~f:transform_expr) + | Call_ref (ty, f, args) -> + Call_ref (ty, transform_expr f, List.map args ~f:transform_expr) + | IfExpr (ty, cond, t, el) -> + IfExpr (ty, transform_expr cond, transform_expr t, transform_expr el) + | BlockExpr (ty, l) -> BlockExpr (ty, transform_instrs l) + | Seq (l, e') -> Seq (transform_instrs l, transform_expr e') + | Try (ty, body, catches) -> Try (ty, transform_instrs body, catches) + +let f instrs = + let t = Timer.make () in + incr total_calls; + let instrs = transform_instrs instrs in + total_time := !total_time +. Timer.get t; + instrs diff --git a/compiler/lib-wasm/local_sink.mli b/compiler/lib-wasm/local_sink.mli new file mode 100644 index 0000000000..a8d08fe719 --- /dev/null +++ b/compiler/lib-wasm/local_sink.mli @@ -0,0 +1,39 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * Copyright (C) 2026 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(** Local.set sinking for the Wasm backend. + + For each [local.set x e] in a function, the pass tries to push the + write forward to the first subsequent [local.get x], turning the + [set]/[get] pair into a single [local.tee]. The sink is applied only + when it does not cross another write to [x] (no intervening + [local.set x] / [local.tee x]), does not cross a control-flow + boundary, and does not reorder effects unsafely (conservatively, we + require [e] or the intervening code to be effect-free). + + The pass does not touch the [locals] list — it only deletes some + [local.set]s and turns some [local.get x]s into [local.tee x ...]. + Variables that become dead as a result are cleaned up later by + [Var_coalescing]. *) + +val f : Wasm_ast.instruction list -> Wasm_ast.instruction list + +val report_stats : unit -> unit +(** Emit aggregated time/stats logs accumulated across all [f] calls and + reset the counters. Honours the [times] and [stats] debug flags. *) diff --git a/compiler/lib-wasm/reorder_locals.ml b/compiler/lib-wasm/reorder_locals.ml new file mode 100644 index 0000000000..1121773032 --- /dev/null +++ b/compiler/lib-wasm/reorder_locals.ml @@ -0,0 +1,194 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +open! Stdlib +module W = Wasm_ast +module Var = Code.Var + +(* + Reorder a function's non-parameter locals. The resulting layout is + + [ numeric block | reference block ] + + Each block is a sequence of same-type runs (i32, i64, f32, f64 in + the numeric block; the various ref types in the reference block). + Within a block, runs are ordered by descending total use count, and + within each run the individual locals are ordered by descending use + count. Parameters are not part of this list and keep their + signature-fixed indices. + + Why low indices matter. [local.get]/[local.set]/[local.tee] encode + their local index as LEB128, so indices 0..127 take one byte and + 128..16383 take two. Placing high-use locals first shaves a byte + off each of their accesses in the emitted Wasm. + + Why numerics come before references. V8's Liftoff baseline expects + two contiguous regions: numerics first (zero-initialised via a fast + block-zeroing path) and references after (null-initialised by a + single follow-up pass). OCaml code is reference-heavy, so the + leading numeric block tends to be small and the hot ref locals + still get low indices. + + Why same-type runs. The binary format encodes consecutive locals of + the same type as a single [(count, type)] pair, and V8 Liftoff + initialises each type-run as one block, so contiguity makes the + function section smaller and warm-up faster. +*) + +(* --------------------------------------------------------------------- *) +(* Use counting *) +(* --------------------------------------------------------------------- *) + +let bump counts x = + match Var.Hashtbl.find_opt counts x with + | Some n -> Var.Hashtbl.replace counts x (n + 1) + | None -> () +(* Parameters are absent from [counts], so their reads go uncounted. *) + +let rec count_expr counts (e : W.expression) = + match e with + | Const _ | GlobalGet _ | RefFunc _ | RefNull _ | Pop _ -> () + | LocalGet x -> bump counts x + | LocalTee (x, e') -> + bump counts x; + count_expr counts e' + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | Br_on_cast (_, _, _, e') + | Br_on_cast_fail (_, _, _, e') + | Br_on_null (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> count_expr counts e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> + count_expr counts e1; + count_expr counts e2 + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> + List.iter l ~f:(count_expr counts) + | Call_ref (_, f, l) -> + count_expr counts f; + List.iter l ~f:(count_expr counts) + | IfExpr (_, c, t, el) -> + count_expr counts c; + count_expr counts t; + count_expr counts el + | BlockExpr (_, l) -> count_instrs counts l + | Seq (l, e') -> + count_instrs counts l; + count_expr counts e' + | Try (_, body, _) -> count_instrs counts body + +and count_instr counts (i : W.instruction) = + match i with + | Nop | Event _ | Br (_, None) | Return None | Rethrow _ | Unreachable -> () + | Drop e + | Push e + | GlobalSet (_, e) + | Br (_, Some e) + | Br_if (_, e) + | Br_table (e, _, _) + | Throw (_, e) + | Return (Some e) -> count_expr counts e + | LocalSet (x, e) -> + bump counts x; + count_expr counts e + | StructSet (_, _, e1, e2) -> + count_expr counts e1; + count_expr counts e2 + | ArraySet (_, e1, e2, e3) -> + count_expr counts e1; + count_expr counts e2; + count_expr counts e3 + | CallInstr (_, l) | Return_call (_, l) -> List.iter l ~f:(count_expr counts) + | Return_call_ref (_, f, l) -> + count_expr counts f; + List.iter l ~f:(count_expr counts) + | Loop (_, l) | Block (_, l) -> count_instrs counts l + | If (_, c, t, el) -> + count_expr counts c; + count_instrs counts t; + count_instrs counts el + +and count_instrs counts l = List.iter l ~f:(count_instr counts) + +(* --------------------------------------------------------------------- *) +(* Reordering *) +(* --------------------------------------------------------------------- *) + +let is_ref_type (t : W.value_type) = + match t with + | Ref _ -> true + | I32 | I64 | F32 | F64 -> false + +(* Split the input list into consecutive same-type runs, preserving + the first-occurrence order of the types. + Returns a list of (type, locals-in-that-run). *) +let group_by_type locals = + match locals with + | [] -> [] + | (_, t0) :: _ -> + let rec loop acc curr_t curr_rev = function + | [] -> List.rev ((curr_t, List.rev curr_rev) :: acc) + | ((_, t) as loc) :: rest -> + if Poly.equal t curr_t + then loop acc curr_t (loc :: curr_rev) rest + else loop ((curr_t, List.rev curr_rev) :: acc) t [ loc ] rest + in + loop [] t0 [] locals + +(* Stable sort [l] by descending key [f x]. *) +let sort_desc ~key l = List.stable_sort l ~cmp:(fun a b -> compare (key b) (key a)) + +let f ~locals body = + let counts = Var.Hashtbl.create (List.length locals) in + List.iter locals ~f:(fun (x, _) -> Var.Hashtbl.add counts x 0); + count_instrs counts body; + let count_of x = Var.Hashtbl.find counts x in + (* Split into reference-typed and numeric-typed locals, preserving the + original relative order within each group so that [group_by_type] + sees the same runs the input would have produced. *) + let refs, nums = List.partition locals ~f:(fun (_, t) -> is_ref_type t) in + let reorder_group group = + let runs = group_by_type group in + let runs = + List.map runs ~f:(fun (t, members) -> + let sorted = sort_desc members ~key:(fun (x, _) -> count_of x) in + let total = List.fold_left sorted ~init:0 ~f:(fun s (x, _) -> s + count_of x) in + t, sorted, total) + in + let runs = sort_desc runs ~key:(fun (_, _, total) -> total) in + List.concat_map runs ~f:(fun (_, sorted, _) -> sorted) + in + (* Numeric-typed locals are typically much rarer in OCaml code than + reference-typed ones, so placing them first keeps the high-use + reference locals in a contiguous run starting right after — none + of them pushed across the 128-index boundary by intervening + numerics. *) + reorder_group nums @ reorder_group refs diff --git a/compiler/lib-wasm/reorder_locals.mli b/compiler/lib-wasm/reorder_locals.mli new file mode 100644 index 0000000000..3e242abaf7 --- /dev/null +++ b/compiler/lib-wasm/reorder_locals.mli @@ -0,0 +1,34 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(** Reorder a function's [locals] list so frequently-used locals get + lower Wasm indices (one-byte LEB128 encoding for indices < 128). + + Parameters are untouched; only the non-parameter [locals] list is + reordered. The instruction body is unchanged — the Wasm local + index of a variable is determined by its position in + [param_names @ locals], so rearranging the list is enough. + + Reference-type locals and numeric-type locals are kept in separate + blocks so that the run-length encoding performed by + [Wasm_output.coalesce_locals] doesn't get fragmented. *) + +val f : + locals:(Wasm_ast.var * Wasm_ast.value_type) list + -> Wasm_ast.instruction list + -> (Wasm_ast.var * Wasm_ast.value_type) list diff --git a/compiler/lib-wasm/target_sig.ml b/compiler/lib-wasm/target_sig.ml index f13e4aac5c..360dd4645e 100644 --- a/compiler/lib-wasm/target_sig.ml +++ b/compiler/lib-wasm/target_sig.ml @@ -297,7 +297,9 @@ module type S = sig -> unit Code_generation.t val post_process_function_body : - param_names:Wasm_ast.var list + profile:Profile.t + -> param_names:Wasm_ast.var list + -> param_types:Wasm_ast.value_type list -> locals:(Wasm_ast.var * Wasm_ast.value_type) list -> Wasm_ast.instruction list -> (Wasm_ast.var * Wasm_ast.value_type) list * Wasm_ast.instruction list diff --git a/compiler/lib-wasm/var_coalescing.ml b/compiler/lib-wasm/var_coalescing.ml new file mode 100644 index 0000000000..0b7d4b4264 --- /dev/null +++ b/compiler/lib-wasm/var_coalescing.ml @@ -0,0 +1,1184 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * Copyright (C) 2026 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(* + Linear-scan variable coalescing for the Wasm backend. + + The algorithm mirrors [Js_variable_coalescing]: + 1. Build a CFG of the function body. + 2. Compute backward liveness to a fixed point via [Dgraph.Solver]. + 3. Derive per-variable live ranges (intervals with possible holes). + 4. Run linear-scan allocation, bucketed by Wasm [value_type], with + copy hints from [local.set x (local.get y)] patterns. + 5. Rewrite [LocalGet]/[LocalSet]/[LocalTee] through the substitution + and drop [LocalSet(x, LocalGet x)] copies introduced by merging. + 6. Rebuild the [locals] list with only representatives. + + Parameters participate as candidates — they are all live at entry + (enforced by a synthetic Def-of-all-params entry node) so two + parameters never merge, but a non-parameter whose range does not + interfere with a parameter can be rewritten to reuse the parameter's + slot. + + [Try] expressions are handled by creating a synthetic [catch_entry] + node whose successors are the resolved catch targets (branch labels + in the enclosing block stack). Because an exception can fire from + anywhere inside the body, every variable live at [catch_entry] is + considered live across the entire try body — enforced by extending + active live ranges back to the body entry when [catch_entry] is + reached during live-range construction. +*) + +open! Stdlib +module Var = Code.Var +module W = Wasm_ast + +let times = Debug.find "times" + +let stats = Debug.find "stats" + +(* Aggregated statistics across all calls to [f]. The pass runs once per + Wasm function; per-function logs are noisy, so we accumulate here and + emit a single summary via [report_stats]. *) +let total_time = ref 0. + +let total_calls = ref 0 + +let total_candidates = ref 0 + +let total_hint_count = ref 0 + +let total_opportunistic_count = ref 0 + +let total_dead_tee_count = ref 0 + +let report_stats () = + if !total_calls > 0 + then ( + if times () then Format.eprintf " wasm var coalescing: %.2f@." !total_time; + if stats () + then + Format.eprintf + "Stats - wasm var coalescing: %d functions, %d candidates, %d coalesced (%d \ + hint, %d opportunistic), %d dead tees@." + !total_calls + !total_candidates + (!total_hint_count + !total_opportunistic_count) + !total_hint_count + !total_opportunistic_count + !total_dead_tee_count; + total_time := 0.; + total_calls := 0; + total_candidates := 0; + total_hint_count := 0; + total_opportunistic_count := 0; + total_dead_tee_count := 0) + +(* --------------------------------------------------------------------- *) +(* CFG construction *) +(* --------------------------------------------------------------------- *) + +module Node = struct + type t = int +end + +module NodeSet = struct + type t = BitSet.t + + type elt = int + + let iter f t = BitSet.iter ~f t + + let mem = BitSet.mem + + let add = BitSet.set + + let remove = BitSet.unset + + let copy = BitSet.copy +end + +module NodeTbl = struct + type 'a t = 'a array + + type key = int + + type size = int + + let get t k = t.(k) + + let set t k v = t.(k) <- v + + let make = Array.make +end + +module G = Dgraph.Make_Imperative (Node) (NodeSet) (NodeTbl) + +module Domain = struct + type t = Var.Set.t + + let equal = Var.Set.equal + + let bot = Var.Set.empty +end + +module Solver = G.Solver (Domain) + +type action = + | Use of Var.Set.t + | Def of Var.Set.t + | Nop + +let defs_of_action = function + | Def d -> d + | Use _ | Nop -> Var.Set.empty + +let uses_of_action = function + | Use u -> u + | Def _ | Nop -> Var.Set.empty + +type node_id = int + +type stmt_graph = + { entry : node_id + ; size : int + ; actions : action array + ; succs : node_id list array + ; coalescing_hints : Var.t Var.Hashtbl.t + ; try_blocks : node_id Int.Hashtbl.t + (** [catch_entry -> try_body_entry]. When the live-range pass reaches + [catch_entry], every variable currently live is extended back to + the start of the try body, modeling the fact that an exception + can fire anywhere inside the body. *) + ; tee_nodes : node_id list + (** IDs of the [Def] nodes created for every [LocalTee] in AST + traversal order. Used by the dead-tee sweep after liveness + to decide which tees store a value no subsequent instruction + will read. *) + } + +type graph_builder = + { mutable nodes : (node_id * action * node_id list) list + ; hints : Var.t Var.Hashtbl.t + ; tries : node_id Int.Hashtbl.t + ; mutable tees : node_id list (* accumulated in reverse AST order *) + } + +(* Block stack frame: the target node id reached by [br n] with n equal + to this frame's depth. For a [Block], that target is the node + following the block (the block's exit). For a [Loop], that target is + the loop entry (back-edge). *) + +let nth_opt l n = + let rec loop l n = + match l, n with + | [], _ -> None + | x :: _, 0 -> Some x + | _ :: r, n -> loop r (n - 1) + in + loop l n + +(* The CFG is built with each sub-expression producing its own node + chain, laid out in Wasm evaluation order (left-to-right for + operands, args-before-funcref for [call_ref]). Each [local.get], + [local.tee], and [local.set] becomes a single node with a [Use] or + [Def] action; structural operators contribute no node by themselves. + + [build_expr block_stack exit_node e] appends the CFG for [e] to the + graph; control flows in forward execution order from the returned + entry node through to [exit_node]. *) + +let build_cfg ~candidates ~param_vars instrs = + let builder = + { nodes = []; hints = Var.Hashtbl.create 16; tries = Int.Hashtbl.create 8; tees = [] } + in + let next_id = ref 0 in + let reserve_id () = + let id = !next_id in + incr next_id; + id + in + let set_node id action succs = builder.nodes <- (id, action, succs) :: builder.nodes in + let add_node action succs = + let id = reserve_id () in + set_node id action succs; + id + in + let add_hint x y = + if Var.Set.mem x candidates && Var.Set.mem y candidates + then Var.Hashtbl.replace builder.hints x y + in + let target_of_br block_stack n = + match nth_opt block_stack n with + | Some t -> t + | None -> + (* [br] out of the function. Conservatively model as a dead end. *) + add_node Nop [] + in + let use_node x exit_node = + if Var.Set.mem x candidates + then add_node (Use (Var.Set.singleton x)) [ exit_node ] + else exit_node + in + let def_node x exit_node = + if Var.Set.mem x candidates + then add_node (Def (Var.Set.singleton x)) [ exit_node ] + else exit_node + in + (* For [LocalTee] we additionally remember the Def node we just + created so that the dead-tee sweep can later consult it. When [x] + is not a candidate, no Def node is created (we return [exit_node] + unchanged) and no tee-sweep is needed for that position either, + so we don't record anything. *) + let tee_node x exit_node = + if Var.Set.mem x candidates + then ( + let id = add_node (Def (Var.Set.singleton x)) [ exit_node ] in + builder.tees <- id :: builder.tees; + id) + else ( + (* Record [-1] as a placeholder so the rewriter can still pop one + entry per [LocalTee] it encounters in the AST, keeping the + traversal orders synchronised. *) + builder.tees <- -1 :: builder.tees; + exit_node) + in + let rec build_expr block_stack exit_node (e : W.expression) = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> exit_node + | LocalGet x -> use_node x exit_node + | LocalTee (x, e') -> + let tee = tee_node x exit_node in + build_expr block_stack tee e' + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> build_expr block_stack exit_node e' + | Br_on_cast (depth, _, _, e') + | Br_on_cast_fail (depth, _, _, e') + | Br_on_null (depth, e') -> + (* Conditional branch: on a successful type test (or null test), + branch to the label [depth] with the value on the stack; + otherwise continue with the value on the stack. *) + let target = target_of_br block_stack depth in + let branch = add_node Nop [ target; exit_node ] in + build_expr block_stack branch e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> + (* [e1] evaluated first, then [e2]. *) + let after_e1 = build_expr block_stack exit_node e2 in + build_expr block_stack after_e1 e1 + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> + build_exprs_left_to_right block_stack exit_node l + | Call_ref (_, f, l) -> + (* Wasm [call_ref] evaluates args first, then the funcref. *) + let after_f = build_expr block_stack exit_node f in + build_exprs_left_to_right block_stack after_f l + | IfExpr (_, cond, e1, e2) -> + (* Evaluate [cond], then branch to [e1] or [e2]. [br 0] inside + either branch targets the continuation after the IfExpr. *) + let block_stack' = exit_node :: block_stack in + let then_entry = build_expr block_stack' exit_node e1 in + let else_entry = build_expr block_stack' exit_node e2 in + let branch = add_node Nop [ then_entry; else_entry ] in + build_expr block_stack branch cond + | BlockExpr (_, l) -> + (* [BlockExpr] is a block targetable by [br] at its own depth. *) + let block_stack' = exit_node :: block_stack in + build_instrs block_stack' exit_node l + | Seq (l, e') -> + let after_l = build_expr block_stack exit_node e' in + build_instrs block_stack after_l l + | Try (_, body, catches) -> + (* [Try] is a block: [br 0] inside the body targets the normal + continuation after the [Try]. Catch labels refer to the + enclosing block stack (not counting the [Try] itself), so they + are resolved against [block_stack]. *) + let block_stack' = exit_node :: block_stack in + let inner_body_entry = build_instrs block_stack' exit_node body in + (* Wrap the body entry in a fresh [Nop] node whose only + predecessor is the [Try] wrapper we are about to create. This + ensures that in the DFS used to compute the RPO layout, + [body_entry] is numbered strictly after [catch_entry], which + is visited first from the wrapper. Without this shim the body + entry can be reached via the catch targets (through + convergent paths in the enclosing code), putting it inside + [catch_entry]'s DFS subtree and breaking the invariant that + [body_order < catch_order]. *) + let body_entry = add_node Nop [ inner_body_entry ] in + (* Synthetic catch entry whose [live_in] is the union of the + live_in sets of the catch targets. Making it a successor of + the Try wrapper ensures it is reachable and thus has its + liveness computed by the dataflow solver. *) + let catch_targets = + List.map catches ~f:(fun (_, depth, _) -> target_of_br block_stack depth) + in + let catch_entry = add_node Nop catch_targets in + Int.Hashtbl.replace builder.tries catch_entry body_entry; + add_node Nop [ body_entry; catch_entry ] + and build_exprs_left_to_right block_stack exit_node l = + List.fold_right l ~init:exit_node ~f:(fun e next -> build_expr block_stack next e) + and build_instrs block_stack exit_node instrs = + List.fold_right instrs ~init:exit_node ~f:(fun instr next -> + build_instr block_stack next instr) + and build_instr block_stack exit_node instr = + match (instr : W.instruction) with + | Nop | Event _ -> exit_node + | Unreachable | Rethrow _ -> add_node Nop [] + | Drop e | Push e -> build_expr block_stack exit_node e + | LocalSet (x, e) -> + (match e with + | LocalGet y -> add_hint x y + | _ -> ()); + let set = def_node x exit_node in + build_expr block_stack set e + | GlobalSet (_, e) -> build_expr block_stack exit_node e + | StructSet (_, _, e1, e2) -> + let after_e1 = build_expr block_stack exit_node e2 in + build_expr block_stack after_e1 e1 + | ArraySet (_, e1, e2, e3) -> + let after_e2 = build_expr block_stack exit_node e3 in + let after_e1 = build_expr block_stack after_e2 e2 in + build_expr block_stack after_e1 e1 + | CallInstr (_, l) -> build_exprs_left_to_right block_stack exit_node l + | Return None -> add_node Nop [] + | Return (Some e) -> + let dead = add_node Nop [] in + build_expr block_stack dead e + | Throw (_, e) -> + let dead = add_node Nop [] in + build_expr block_stack dead e + | Return_call (_, l) -> + let dead = add_node Nop [] in + build_exprs_left_to_right block_stack dead l + | Return_call_ref (_, f, l) -> + let dead = add_node Nop [] in + let after_f = build_expr block_stack dead f in + build_exprs_left_to_right block_stack after_f l + | Br (n, None) -> target_of_br block_stack n + | Br (n, Some e) -> + let target = target_of_br block_stack n in + build_expr block_stack target e + | Br_if (n, cond) -> + let target = target_of_br block_stack n in + let branch = add_node Nop [ target; exit_node ] in + build_expr block_stack branch cond + | Br_table (e, ns, default) -> + let all_targets = + target_of_br block_stack default :: List.map ~f:(target_of_br block_stack) ns + in + let branch = add_node Nop all_targets in + build_expr block_stack branch e + | If (_, cond, l1, l2) -> + (* [If] is a Wasm block: [br 0] inside a branch targets the + continuation after the if. Push [exit_node] onto [block_stack]. *) + let block_stack' = exit_node :: block_stack in + let then_entry = build_instrs block_stack' exit_node l1 in + let else_entry = build_instrs block_stack' exit_node l2 in + let branch = add_node Nop [ then_entry; else_entry ] in + build_expr block_stack branch cond + | Block (_, l) -> + let block_stack' = exit_node :: block_stack in + build_instrs block_stack' exit_node l + | Loop (_, l) -> + let loop_entry = reserve_id () in + let block_stack' = loop_entry :: block_stack in + let body_entry = build_instrs block_stack' exit_node l in + set_node loop_entry Nop [ body_entry ]; + loop_entry + in + let end_node = add_node Nop [] in + let body_entry = build_instrs [] end_node instrs in + let entry = + if Var.Set.is_empty param_vars + then body_entry + else add_node (Def param_vars) [ body_entry ] + in + let size = !next_id in + let actions = Array.make size Nop in + let succs = Array.make size [] in + List.iter builder.nodes ~f:(fun (id, action, s) -> + actions.(id) <- action; + succs.(id) <- s); + { entry + ; size + ; actions + ; succs + ; coalescing_hints = builder.hints + ; try_blocks = builder.tries + ; tee_nodes = List.rev builder.tees + } + +(* --------------------------------------------------------------------- *) +(* Liveness (backward dataflow) and live range extraction *) +(* --------------------------------------------------------------------- *) + +let compute_liveness g = + let domain_set = BitSet.create' g.size in + for i = 0 to g.size - 1 do + BitSet.set domain_set i + done; + let preds = Array.make (Array.length g.succs) [] in + Array.iteri g.succs ~f:(fun i l -> + List.iter l ~f:(fun j -> preds.(j) <- i :: preds.(j))); + let inv_graph = + { G.domain = domain_set; iter_children = (fun f i -> List.iter ~f preds.(i)) } + in + let transfer state node_id = + let live_out = + List.fold_left g.succs.(node_id) ~init:Var.Set.empty ~f:(fun acc id -> + Var.Set.union acc state.(id)) + in + let def = defs_of_action g.actions.(node_id) in + let use = uses_of_action g.actions.(node_id) in + Var.Set.union use (Var.Set.diff live_out def) + in + Solver.f g.size inv_graph transfer + +module Live_range = struct + type interval = + { start_pos : int + ; end_pos : int + } + + type t = + { id : Var.t + ; mutable ranges : interval list (* sorted by start_pos *) + ; mutable assigned : bool + } + + let create v = { id = v; ranges = []; assigned = false } + + let add_range t start_pos end_pos = + let rec loop s e acc = function + | [] -> List.rev ({ start_pos = s; end_pos = e } :: acc) + | r :: rest -> + if e < r.start_pos - 1 + then List.rev_append acc ({ start_pos = s; end_pos = e } :: r :: rest) + else if s > r.end_pos + 1 + then loop s e (r :: acc) rest + else + let new_start = min s r.start_pos in + let new_end = max e r.end_pos in + loop new_start new_end acc rest + in + t.ranges <- + (match t.ranges with + | [] -> [ { start_pos; end_pos } ] + | r :: _ when r.start_pos > end_pos + 1 -> { start_pos; end_pos } :: t.ranges + | _ -> loop start_pos end_pos [] t.ranges) + + let add_ranges t other_ranges = + match t.ranges, other_ranges with + | [], l | l, [] -> t.ranges <- l + | l1, l2 -> + let rec loop acc l1 l2 = + match l1, l2 with + | [], l | l, [] -> List.rev_append acc l + | h1 :: t1, h2 :: t2 -> + if h1.start_pos < h2.start_pos then step acc h1 t1 l2 else step acc h2 t2 l1 + and step acc current rest other = + match acc with + | prev :: acc_rest when prev.end_pos + 1 >= current.start_pos -> + let merged = { prev with end_pos = max prev.end_pos current.end_pos } in + loop (merged :: acc_rest) rest other + | _ -> loop (current :: acc) rest other + in + t.ranges <- loop [] l1 l2 + + let get_start_pos t = + match t.ranges with + | [] -> max_int + | r :: _ -> r.start_pos + + let get_first_hole t = + match t.ranges with + | [] -> 0 + | r :: _ -> r.end_pos + 1 + + let rec advance t position = + match t.ranges with + | [] -> `Dead + | r :: rem -> + if r.end_pos < position + then ( + t.ranges <- rem; + advance t position) + else if r.start_pos > position + then `Inactive + else `Active + + let intersects t1 t2 = + let rec loop l1 l2 = + match l1, l2 with + | [], _ | _, [] -> false + | r1 :: rest1, r2 :: rest2 -> + if r1.end_pos < r2.start_pos + then loop rest1 l2 + else if r2.end_pos < r1.start_pos + then loop l1 rest2 + else true + in + loop t1.ranges t2.ranges +end + +(* Compute live ranges. Uses the same 2x position granularity as + [Js_variable_coalescing]: each CFG node at linear position i maps to + positions 2*i (pre, for uses) and 2*i+1 (post, for defs). *) +let compute_live_ranges g live_in_map candidates param_vars = + let visited = Array.make g.size false in + let layout = Array.make g.size 0 in + let i = ref g.size in + let rec list_rev_iter ~f l = + match l with + | [] -> () + | x :: r -> + list_rev_iter ~f r; + f x + in + let rec dfs n = + if not visited.(n) + then ( + visited.(n) <- true; + list_rev_iter g.succs.(n) ~f:dfs; + decr i; + layout.(!i) <- n) + in + dfs g.entry; + let num_reachable = g.size - !i in + let layout = if !i = 0 then layout else Array.sub layout ~pos:!i ~len:num_reachable in + let node_order = Array.make g.size (-1) in + Array.iteri layout ~f:(fun i n -> node_order.(n) <- i); + let ranges = Var.Hashtbl.create (Var.Set.cardinal candidates) in + Var.Set.iter (fun v -> Var.Hashtbl.add ranges v (Live_range.create v)) candidates; + let active_ranges = Var.Hashtbl.create 64 in + let commit_range v start_pos end_pos = + let r = Var.Hashtbl.find ranges v in + Live_range.add_range r start_pos end_pos + in + for order = num_reachable - 1 downto 0 do + let node_id = layout.(order) in + let start_idx = 2 * order in + let end_idx = (2 * order) + 1 in + let succs = g.succs.(node_id) in + let is_fallthrough = + match succs with + | [ s ] -> order < num_reachable - 1 && layout.(order + 1) = s + | _ -> false + in + if not is_fallthrough + then ( + let live_out = + List.fold_left succs ~init:Var.Set.empty ~f:(fun acc sid -> + Var.Set.union acc live_in_map.(sid)) + in + let to_remove = ref [] in + Var.Hashtbl.iter + (fun v high -> + if not (Var.Set.mem v live_out) + then ( + commit_range v (end_idx + 1) high; + to_remove := v :: !to_remove)) + active_ranges; + List.iter !to_remove ~f:(Var.Hashtbl.remove active_ranges); + Var.Set.iter + (fun v -> + if not (Var.Hashtbl.mem active_ranges v) + then Var.Hashtbl.add active_ranges v end_idx) + live_out); + let defs = defs_of_action g.actions.(node_id) in + Var.Set.iter + (fun v -> + match Var.Hashtbl.find_opt active_ranges v with + | Some high -> + commit_range v end_idx high; + Var.Hashtbl.remove active_ranges v + | None -> commit_range v end_idx end_idx) + defs; + let uses = uses_of_action g.actions.(node_id) in + Var.Set.iter + (fun v -> + if not (Var.Hashtbl.mem active_ranges v) + then Var.Hashtbl.add active_ranges v start_idx) + uses; + (* Try-catch range extension: reaching a [catch_entry] in RPO means we + have processed the catch targets. Whatever is live here must be + considered live throughout the try body, because an exception can + fire anywhere in the body and jump to a catch target. Extend every + currently-active range back to the start of the body, then clear + the active set so the body's own liveness is computed without + double-counting the exception path. + + [body_entry] is a dummy [Nop] shim whose only predecessor is the + Try wrapper, so its RPO order is guaranteed to be strictly less + than [catch_entry]'s. *) + match Int.Hashtbl.find_opt g.try_blocks node_id with + | None -> () + | Some body_entry_id -> + let body_order = node_order.(body_entry_id) in + assert (body_order < order); + let body_start_idx = 2 * body_order in + Var.Hashtbl.iter (fun v high -> commit_range v body_start_idx high) active_ranges; + Var.Hashtbl.clear active_ranges + done; + Var.Hashtbl.iter (fun v high -> commit_range v 0 high) active_ranges; + Var.Set.iter (fun v -> commit_range v 0 0) param_vars; + ranges + +(* --------------------------------------------------------------------- *) +(* Type compatibility *) +(* --------------------------------------------------------------------- *) + +let heap_type_equal (a : W.heap_type) (b : W.heap_type) = + match a, b with + | W.Func, W.Func + | Extern, Extern + | Any, Any + | Eq, Eq + | Struct, Struct + | Array, Array + | I31, I31 + | None_, None_ -> true + | Type x, Type y -> Var.equal x y + | ( (Func | Extern | Any | Eq | Struct | Array | I31 | None_ | Type _) + , (Func | Extern | Any | Eq | Struct | Array | I31 | None_ | Type _) ) -> false + +let ref_type_equal (a : W.ref_type) (b : W.ref_type) = + Bool.equal a.nullable b.nullable && heap_type_equal a.typ b.typ + +let value_type_equal (a : W.value_type) (b : W.value_type) = + match a, b with + | W.I32, W.I32 | I64, I64 | F32, F32 | F64, F64 -> true + | Ref ra, Ref rb -> ref_type_equal ra rb + | (I32 | I64 | F32 | F64 | Ref _), (I32 | I64 | F32 | F64 | Ref _) -> false + +(* --------------------------------------------------------------------- *) +(* Linear scan register allocation *) +(* --------------------------------------------------------------------- *) + +module Active_pqueue = Pqueue.Make (struct + type t = Live_range.t + + let compare r r' = compare (Live_range.get_first_hole r) (Live_range.get_first_hole r') +end) + +module Inactive_pqueue = Pqueue.Make (struct + type t = int * Var.t + + let compare (p, _) (p', _) = compare (p : int) p' +end) + +let allocate_registers subst types ranges hints = + let hint_count = ref 0 in + let opportunistic_count = ref 0 in + let sorted_intervals = + let intervals = Var.Hashtbl.fold (fun _ r acc -> r :: acc) ranges [] in + List.sort + ~cmp:(fun a b -> + Int.compare (Live_range.get_start_pos a) (Live_range.get_start_pos b)) + intervals + in + let active = ref Active_pqueue.empty in + let inactive_queue = ref Inactive_pqueue.empty in + let inactive = Var.Hashtbl.create 128 in + let free_pool = ref [] in + let rec update_active_queue position = + match Active_pqueue.find_min !active with + | exception Not_found -> () + | r -> ( + if Live_range.get_first_hole r <= position + then + let active' = Active_pqueue.remove_min !active in + match Live_range.advance r position with + | `Dead -> + free_pool := r :: !free_pool; + active := active'; + update_active_queue position + | `Inactive -> + inactive_queue := + Inactive_pqueue.add (Live_range.get_start_pos r, r.id) !inactive_queue; + Var.Hashtbl.replace inactive r.id r; + active := active'; + update_active_queue position + | `Active -> + active := Active_pqueue.add r active'; + update_active_queue position) + in + let rec update_inactive_queue position = + match Inactive_pqueue.find_min !inactive_queue with + | exception Not_found -> () + | p, v -> ( + if p <= position + then + let inactive' = Inactive_pqueue.remove_min !inactive_queue in + match Var.Hashtbl.find_opt inactive v with + | None -> + inactive_queue := inactive'; + update_inactive_queue position + | Some r -> ( + match Live_range.advance r position with + | `Dead -> + free_pool := r :: !free_pool; + inactive_queue := inactive'; + Var.Hashtbl.remove inactive v; + update_inactive_queue position + | `Inactive -> + inactive_queue := + Inactive_pqueue.add (Live_range.get_start_pos r, r.id) inactive'; + update_inactive_queue position + | `Active -> + active := Active_pqueue.add r !active; + inactive_queue := inactive'; + Var.Hashtbl.remove inactive v; + update_inactive_queue position)) + in + let type_of v = Var.Hashtbl.find types v in + let compatible a b = value_type_equal (type_of a) (type_of b) in + let get_free_matching wanted = + let rec loop kept pool = + match pool with + | [] -> + free_pool := List.rev kept; + None + | r :: rs -> + if r.Live_range.assigned + then loop kept rs + else if compatible r.Live_range.id wanted + then ( + free_pool := List.rev_append kept rs; + Some r) + else loop (r :: kept) rs + in + loop [] !free_pool + in + List.iter sorted_intervals ~f:(fun current -> + let position = Live_range.get_start_pos current in + update_active_queue position; + update_inactive_queue position; + let hint_repr = + match Var.Hashtbl.find_opt hints current.Live_range.id with + | None -> None + | Some src -> ( + match Var.Hashtbl.find_opt subst src with + | None -> None + | Some var -> ( + if not (compatible var current.id) + then None + else + let r = Var.Hashtbl.find ranges var in + match Live_range.advance r position with + | `Dead -> + r.assigned <- true; + Some r + | `Inactive -> + if Live_range.intersects r current + then None + else ( + Var.Hashtbl.remove inactive r.id; + Some r) + | `Active -> None)) + in + let repr = + match hint_repr with + | Some r -> + incr hint_count; + r + | None -> ( + let candidate = + let rec loop q count = + if count >= 50 || Inactive_pqueue.is_empty q + then None + else + let _, v = Inactive_pqueue.find_min q in + let q' = Inactive_pqueue.remove_min q in + match Var.Hashtbl.find_opt inactive v with + | None -> loop q' count + | Some iv -> + if + compatible iv.id current.id + && not (Live_range.intersects iv current) + then Some iv + else loop q' (count + 1) + in + loop !inactive_queue 0 + in + match candidate with + | Some r -> + incr opportunistic_count; + Var.Hashtbl.remove inactive r.id; + r + | None -> ( + match get_free_matching current.id with + | Some r -> + incr opportunistic_count; + r + | None -> current)) + in + if not (Var.equal current.id repr.id) + then ( + Var.forget_generated_name current.id; + Var.forget_generated_name repr.id; + Live_range.add_ranges repr current.Live_range.ranges; + Var.Hashtbl.replace subst current.id repr.id); + active := Active_pqueue.add repr !active); + !hint_count, !opportunistic_count + +(* --------------------------------------------------------------------- *) +(* Body rewriting *) +(* --------------------------------------------------------------------- *) + +let subst_var subst x = + match Var.Hashtbl.find_opt subst x with + | None -> x + | Some y -> y + +(* State threaded through the rewrite: the substitution, a mutable queue + of [tee_nodes] popped as we encounter each [LocalTee] in the AST, the + set of dead-tee node ids (as classified before rename), and an + accumulator of variables that had their only write eliminated by a + dead-tee replacement. *) +type rewriter = + { subst : Var.t Var.Hashtbl.t + ; mutable tee_queue : node_id list + ; dead_tees : unit Int.Hashtbl.t + ; erased : unit Var.Hashtbl.t + } + +let pop_tee r = + match r.tee_queue with + | [] -> + (* Every [LocalTee] encountered by the rewriter corresponds to one + entry recorded by [build_cfg]. If the queue is empty here, the + two traversals have drifted out of sync. *) + assert false + | id :: rest -> + r.tee_queue <- rest; + id + +(* A [rev_map]-style helper that applies [f] to list elements in + right-to-left order, matching [List.fold_right]'s visit order used + by [build_cfg] — the tee queue must be consumed in the same order it + was produced. *) +let map_right_to_left f l = List.fold_right l ~init:[] ~f:(fun x acc -> f x :: acc) + +let rec rewrite_expr r (e : W.expression) : W.expression = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> e + | LocalGet x -> LocalGet (subst_var r.subst x) + | LocalTee (x, e') -> + (* [build_cfg] records the tee's Def-node id BEFORE recursing into + [e']; mirror that here. *) + let id = pop_tee r in + let dead = id <> -1 && Int.Hashtbl.mem r.dead_tees id in + let e'' = rewrite_expr r e' in + if dead + then ( + Var.Hashtbl.replace r.erased x (); + e'') + else LocalTee (subst_var r.subst x, e'') + | UnOp (op, e') -> UnOp (op, rewrite_expr r e') + | I32WrapI64 e' -> I32WrapI64 (rewrite_expr r e') + | I64ExtendI32 (s, e') -> I64ExtendI32 (s, rewrite_expr r e') + | F32DemoteF64 e' -> F32DemoteF64 (rewrite_expr r e') + | F64PromoteF32 e' -> F64PromoteF32 (rewrite_expr r e') + | RefI31 e' -> RefI31 (rewrite_expr r e') + | I31Get (s, e') -> I31Get (s, rewrite_expr r e') + | ArrayLen e' -> ArrayLen (rewrite_expr r e') + | StructGet (s, ty, i, e') -> StructGet (s, ty, i, rewrite_expr r e') + | RefCast (ty, e') -> RefCast (ty, rewrite_expr r e') + | RefTest (ty, e') -> RefTest (ty, rewrite_expr r e') + | Br_on_cast (i, a, b, e') -> Br_on_cast (i, a, b, rewrite_expr r e') + | Br_on_cast_fail (i, a, b, e') -> Br_on_cast_fail (i, a, b, rewrite_expr r e') + | Br_on_null (i, e') -> Br_on_null (i, rewrite_expr r e') + | ExternConvertAny e' -> ExternConvertAny (rewrite_expr r e') + | AnyConvertExtern e' -> AnyConvertExtern (rewrite_expr r e') + (* Binary cases: [build_cfg] processes [e2] first (fold-right style). *) + | BinOp (op, e1, e2) -> + let e2' = rewrite_expr r e2 in + let e1' = rewrite_expr r e1 in + BinOp (op, e1', e2') + | ArrayNew (ty, e1, e2) -> + let e2' = rewrite_expr r e2 in + let e1' = rewrite_expr r e1 in + ArrayNew (ty, e1', e2') + | ArrayNewData (ty, d, e1, e2) -> + let e2' = rewrite_expr r e2 in + let e1' = rewrite_expr r e1 in + ArrayNewData (ty, d, e1', e2') + | ArrayGet (s, ty, e1, e2) -> + let e2' = rewrite_expr r e2 in + let e1' = rewrite_expr r e1 in + ArrayGet (s, ty, e1', e2') + | RefEq (e1, e2) -> + let e2' = rewrite_expr r e2 in + let e1' = rewrite_expr r e1 in + RefEq (e1', e2') + (* List cases: args processed right-to-left. *) + | Call (f, l) -> Call (f, map_right_to_left (rewrite_expr r) l) + | ArrayNewFixed (ty, l) -> ArrayNewFixed (ty, map_right_to_left (rewrite_expr r) l) + | StructNew (ty, l) -> StructNew (ty, map_right_to_left (rewrite_expr r) l) + (* [Call_ref]: funcref first, then args right-to-left. *) + | Call_ref (ty, f, l) -> + let f' = rewrite_expr r f in + let l' = map_right_to_left (rewrite_expr r) l in + Call_ref (ty, f', l') + (* [IfExpr]: [e1], then [e2], then [cond] — [build_cfg] order. *) + | IfExpr (ty, cond, e1, e2) -> + let e1' = rewrite_expr r e1 in + let e2' = rewrite_expr r e2 in + let cond' = rewrite_expr r cond in + IfExpr (ty, cond', e1', e2') + | BlockExpr (ty, l) -> BlockExpr (ty, rewrite_instrs r l) + | Seq (l, e') -> + let e'' = rewrite_expr r e' in + let l' = rewrite_instrs r l in + Seq (l', e'') + | Try (ty, body, catches) -> Try (ty, rewrite_instrs r body, catches) + +and rewrite_instr r (i : W.instruction) : W.instruction option = + match i with + | Nop | Event _ | Unreachable | Rethrow _ | Br (_, None) | Return None -> Some i + | Drop e -> Some (Drop (rewrite_expr r e)) + | Push e -> Some (Push (rewrite_expr r e)) + | LocalSet (x, e) -> ( + let e' = rewrite_expr r e in + let x' = subst_var r.subst x in + match e' with + | LocalGet y when Var.equal x' y -> None + | _ -> Some (LocalSet (x', e'))) + | GlobalSet (x, e) -> Some (GlobalSet (x, rewrite_expr r e)) + | StructSet (ty, i, e1, e2) -> + let e2' = rewrite_expr r e2 in + let e1' = rewrite_expr r e1 in + Some (StructSet (ty, i, e1', e2')) + | ArraySet (ty, e1, e2, e3) -> + let e3' = rewrite_expr r e3 in + let e2' = rewrite_expr r e2 in + let e1' = rewrite_expr r e1 in + Some (ArraySet (ty, e1', e2', e3')) + | CallInstr (f, l) -> Some (CallInstr (f, map_right_to_left (rewrite_expr r) l)) + | Return_call (f, l) -> Some (Return_call (f, map_right_to_left (rewrite_expr r) l)) + | Return_call_ref (ty, f, l) -> + let f' = rewrite_expr r f in + let l' = map_right_to_left (rewrite_expr r) l in + Some (Return_call_ref (ty, f', l')) + | Return (Some e) -> Some (Return (Some (rewrite_expr r e))) + | Throw (t, e) -> Some (Throw (t, rewrite_expr r e)) + | Br (i, Some e) -> Some (Br (i, Some (rewrite_expr r e))) + | Br_if (i, e) -> Some (Br_if (i, rewrite_expr r e)) + | Br_table (e, l, d) -> Some (Br_table (rewrite_expr r e, l, d)) + | Loop (ty, l) -> Some (Loop (ty, rewrite_instrs r l)) + | Block (ty, l) -> Some (Block (ty, rewrite_instrs r l)) + | If (ty, cond, l1, l2) -> + let l1' = rewrite_instrs r l1 in + let l2' = rewrite_instrs r l2 in + let cond' = rewrite_expr r cond in + Some (If (ty, cond', l1', l2')) + +(* [build_cfg] processes instruction lists right-to-left via [fold_right]. + Use the same order here so the tee queue is consumed consistently. *) +and rewrite_instrs r l = + List.fold_right l ~init:[] ~f:(fun i acc -> + match rewrite_instr r i with + | Some i' -> i' :: acc + | None -> acc) + +(* --------------------------------------------------------------------- *) +(* Entry point *) +(* --------------------------------------------------------------------- *) + +let f ~param_names ~param_types ~locals instrs = + let t = Timer.make () in + let num_params = List.length param_names in + assert (List.length param_types = num_params); + (* In pretty mode, only coalesce compiler-generated variables so + user-given OCaml names survive in the Wasm name section (and are + visible in browser devtools). *) + let eligible v = (not (Config.Flag.pretty ())) || Var.generated_name v in + let types = Var.Hashtbl.create (num_params + List.length locals) in + let param_vars = + List.fold_left2 param_names param_types ~init:Var.Set.empty ~f:(fun acc v t -> + Var.Hashtbl.replace types v t; + if eligible v then Var.Set.add v acc else acc) + in + let candidates = + List.fold_left locals ~init:param_vars ~f:(fun acc (v, t) -> + Var.Hashtbl.replace types v t; + if eligible v then Var.Set.add v acc else acc) + in + let num_candidates = Var.Set.cardinal candidates in + if num_candidates <= 1 + then locals, instrs + else ( + incr total_calls; + total_candidates := !total_candidates + num_candidates; + let g = build_cfg ~candidates ~param_vars instrs in + let live_in_map = compute_liveness g in + (* Dead-tee detection using the liveness already computed. A + [LocalTee x e] Def node is "dead" iff [x] is not in the node's + [live_out] — no subsequent instruction will read what the tee + stored. This must be done on the ORIGINAL variables, before + rename: coalescing only guarantees disjoint live ranges for + the vars it merges, not that the tee's write is unobserved by + the slot's other users. *) + let dead_tees = Int.Hashtbl.create 16 in + List.iter g.tee_nodes ~f:(fun id -> + if id >= 0 + then + let live_out = + List.fold_left g.succs.(id) ~init:Var.Set.empty ~f:(fun acc s -> + Var.Set.union acc live_in_map.(s)) + in + match (g.actions.(id) : action) with + | Def d -> + let x = Var.Set.choose d in + if not (Var.Set.mem x live_out) then Int.Hashtbl.replace dead_tees id () + | Use _ | Nop -> ()); + let ranges = compute_live_ranges g live_in_map candidates param_vars in + let subst = Var.Hashtbl.create num_candidates in + let hint_count, opportunistic_count = + allocate_registers subst types ranges g.coalescing_hints + in + let dead_tee_count = Int.Hashtbl.length dead_tees in + total_hint_count := !total_hint_count + hint_count; + total_opportunistic_count := !total_opportunistic_count + opportunistic_count; + total_dead_tee_count := !total_dead_tee_count + dead_tee_count; + (* Short-circuit only when BOTH rewriting drivers would be no-ops. *) + if Var.Hashtbl.length subst = 0 && dead_tee_count = 0 + then ( + total_time := !total_time +. Timer.get t; + locals, instrs) + else + let r = + { subst; tee_queue = g.tee_nodes; dead_tees; erased = Var.Hashtbl.create 16 } + in + let instrs = rewrite_instrs r instrs in + (* The tee queue must be fully consumed if [build_cfg] and the + rewriter saw the same AST. *) + assert (List.is_empty r.tee_queue); + (* Dropping dead tees may have left some locals with no + references at all. Scan the rewritten body once to find such + orphans among the vars we erased, and drop them from the + [locals] declaration. *) + let still_referenced = + if Var.Hashtbl.length r.erased = 0 + then r.erased (* unused; avoid a scan *) + else + let seen = Var.Hashtbl.create (Var.Hashtbl.length r.erased) in + let note v = if Var.Hashtbl.mem r.erased v then Var.Hashtbl.replace seen v () in + let rec scan_expr (e : W.expression) = + match e with + | Const _ | GlobalGet _ | Pop _ | RefFunc _ | RefNull _ -> () + | LocalGet x -> note x + | LocalTee (x, e') -> + note x; + scan_expr e' + | UnOp (_, e') + | I32WrapI64 e' + | I64ExtendI32 (_, e') + | F32DemoteF64 e' + | F64PromoteF32 e' + | RefI31 e' + | I31Get (_, e') + | ArrayLen e' + | StructGet (_, _, _, e') + | RefCast (_, e') + | RefTest (_, e') + | Br_on_cast (_, _, _, e') + | Br_on_cast_fail (_, _, _, e') + | Br_on_null (_, e') + | ExternConvertAny e' + | AnyConvertExtern e' -> scan_expr e' + | BinOp (_, e1, e2) + | ArrayNew (_, e1, e2) + | ArrayNewData (_, _, e1, e2) + | ArrayGet (_, _, e1, e2) + | RefEq (e1, e2) -> + scan_expr e1; + scan_expr e2 + | Call (_, l) | ArrayNewFixed (_, l) | StructNew (_, l) -> + List.iter l ~f:scan_expr + | Call_ref (_, f, l) -> + scan_expr f; + List.iter l ~f:scan_expr + | IfExpr (_, c, t, el) -> + scan_expr c; + scan_expr t; + scan_expr el + | BlockExpr (_, l) -> scan_instrs l + | Seq (l, e') -> + scan_instrs l; + scan_expr e' + | Try (_, body, _) -> scan_instrs body + and scan_instr (i : W.instruction) = + match i with + | Nop | Event _ | Br (_, None) | Return None | Rethrow _ | Unreachable -> () + | Drop e + | Push e + | GlobalSet (_, e) + | Br (_, Some e) + | Br_if (_, e) + | Br_table (e, _, _) + | Throw (_, e) + | Return (Some e) -> scan_expr e + | LocalSet (x, e) -> + note x; + scan_expr e + | StructSet (_, _, e1, e2) -> + scan_expr e1; + scan_expr e2 + | ArraySet (_, e1, e2, e3) -> + scan_expr e1; + scan_expr e2; + scan_expr e3 + | CallInstr (_, l) | Return_call (_, l) -> List.iter l ~f:scan_expr + | Return_call_ref (_, f, l) -> + scan_expr f; + List.iter l ~f:scan_expr + | Loop (_, l) | Block (_, l) -> scan_instrs l + | If (_, c, t, el) -> + scan_expr c; + scan_instrs t; + scan_instrs el + and scan_instrs l = List.iter l ~f:scan_instr in + scan_instrs instrs; + seen + in + (* Rebuild locals: drop vars merged away, or vars whose only + reference was an eliminated dead tee and who have no + surviving references in the rewritten body. *) + let kept_locals = + List.filter locals ~f:(fun (v, _) -> + (not (Var.Hashtbl.mem subst v)) + && not (Var.Hashtbl.mem r.erased v && not (Var.Hashtbl.mem still_referenced v))) + in + total_time := !total_time +. Timer.get t; + kept_locals, instrs) diff --git a/compiler/lib-wasm/var_coalescing.mli b/compiler/lib-wasm/var_coalescing.mli new file mode 100644 index 0000000000..3a0a2c769a --- /dev/null +++ b/compiler/lib-wasm/var_coalescing.mli @@ -0,0 +1,40 @@ +(* Wasm_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * Copyright (C) 2026 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(** Liveness analysis and variable coalescing for the Wasm backend. + + Merges Wasm locals (including parameters) with disjoint live ranges so + that a single local is reused. This shrinks the function's [locals] + declaration and eliminates redundant [local.set]/[local.get] copies. + + Parameters are valid coalescing targets: a local whose live range does + not overlap a parameter's can be rewritten to use the parameter's + index. Two parameters never merge because they are all live at + function entry. *) + +val f : + param_names:Code.Var.t list + -> param_types:Wasm_ast.value_type list + -> locals:(Code.Var.t * Wasm_ast.value_type) list + -> Wasm_ast.instruction list + -> (Code.Var.t * Wasm_ast.value_type) list * Wasm_ast.instruction list + +val report_stats : unit -> unit +(** Emit aggregated time/stats logs accumulated across all [f] calls and + reset the counters. Honours the [times] and [stats] debug flags. *) diff --git a/compiler/lib/config.ml b/compiler/lib/config.ml index 69db92887d..29187b4385 100644 --- a/compiler/lib/config.ml +++ b/compiler/lib/config.ml @@ -103,6 +103,12 @@ module Flag = struct let var_coalescing = o ~name:"var-coalescing" ~default:true + let wasm_var_coalescing = o ~name:"wasm-var-coalescing" ~default:true + + let wasm_local_sink = o ~name:"wasm-local-sink" ~default:true + + let wasm_reorder_locals = o ~name:"wasm-reorder-locals" ~default:true + let header = o ~name:"header" ~default:true let auto_link = o ~name:"auto-link" ~default:true diff --git a/compiler/lib/config.mli b/compiler/lib/config.mli index 2a540f68bc..dc9a957a45 100644 --- a/compiler/lib/config.mli +++ b/compiler/lib/config.mli @@ -52,6 +52,12 @@ module Flag : sig val var_coalescing : unit -> bool + val wasm_var_coalescing : unit -> bool + + val wasm_local_sink : unit -> bool + + val wasm_reorder_locals : unit -> bool + val debugger : unit -> bool val pretty : unit -> bool diff --git a/compiler/lib/source_map.ml b/compiler/lib/source_map.ml index 61598b8786..98c910c4ff 100644 --- a/compiler/lib/source_map.ml +++ b/compiler/lib/source_map.ml @@ -140,7 +140,7 @@ module Mappings = struct done; gen_col_r := 0; gen_line_r := gen_line c) - else if i > 0 + else if prev >= 0 then Buffer.add_char buf ','; let l = match c with