From 0611f3bfe5b1c7b05768d418d8f66f19d1a81191 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Mon, 2 Feb 2026 22:16:28 +0100 Subject: [PATCH 1/5] Compiler: fix js_assign for module with toplevel var decl --- compiler/lib/js_assign.ml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/compiler/lib/js_assign.ml b/compiler/lib/js_assign.ml index 5fb53d14b2..a040a2312a 100644 --- a/compiler/lib/js_assign.ml +++ b/compiler/lib/js_assign.ml @@ -435,7 +435,12 @@ let program' (module Strategy : Strategy) p = let o = new traverse_idents_and_labels ~idents:count ~labels in o#program p in - mapper#record_block Normal; + (* Use Params for top-level to include both def_var and def_local. + - For ESM: module-level var/let/const all need names allocated + - For scripts: also safe because top-level var declarations are in def_var, + and using Params ensures they're included (Normal would only include def_local). + The empty params list means no parameter name preferences are recorded. *) + mapper#record_block (Params { list = []; rest = None }); let freevar = IdentSet.fold (fun ident acc -> From 50592b3977ae51354e03f6e5b293efa6bb88a3ee Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Mon, 2 Feb 2026 22:17:31 +0100 Subject: [PATCH 2/5] Compiler: initial support for esm --- compiler/lib/esm.ml | 420 ++++++++++++++ compiler/lib/esm.mli | 127 ++++ compiler/lib/esm_bundle.ml | 313 ++++++++++ compiler/lib/esm_bundle.mli | 53 ++ compiler/lib/esm_tree_shake.ml | 502 ++++++++++++++++ compiler/lib/esm_tree_shake.mli | 48 ++ compiler/tests-esm/dune | 7 + compiler/tests-esm/esm_bundle.ml | 958 +++++++++++++++++++++++++++++++ 8 files changed, 2428 insertions(+) create mode 100644 compiler/lib/esm.ml create mode 100644 compiler/lib/esm.mli create mode 100644 compiler/lib/esm_bundle.ml create mode 100644 compiler/lib/esm_bundle.mli create mode 100644 compiler/lib/esm_tree_shake.ml create mode 100644 compiler/lib/esm_tree_shake.mli create mode 100644 compiler/tests-esm/dune create mode 100644 compiler/tests-esm/esm_bundle.ml diff --git a/compiler/lib/esm.ml b/compiler/lib/esm.ml new file mode 100644 index 0000000000..0fd61e884e --- /dev/null +++ b/compiler/lib/esm.ml @@ -0,0 +1,420 @@ +(* Js_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +open! Stdlib +open Javascript + +(* ========== Module Identifiers ========== *) + +module ModuleId = struct + type t = Id of string [@@unboxed] + + let of_path path = Id path + + let to_path (Id t) = t + + let compare (Id a) (Id b) = String.compare a b + + let equal (Id a) (Id b) = String.equal a b + + module Set = Set.Make (struct + type nonrec t = t + + let compare = compare + end) + + module Map = Map.Make (struct + type nonrec t = t + + let compare = compare + end) +end + +module ModuleSCC = Strongly_connected_components.Make (struct + type t = ModuleId.t + + module Map = ModuleId.Map + module Set = ModuleId.Set +end) + +(* ========== Module Representation ========== *) + +type export_kind = + | Export_var + | Export_fun + | Export_class + | Export_reexport of ModuleId.t * string + +type export_entry = + { exported_name : string + ; local_ident : ident + ; kind : export_kind + } + +type import_binding = + | ImportNamed of string * ident + | ImportDefault of ident + | ImportNamespace of ident + | ImportSideEffect + +type import_entry = + { source : ModuleId.t + ; bindings : import_binding list + } + +type esm_module = + { id : ModuleId.t + ; imports : import_entry list + ; exports : export_entry StringMap.t + ; star_exports : ModuleId.t list (* export * from sources *) + ; body : statement_list + ; has_default_export : bool + } + +type module_graph = + { modules : esm_module ModuleId.Map.t + ; deps : ModuleId.Set.t ModuleId.Map.t + } + +(* ========== Identifier Helpers ========== *) + +let ident_to_utf8 id = + match id with + | S { name; _ } -> name + | V v -> + let name = + match Code.Var.get_name v with + | Some n -> n + | None -> Printf.sprintf "v%d" (Code.Var.idx v) + in + Utf8_string.of_string_exn name + +let fresh_ident name = + let v = Code.Var.fresh_n name in + V v + +(* Normalize export declarations: convert ExportVar/ExportFun/ExportClass + and ExportDefault* to declaration + ExportNames. This allows declared_names + to find all names and simplifies extract_exports and extract_body. *) +let normalize_exports (stmts : statement_list) : statement_list = + let default_utf8 = Utf8_string.of_string_exn "default" in + List.concat_map stmts ~f:(fun (stmt, loc) -> + match stmt with + | Export (k, pi) -> begin + match k with + | ExportVar (k, decls) -> + let ids = + List.concat_map decls ~f:(fun decl -> + bound_idents_of_variable_declaration decl) + in + let export_names = List.map ids ~f:(fun id -> id, ident_to_utf8 id) in + [ Variable_statement (k, decls), loc + ; Export (ExportNames export_names, pi), N + ] + | ExportFun (id, decl) -> + [ Function_declaration (id, decl), loc + ; Export (ExportNames [ id, ident_to_utf8 id ], pi), N + ] + | ExportClass (id, decl) -> + [ Class_declaration (id, decl), loc + ; Export (ExportNames [ id, ident_to_utf8 id ], pi), N + ] + | ExportDefaultFun (id_opt, decl) -> + let id = Option.value id_opt ~default:(fresh_ident "default") in + [ Function_declaration (id, decl), loc + ; Export (ExportNames [ id, default_utf8 ], pi), N + ] + | ExportDefaultClass (id_opt, decl) -> + let id = Option.value id_opt ~default:(fresh_ident "default") in + [ Class_declaration (id, decl), loc + ; Export (ExportNames [ id, default_utf8 ], pi), N + ] + | ExportDefaultExpression e -> + let id = fresh_ident "default" in + [ Variable_statement (Const, [ DeclIdent (id, Some (e, N)) ]), loc + ; Export (ExportNames [ id, default_utf8 ], pi), N + ] + | CoverExportFrom _ -> assert false + (* These export forms pass through unchanged *) + | ExportNames _ | ExportFrom _ -> [ stmt, loc ] + end + | _ -> [ stmt, loc ]) + +(* Rename all declarations to fresh V identifiers using Js_traverse.rename_variable. + This includes all variables, functions, classes, imports, and exports at all scopes. *) +let rename_module_declarations (stmts : statement_list) : statement_list = + let stmts = normalize_exports stmts in + let renamer = new Js_traverse.rename_variable ~esm:true in + renamer#program stmts + +(* ========== Module Analysis ========== *) + +(* Extract imports from a module *) +let extract_imports ~resolve (stmts : statement_list) : import_entry list = + List.filter_map stmts ~f:(fun (stmt, _loc) -> + match stmt with + | Import ({ from = Utf8_string.Utf8 from_path; kind; _ }, _) -> + let source = resolve from_path in + let bindings = + match kind with + | SideEffect -> [ ImportSideEffect ] + | Default id -> [ ImportDefault id ] + | DeferNamespace id | Namespace (None, id) -> [ ImportNamespace id ] + | Namespace (Some default_id, ns_id) -> + [ ImportDefault default_id; ImportNamespace ns_id ] + | Named (default_opt, named) -> ( + let bindings = + List.map named ~f:(fun (Utf8_string.Utf8 orig, local) -> + ImportNamed (orig, local)) + in + match default_opt with + | None -> bindings + | Some id -> ImportDefault id :: bindings) + in + Some { source; bindings } + | _ -> None) + +(* Extract exports from a module (called after normalize_exports and renaming) *) +let extract_exports ~resolve (stmts : statement_list) : + export_entry StringMap.t * ModuleId.t list * bool = + let exports = ref StringMap.empty in + let star_exports = ref [] in + let has_default = ref false in + let add_export name entry = exports := StringMap.add name entry !exports in + List.iter stmts ~f:(fun (stmt, _loc) -> + match stmt with + | Export (export, _) -> ( + match export with + (* After normalize_exports, all direct exports become ExportNames *) + | ExportNames named_exports -> + List.iter named_exports ~f:(fun (id, Utf8_string.Utf8 exported_name) -> + if String.equal exported_name "default" then has_default := true; + add_export + exported_name + { exported_name; local_ident = id; kind = Export_var }) + | ExportFrom { kind = Export_all None; from = Utf8_string.Utf8 from_path; _ } -> + (* export * from 'module' - track source for later resolution *) + let source = resolve from_path in + star_exports := source :: !star_exports + | ExportFrom + { kind = Export_all (Some (Utf8_string.Utf8 exported_name)) + ; from = Utf8_string.Utf8 from_path + ; _ + } -> + (* export * as name from 'module' *) + let source = resolve from_path in + let local_ident = fresh_ident exported_name in + add_export + exported_name + { exported_name; local_ident; kind = Export_reexport (source, "*") } + | ExportFrom { kind = Export_names named; from = Utf8_string.Utf8 from_path; _ } + -> + let source = resolve from_path in + List.iter + named + ~f:(fun (Utf8_string.Utf8 orig_name, Utf8_string.Utf8 exported_name) -> + let local_ident = fresh_ident exported_name in + add_export + exported_name + { exported_name + ; local_ident + ; kind = Export_reexport (source, orig_name) + }) + | ExportVar _ + | ExportFun _ + | ExportClass _ + | ExportDefaultFun _ + | ExportDefaultClass _ + | ExportDefaultExpression _ -> + (* These should have been normalized to ExportNames *) + assert false + | CoverExportFrom _ -> assert false) + | _ -> ()); + !exports, List.rev !star_exports, !has_default + +let analyze_module ~resolve id (program : program) : esm_module = + (* Rename first - this also normalizes exports *) + let renamed_program = rename_module_declarations program in + (* Extract from renamed program - identifiers are already V variants *) + let exports, star_exports, has_default_export = + extract_exports ~resolve renamed_program + in + let imports = extract_imports ~resolve renamed_program in + (* Extract body: filter out Import and Export statements *) + let body = + List.filter renamed_program ~f:(fun (stmt, _loc) -> + match stmt with + | Import _ | Export _ -> false + | _ -> true) + in + { id; imports; exports; star_exports; body; has_default_export } + +(* ========== Graph Construction ========== *) + +let rec build_graph ~parse ~resolve ~entry_points : module_graph = + let modules = ref ModuleId.Map.empty in + let deps = ref ModuleId.Map.empty in + let worklist = Queue.create () in + (* Initialize worklist with entry points *) + List.iter entry_points ~f:(fun path -> + let id = ModuleId.of_path path in + Queue.push id worklist); + (* Process modules until worklist is empty *) + while not (Queue.is_empty worklist) do + let id = Queue.pop worklist in + if not (ModuleId.Map.mem id !modules) + then begin + let path = ModuleId.to_path id in + let program = parse path in + let resolve_for_module specifier = + match resolve ~from:path specifier with + | Some resolved_path -> ModuleId.of_path resolved_path + | None -> failwith (Printf.sprintf "Cannot resolve '%s' from '%s'" specifier path) + in + let esm = analyze_module ~resolve:resolve_for_module id program in + modules := ModuleId.Map.add id esm !modules; + (* Collect dependencies *) + let module_deps = + List.fold_left esm.imports ~init:ModuleId.Set.empty ~f:(fun acc import -> + ModuleId.Set.add import.source acc) + in + (* Add re-export dependencies *) + let module_deps = + StringMap.fold + (fun _ export acc -> + match export.kind with + | Export_reexport (source, _) -> ModuleId.Set.add source acc + | Export_var | Export_fun | Export_class -> acc) + esm.exports + module_deps + in + (* Add star export dependencies *) + let module_deps = + List.fold_left esm.star_exports ~init:module_deps ~f:(fun acc source -> + ModuleId.Set.add source acc) + in + deps := ModuleId.Map.add id module_deps !deps; + (* Add dependencies to worklist *) + ModuleId.Set.iter (fun dep -> Queue.push dep worklist) module_deps + end + done; + let graph = { modules = !modules; deps = !deps } in + (* Resolve star exports *) + resolve_star_exports graph + +(* Resolve export * from by copying exports from source modules. + + Per the ES spec: + - The 'default' export is never re-exported by 'export *' + - A module's own explicit exports take precedence over 'export *' + - If the same name comes from multiple 'export *' sources, it should be + an ambiguous export error at link time + + We use "first wins" semantics for conflicting star exports instead of + tracking ambiguous exports, which is a pragmatic simplification. *) +and resolve_star_exports (graph : module_graph) : module_graph = + (* Get modules in topological order (dependencies first) *) + let sorted = topological_sort_for_star_exports graph in + (* Process modules and accumulate resolved exports *) + let resolved_modules = + List.fold_left sorted ~init:graph.modules ~f:(fun modules id -> + match ModuleId.Map.find_opt id modules with + | None -> modules + | Some m -> + if List.is_empty m.star_exports + then modules + else + (* Collect exports from all star export sources *) + let additional_exports = + List.fold_left + m.star_exports + ~init:StringMap.empty + ~f:(fun acc source_id -> + match ModuleId.Map.find_opt source_id modules with + | None -> acc + | Some source_module -> + (* Add all exports from source, except default *) + StringMap.fold + (fun name _export acc -> + if String.equal name "default" + then acc + else if StringMap.mem name acc || StringMap.mem name m.exports + then acc + else + (* Create a re-export entry *) + let local_ident = fresh_ident name in + StringMap.add + name + { exported_name = name + ; local_ident + ; kind = Export_reexport (source_id, name) + } + acc) + source_module.exports + acc) + in + (* Merge additional exports into module *) + let exports = + StringMap.union (fun _ a _ -> Some a) m.exports additional_exports + in + ModuleId.Map.add id { m with exports } modules) + in + { graph with modules = resolved_modules } + +(* Simple topological sort for star export resolution *) +and topological_sort_for_star_exports (graph : module_graph) : ModuleId.t list = + let components = ModuleSCC.connected_components_sorted_from_roots_to_leaf graph.deps in + Array.fold_right components ~init:[] ~f:(fun component acc -> + match component with + | ModuleSCC.No_loop id -> id :: acc + | ModuleSCC.Has_loop ids -> List.rev_append ids acc) + |> List.rev + +(* Topological sort of modules *) +let topological_sort (graph : module_graph) : ModuleId.t list = + let components = ModuleSCC.connected_components_sorted_from_roots_to_leaf graph.deps in + (* Components are sorted from roots to leaves, we want leaves first *) + let sorted = + Array.fold_right components ~init:[] ~f:(fun component acc -> + match component with + | ModuleSCC.No_loop id -> id :: acc + | ModuleSCC.Has_loop ids -> + (* For cycles, add all modules in cycle order *) + List.rev_append ids acc) + in + (* Reverse to get dependency order (dependencies before dependents) *) + List.rev sorted + +(* Resolve re-exports to find the actual source identifier *) +let rec resolve_reexport all_modules exp = + match exp.kind with + | Export_reexport (reexport_source, reexport_name) -> + let reexport_module = + match ModuleId.Map.find_opt reexport_source all_modules with + | Some m -> m + | None -> failwith ("Module not found: " ^ ModuleId.to_path reexport_source) + in + let reexport_exp = + match StringMap.find_opt reexport_name reexport_module.exports with + | Some e -> e + | None -> exp (* fallback - use original export *) + in + resolve_reexport all_modules reexport_exp + | Export_var | Export_fun | Export_class -> + (* Found the actual export - local_ident is already a V identifier *) + exp.local_ident diff --git a/compiler/lib/esm.mli b/compiler/lib/esm.mli new file mode 100644 index 0000000000..6b51dfab7b --- /dev/null +++ b/compiler/lib/esm.mli @@ -0,0 +1,127 @@ +(* Js_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +open Stdlib + +(** ES Module types and graph construction. + + This module provides: + - Type definitions for ES module representation + - Module graph construction from entry points + - Resolution of import/export dependencies + + For tree-shaking, see {!Esm_tree_shake}. + For bundle generation, see {!Esm_bundle}. +*) + +(** {1 Module Identifiers} *) + +module ModuleId : sig + type t + + val of_path : string -> t + (** Create a module identifier from a file path. *) + + val to_path : t -> string + (** Get the file path from a module identifier. *) + + val compare : t -> t -> int + + val equal : t -> t -> bool + + module Set : Set.S with type elt = t + + module Map : Map.S with type key = t +end + +(** {1 Module Representation} *) + +type export_kind = + | Export_var + | Export_fun + | Export_class + | Export_reexport of ModuleId.t * string (** Re-export from another module *) + +type export_entry = + { exported_name : string (** Name visible to importers *) + ; local_ident : Javascript.ident (** Local binding in this module *) + ; kind : export_kind + } + +type import_binding = + | ImportNamed of string * Javascript.ident (** original name, local binding *) + | ImportDefault of Javascript.ident + | ImportNamespace of Javascript.ident + | ImportSideEffect + +type import_entry = + { source : ModuleId.t + ; bindings : import_binding list + } + +type esm_module = + { id : ModuleId.t + ; imports : import_entry list + ; exports : export_entry StringMap.t (** Map from exported name to export entry *) + ; star_exports : ModuleId.t list (** Sources of [export * from] statements *) + ; body : Javascript.statement_list (** Non-import/export statements *) + ; has_default_export : bool + } + +type module_graph = + { modules : esm_module ModuleId.Map.t + ; deps : ModuleId.Set.t ModuleId.Map.t + (** Dependencies: module -> set of modules it imports from *) + } + +(** {1 Module Analysis} *) + +val analyze_module : + resolve:(string -> ModuleId.t) -> ModuleId.t -> Javascript.program -> esm_module +(** [analyze_module ~resolve id program] analyzes a parsed JavaScript module + and extracts its imports, exports, and body statements. + + @param resolve Function to resolve import specifiers to module IDs + @param id The module's identifier + @param program The parsed JavaScript program +*) + +(** {1 Graph Construction} *) + +val build_graph : + parse:(string -> Javascript.program) + -> resolve:(from:string -> string -> string option) + -> entry_points:string list + -> module_graph +(** [build_graph ~parse ~resolve ~entry_points] builds a complete module + dependency graph starting from the given entry points. + + @param parse Function to parse a file path into a JavaScript program + @param resolve Function to resolve an import specifier relative to a module + @param entry_points List of entry point file paths +*) + +(** {1 Utilities} *) + +val topological_sort : module_graph -> ModuleId.t list +(** [topological_sort graph] returns modules in dependency order + (dependencies before dependents). *) + +val resolve_reexport : esm_module ModuleId.Map.t -> export_entry -> Javascript.ident +(** [resolve_reexport modules export] follows re-export chains to find the + actual source identifier for an export. *) diff --git a/compiler/lib/esm_bundle.ml b/compiler/lib/esm_bundle.ml new file mode 100644 index 0000000000..d19ba13e7a --- /dev/null +++ b/compiler/lib/esm_bundle.ml @@ -0,0 +1,313 @@ +(* Js_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +open! Stdlib +open Javascript + +(* Build a substitution map from import identifiers to their resolved source identifiers. + This allows us to directly replace uses of import bindings with the actual source + variables, avoiding intermediate const bindings. *) +let build_import_substitutions + (m : Esm.esm_module) + (all_modules : Esm.esm_module Esm.ModuleId.Map.t) : ident Code.Var.Map.t = + List.fold_left m.imports ~init:Code.Var.Map.empty ~f:(fun acc import -> + let source_module = + match Esm.ModuleId.Map.find_opt import.Esm.source all_modules with + | Some m -> m + | None -> failwith ("Module not found: " ^ Esm.ModuleId.to_path import.Esm.source) + in + List.fold_left import.Esm.bindings ~init:acc ~f:(fun acc binding -> + match binding with + | Esm.ImportSideEffect -> acc + | Esm.ImportDefault local_id -> ( + let export = + match StringMap.find_opt "default" source_module.exports with + | Some e -> e + | None -> + failwith ("No default export in " ^ Esm.ModuleId.to_path import.Esm.source) + in + let source_ident = Esm.resolve_reexport all_modules export in + match local_id with + | V v -> Code.Var.Map.add v source_ident acc + | S _ -> acc) + | Esm.ImportNamed (orig_name, local_id) -> ( + let export = + match StringMap.find_opt orig_name source_module.exports with + | Some e -> e + | None -> + failwith + (Printf.sprintf + "No export '%s' in %s" + orig_name + (Esm.ModuleId.to_path import.Esm.source)) + in + let source_ident = Esm.resolve_reexport all_modules export in + match local_id with + | V v -> Code.Var.Map.add v source_ident acc + | S _ -> acc) + | Esm.ImportNamespace _ -> + (* Namespace imports still need object creation, handled separately *) + acc)) + +(* ========== Namespace Import Optimization ========== *) + +(* Analyze namespace usage to find which fields are accessed. + Returns None if the namespace is used in a way that prevents optimization + (e.g., passed to a function, used as a value itself). + Returns Some field_set if only static field accesses are found. *) + +type namespace_usage = + | Only_fields of StringSet.t (* Only static field accesses *) + | Cannot_optimize (* Namespace used in non-field-access way *) + +class collect_namespace_accesses (namespace_vars : Code.Var.Set.t) = + object (self) + inherit Js_traverse.iter as super + + val mutable usage : namespace_usage Code.Var.Map.t = Code.Var.Map.empty + + method get_usage = usage + + method private mark_cannot_optimize v = + usage <- Code.Var.Map.add v Cannot_optimize usage + + method private add_field_access v field = + let current = + match Code.Var.Map.find_opt v usage with + | None -> Only_fields StringSet.empty + | Some x -> x + in + match current with + | Cannot_optimize -> () + | Only_fields fields -> + usage <- Code.Var.Map.add v (Only_fields (StringSet.add field fields)) usage + + method! expression e = + match e with + (* ns.field - static field access via dot notation *) + | EDot (EVar (V v), _, Utf8 field) when Code.Var.Set.mem v namespace_vars -> + self#add_field_access v field + (* ns["field"] - static field access via bracket notation with string literal *) + | EAccess (EVar (V v), _, EStr (Utf8 field)) when Code.Var.Set.mem v namespace_vars + -> self#add_field_access v field + (* ns[expr] - dynamic access, cannot optimize *) + | EAccess (EVar (V v), _, _) when Code.Var.Set.mem v namespace_vars -> + self#mark_cannot_optimize v + (* EVar ns - namespace used as a value (not a field access) *) + | EVar (V v) when Code.Var.Set.mem v namespace_vars -> self#mark_cannot_optimize v + | _ -> super#expression e + end + +(* Transform namespace imports into named imports based on field usage analysis. + Returns updated module with transformed imports and body. *) +let optimize_namespace_imports (m : Esm.esm_module) : Esm.esm_module = + (* Collect all namespace import identifiers *) + let namespace_vars = + List.fold_left m.imports ~init:Code.Var.Map.empty ~f:(fun acc import -> + List.fold_left import.Esm.bindings ~init:acc ~f:(fun acc binding -> + match binding with + | Esm.ImportNamespace (V v) -> Code.Var.Map.add v import.Esm.source acc + | Esm.ImportNamespace (S _) + | Esm.ImportNamed _ + | Esm.ImportDefault _ + | Esm.ImportSideEffect -> acc)) + in + if Code.Var.Map.is_empty namespace_vars + then m + else + let namespace_var_set = + Code.Var.Map.fold + (fun v _ acc -> Code.Var.Set.add v acc) + namespace_vars + Code.Var.Set.empty + in + (* Analyze field accesses *) + let collector = new collect_namespace_accesses namespace_var_set in + collector#program m.body; + let usage = collector#get_usage in + (* Build a map from (namespace_var, field) -> fresh_ident for optimizable namespaces *) + let field_idents : ident StringMap.t Code.Var.Map.t = + Code.Var.Map.fold + (fun v _source acc -> + match Code.Var.Map.find_opt v usage with + | Some (Only_fields fields) -> + let field_map = + StringSet.fold + (fun field fmap -> + let fresh_id = V (Code.Var.fresh_n field) in + StringMap.add field fresh_id fmap) + fields + StringMap.empty + in + Code.Var.Map.add v field_map acc + | Some Cannot_optimize | None -> acc) + namespace_vars + Code.Var.Map.empty + in + (* Transform imports: replace optimizable ImportNamespace with ImportNamed *) + let imports = + List.map m.imports ~f:(fun import -> + let bindings = + List.concat_map import.Esm.bindings ~f:(fun binding -> + match binding with + | Esm.ImportNamespace (V v) -> ( + match Code.Var.Map.find_opt v field_idents with + | Some field_map -> + (* Replace with named imports *) + StringMap.fold + (fun field local_id acc -> + Esm.ImportNamed (field, local_id) :: acc) + field_map + [] + | None -> + (* Keep as namespace import *) + [ binding ]) + | Esm.ImportNamespace (S _) + | Esm.ImportNamed _ + | Esm.ImportDefault _ + | Esm.ImportSideEffect -> [ binding ]) + in + { import with bindings }) + in + (* Transform body: replace ns.field with the fresh identifier *) + let body = + let replacer = + object + inherit Js_traverse.map as super + + method! expression e = + match e with + | EDot (EVar (V v), _, Utf8 field) -> ( + match Code.Var.Map.find_opt v field_idents with + | Some field_map -> ( + match StringMap.find_opt field field_map with + | Some local_id -> EVar local_id + | None -> super#expression e) + | None -> super#expression e) + | EAccess (EVar (V v), _, EStr (Utf8 field)) -> ( + match Code.Var.Map.find_opt v field_idents with + | Some field_map -> ( + match StringMap.find_opt field field_map with + | Some local_id -> EVar local_id + | None -> super#expression e) + | None -> super#expression e) + | _ -> super#expression e + end + in + replacer#program m.body + in + { m with imports; body } + +(* Generate bindings only for namespace imports, which require object creation *) +let generate_namespace_bindings + (m : Esm.esm_module) + (all_modules : Esm.esm_module Esm.ModuleId.Map.t) : statement_list = + List.concat_map m.imports ~f:(fun import -> + let source_module = + match Esm.ModuleId.Map.find_opt import.Esm.source all_modules with + | Some m -> m + | None -> failwith ("Module not found: " ^ Esm.ModuleId.to_path import.Esm.source) + in + List.filter_map import.Esm.bindings ~f:(fun binding -> + match binding with + | Esm.ImportNamespace local_id -> + (* Create object with all exports *) + let props = + StringMap.fold + (fun name export acc -> + let source_ident = Esm.resolve_reexport all_modules export in + let name_utf8 = Utf8_string.of_string_exn name in + let pn = if is_ident name then PNI name_utf8 else PNS name_utf8 in + Property (pn, EVar source_ident) :: acc) + source_module.exports + [] + in + Some + ( Variable_statement (Const, [ DeclIdent (local_id, Some (EObj props, N)) ]) + , N ) + | Esm.ImportNamed _ | Esm.ImportDefault _ | Esm.ImportSideEffect -> None)) + +(* Substitution traversal: replace import identifiers with their source identifiers *) +class substitute_imports (subst : ident Code.Var.Map.t) = + object + inherit Js_traverse.map + + method! ident i = + match i with + | V v -> ( + match Code.Var.Map.find_opt v subst with + | Some target -> target + | None -> i) + | S _ -> i + end + +let apply_import_substitutions subst stmts = + if Code.Var.Map.is_empty subst + then stmts + else (new substitute_imports subst)#program stmts + +let bundle (graph : Esm.module_graph) ~(entry_points : Esm.ModuleId.t list) : program = + let sorted = Esm.topological_sort graph in + let body_stmts = + List.concat_map sorted ~f:(fun id -> + match Esm.ModuleId.Map.find_opt id graph.modules with + | None -> [] + | Some m -> + (* Optimize namespace imports: convert `ns.field` to named imports *) + let m = optimize_namespace_imports m in + (* Build substitution map and apply to body *) + let subst = build_import_substitutions m graph.modules in + let body = apply_import_substitutions subst m.body in + (* Generate namespace bindings (only for non-optimized namespaces) *) + let namespace_stmts = generate_namespace_bindings m graph.modules in + namespace_stmts @ body) + in + (* Generate export statements for entry point modules *) + let export_stmts = + List.concat_map entry_points ~f:(fun entry_id -> + match Esm.ModuleId.Map.find_opt entry_id graph.modules with + | None -> [] + | Some m -> + StringMap.fold + (fun _export_name export acc -> + (* Get the source identifier - resolve re-exports *) + let source_ident = Esm.resolve_reexport graph.modules export in + let exported_name = Utf8_string.of_string_exn export.exported_name in + (Export (ExportNames [ source_ident, exported_name ], Parse_info.zero), N) + :: acc) + m.exports + []) + in + body_stmts @ export_stmts + +(* ========== Convenience Function ========== *) + +let bundle_modules ~parse ~resolve ~entry_points ~tree_shake:do_tree_shake : program = + let graph = Esm.build_graph ~parse ~resolve ~entry_points in + let entry_ids = List.map entry_points ~f:Esm.ModuleId.of_path in + let graph = + if do_tree_shake + then + let entry_exports = + List.fold_left entry_ids ~init:Esm.ModuleId.Map.empty ~f:(fun acc id -> + Esm.ModuleId.Map.add id StringSet.empty acc) + in + Esm_tree_shake.run graph ~entry_exports + else graph + in + bundle graph ~entry_points:entry_ids diff --git a/compiler/lib/esm_bundle.mli b/compiler/lib/esm_bundle.mli new file mode 100644 index 0000000000..abffa3c8e2 --- /dev/null +++ b/compiler/lib/esm_bundle.mli @@ -0,0 +1,53 @@ +(* Js_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(** ES module bundle generation. + + This module generates bundled JavaScript output from an ES module graph. + It merges all modules in topological order, resolves imports to their + source identifiers, and generates final export statements. +*) + +val bundle : Esm.module_graph -> entry_points:Esm.ModuleId.t list -> Javascript.program +(** [bundle graph ~entry_points] generates the final bundled program + by merging all modules in topological order. + + All module-level declarations are renamed to unique identifiers during + analysis, so no collision resolution is needed at bundle time. + + @param graph The module graph (possibly tree-shaken) + @param entry_points The entry point module IDs (determines export visibility) +*) + +val bundle_modules : + parse:(string -> Javascript.program) + -> resolve:(from:string -> string -> string option) + -> entry_points:string list + -> tree_shake:bool + -> Javascript.program +(** [bundle_modules ~parse ~resolve ~entry_points ~tree_shake] is a convenience + function that performs the complete bundling pipeline: + 1. Build the module graph (with all declarations renamed to unique identifiers) + 2. Optionally perform tree-shaking + 3. Generate the bundled output + + @param parse Function to parse a file path into a JavaScript program + @param resolve Function to resolve an import specifier relative to a module + @param entry_points List of entry point file paths + @param tree_shake Whether to perform tree-shaking +*) diff --git a/compiler/lib/esm_tree_shake.ml b/compiler/lib/esm_tree_shake.ml new file mode 100644 index 0000000000..cd30d437d3 --- /dev/null +++ b/compiler/lib/esm_tree_shake.ml @@ -0,0 +1,502 @@ +(* Js_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +open! Stdlib +open Javascript + +(* Collect free variables (unbound identifiers) in a statement *) +let collect_free_vars stmt = + let free = new Js_traverse.free in + let _ = free#statement stmt in + free#get_free + +let collect_free_vars_expr expr = + let free = new Js_traverse.free in + let _ = free#expression expr in + free#get_free + +let collect_free_vars_fun_decl decl = + let free = new Js_traverse.free in + let _ = free#fun_decl decl in + free#get_free + +let collect_free_vars_class_decl decl = + let free = new Js_traverse.free in + let _ = free#class_decl decl in + free#get_free + +(* Check if an expression might have side effects *) +let option_has ~f = function + | None -> false + | Some x -> f x + +let rec expr_has_side_effects expr = + match expr with + | EVar _ | EStr _ | EBool _ | ENum _ | ERegexp _ | EPrivName _ -> false + | EFun _ | EArrow _ | EClass _ -> false + | EArr l -> + List.exists l ~f:(function + | ElementHole -> false + | Element e | ElementSpread e -> expr_has_side_effects e) + | EObj l -> + List.exists l ~f:(function + | Property (pn, e) -> + property_name_has_side_effects pn || expr_has_side_effects e + | PropertySpread e -> expr_has_side_effects e + | PropertyMethod (pn, _) -> property_name_has_side_effects pn + | CoverInitializedName _ -> false) + | ESeq (e1, e2) -> expr_has_side_effects e1 || expr_has_side_effects e2 + | ECond (e1, e2, e3) -> + expr_has_side_effects e1 || expr_has_side_effects e2 || expr_has_side_effects e3 + | EBin (op, e1, e2) -> ( + match op with + (* Assignment operators have side effects *) + | Eq | StarEq | SlashEq | ModEq | PlusEq | MinusEq | LslEq | AsrEq | LsrEq | BandEq + | BxorEq | BorEq | OrEq | AndEq | ExpEq | CoalesceEq -> + true + (* Non-assignment operators: check operands *) + | Or | And | Bor | Bxor | Band | EqEq | NotEq | EqEqEq | NotEqEq | Lt | Le | Gt | Ge + | LtInt | LeInt | GtInt | GeInt | InstanceOf | In | Lsl | Lsr | Asr | Plus | Minus + | Mul | Div | Mod | Exp | Coalesce -> + expr_has_side_effects e1 || expr_has_side_effects e2) + | EUn (op, e) -> ( + match op with + (* These operators have side effects *) + | Delete | IncrA | DecrA | IncrB | DecrB | Await -> true + (* These operators don't have side effects themselves *) + | Not | Neg | Pl | Typeof | Void | Bnot -> expr_has_side_effects e) + | ECall _ | ECallTemplate _ | ENew _ -> true (* calls might have side effects *) + | EAccess (e1, _, e2) -> expr_has_side_effects e1 || expr_has_side_effects e2 + | EDot (e, _, _) | EDotPrivate (e, _, _) -> expr_has_side_effects e + | EAssignTarget _ -> true + | ETemplate parts -> + List.exists parts ~f:(function + | TStr _ -> false + | TExp e -> expr_has_side_effects e) + | EYield _ -> true + | CoverParenthesizedExpressionAndArrowParameterList _ + | CoverCallExpressionAndAsyncArrowHead _ -> false + +and property_name_has_side_effects pn = + match pn with + | PNI _ | PNS _ | PNN _ -> false + | PComputed e -> expr_has_side_effects e + +let rec stmt_has_side_effects stmt = + match stmt with + | Empty_statement | Debugger_statement -> false + | Block l -> List.exists l ~f:(fun (s, _) -> stmt_has_side_effects s) + | Variable_statement (_, decls) -> + List.exists decls ~f:(function + | DeclIdent (_, None) -> false + | DeclIdent (_, Some (e, _)) -> expr_has_side_effects e + | DeclPattern (_, (e, _)) -> expr_has_side_effects e) + | Function_declaration _ -> false + | Class_declaration (_, decl) -> class_decl_has_side_effects decl + | Expression_statement e -> expr_has_side_effects e + | If_statement (e, (s1, _), s2_opt) -> + expr_has_side_effects e + || stmt_has_side_effects s1 + || option_has s2_opt ~f:(fun (s2, _) -> stmt_has_side_effects s2) + | Do_while_statement ((s, _), e) -> stmt_has_side_effects s || expr_has_side_effects e + | While_statement (e, (s, _)) -> expr_has_side_effects e || stmt_has_side_effects s + | For_statement (init, cond, incr, (body, _)) -> + (match init with + | Left None -> false + | Left (Some e) -> expr_has_side_effects e + | Right (_, decls) -> + List.exists decls ~f:(function + | DeclIdent (_, None) -> false + | DeclIdent (_, Some (e, _)) -> expr_has_side_effects e + | DeclPattern (_, (e, _)) -> expr_has_side_effects e)) + || option_has cond ~f:expr_has_side_effects + || option_has incr ~f:expr_has_side_effects + || stmt_has_side_effects body + | ForIn_statement (_, e, (body, _)) + | ForOf_statement (_, e, (body, _)) + | ForAwaitOf_statement (_, e, (body, _)) -> + expr_has_side_effects e || stmt_has_side_effects body + | Continue_statement _ | Break_statement _ -> false + | Return_statement (e_opt, _) -> option_has e_opt ~f:expr_has_side_effects + | Labelled_statement (_, (s, _)) -> stmt_has_side_effects s + | Switch_statement (e, cases1, default, cases2) -> + expr_has_side_effects e + || List.exists cases1 ~f:(fun (ce, sl) -> + expr_has_side_effects ce + || List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) + || option_has default ~f:(fun sl -> + List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) + || List.exists cases2 ~f:(fun (ce, sl) -> + expr_has_side_effects ce + || List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) + | Throw_statement _ -> true + | Try_statement (b, catch, finally) -> + List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s) + || option_has catch ~f:(fun (_, b) -> + List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s)) + || option_has finally ~f:(fun b -> + List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s)) + | With_statement (e, (s, _)) -> expr_has_side_effects e || stmt_has_side_effects s + | Import _ -> true + | Export _ -> true + +and class_decl_has_side_effects decl = + option_has decl.extends ~f:expr_has_side_effects + || List.exists decl.body ~f:(function + | CEMethod (_, _, name, _) -> class_element_name_has_side_effects name + | CEField (_, _, name, init) -> + class_element_name_has_side_effects name + || option_has init ~f:(fun (e, _) -> expr_has_side_effects e) + | CEStaticBLock stmts -> + List.exists stmts ~f:(fun (s, _) -> stmt_has_side_effects s) + | CEAccessor (_, _, name, init) -> + class_element_name_has_side_effects name + || option_has init ~f:(fun (e, _) -> expr_has_side_effects e)) + +and class_element_name_has_side_effects name = + match name with + | PropName pn -> property_name_has_side_effects pn + | PrivName _ -> false + +(* Statement info for tree shaking *) +type stmt_info = + { idx : int + ; defines : IdentSet.t + ; uses : IdentSet.t (* local uses within the module *) + ; import_uses : (Esm.ModuleId.t * string * Code.Var.t) list + (* (source, export_name, local_binding_var) *) + ; has_side_effects : bool + ; stmt : statement * location + } + +(* Build a map from import binding ident to (source, export_name) *) +let build_import_map (imports : Esm.import_entry list) : + (Esm.ModuleId.t * string) Code.Var.Map.t = + List.fold_left imports ~init:Code.Var.Map.empty ~f:(fun acc import -> + List.fold_left import.Esm.bindings ~init:acc ~f:(fun acc binding -> + match binding with + | Esm.ImportNamed (orig, V v) -> + Code.Var.Map.add v (import.Esm.source, orig) acc + | Esm.ImportDefault (V v) -> + Code.Var.Map.add v (import.Esm.source, "default") acc + | Esm.ImportNamespace (V v) -> + (* Namespace imports use "*" to indicate all exports *) + Code.Var.Map.add v (import.Esm.source, "*") acc + (* S identifiers and side-effect imports don't add to the map *) + | Esm.ImportNamed (_, S _) + | Esm.ImportDefault (S _) + | Esm.ImportNamespace (S _) + | Esm.ImportSideEffect -> + acc)) + +(* Analyze a statement: extract defines, split uses into local vs import *) +let analyze_stmt import_map idx (stmt, loc) : stmt_info = + let defines, all_uses, has_side_effects = + match stmt with + | Variable_statement (_, decls) -> + let defines = + List.fold_left decls ~init:IdentSet.empty ~f:(fun acc decl -> + let ids = bound_idents_of_variable_declaration decl in + List.fold_left ids ~init:acc ~f:(fun acc id -> IdentSet.add id acc)) + in + let uses = + List.fold_left decls ~init:IdentSet.empty ~f:(fun acc decl -> + match decl with + | DeclIdent (_, None) -> acc + | DeclIdent (_, Some (e, _)) -> IdentSet.union acc (collect_free_vars_expr e) + | DeclPattern (_, (e, _)) -> IdentSet.union acc (collect_free_vars_expr e)) + in + let side_effects = + List.exists decls ~f:(function + | DeclIdent (_, None) -> false + | DeclIdent (_, Some (e, _)) -> expr_has_side_effects e + | DeclPattern (_, (e, _)) -> expr_has_side_effects e) + in + defines, uses, side_effects + | Function_declaration (id, decl) -> + let defines = IdentSet.singleton id in + let uses = collect_free_vars_fun_decl decl in + defines, uses, false + | Class_declaration (id, decl) -> + let defines = IdentSet.singleton id in + let uses = collect_free_vars_class_decl decl in + let side_effects = class_decl_has_side_effects decl in + defines, uses, side_effects + (* Statements that don't define module-level bindings *) + | Block _ | Empty_statement | Expression_statement _ | If_statement _ + | Do_while_statement _ | While_statement _ | For_statement _ | ForIn_statement _ + | ForOf_statement _ | ForAwaitOf_statement _ | Continue_statement _ | Break_statement _ + | Return_statement _ | With_statement _ | Labelled_statement _ | Switch_statement _ + | Throw_statement _ | Try_statement _ | Debugger_statement | Import _ | Export _ -> + let uses = collect_free_vars stmt in + IdentSet.empty, uses, stmt_has_side_effects stmt + in + (* Split uses into local identifiers and import references. + import_uses is a list of (source_module, export_name, local_binding_var) *) + let local_uses, import_uses = + IdentSet.fold + (fun id (local_acc, import_acc) -> + match id with + | V v -> ( + match Code.Var.Map.find_opt v import_map with + | Some (source, name) -> local_acc, (source, name, v) :: import_acc + | None -> IdentSet.add id local_acc, import_acc) + | S _ -> IdentSet.add id local_acc, import_acc) + all_uses + (IdentSet.empty, []) + in + { idx; defines; uses = local_uses; import_uses; has_side_effects; stmt = stmt, loc } + +(* Work item for the fixpoint *) +type work_item = + | MarkExport of Esm.ModuleId.t * string + | MarkIdent of Esm.ModuleId.t * ident + | MarkModuleReached of Esm.ModuleId.t + +(* Set for tracking live (module, export_name) pairs *) +module ExportKey = struct + type t = Esm.ModuleId.t * string + + let compare (m1, s1) (m2, s2) = + let c = Esm.ModuleId.compare m1 m2 in + if c <> 0 then c else String.compare s1 s2 +end + +module ExportSet = Set.Make (ExportKey) + +(* Set for tracking live (module, var) pairs *) +module VarKey = struct + type t = Esm.ModuleId.t * Code.Var.t + + let compare (m1, v1) (m2, v2) = + let c = Esm.ModuleId.compare m1 m2 in + if c <> 0 then c else Code.Var.compare v1 v2 +end + +module VarSet = Set.Make (VarKey) + +let run (graph : Esm.module_graph) ~(entry_exports : StringSet.t Esm.ModuleId.Map.t) : + Esm.module_graph = + (* Analyze all statements in all modules, storing as arrays for O(1) access *) + let module_stmts : stmt_info array Esm.ModuleId.Map.t = + Esm.ModuleId.Map.map + (fun m -> + let import_map = build_import_map m.Esm.imports in + Array.of_list + (List.mapi m.Esm.body ~f:(fun idx stmt -> analyze_stmt import_map idx stmt))) + graph.modules + in + (* Build map from (module, ident) -> stmt indices that define it *) + let def_map : int list Code.Var.Map.t Esm.ModuleId.Map.t = + Esm.ModuleId.Map.map + (fun stmts -> + Array.fold_left stmts ~init:Code.Var.Map.empty ~f:(fun acc info -> + IdentSet.fold + (fun id acc -> + match id with + | V v -> + let existing = + match Code.Var.Map.find_opt v acc with + | Some l -> l + | None -> [] + in + Code.Var.Map.add v (info.idx :: existing) acc + | S _ -> acc) + info.defines + acc)) + module_stmts + in + (* Live state - using mutable arrays for statements, immutable sets for exports/idents *) + let live_stmts : bool array Esm.ModuleId.Map.t = + Esm.ModuleId.Map.map + (fun m -> Array.make (List.length m.Esm.body) false) + graph.modules + in + let live_exports = ref ExportSet.empty in + let live_idents = ref VarSet.empty in + let reached_modules = ref Esm.ModuleId.Set.empty in + (* Worklist *) + let worklist = Queue.create () in + (* Mark a statement as live and enqueue its dependencies *) + let mark_stmt_live module_id idx = + let arr = Esm.ModuleId.Map.find module_id live_stmts in + if not arr.(idx) + then begin + arr.(idx) <- true; + (* Mark module as reached (for side-effect propagation) *) + if not (Esm.ModuleId.Set.mem module_id !reached_modules) + then begin + reached_modules := Esm.ModuleId.Set.add module_id !reached_modules; + Queue.push (MarkModuleReached module_id) worklist + end; + let stmts = Esm.ModuleId.Map.find module_id module_stmts in + let info = stmts.(idx) in + (* Enqueue local dependencies *) + IdentSet.iter (fun id -> Queue.push (MarkIdent (module_id, id)) worklist) info.uses; + (* Enqueue import dependencies and mark import bindings as live *) + List.iter info.import_uses ~f:(fun (source, name, local_var) -> + (* Mark the local import binding as live *) + live_idents := VarSet.add (module_id, local_var) !live_idents; + if String.equal name "*" + then + (* Namespace import: need all exports from source *) + let source_module = Esm.ModuleId.Map.find source graph.modules in + StringMap.iter + (fun export_name _ -> + Queue.push (MarkExport (source, export_name)) worklist) + source_module.exports + else Queue.push (MarkExport (source, name)) worklist) + end + in + (* Mark an ident as live and find statements that define it *) + let mark_ident_live module_id id = + match id with + | V v -> + if not (VarSet.mem (module_id, v) !live_idents) + then begin + live_idents := VarSet.add (module_id, v) !live_idents; + let def_map_for_module = Esm.ModuleId.Map.find module_id def_map in + match Code.Var.Map.find_opt v def_map_for_module with + | Some indices -> List.iter indices ~f:(mark_stmt_live module_id) + | None -> () + end + | S _ -> () + in + (* Resolve an export to its source, following re-export chains *) + let rec resolve_export module_id export_name = + let m = Esm.ModuleId.Map.find module_id graph.modules in + match StringMap.find_opt export_name m.Esm.exports with + | None -> None + | Some export -> ( + match export.kind with + | Esm.Export_reexport (source, orig_name) -> + let orig = if String.equal orig_name "*" then export_name else orig_name in + resolve_export source orig + | Esm.Export_var | Esm.Export_fun | Esm.Export_class -> + Some (module_id, export.local_ident)) + in + (* Mark an export as live *) + let mark_export_live module_id export_name = + if not (ExportSet.mem (module_id, export_name) !live_exports) + then begin + live_exports := ExportSet.add (module_id, export_name) !live_exports; + match resolve_export module_id export_name with + | Some (resolved_module, resolved_ident) -> + Queue.push (MarkIdent (resolved_module, resolved_ident)) worklist + | None -> () + end + in + (* Mark a module as reached: mark its side-effecting statements and propagate + through side-effect imports *) + let mark_module_reached module_id = + match Esm.ModuleId.Map.find_opt module_id graph.modules with + | None -> () + | Some m -> + (* Mark all side-effecting statements in this module *) + let stmts = Esm.ModuleId.Map.find module_id module_stmts in + Array.iter stmts ~f:(fun info -> + if info.has_side_effects then mark_stmt_live module_id info.idx); + (* Propagate to modules imported for side effects *) + List.iter m.Esm.imports ~f:(fun import -> + List.iter import.Esm.bindings ~f:(fun binding -> + match binding with + | Esm.ImportSideEffect -> + if not (Esm.ModuleId.Set.mem import.Esm.source !reached_modules) + then begin + reached_modules := + Esm.ModuleId.Set.add import.Esm.source !reached_modules; + Queue.push (MarkModuleReached import.Esm.source) worklist + end + | Esm.ImportNamed _ | Esm.ImportDefault _ | Esm.ImportNamespace _ -> ())) + in + (* Initialize: mark entry exports as live *) + Esm.ModuleId.Map.iter + (fun module_id exports -> + if StringSet.is_empty exports + then + (* Empty set = all exports of this entry module *) + let m = Esm.ModuleId.Map.find module_id graph.modules in + StringMap.iter + (fun name _ -> Queue.push (MarkExport (module_id, name)) worklist) + m.Esm.exports + else + StringSet.iter + (fun name -> Queue.push (MarkExport (module_id, name)) worklist) + exports) + entry_exports; + (* Fixpoint: process worklist until empty *) + while not (Queue.is_empty worklist) do + match Queue.pop worklist with + | MarkExport (module_id, name) -> mark_export_live module_id name + | MarkIdent (module_id, id) -> mark_ident_live module_id id + | MarkModuleReached module_id -> mark_module_reached module_id + done; + (* Build result: filter each module to keep only live statements *) + let modules = + Esm.ModuleId.Map.mapi + (fun module_id m -> + let arr = Esm.ModuleId.Map.find module_id live_stmts in + (* Keep module only if it has live statements *) + if not (Array.exists ~f:(fun x -> x) arr) + then None + else + let stmts = Esm.ModuleId.Map.find module_id module_stmts in + let body = + Array.fold_right stmts ~init:[] ~f:(fun info acc -> + if arr.(info.idx) then info.stmt :: acc else acc) + in + (* Filter exports to only those that are live *) + let exports = + StringMap.filter + (fun name _ -> ExportSet.mem (module_id, name) !live_exports) + m.Esm.exports + in + (* Filter imports to only those with live bindings *) + let imports = + List.filter_map m.Esm.imports ~f:(fun import -> + let bindings = + List.filter import.Esm.bindings ~f:(fun binding -> + match binding with + | Esm.ImportNamed (_, V v) + | Esm.ImportDefault (V v) + | Esm.ImportNamespace (V v) -> + VarSet.mem (module_id, v) !live_idents + | Esm.ImportSideEffect -> true + (* S identifiers are not tracked in live_idents *) + | Esm.ImportNamed (_, S _) + | Esm.ImportDefault (S _) + | Esm.ImportNamespace (S _) -> + false) + in + if List.is_empty bindings + then None + else Some { import with bindings }) + in + Some { m with body; exports; imports }) + graph.modules + in + let modules = + Esm.ModuleId.Map.fold + (fun id m_opt acc -> + match m_opt with + | Some m -> Esm.ModuleId.Map.add id m acc + | None -> acc) + modules + Esm.ModuleId.Map.empty + in + { graph with modules } diff --git a/compiler/lib/esm_tree_shake.mli b/compiler/lib/esm_tree_shake.mli new file mode 100644 index 0000000000..77e09e8e51 --- /dev/null +++ b/compiler/lib/esm_tree_shake.mli @@ -0,0 +1,48 @@ +(* Js_of_ocaml compiler + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, with linking exception; + * either version 2.1 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +(** Tree shaking for ES modules. + + This module implements dead code elimination for ES module graphs using a + worklist-based fixpoint algorithm. Starting from entry point exports, it + marks all reachable code as live and removes everything else. + + The algorithm works in a single pass: + 1. Mark entry exports as live + 2. For each live export, resolve to its defining identifier + 3. For each live identifier, mark its defining statement as live + 4. For each live statement, mark its used identifiers and imports as live + 5. Repeat until fixpoint + 6. Mark side-effect statements in reachable modules as live + 7. Filter modules to keep only live statements, exports, and imports + + Time complexity: O(N + V log V) where N is total AST size and V is + the number of variable definitions/uses. +*) + +open Stdlib + +val run : Esm.module_graph -> entry_exports:StringSet.t Esm.ModuleId.Map.t -> Esm.module_graph +(** [run graph ~entry_exports] performs tree shaking on the module graph. + + @param graph The module graph to shake + @param entry_exports Map from entry point module IDs to sets of export + names that should be kept. Use empty set to keep all exports + of an entry module. + @return A new module graph with unused code removed +*) diff --git a/compiler/tests-esm/dune b/compiler/tests-esm/dune new file mode 100644 index 0000000000..5830d74b19 --- /dev/null +++ b/compiler/tests-esm/dune @@ -0,0 +1,7 @@ +(library + (name esm_bundle_tests) + (modules esm_bundle) + (libraries js_of_ocaml_compiler unix) + (inline_tests) + (preprocess + (pps ppx_expect))) diff --git a/compiler/tests-esm/esm_bundle.ml b/compiler/tests-esm/esm_bundle.ml new file mode 100644 index 0000000000..12be86e979 --- /dev/null +++ b/compiler/tests-esm/esm_bundle.ml @@ -0,0 +1,958 @@ +(* Js_of_ocaml tests + * http://www.ocsigen.org/js_of_ocaml/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *) + +open Js_of_ocaml_compiler.Stdlib +open Js_of_ocaml_compiler + +let test_dir = Filename.concat (Sys.getcwd ()) "_esm_test_files" + +let ensure_dir dir = if not (Sys.file_exists dir) then Unix.mkdir dir 0o755 + +let write_file path content = + let oc = open_out_bin path in + output_string oc content; + close_out oc + +let parse_file path = + let lexer = Parse_js.Lexer.of_file path in + Parse_js.parse `Module lexer + +let normalize_path path = + (* Simple path normalization: remove . and .. components *) + let parts = String.split_on_char ~sep:'/' path in + let rec normalize acc = function + | [] -> List.rev acc + | "." :: rest -> normalize acc rest + | ".." :: rest -> ( + match acc with + | _ :: acc' -> normalize acc' rest + | [] -> normalize [] rest) + | part :: rest -> normalize (part :: acc) rest + in + String.concat ~sep:"/" (normalize [] parts) + +let resolve ~from specifier = + if Filename.is_relative specifier + then + let dir = Filename.dirname from in + let resolved = normalize_path (Filename.concat dir specifier) in + if Sys.file_exists resolved then Some resolved else None + else None + +let contains_substring haystack needle = + let needle_len = String.length needle in + let haystack_len = String.length haystack in + if needle_len > haystack_len + then false + else + let found = ref false in + for i = 0 to haystack_len - needle_len do + if (not !found) && String.equal (String.sub haystack ~pos:i ~len:needle_len) needle + then found := true + done; + !found + +let bundle_to_string program = + let buffer = Buffer.create 4096 in + let pp = Pretty_print.to_buffer buffer in + Pretty_print.set_compact pp false; + Config.Flag.disable "debuginfo"; + Config.Flag.disable "shortvar"; + (* Assign names to unnamed variables before output *) + let program = Js_assign.program program in + let _ = Js_output.program pp program in + Buffer.contents buffer + +let with_test_dir f = + ensure_dir test_dir; + let files = ref [] in + let write name content = + let path = Filename.concat test_dir name in + write_file path content; + files := path :: !files; + path + in + Fun.protect + ~finally:(fun () -> + List.iter ~f:Sys.remove !files; + Unix.rmdir test_dir) + (fun () -> f ~write) + +let%expect_test "bundle simple ES modules" = + with_test_dir + @@ fun ~write -> + let entry = + write + "single.js" + {| +export function hello() { + return "Hello"; +} +export const VALUE = 42; +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let module_count = Esm.ModuleId.Map.cardinal graph.modules in + Printf.printf "Bundled %d modules\n" module_count; + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + Bundled 1 modules + function hello(){return "Hello";} + const VALUE = 42; + export { hello }; + export { VALUE }; + |}] + +let%expect_test "bundle with imports" = + with_test_dir + @@ fun ~write -> + let _ = + write + "lib.js" + {| +export const VERSION = "1.0.0"; +export function greet(name) { + return "Hello, " + name; +} +|} + in + let entry = + write + "app.js" + {| +import { VERSION, greet } from './lib.js'; + +export function app() { + return greet("World") + " v" + VERSION; +} +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let module_count = Esm.ModuleId.Map.cardinal graph.modules in + Printf.printf "Bundled %d modules\n" module_count; + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + Bundled 2 modules + const VERSION = "1.0.0"; + function greet(name){return "Hello, " + name;} + function app(){return greet("World") + " v" + VERSION;} + export { app }; + |}] + +let%expect_test "import * as namespace" = + with_test_dir + @@ fun ~write -> + let _ = + write + "math.js" + {| +export const PI = 3.14159; +export function add(a, b) { return a + b; } +export function multiply(a, b) { return a * b; } +|} + in + let entry = + write + "app.js" + {| +import * as Math from './math.js'; + +export function calculate() { + return Math.add(1, 2) * Math.PI; +} +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let module_count = Esm.ModuleId.Map.cardinal graph.modules in + Printf.printf "Bundled %d modules\n" module_count; + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + Bundled 2 modules + const PI = 3.14159; + function add(a, b){return a + b;} + function multiply(a, b){return a * b;} + function calculate(){return add(1, 2) * PI;} + export { calculate }; + |}] + +let%expect_test "export * from re-exports" = + with_test_dir + @@ fun ~write -> + let _ = + write + "utils.js" + {| +export const UTIL_VALUE = 100; +export function utilFunc() { return "util"; } +|} + in + let _ = + write "index.js" {| +export * from './utils.js'; +export const INDEX_VALUE = 200; +|} + in + let entry = + write + "app.js" + {| +import { UTIL_VALUE, utilFunc, INDEX_VALUE } from './index.js'; + +export function main() { + return utilFunc() + UTIL_VALUE + INDEX_VALUE; +} +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let module_count = Esm.ModuleId.Map.cardinal graph.modules in + Printf.printf "Bundled %d modules\n" module_count; + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + Bundled 3 modules + const UTIL_VALUE = 100; + function utilFunc(){return "util";} + const INDEX_VALUE = 200; + function main(){return utilFunc() + UTIL_VALUE + INDEX_VALUE;} + export { main }; + |}] + +let%expect_test "import alias collision" = + with_test_dir + @@ fun ~write -> + let _ = write "lib.js" {|export const foo = 1;|} in + let _ = write "utils.js" {|export const bar = 2;|} in + let entry = + write + "app.js" + {| +import { foo as bar } from './lib.js'; +import { bar as utilsBar } from './utils.js'; + +export function use() { + return bar + utilsBar; +} +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + const bar = 2; + const foo = 1; + function use(){return foo + bar;} + export { use }; + |}] + +let%expect_test "tree shaking removes unused exports" = + with_test_dir + @@ fun ~write -> + let _ = + write + "lib.js" + {| +export const USED_CONST = "I am used"; +export const UNUSED_CONST = "I am not used"; + +export function usedFunction() { + return USED_CONST; +} + +export function unusedFunction() { + return "This function is never called"; +} + +export function anotherUnused() { + return unusedFunction(); +} +|} + in + let entry = + write + "app.js" + {| +import { usedFunction, USED_CONST } from './lib.js'; + +export function app() { + return usedFunction() + " - " + USED_CONST; +} +|} + in + (* Bundle WITH tree shaking *) + let output_shaken = + Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + in + let output_shaken_str = bundle_to_string output_shaken in + (* Bundle WITHOUT tree shaking for comparison *) + let output_full = + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:false + in + let output_full_str = bundle_to_string output_full in + Printf.printf + "=== Without tree shaking (%d bytes) ===\n" + (String.length output_full_str); + print_endline output_full_str; + Printf.printf "=== With tree shaking (%d bytes) ===\n" (String.length output_shaken_str); + print_endline output_shaken_str; + (* Verify unused code is removed *) + let has_unused_const = contains_substring output_shaken_str "UNUSED_CONST" in + let has_unused_func = contains_substring output_shaken_str "unusedFunction" in + let has_another_unused = contains_substring output_shaken_str "anotherUnused" in + Printf.printf "Tree shaking results:\n"; + Printf.printf " UNUSED_CONST removed: %b\n" (not has_unused_const); + Printf.printf " unusedFunction removed: %b\n" (not has_unused_func); + Printf.printf " anotherUnused removed: %b\n" (not has_another_unused); + [%expect + {| + === Without tree shaking (308 bytes) === + const USED_CONST = "I am used"; + const UNUSED_CONST = "I am not used"; + function usedFunction(){return USED_CONST;} + function unusedFunction(){return "This function is never called";} + function anotherUnused(){return unusedFunction();} + function app(){return usedFunction() + " - " + USED_CONST;} + export { app }; + + === With tree shaking (152 bytes) === + const USED_CONST = "I am used"; + function usedFunction(){return USED_CONST;} + function app(){return usedFunction() + " - " + USED_CONST;} + export { app }; + + Tree shaking results: + UNUSED_CONST removed: true + unusedFunction removed: true + anotherUnused removed: true + |}] + +let%expect_test "nested variable shadowing" = + with_test_dir + @@ fun ~write -> + let entry = + write + "shadow.js" + {| +export const foo = "top-level"; + +export function test() { + let foo = "shadowed"; // This should NOT be renamed + return foo; // This should refer to shadowed, not top-level +} + +export function usesTopLevel() { + return foo; // This should refer to top-level foo +} +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + const foo = "top-level"; + function test(){let foo = "shadowed"; return foo;} + function usesTopLevel(){return foo;} + export { usesTopLevel }; + export { test }; + export { foo }; + |}] + +let%expect_test "bundle writes to file" = + with_test_dir + @@ fun ~write -> + let entry = write "single.js" {|export function hello() { return "Hello"; }|} in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + let bundle_path = write "bundle.js" output in + Printf.printf "Bundle written to bundle.js\n"; + Printf.printf "Bundle size: %d bytes\n" (String.length output); + let ic = open_in_bin bundle_path in + let file_size = in_channel_length ic in + close_in ic; + Printf.printf "File size on disk: %d bytes\n" file_size; + [%expect + {| + Bundle written to bundle.js + Bundle size: 52 bytes + File size on disk: 52 bytes + |}] + +let find_runtime_dir () = + (* Navigate from test working directory to find runtime/js *) + let rec find dir = + let runtime = Filename.concat dir "runtime/js" in + if Sys.file_exists runtime && Sys.is_directory runtime + then Some runtime + else + let parent = Filename.dirname dir in + if String.equal parent dir then None else find parent + in + find (Sys.getcwd ()) + +let%expect_test "namespace optimization with bracket notation" = + with_test_dir + @@ fun ~write -> + let _ = write "lib.js" {|export const foo = 1; export const bar = 2;|} in + let entry = + write + "app.js" + {| +import * as Lib from './lib.js'; +export function test() { return Lib["foo"] + Lib["bar"]; } +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + let has_object = + contains_substring output "{foo:" || contains_substring output "{ foo:" + in + Printf.printf "Namespace object created: %b\n" has_object; + [%expect + {| + const foo = 1; + const bar = 2; + function test(){return foo + bar;} + export { test }; + + Namespace object created: false + |}] + +let%expect_test "namespace not optimized when passed as value" = + with_test_dir + @@ fun ~write -> + let _ = write "lib.js" {|export const foo = 1; export const bar = 2;|} in + let entry = + write + "app.js" + {| +import * as Lib from './lib.js'; +function useNamespace(ns) { return ns.foo; } +export function test() { return useNamespace(Lib); } +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + let has_object = + contains_substring output "{bar:" || contains_substring output "{foo:" + in + Printf.printf "Namespace object created: %b\n" has_object; + [%expect + {| + const foo = 1; + const bar = 2; + const Lib = {foo: foo, bar: bar}; + function useNamespace(ns){return ns.foo;} + function test(){return useNamespace(Lib);} + export { test }; + + Namespace object created: true + |}] + +let%expect_test "namespace not optimized with dynamic access" = + with_test_dir + @@ fun ~write -> + let _ = write "lib.js" {|export const foo = 1; export const bar = 2;|} in + let entry = + write + "app.js" + {| +import * as Lib from './lib.js'; +export function test(key) { return Lib[key]; } +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + let has_object = + contains_substring output "{bar:" || contains_substring output "{foo:" + in + Printf.printf "Namespace object created: %b\n" has_object; + [%expect + {| + const foo = 1; + const bar = 2; + const Lib = {foo: foo, bar: bar}; + function test(key){return Lib[key];} + export { test }; + + Namespace object created: true + |}] + +let%expect_test "mixed namespace and named imports" = + with_test_dir + @@ fun ~write -> + let _ = + write "lib.js" {|export const a = 1; export const b = 2; export const c = 3;|} + in + let entry = + write + "app.js" + {| +import * as Lib from './lib.js'; +import { c } from './lib.js'; +export function test() { return Lib.a + Lib.b + c; } +|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + let has_namespace_object = contains_substring output "= {" in + Printf.printf "Namespace object created: %b\n" has_namespace_object; + [%expect + {| + const a = 1; + const b = 2; + const c = 3; + function test(){return a + b + c;} + export { test }; + + Namespace object created: false + |}] + +let%expect_test "simple cyclic dependency" = + with_test_dir + @@ fun ~write -> + let _ = + write + "a.js" + {|import { b } from './b.js'; export const a = 1; export function useB() { return b + a; }|} + in + let _ = + write + "b.js" + {|import { a } from './a.js'; export const b = 2; export function useA() { return a + b; }|} + in + let entry = + write + "main.js" + {|import { useB } from './a.js'; import { useA } from './b.js'; export function main() { return useA() + useB(); }|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let module_count = Esm.ModuleId.Map.cardinal graph.modules in + Printf.printf "Bundled %d modules (with cycle)\n" module_count; + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + Bundled 3 modules (with cycle) + const b = 2; + function useA(){return a + b;} + const a = 1; + function useB(){return b + a;} + function main(){return useA() + useB();} + export { main }; + |}] + +let%expect_test "three-way cyclic dependency" = + with_test_dir + @@ fun ~write -> + let _ = + write + "a.js" + {|import { c } from './c.js'; export const a = "a"; export function fromA() { return a + c; }|} + in + let _ = + write + "b.js" + {|import { a } from './a.js'; export const b = "b"; export function fromB() { return b + a; }|} + in + let _ = + write + "c.js" + {|import { b } from './b.js'; export const c = "c"; export function fromC() { return c + b; }|} + in + let entry = + write + "main.js" + {|import { fromA } from './a.js'; import { fromB } from './b.js'; import { fromC } from './c.js'; export function main() { return fromA() + fromB() + fromC(); }|} + in + let graph = Esm.build_graph ~parse:parse_file ~resolve ~entry_points:[ entry ] in + let module_count = Esm.ModuleId.Map.cardinal graph.modules in + Printf.printf "Bundled %d modules (with 3-way cycle)\n" module_count; + let entry_id = Esm.ModuleId.of_path entry in + let bundled = Esm_bundle.bundle graph ~entry_points:[ entry_id ] in + let output = bundle_to_string bundled in + print_endline output; + [%expect + {| + Bundled 4 modules (with 3-way cycle) + const c = "c"; + function fromC(){return c + b;} + const b = "b"; + function fromB(){return b + a;} + const a = "a"; + function fromA(){return a + c;} + function main(){return fromA() + fromB() + fromC();} + export { main }; + |}] + +let%expect_test "tree shaking with destructuring exports" = + with_test_dir + @@ fun ~write -> + let _ = + write + "lib.js" + {| +const obj = { used: 1, unused: 2 }; +export const { used, unused } = obj; +|} + in + let entry = + write + "app.js" + {| +import { used } from './lib.js'; +export function app() { return used; } +|} + in + let output_shaken = + Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + in + let output_str = bundle_to_string output_shaken in + print_endline output_str; + let has_unused = contains_substring output_str "unused" in + Printf.printf "unused export removed: %b\n" (not has_unused); + [%expect + {| + const obj = {used: 1, unused: 2}; + const {used: used, unused: unused} = obj; + function app(){return used;} + export { app }; + + unused export removed: false + |}] + +let%expect_test "tree shaking preserves side-effect imports" = + with_test_dir + @@ fun ~write -> + let _ = write "logger.js" {| +export function log(msg) { console.log(msg); } +|} in + let _ = + write + "lib.js" + {| +import { log } from './logger.js'; +log("side effect"); +export const foo = 1; +|} + in + let entry = + write + "app.js" + {| +import { foo } from './lib.js'; +export function app() { return foo; } +|} + in + let output_shaken = + Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + in + let output_str = bundle_to_string output_shaken in + print_endline output_str; + let has_log = contains_substring output_str "log" in + Printf.printf "side-effect import preserved: %b\n" has_log; + [%expect + {| + function log(msg){console.log(msg);} + log("side effect"); + const foo = 1; + function app(){return foo;} + export { app }; + + side-effect import preserved: true + |}] + +let%expect_test "tree shaking removes unused imports with side-effects" = + with_test_dir + @@ fun ~write -> + let _ = write "used.js" {|export function used() { return 1; }|} in + let _ = write "unused.js" {|export function unused() { return 2; }|} in + let _ = + write + "lib.js" + {| +import { used } from './used.js'; +import { unused } from './unused.js'; +used(); +export const foo = 1; +|} + in + let entry = + write + "app.js" + {| +import { foo } from './lib.js'; +export function app() { return foo; } +|} + in + let output_shaken = + Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + in + let output_str = bundle_to_string output_shaken in + print_endline output_str; + let has_used = contains_substring output_str "used" in + let has_unused = contains_substring output_str "unused" in + Printf.printf "used import preserved: %b\n" has_used; + Printf.printf "unused import removed: %b\n" (not has_unused); + [%expect + {| + function used(){return 1;} + used(); + const foo = 1; + function app(){return foo;} + export { app }; + + used import preserved: true + unused import removed: true + |}] + +let%expect_test "tree shaking preserves side-effect-only imports" = + (* Test import './module.js' style imports that have no bindings *) + with_test_dir + @@ fun ~write -> + let _ = + write + "setup.js" + {| +console.log("setup module loaded"); +export {}; +|} + in + let _ = + write + "lib.js" + {| +import './setup.js'; +export const foo = 1; +|} + in + let entry = + write + "app.js" + {| +import { foo } from './lib.js'; +export function app() { return foo; } +|} + in + let output_shaken = + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true + in + let output_str = bundle_to_string output_shaken in + print_endline output_str; + let has_setup = contains_substring output_str "setup module loaded" in + Printf.printf "side-effect-only import preserved: %b\n" has_setup; + [%expect + {| + console.log("setup module loaded"); + const foo = 1; + function app(){return foo;} + export { app }; + + side-effect-only import preserved: true + |}] + +let%expect_test "tree shaking preserves transitive side-effect imports" = + (* Test that import './a.js' -> import './b.js' chains are preserved *) + with_test_dir + @@ fun ~write -> + let _ = + write + "deep.js" + {| +console.log("deep module"); +export {}; +|} + in + let _ = + write + "middle.js" + {| +import './deep.js'; +console.log("middle module"); +export {}; +|} + in + let _ = + write + "lib.js" + {| +import './middle.js'; +export const foo = 1; +|} + in + let entry = + write + "app.js" + {| +import { foo } from './lib.js'; +export function app() { return foo; } +|} + in + let output_shaken = + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true + in + let output_str = bundle_to_string output_shaken in + print_endline output_str; + let has_deep = contains_substring output_str "deep module" in + let has_middle = contains_substring output_str "middle module" in + Printf.printf "deep side-effect preserved: %b\n" has_deep; + Printf.printf "middle side-effect preserved: %b\n" has_middle; + [%expect + {| + console.log("deep module"); + console.log("middle module"); + const foo = 1; + function app(){return foo;} + export { app }; + + deep side-effect preserved: true + middle side-effect preserved: true + |}] + +let%expect_test "intra-module dead code elimination" = + with_test_dir + @@ fun ~write -> + let _ = write "helper.js" {|export function helper() { return 1; }|} in + let _ = + write + "lib.js" + {| +import { helper } from './helper.js'; +function internal() { return helper(); } +export function used() { return 1; } +export function unused() { return internal(); } +|} + in + let entry = + write + "app.js" + {| +import { used } from './lib.js'; +export function app() { return used(); } +|} + in + let output_shaken = + Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + in + let output_str = bundle_to_string output_shaken in + print_endline output_str; + let has_internal = contains_substring output_str "internal" in + let has_helper = contains_substring output_str "helper" in + let has_unused = contains_substring output_str "unused" in + Printf.printf "internal function removed: %b\n" (not has_internal); + Printf.printf "helper import removed: %b\n" (not has_helper); + Printf.printf "unused export removed: %b\n" (not has_unused); + [%expect + {| + function used(){return 1;} function app(){return used();} export { app }; + + internal function removed: true + helper import removed: true + unused export removed: true + |}] + +let%expect_test "diamond dependency - both paths need different exports" = + with_test_dir + @@ fun ~write -> + (* A exports x and z *) + let _ = write "a.js" {| +export function x() { return "x"; } +export function z() { return "z"; } +export function unused() { return "unused"; } +|} in + (* B depends on A.z *) + let _ = write "b.js" {| +import { z } from './a.js'; +export function y() { return z(); } +|} in + (* root depends on A.x and B.y *) + let entry = write "root.js" {| +import { x } from './a.js'; +import { y } from './b.js'; +export function main() { return x() + y(); } +|} in + let output = + Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + in + let output_str = bundle_to_string output in + print_endline output_str; + let has_x = contains_substring output_str "\"x\"" in + let has_z = contains_substring output_str "\"z\"" in + let has_unused = contains_substring output_str "unused" in + Printf.printf "x preserved (direct): %b\n" has_x; + Printf.printf "z preserved (via B): %b\n" has_z; + Printf.printf "unused removed: %b\n" (not has_unused); + [%expect + {| + function x(){return "x";} + function z(){return "z";} + function y(){return z();} + function main(){return x() + y();} + export { main }; + + x preserved (direct): true + z preserved (via B): true + unused removed: true + |}] From 25b9c5a284353b2796ed90ae26752a9b3d97b95e Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Tue, 3 Feb 2026 01:08:53 +0100 Subject: [PATCH 3/5] Compiler: Simplify api --- compiler/lib/linker.ml | 38 +++++++++++++++++++++++++++++++-- compiler/lib/parse_js.ml | 29 ------------------------- compiler/lib/parse_js.mli | 2 +- compiler/tests-js-parser/run.ml | 4 ++-- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/compiler/lib/linker.ml b/compiler/lib/linker.ml index 1fbc0f90ed..39d2f16742 100644 --- a/compiler/lib/linker.ml +++ b/compiler/lib/linker.ml @@ -254,9 +254,43 @@ module Fragment = struct let version_match = List.for_all ~f:(fun (op, str) -> op Ocaml_version.(compare current (split str)) 0) + let attach_annot p toks = + let take_annot_before = + let toks_r = ref toks in + let rec loop start_pos acc (toks : (Js_token.t * _) list) = + match toks with + | [] -> assert false + | (TAnnot a, loc) :: xs -> + loop start_pos ((a, Parse_info.t_of_pos (Loc.p1 loc)) :: acc) xs + | ((TComment _ | TCommentLineDirective _), _) :: xs -> loop start_pos acc xs + | (_, loc) :: xs -> + if Loc.cnum loc = start_pos.Lexing.pos_cnum + then ( + toks_r := toks; + List.rev acc) + else loop start_pos [] xs + in + fun start_pos -> loop start_pos [] !toks_r + in + let p = List.map p ~f:(fun (start_pos, s) -> take_annot_before start_pos, s) in + let groups = + List.group p ~f:(fun a _pred -> + match a with + | [], _ -> true + | _ :: _, _ -> false) + in + let p = + List.map groups ~f:(function + | [] -> assert false + | (annot, _) :: _ as l -> annot, List.map l ~f:snd) + in + p + let parse_from_lex ~filename lex = - let program, _ = - try Parse_js.parse' `Script lex + let program = + try + let p, toks = Parse_js.parse' `Script lex in + attach_annot p toks with Parse_js.Parsing_error pi -> let name = match pi with diff --git a/compiler/lib/parse_js.ml b/compiler/lib/parse_js.ml index 9b0eb34a87..ffb2f02910 100644 --- a/compiler/lib/parse_js.ml +++ b/compiler/lib/parse_js.ml @@ -685,35 +685,6 @@ let parse' script_or_module lex = in check_program p; let toks = State.all_tokens toks in - let take_annot_before = - let toks_r = ref toks in - let rec loop start_pos acc (toks : (Js_token.t * _) list) = - match toks with - | [] -> assert false - | (TAnnot a, loc) :: xs -> - loop start_pos ((a, Parse_info.t_of_pos (Loc.p1 loc)) :: acc) xs - | ((TComment _ | TCommentLineDirective _), _) :: xs -> loop start_pos acc xs - | (_, loc) :: xs -> - if Loc.cnum loc = start_pos.Lexing.pos_cnum - then ( - toks_r := toks; - List.rev acc) - else loop start_pos [] xs - in - fun start_pos -> loop start_pos [] !toks_r - in - let p = List.map p ~f:(fun (start_pos, s) -> take_annot_before start_pos, s) in - let groups = - List.group p ~f:(fun a _pred -> - match a with - | [], _ -> true - | _ :: _, _ -> false) - in - let p = - List.map groups ~f:(function - | [] -> assert false - | (annot, _) :: _ as l -> annot, List.map l ~f:snd) - in p, toks let parse script_or_module lex = diff --git a/compiler/lib/parse_js.mli b/compiler/lib/parse_js.mli index 13922c9b8a..ec56240118 100644 --- a/compiler/lib/parse_js.mli +++ b/compiler/lib/parse_js.mli @@ -43,7 +43,7 @@ val parse : [ `Script | `Module ] -> Lexer.t -> Javascript.program val parse' : [ `Script | `Module ] -> Lexer.t - -> ((Js_token.Annot.t * Parse_info.t) list * Javascript.program) list + -> (Lexing.position * (Javascript.statement * Javascript.location)) list * (Js_token.t * Loc.t) list val parse_expr : Lexer.t -> Javascript.expression diff --git a/compiler/tests-js-parser/run.ml b/compiler/tests-js-parser/run.ml index 256c120973..90ad532783 100644 --- a/compiler/tests-js-parser/run.ml +++ b/compiler/tests-js-parser/run.ml @@ -349,7 +349,7 @@ let () = then add unsupported else fail := (Parse (loc, content), filename) :: !fail | (p1, toks1), lexing_mode -> ( - let p1 = List.concat_map p1 ~f:snd in + let p1 = List.map p1 ~f:snd in match List.rev !errors with | [] -> ( let s = p_to_string p1 in @@ -363,7 +363,7 @@ let () = Parse_js.parse' lexing_mode lex with | p2, toks2 -> ( - let p2 = List.concat_map p2 ~f:snd in + let p2 = List.map p2 ~f:snd in match ( Poly.equal (clean_loc (normalize p1)) From 7da1e161fa1bdb16f9c404b7ee30e5af4ebd8d31 Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Tue, 3 Feb 2026 01:51:31 +0100 Subject: [PATCH 4/5] Compiler: convert script to module --- compiler/lib/esm_bundle.ml | 15 ++-- compiler/lib/esm_tree_shake.ml | 141 +++++++++++++++++++++---------- compiler/lib/esm_tree_shake.mli | 3 +- compiler/lib/linker.ml | 38 +++++++++ compiler/lib/linker.mli | 10 +++ compiler/tests-esm/esm_bundle.ml | 94 ++++++++++++--------- 6 files changed, 207 insertions(+), 94 deletions(-) diff --git a/compiler/lib/esm_bundle.ml b/compiler/lib/esm_bundle.ml index d19ba13e7a..b7942c9f0a 100644 --- a/compiler/lib/esm_bundle.ml +++ b/compiler/lib/esm_bundle.ml @@ -39,7 +39,8 @@ let build_import_substitutions match StringMap.find_opt "default" source_module.exports with | Some e -> e | None -> - failwith ("No default export in " ^ Esm.ModuleId.to_path import.Esm.source) + failwith + ("No default export in " ^ Esm.ModuleId.to_path import.Esm.source) in let source_ident = Esm.resolve_reexport all_modules export in match local_id with @@ -123,9 +124,7 @@ let optimize_namespace_imports (m : Esm.esm_module) : Esm.esm_module = match binding with | Esm.ImportNamespace (V v) -> Code.Var.Map.add v import.Esm.source acc | Esm.ImportNamespace (S _) - | Esm.ImportNamed _ - | Esm.ImportDefault _ - | Esm.ImportSideEffect -> acc)) + | Esm.ImportNamed _ | Esm.ImportDefault _ | Esm.ImportSideEffect -> acc)) in if Code.Var.Map.is_empty namespace_vars then m @@ -178,9 +177,8 @@ let optimize_namespace_imports (m : Esm.esm_module) : Esm.esm_module = (* Keep as namespace import *) [ binding ]) | Esm.ImportNamespace (S _) - | Esm.ImportNamed _ - | Esm.ImportDefault _ - | Esm.ImportSideEffect -> [ binding ]) + | Esm.ImportNamed _ | Esm.ImportDefault _ | Esm.ImportSideEffect -> + [ binding ]) in { import with bindings }) in @@ -238,7 +236,8 @@ let generate_namespace_bindings [] in Some - ( Variable_statement (Const, [ DeclIdent (local_id, Some (EObj props, N)) ]) + ( Variable_statement + (Const, [ DeclIdent (local_id, Some (EObj props, N)) ]) , N ) | Esm.ImportNamed _ | Esm.ImportDefault _ | Esm.ImportSideEffect -> None)) diff --git a/compiler/lib/esm_tree_shake.ml b/compiler/lib/esm_tree_shake.ml index cd30d437d3..4f2d7efe1f 100644 --- a/compiler/lib/esm_tree_shake.ml +++ b/compiler/lib/esm_tree_shake.ml @@ -55,8 +55,7 @@ let rec expr_has_side_effects expr = | Element e | ElementSpread e -> expr_has_side_effects e) | EObj l -> List.exists l ~f:(function - | Property (pn, e) -> - property_name_has_side_effects pn || expr_has_side_effects e + | Property (pn, e) -> property_name_has_side_effects pn || expr_has_side_effects e | PropertySpread e -> expr_has_side_effects e | PropertyMethod (pn, _) -> property_name_has_side_effects pn | CoverInitializedName _ -> false) @@ -66,14 +65,52 @@ let rec expr_has_side_effects expr = | EBin (op, e1, e2) -> ( match op with (* Assignment operators have side effects *) - | Eq | StarEq | SlashEq | ModEq | PlusEq | MinusEq | LslEq | AsrEq | LsrEq | BandEq - | BxorEq | BorEq | OrEq | AndEq | ExpEq | CoalesceEq -> - true + | Eq + | StarEq + | SlashEq + | ModEq + | PlusEq + | MinusEq + | LslEq + | AsrEq + | LsrEq + | BandEq + | BxorEq + | BorEq + | OrEq + | AndEq + | ExpEq + | CoalesceEq -> true (* Non-assignment operators: check operands *) - | Or | And | Bor | Bxor | Band | EqEq | NotEq | EqEqEq | NotEqEq | Lt | Le | Gt | Ge - | LtInt | LeInt | GtInt | GeInt | InstanceOf | In | Lsl | Lsr | Asr | Plus | Minus - | Mul | Div | Mod | Exp | Coalesce -> - expr_has_side_effects e1 || expr_has_side_effects e2) + | Or + | And + | Bor + | Bxor + | Band + | EqEq + | NotEq + | EqEqEq + | NotEqEq + | Lt + | Le + | Gt + | Ge + | LtInt + | LeInt + | GtInt + | GeInt + | InstanceOf + | In + | Lsl + | Lsr + | Asr + | Plus + | Minus + | Mul + | Div + | Mod + | Exp + | Coalesce -> expr_has_side_effects e1 || expr_has_side_effects e2) | EUn (op, e) -> ( match op with (* These operators have side effects *) @@ -117,13 +154,13 @@ let rec stmt_has_side_effects stmt = | While_statement (e, (s, _)) -> expr_has_side_effects e || stmt_has_side_effects s | For_statement (init, cond, incr, (body, _)) -> (match init with - | Left None -> false - | Left (Some e) -> expr_has_side_effects e - | Right (_, decls) -> - List.exists decls ~f:(function - | DeclIdent (_, None) -> false - | DeclIdent (_, Some (e, _)) -> expr_has_side_effects e - | DeclPattern (_, (e, _)) -> expr_has_side_effects e)) + | Left None -> false + | Left (Some e) -> expr_has_side_effects e + | Right (_, decls) -> + List.exists decls ~f:(function + | DeclIdent (_, None) -> false + | DeclIdent (_, Some (e, _)) -> expr_has_side_effects e + | DeclPattern (_, (e, _)) -> expr_has_side_effects e)) || option_has cond ~f:expr_has_side_effects || option_has incr ~f:expr_has_side_effects || stmt_has_side_effects body @@ -137,20 +174,20 @@ let rec stmt_has_side_effects stmt = | Switch_statement (e, cases1, default, cases2) -> expr_has_side_effects e || List.exists cases1 ~f:(fun (ce, sl) -> - expr_has_side_effects ce - || List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) + expr_has_side_effects ce + || List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) || option_has default ~f:(fun sl -> - List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) + List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) || List.exists cases2 ~f:(fun (ce, sl) -> - expr_has_side_effects ce - || List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) + expr_has_side_effects ce + || List.exists sl ~f:(fun (s, _) -> stmt_has_side_effects s)) | Throw_statement _ -> true | Try_statement (b, catch, finally) -> List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s) || option_has catch ~f:(fun (_, b) -> - List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s)) + List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s)) || option_has finally ~f:(fun b -> - List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s)) + List.exists b ~f:(fun (s, _) -> stmt_has_side_effects s)) | With_statement (e, (s, _)) -> expr_has_side_effects e || stmt_has_side_effects s | Import _ -> true | Export _ -> true @@ -158,15 +195,14 @@ let rec stmt_has_side_effects stmt = and class_decl_has_side_effects decl = option_has decl.extends ~f:expr_has_side_effects || List.exists decl.body ~f:(function - | CEMethod (_, _, name, _) -> class_element_name_has_side_effects name - | CEField (_, _, name, init) -> - class_element_name_has_side_effects name - || option_has init ~f:(fun (e, _) -> expr_has_side_effects e) - | CEStaticBLock stmts -> - List.exists stmts ~f:(fun (s, _) -> stmt_has_side_effects s) - | CEAccessor (_, _, name, init) -> - class_element_name_has_side_effects name - || option_has init ~f:(fun (e, _) -> expr_has_side_effects e)) + | CEMethod (_, _, name, _) -> class_element_name_has_side_effects name + | CEField (_, _, name, init) -> + class_element_name_has_side_effects name + || option_has init ~f:(fun (e, _) -> expr_has_side_effects e) + | CEStaticBLock stmts -> List.exists stmts ~f:(fun (s, _) -> stmt_has_side_effects s) + | CEAccessor (_, _, name, init) -> + class_element_name_has_side_effects name + || option_has init ~f:(fun (e, _) -> expr_has_side_effects e)) and class_element_name_has_side_effects name = match name with @@ -179,7 +215,7 @@ type stmt_info = ; defines : IdentSet.t ; uses : IdentSet.t (* local uses within the module *) ; import_uses : (Esm.ModuleId.t * string * Code.Var.t) list - (* (source, export_name, local_binding_var) *) + (* (source, export_name, local_binding_var) *) ; has_side_effects : bool ; stmt : statement * location } @@ -201,8 +237,7 @@ let build_import_map (imports : Esm.import_entry list) : | Esm.ImportNamed (_, S _) | Esm.ImportDefault (S _) | Esm.ImportNamespace (S _) - | Esm.ImportSideEffect -> - acc)) + | Esm.ImportSideEffect -> acc)) (* Analyze a statement: extract defines, split uses into local vs import *) let analyze_stmt import_map idx (stmt, loc) : stmt_info = @@ -218,7 +253,8 @@ let analyze_stmt import_map idx (stmt, loc) : stmt_info = List.fold_left decls ~init:IdentSet.empty ~f:(fun acc decl -> match decl with | DeclIdent (_, None) -> acc - | DeclIdent (_, Some (e, _)) -> IdentSet.union acc (collect_free_vars_expr e) + | DeclIdent (_, Some (e, _)) -> + IdentSet.union acc (collect_free_vars_expr e) | DeclPattern (_, (e, _)) -> IdentSet.union acc (collect_free_vars_expr e)) in let side_effects = @@ -238,11 +274,27 @@ let analyze_stmt import_map idx (stmt, loc) : stmt_info = let side_effects = class_decl_has_side_effects decl in defines, uses, side_effects (* Statements that don't define module-level bindings *) - | Block _ | Empty_statement | Expression_statement _ | If_statement _ - | Do_while_statement _ | While_statement _ | For_statement _ | ForIn_statement _ - | ForOf_statement _ | ForAwaitOf_statement _ | Continue_statement _ | Break_statement _ - | Return_statement _ | With_statement _ | Labelled_statement _ | Switch_statement _ - | Throw_statement _ | Try_statement _ | Debugger_statement | Import _ | Export _ -> + | Block _ + | Empty_statement + | Expression_statement _ + | If_statement _ + | Do_while_statement _ + | While_statement _ + | For_statement _ + | ForIn_statement _ + | ForOf_statement _ + | ForAwaitOf_statement _ + | Continue_statement _ + | Break_statement _ + | Return_statement _ + | With_statement _ + | Labelled_statement _ + | Switch_statement _ + | Throw_statement _ + | Try_statement _ + | Debugger_statement + | Import _ + | Export _ -> let uses = collect_free_vars stmt in IdentSet.empty, uses, stmt_has_side_effects stmt in @@ -480,12 +532,9 @@ let run (graph : Esm.module_graph) ~(entry_exports : StringSet.t Esm.ModuleId.Ma (* S identifiers are not tracked in live_idents *) | Esm.ImportNamed (_, S _) | Esm.ImportDefault (S _) - | Esm.ImportNamespace (S _) -> - false) + | Esm.ImportNamespace (S _) -> false) in - if List.is_empty bindings - then None - else Some { import with bindings }) + if List.is_empty bindings then None else Some { import with bindings }) in Some { m with body; exports; imports }) graph.modules diff --git a/compiler/lib/esm_tree_shake.mli b/compiler/lib/esm_tree_shake.mli index 77e09e8e51..5cb298ff61 100644 --- a/compiler/lib/esm_tree_shake.mli +++ b/compiler/lib/esm_tree_shake.mli @@ -37,7 +37,8 @@ open Stdlib -val run : Esm.module_graph -> entry_exports:StringSet.t Esm.ModuleId.Map.t -> Esm.module_graph +val run : + Esm.module_graph -> entry_exports:StringSet.t Esm.ModuleId.Map.t -> Esm.module_graph (** [run graph ~entry_exports] performs tree shaking on the module graph. @param graph The module graph to shake diff --git a/compiler/lib/linker.ml b/compiler/lib/linker.ml index 39d2f16742..6dcfcf5742 100644 --- a/compiler/lib/linker.ml +++ b/compiler/lib/linker.ml @@ -286,6 +286,44 @@ module Fragment = struct in p + let script_to_module ~runtime_import p = + let open Javascript in + List.concat_map p ~f:(fun (annots, code_fragments) -> + (* Extract provides and requires from annotations *) + let provides, requires = + List.fold_left annots ~init:(None, []) ~f:(fun (prov, reqs) ((_, a), _pi) -> + match a with + | `Provides (name, _, _) -> Some name, reqs + | `Requires names -> prov, names @ reqs + | _ -> prov, reqs) + in + (* Build import statement if there are requires *) + let import_stmt = + match requires with + | [] -> [] + | _ -> + let named = + List.map requires ~f:(fun name -> + let utf8 = Utf8_string.of_string_exn name in + utf8, ident utf8) + in + let imp = + { from = runtime_import; kind = Named (None, named); withClause = None } + in + [ Import (imp, Parse_info.zero), N ] + in + (* Build export statement if there's a provides *) + let export_stmt = + match provides with + | None -> [] + | Some name -> + let utf8 = Utf8_string.of_string_exn name in + let id = ident utf8 in + [ Export (ExportNames [ id, utf8 ], Parse_info.zero), N ] + in + let code = List.concat code_fragments in + import_stmt @ code @ export_stmt) + let parse_from_lex ~filename lex = let program = try diff --git a/compiler/lib/linker.mli b/compiler/lib/linker.mli index bc3b9b4caf..69cf50aa51 100644 --- a/compiler/lib/linker.mli +++ b/compiler/lib/linker.mli @@ -32,6 +32,16 @@ module Fragment : sig val parse_builtin : Builtins.File.t -> t list val pack : t -> t + + val attach_annot : + (Lexing.position * Javascript.statement_list) list + -> (Js_token.t * Loc.t) list + -> ((Js_token.Annot.t * Parse_info.t) list * Javascript.statement_list list) list + + val script_to_module : + runtime_import:Utf8_string.t + -> ((Js_token.Annot.t * Parse_info.t) list * Javascript.statement_list list) list + -> Javascript.program end val reset : unit -> unit diff --git a/compiler/tests-esm/esm_bundle.ml b/compiler/tests-esm/esm_bundle.ml index 12be86e979..5e16d086f0 100644 --- a/compiler/tests-esm/esm_bundle.ml +++ b/compiler/tests-esm/esm_bundle.ml @@ -313,7 +313,11 @@ export function app() { in (* Bundle WITH tree shaking *) let output_shaken = - Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true in let output_shaken_str = bundle_to_string output_shaken in (* Bundle WITHOUT tree shaking for comparison *) @@ -659,7 +663,11 @@ export function app() { return used; } |} in let output_shaken = - Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true in let output_str = bundle_to_string output_shaken in print_endline output_str; @@ -699,7 +707,11 @@ export function app() { return foo; } |} in let output_shaken = - Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true in let output_str = bundle_to_string output_shaken in print_endline output_str; @@ -740,7 +752,11 @@ export function app() { return foo; } |} in let output_shaken = - Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true in let output_str = bundle_to_string output_shaken in print_endline output_str; @@ -764,22 +780,14 @@ let%expect_test "tree shaking preserves side-effect-only imports" = (* Test import './module.js' style imports that have no bindings *) with_test_dir @@ fun ~write -> - let _ = - write - "setup.js" - {| + let _ = write "setup.js" {| console.log("setup module loaded"); export {}; -|} - in - let _ = - write - "lib.js" - {| +|} in + let _ = write "lib.js" {| import './setup.js'; export const foo = 1; -|} - in +|} in let entry = write "app.js" @@ -813,31 +821,21 @@ let%expect_test "tree shaking preserves transitive side-effect imports" = (* Test that import './a.js' -> import './b.js' chains are preserved *) with_test_dir @@ fun ~write -> - let _ = - write - "deep.js" - {| + let _ = write "deep.js" {| console.log("deep module"); export {}; -|} - in +|} in let _ = - write - "middle.js" - {| + write "middle.js" {| import './deep.js'; console.log("middle module"); export {}; |} in - let _ = - write - "lib.js" - {| + let _ = write "lib.js" {| import './middle.js'; export const foo = 1; -|} - in +|} in let entry = write "app.js" @@ -894,7 +892,11 @@ export function app() { return used(); } |} in let output_shaken = - Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true in let output_str = bundle_to_string output_shaken in print_endline output_str; @@ -917,24 +919,38 @@ let%expect_test "diamond dependency - both paths need different exports" = with_test_dir @@ fun ~write -> (* A exports x and z *) - let _ = write "a.js" {| + let _ = + write + "a.js" + {| export function x() { return "x"; } export function z() { return "z"; } export function unused() { return "unused"; } -|} in +|} + in (* B depends on A.z *) - let _ = write "b.js" {| + let _ = + write "b.js" {| import { z } from './a.js'; export function y() { return z(); } -|} in +|} + in (* root depends on A.x and B.y *) - let entry = write "root.js" {| + let entry = + write + "root.js" + {| import { x } from './a.js'; import { y } from './b.js'; export function main() { return x() + y(); } -|} in +|} + in let output = - Esm_bundle.bundle_modules ~parse:parse_file ~resolve ~entry_points:[ entry ] ~tree_shake:true + Esm_bundle.bundle_modules + ~parse:parse_file + ~resolve + ~entry_points:[ entry ] + ~tree_shake:true in let output_str = bundle_to_string output in print_endline output_str; From e6cc192736bf0642f8690fed0dc20aaae32eff6d Mon Sep 17 00:00:00 2001 From: Hugo Heuzard Date: Tue, 3 Feb 2026 02:20:46 +0100 Subject: [PATCH 5/5] Compiler: esm merge --- compiler/lib/esm_bundle.ml | 88 +++++++++++ compiler/lib/esm_bundle.mli | 12 ++ compiler/tests-esm/esm_bundle.ml | 252 +++++++++++++++++++++++++++++++ 3 files changed, 352 insertions(+) diff --git a/compiler/lib/esm_bundle.ml b/compiler/lib/esm_bundle.ml index b7942c9f0a..d1b40068cc 100644 --- a/compiler/lib/esm_bundle.ml +++ b/compiler/lib/esm_bundle.ml @@ -310,3 +310,91 @@ let bundle_modules ~parse ~resolve ~entry_points ~tree_shake:do_tree_shake : pro else graph in bundle graph ~entry_points:entry_ids + +(* ========== Module Merging ========== *) + +(* Convert an Esm.import_entry back to a Javascript.Import statement *) +let generate_import_statement (import : Esm.import_entry) : statement = + let from = Utf8_string.of_string_exn (Esm.ModuleId.to_path import.Esm.source) in + (* Collect bindings by type *) + let default_opt, named, namespace_opt, has_side_effect = + List.fold_left + import.Esm.bindings + ~init:(None, [], None, false) + ~f:(fun (def, named, ns, side) binding -> + match binding with + | Esm.ImportDefault id -> Some id, named, ns, side + | Esm.ImportNamed (orig_name, local_id) -> + def, (Utf8_string.of_string_exn orig_name, local_id) :: named, ns, side + | Esm.ImportNamespace id -> def, named, Some id, side + | Esm.ImportSideEffect -> def, named, ns, true) + in + let kind = + match default_opt, List.rev named, namespace_opt, has_side_effect with + | None, [], None, true -> SideEffect + | Some id, [], None, _ -> Default id + | def_opt, named, None, _ when not (List.is_empty named) -> Named (def_opt, named) + | def_opt, [], Some ns_id, _ -> Namespace (def_opt, ns_id) + | _ -> SideEffect (* fallback *) + in + Import ({ from; kind; withClause = None }, Parse_info.zero) + +let merge_modules ~dest (modules : Esm.esm_module list) : program = + let dest_id = Esm.ModuleId.of_path dest in + (* Build a combined map of all exports across all modules. + When an import from dest is found, we look up the export by name here. *) + let all_exports = + List.fold_left modules ~init:StringMap.empty ~f:(fun acc m -> + StringMap.fold + (fun name export acc -> + (* First module's export wins for conflicting names *) + if StringMap.mem name acc then acc else StringMap.add name export acc) + m.Esm.exports + acc) + in + (* Collect external imports (not from dest) and generate import statements *) + let import_stmts = + List.concat_map modules ~f:(fun m -> + List.filter_map m.Esm.imports ~f:(fun import -> + if Esm.ModuleId.equal import.Esm.source dest_id + then None (* Internal import - will be substituted *) + else Some (generate_import_statement import, N))) + in + (* Process each module: filter imports and apply substitutions *) + let body_stmts = + List.concat_map modules ~f:(fun m -> + (* Filter out imports from the destination module *) + let internal_imports = + List.filter m.Esm.imports ~f:(fun import -> + Esm.ModuleId.equal import.Esm.source dest_id) + in + (* Build substitution for internal imports *) + let subst = + List.fold_left internal_imports ~init:Code.Var.Map.empty ~f:(fun acc import -> + List.fold_left import.Esm.bindings ~init:acc ~f:(fun acc binding -> + match binding with + | Esm.ImportNamed (orig_name, V v) -> ( + match StringMap.find_opt orig_name all_exports with + | Some export -> Code.Var.Map.add v export.Esm.local_ident acc + | None -> acc) + | Esm.ImportDefault (V v) -> ( + match StringMap.find_opt "default" all_exports with + | Some export -> Code.Var.Map.add v export.Esm.local_ident acc + | None -> acc) + | _ -> acc)) + in + apply_import_substitutions subst m.Esm.body) + in + (* Collect all exports from all modules *) + let export_stmts = + List.concat_map modules ~f:(fun m -> + StringMap.fold + (fun _name export acc -> + let exported_name = Utf8_string.of_string_exn export.Esm.exported_name in + ( Export (ExportNames [ export.Esm.local_ident, exported_name ], Parse_info.zero) + , N ) + :: acc) + m.Esm.exports + []) + in + import_stmts @ body_stmts @ export_stmts diff --git a/compiler/lib/esm_bundle.mli b/compiler/lib/esm_bundle.mli index abffa3c8e2..ddd61f06ef 100644 --- a/compiler/lib/esm_bundle.mli +++ b/compiler/lib/esm_bundle.mli @@ -51,3 +51,15 @@ val bundle_modules : @param entry_points List of entry point file paths @param tree_shake Whether to perform tree-shaking *) + +val merge_modules : dest:string -> Esm.esm_module list -> Javascript.program +(** [merge_modules ~dest modules] merges multiple ES modules into a single program. + + - Removes imports that reference the destination file (self-imports after merge) + - Substitutes import bindings with actual exported identifiers from source modules + - Preserves all exports from all input modules + + @param dest Path of the destination/merged module (imports from this path will be removed) + @param modules List of analyzed ESM modules to merge + @return Single JavaScript program with all bodies and exports +*) diff --git a/compiler/tests-esm/esm_bundle.ml b/compiler/tests-esm/esm_bundle.ml index 5e16d086f0..4d86c755b0 100644 --- a/compiler/tests-esm/esm_bundle.ml +++ b/compiler/tests-esm/esm_bundle.ml @@ -972,3 +972,255 @@ export function main() { return x() + y(); } z preserved (via B): true unused removed: true |}] + +(* ========== merge_modules tests ========== *) + +let analyze_js ~resolve name content = + (* Parse and analyze a JS module *) + let lexer = Parse_js.Lexer.of_string content in + let program = Parse_js.parse `Module lexer in + let id = Esm.ModuleId.of_path name in + Esm.analyze_module ~resolve id program + +let%expect_test "merge_modules simple" = + (* Test merging two independent modules *) + let resolve _ = failwith "no resolution needed" in + let m1 = + analyze_js + ~resolve + "a.js" + {| +export const foo = 1; +export function hello() { return "hello"; } +|} + in + let m2 = + analyze_js + ~resolve + "b.js" + {| +export const bar = 2; +export function world() { return "world"; } +|} + in + let merged = Esm_bundle.merge_modules ~dest:"bundle.js" [ m1; m2 ] in + let output = bundle_to_string merged in + print_endline output; + [%expect + {| + const foo = 1; + function hello(){return "hello";} + const bar = 2; + function world(){return "world";} + export { hello }; + export { foo }; + export { world }; + export { bar }; + |}] + +let%expect_test "merge_modules removes self-imports" = + (* Test that imports from the destination file are removed and substituted *) + let resolve specifier = + (* Resolve ./a.js to bundle.js to simulate self-import *) + if String.equal specifier "./a.js" + then Esm.ModuleId.of_path "bundle.js" + else Esm.ModuleId.of_path specifier + in + let m1 = analyze_js ~resolve "a.js" {| +export const foo = 42; +|} in + let m2 = + analyze_js + ~resolve + "b.js" + {| +import { foo } from './a.js'; +export function useFoo() { return foo + 1; } +|} + in + let merged = Esm_bundle.merge_modules ~dest:"bundle.js" [ m1; m2 ] in + let output = bundle_to_string merged in + print_endline output; + (* Verify no import statement remains *) + let has_import = contains_substring output "import" in + Printf.printf "import removed: %b\n" (not has_import); + [%expect + {| + const foo = 42; + function useFoo(){return foo + 1;} + export { foo }; + export { useFoo }; + + import removed: true + |}] + +let%expect_test "merge_modules with default export" = + let resolve specifier = + if String.equal specifier "./a.js" + then Esm.ModuleId.of_path "bundle.js" + else Esm.ModuleId.of_path specifier + in + let m1 = + analyze_js ~resolve "a.js" {| +export default function greet() { return "hi"; } +|} + in + let m2 = + analyze_js + ~resolve + "b.js" + {| +import greet from './a.js'; +export function useGreet() { return greet() + "!"; } +|} + in + let merged = Esm_bundle.merge_modules ~dest:"bundle.js" [ m1; m2 ] in + let output = bundle_to_string merged in + print_endline output; + let has_import = contains_substring output "import" in + Printf.printf "import removed: %b\n" (not has_import); + [%expect + {| + function greet(){return "hi";} + function useGreet(){return greet() + "!";} + export { greet as default }; + export { useGreet }; + + import removed: true + |}] + +let%expect_test "merge_modules preserves all exports" = + let resolve _ = failwith "no resolution needed" in + let m1 = + analyze_js + ~resolve + "utils.js" + {| +export const A = 1; +export const B = 2; +export function helper() { return A + B; } +|} + in + let m2 = + analyze_js + ~resolve + "main.js" + {| +export const C = 3; +export function main() { return C; } +|} + in + let merged = Esm_bundle.merge_modules ~dest:"bundle.js" [ m1; m2 ] in + let output = bundle_to_string merged in + print_endline output; + (* Count exports *) + let export_count = + List.length (List.filter ~f:(fun line -> contains_substring line "export") (String.split_on_char ~sep:'\n' output)) + in + Printf.printf "Total exports: %d\n" export_count; + [%expect + {| + const A = 1; + const B = 2; + function helper(){return A + B;} + const C = 3; + function main(){return C;} + export { helper }; + export { B }; + export { A }; + export { main }; + export { C }; + + Total exports: 5 + |}] + +let%expect_test "merge_modules preserves external imports" = + (* Test that imports from external modules (not dest) are preserved *) + let resolve specifier = + if String.equal specifier "./internal.js" + then Esm.ModuleId.of_path "bundle.js" + else Esm.ModuleId.of_path specifier + in + let m1 = + analyze_js ~resolve "internal.js" {| +export const foo = 42; +|} + in + let m2 = + analyze_js + ~resolve + "main.js" + {| +import { foo } from './internal.js'; +import { external } from './external.js'; +import defaultExt from './default-ext.js'; +export function useBoth() { return foo + external + defaultExt; } +|} + in + let merged = Esm_bundle.merge_modules ~dest:"bundle.js" [ m1; m2 ] in + let output = bundle_to_string merged in + print_endline output; + (* Verify external imports are preserved *) + let has_external_import = contains_substring output "external.js" in + let has_default_import = contains_substring output "default-ext.js" in + let has_internal_import = contains_substring output "internal.js" in + Printf.printf "external.js import preserved: %b\n" has_external_import; + Printf.printf "default-ext.js import preserved: %b\n" has_default_import; + Printf.printf "internal.js import removed: %b\n" (not has_internal_import); + [%expect + {| + import { external } from "./external.js"; + import defaultExt from "./default-ext.js"; + const foo = 42; + function useBoth(){return foo + external + defaultExt;} + export { foo }; + export { useBoth }; + + external.js import preserved: true + default-ext.js import preserved: true + internal.js import removed: true + |}] + +let%expect_test "merge_modules with same-named helpers" = + (* Test that modules with identically named functions don't collide *) + let resolve _ = failwith "no resolution needed" in + let m1 = + analyze_js + ~resolve + "a.js" + {| +function helper() { return "from a"; } +export function useA() { return helper(); } +|} + in + let m2 = + analyze_js + ~resolve + "b.js" + {| +function helper() { return "from b"; } +export function useB() { return helper(); } +|} + in + let merged = Esm_bundle.merge_modules ~dest:"bundle.js" [ m1; m2 ] in + let output = bundle_to_string merged in + print_endline output; + (* Count helper functions - should be 2 distinct ones *) + let helper_count = + List.length + (List.filter + ~f:(fun line -> contains_substring line "function helper") + (String.split_on_char ~sep:'\n' output)) + in + Printf.printf "Number of helper functions: %d\n" helper_count; + [%expect + {| + function helper(){return "from a";} + function useA(){return helper();} + function helper$0(){return "from b";} + function useB(){return helper$0();} + export { useA }; + export { useB }; + + Number of helper functions: 2 + |}]