diff --git a/opam/call_graph.opam b/opam/call_graph.opam new file mode 100644 index 000000000..9da3b6370 --- /dev/null +++ b/opam/call_graph.opam @@ -0,0 +1,29 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "1.100.0" +synopsis: "Call graph infrastructure for opengrep" +description: + "Call graph construction, serialization, and analysis utilities." +maintainer: ["Opengrep authors"] +authors: ["Semgrep authors, Opengrep authors"] +homepage: "https://opengrep.dev" +bug-reports: "https://github.com/opengrep/opengrep/issues" +depends: [ + "dune" {>= "3.8"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/opengrep/opengrep.git" diff --git a/src/call_graph/Call_graph.ml b/src/call_graph/Call_graph.ml index 71b3f4627..3d5b244a7 100644 --- a/src/call_graph/Call_graph.ml +++ b/src/call_graph/Call_graph.ml @@ -98,11 +98,8 @@ let lookup_callee_from_graph (graph : G.t option) m "CALL_GRAPH: caller_node is None during lookup!"); None | Some g, Some caller -> - if not (G.mem_vertex g caller) then ( - Log.debug (fun m -> - m "CALL_GRAPH: Caller %s not in graph" (show_node caller)); - None - ) else + if not (G.mem_vertex g caller) then None + else let call_pos = pos_of_tok call_tok in (* Get edges coming INTO the caller (callee -> caller) *) let incoming_edges = G.pred_e g caller in @@ -115,8 +112,7 @@ let lookup_callee_from_graph (graph : G.t option) Pos.equal label.call_site call_pos) in match exact_match with - | Some edge -> - Some (G.E.src edge) + | Some edge -> Some (G.E.src edge) | None -> (* No fallback - return None so external calls use direct signature lookup. Previously there was a line 0 fallback that matched implicit/HOF edges, diff --git a/src/call_graph/Function_id.ml b/src/call_graph/Function_id.ml index 5b5dacf4a..5b6c7677f 100644 --- a/src/call_graph/Function_id.ml +++ b/src/call_graph/Function_id.ml @@ -20,13 +20,13 @@ let normalize_file (file : Fpath.t) : string = Fpath.to_string (Fpath.normalize file) let key ((id, tok) : t) = - if Tok.is_fake tok then - (id, "", 0, 0) - else - let file = Tok.file_of_tok tok in - let line = Tok.line_of_tok tok in - let col = Tok.col_of_tok tok in - (id, normalize_file file, line, col) + match Tok.loc_of_tok tok with + | Ok loc -> + let file = loc.pos.file in + let line = loc.pos.line in + let col = loc.pos.column in + (id, normalize_file file, line, col) + | Error _ -> (id, "", 0, 0) let hash (v : t) = Hashtbl.hash (key v) @@ -63,5 +63,7 @@ let of_il_name (n : IL.name) : t = n.IL.ident let to_file_line_col ((_, tok) : t) : string * int * int = - if Tok.is_fake tok then ("unknown", 0, 0) - else (normalize_file (Tok.file_of_tok tok), Tok.line_of_tok tok, Tok.col_of_tok tok) + match Tok.loc_of_tok tok with + | Ok loc -> + (normalize_file loc.pos.file, loc.pos.line, loc.pos.column) + | Error _ -> ("unknown", 0, 0) diff --git a/src/core_scan/Core_scan.ml b/src/core_scan/Core_scan.ml index b96136892..beabb33be 100644 --- a/src/core_scan/Core_scan.ml +++ b/src/core_scan/Core_scan.ml @@ -21,6 +21,13 @@ module MR = Mini_rule module R = Rule module Out = Semgrep_output_v1_j module TLS = Thread_local_storage +module PathOrd = struct + type t = Fpath.t + + let compare = Fpath.compare +end + +module PathMap = Map.Make (PathOrd) (*****************************************************************************) (* Purpose *) @@ -754,9 +761,24 @@ let sca_rules_filtering (target : Target.regular) (rules : Rule.t list) : (*****************************************************************************) (* build the callback for iter_targets_and_get_matches_and_exn_to_errors *) +let mk_xconf (config : Core_scan_config.t) + (prefilter_cache_opt : Match_env.prefilter_config) : Match_env.xconfig = + { + Match_env.config = + { Rule_options.default with taint_intrafile = config.taint_intrafile }; + equivs = parse_equivalences config.equivalences_file; + nested_formula = false; + matching_conf = config.matching_conf; + matching_explanations = config.matching_explanations; + filter_irrelevant_rules = prefilter_cache_opt; + } + let mk_target_handler (caps : < Cap.time_limit >) (config : Core_scan_config.t) (valid_rules : Rule.t list) - (prefilter_cache_opt : Match_env.prefilter_config) : target_handler = + (prefilter_cache_opt : Match_env.prefilter_config) + ?(resolved_xtargets : Xtarget.t PathMap.t option) + ?(interfile_context : Match_tainting_mode.interfile_context option) : + target_handler = function | Lockfile ({ path; kind } as lockfile) -> (* TODO: (sca) we always pass None as the manifest target here, but this @@ -792,18 +814,15 @@ let mk_target_handler (caps : < Cap.time_limit >) (config : Core_scan_config.t) (* TODO: can we skip all of this if there are no applicable rules? In particular, can we skip print_cli_progress? *) - let xtarget = Xtarget.resolve parse_and_resolve_name target in - let match_hook _ = () in - let xconf = - { - Match_env.config = { Rule_options.default with taint_intrafile = config.taint_intrafile }; - equivs = parse_equivalences config.equivalences_file; - nested_formula = false; - matching_conf = config.matching_conf; - matching_explanations = config.matching_explanations; - filter_irrelevant_rules = prefilter_cache_opt; - } + let xtarget = + match resolved_xtargets with + | Some xtargets -> + PathMap.find_opt file xtargets + |> Option.value ~default:(Xtarget.resolve parse_and_resolve_name target) + | None -> Xtarget.resolve parse_and_resolve_name target in + let match_hook _ = () in + let xconf = mk_xconf config prefilter_cache_opt in let rules, dependency_match_table = sca_rules_filtering target rules in let timeout = let caps = (caps :> < Cap.time_limit >) in @@ -821,8 +840,8 @@ let mk_target_handler (caps : < Cap.time_limit >) (config : Core_scan_config.t) in let matches : Core_result.matches_single_file = (* !!Calling Match_rules!! Calling the matching engine!! *) - Match_rules.check ~match_hook ~timeout ~dependency_match_table xconf - rules xtarget + Match_rules.check ~match_hook ~timeout ~dependency_match_table + ?interfile_context xconf rules xtarget in (* Add file size when profiling is on. *) let matches = @@ -882,6 +901,72 @@ let scan_exn (caps : < caps ; .. >) (config : Core_scan_config.t) end else NoPrefiltering in + let base_xconf = mk_xconf config prefilter_cache_opt in + let interfile_rule_targets, resolved_xtargets, interfile_languages_used = + let interfile_rules = + valid_rules + |> List_.filter_map (fun rule -> + match rule.R.mode with + | `Taint _ as mode -> + let xconf_rule = + Match_env.adjust_xconfig_with_rule_options base_xconf + rule.R.options + in + if xconf_rule.config.interfile then Some { rule with mode } + else None + | _ -> None) + in + if List_.null interfile_rules then ([], None, []) + else + let resolved_xtargets = + targets + |> List.fold_left + (fun acc target -> + match target with + | Target.Regular regular -> + let xtarget = Xtarget.resolve parse_and_resolve_name regular in + PathMap.add regular.path.internal_path_to_content xtarget acc + | Target.Lockfile _ -> acc) + PathMap.empty + in + let interfile_rule_targets = + interfile_rules + |> List_.map (fun rule -> + let xtargets = + targets + |> List_.filter_map (function + | Target.Regular regular -> + let applicable = + rules_for_target ~analyzer:regular.analyzer + ~products:regular.products + ~origin:regular.path.origin + ~respect_rule_paths:config.respect_rule_paths + [ (rule :> R.rule) ] + <> [] + in + if applicable then + PathMap.find_opt regular.path.internal_path_to_content + resolved_xtargets + else None + | Target.Lockfile _ -> None) + in + (rule, xtargets)) + in + let interfile_languages_used = + interfile_rules + |> List_.map (fun rule -> rule.R.target_analyzer) + |> List.sort_uniq Stdlib.compare + in + (interfile_rule_targets, Some resolved_xtargets, interfile_languages_used) + in + let interfile_context = + match interfile_rule_targets with + | [] -> None + | _ -> + Some + (Match_tainting_mode.build_interfile_contexts base_xconf + interfile_rule_targets) + in let file_results, scanned_targets = targets |> iter_targets_and_get_matches_and_exn_to_errors @@ -889,15 +974,13 @@ let scan_exn (caps : < caps ; .. >) (config : Core_scan_config.t) config (mk_target_handler (caps :> < Cap.time_limit >) - config valid_rules prefilter_cache_opt) + config valid_rules prefilter_cache_opt ?resolved_xtargets + ?interfile_context) in (* TODO: Delete any lockfile-only findings whose rule produced a code+lockfile finding in that lockfile in scanned_targets? *) - - (* the OSS engine was invoked so no interfile langs *) - let interfile_languages_used = [] in let (res : Core_result.t) = Core_result.mk_result file_results (List_.map (fun r -> (r, `OSS)) valid_rules) diff --git a/src/engine/Match_env.ml b/src/engine/Match_env.ml index 5fb1cf072..8ae3e3d87 100644 --- a/src/engine/Match_env.ml +++ b/src/engine/Match_env.ml @@ -109,9 +109,12 @@ let adjust_xconfig_with_rule_options xconf options = match options with | None -> xconf.config | Some (rule_opts : Rule_options.t) -> - (* Merge rule options with existing config, preserving command-line taint_intrafile setting *) + (* Interfile tainting reuses the same summary machinery as taint_intrafile, + * so enabling it at the rule level must also enable taint_intrafile. *) { rule_opts with - taint_intrafile = xconf.config.taint_intrafile || rule_opts.taint_intrafile + taint_intrafile = + xconf.config.taint_intrafile || rule_opts.taint_intrafile + || rule_opts.interfile } in { xconf with config } diff --git a/src/engine/Match_rules.ml b/src/engine/Match_rules.ml index 9e3ae6296..4dae3ab96 100644 --- a/src/engine/Match_rules.ml +++ b/src/engine/Match_rules.ml @@ -130,20 +130,26 @@ let is_relevant_rule_for_xtarget r xconf xtarget = in let xconf = Match_env.adjust_xconfig_with_rule_options xconf r.R.options in let is_relevant = - match xconf.filter_irrelevant_rules with - | NoPrefiltering -> true - | PrefilterWithCache cache -> ( - match Analyze_rule.regexp_prefilter_of_rule ~cache:(Some cache) r with - | None -> true - | Some (prefilter_formula, func) -> - (* NOTE: If [lazy_content] is shared in > 1 thread, then this is not - * thread-safe. However, each [Xtarget.t] is only accessed in 1 worker - * task, so there should be no race. *) - let content = Lazy.force lazy_content in - Log.info (fun m -> - let s = Semgrep_prefilter_j.string_of_formula prefilter_formula in - m "looking for %s in %s" s !!internal_path_to_content); - func content) + if xconf.config.interfile then + (* Regex prefiltering is file-local, but interfile taint rules may split + * their source and sink across different files. Skipping the prefilter + * avoids falsely dropping files that only contain one side of the flow. *) + true + else + match xconf.filter_irrelevant_rules with + | NoPrefiltering -> true + | PrefilterWithCache cache -> ( + match Analyze_rule.regexp_prefilter_of_rule ~cache:(Some cache) r with + | None -> true + | Some (prefilter_formula, func) -> + (* NOTE: If [lazy_content] is shared in > 1 thread, then this is not + * thread-safe. However, each [Xtarget.t] is only accessed in 1 worker + * task, so there should be no race. *) + let content = Lazy.force lazy_content in + Log.info (fun m -> + let s = Semgrep_prefilter_j.string_of_formula prefilter_formula in + m "looking for %s in %s" s !!internal_path_to_content); + func content) in if not is_relevant then Log.info (fun m -> @@ -276,6 +282,7 @@ let scc_match_hook (match_hook : Core_match.t -> unit) let check ?(dependency_match_table : Match_SCA_mode.dependency_match_table option) + ?(interfile_context : Match_tainting_mode.interfile_context option) ~match_hook ~(timeout : timeout_config option) (xconf : Match_env.xconfig) (rules : Rule.rules) (xtarget : Xtarget.t) : Core_result.matches_single_file = @@ -319,6 +326,7 @@ let check taint_rules_groups |> List.concat_map (fun taint_rules -> Match_tainting_mode.check_rules ~match_hook + ?interfile_context ~per_rule_boilerplate_fn:per_rule_boilerplate_fn_opt taint_rules xconf xtarget) in diff --git a/src/engine/Match_rules.mli b/src/engine/Match_rules.mli index ac952aafa..e16bff1aa 100644 --- a/src/engine/Match_rules.mli +++ b/src/engine/Match_rules.mli @@ -35,6 +35,7 @@ type timeout_config = { *) val check : ?dependency_match_table:Match_SCA_mode.dependency_match_table -> + ?interfile_context:Match_tainting_mode.interfile_context -> match_hook:(Core_match.t -> unit) -> timeout:timeout_config option -> Match_env.xconfig -> diff --git a/src/engine/Match_taint_spec.ml b/src/engine/Match_taint_spec.ml index 41712fb81..2d9e40684 100644 --- a/src/engine/Match_taint_spec.ml +++ b/src/engine/Match_taint_spec.ml @@ -441,13 +441,14 @@ let mk_taint_spec_match_preds rule matches = let default_effect_handler _fun_name new_effects = new_effects -let taint_config_of_rule ~per_file_formula_cache +let taint_config_of_rule ~per_file_formula_cache ?(require_source_sink = true) ?(handle_effects = default_effect_handler) xconf lang file ast_and_errors ({ mode = `Taint spec; _ } as rule : R.taint_rule) = match spec_matches_of_taint_rule ~per_file_formula_cache xconf !!file ast_and_errors rule with - | { sinks = []; _ }, _ - | { sources = []; _ }, _ -> None + | ({ sinks = []; _ }, _ | { sources = []; _ }, _) + when require_source_sink -> + None | spec_matches, expls -> let xconf = Match_env.adjust_xconfig_with_rule_options xconf rule.options in let options = xconf.config in diff --git a/src/engine/Match_taint_spec.mli b/src/engine/Match_taint_spec.mli index 587b38711..6c460e395 100644 --- a/src/engine/Match_taint_spec.mli +++ b/src/engine/Match_taint_spec.mli @@ -31,6 +31,7 @@ val hook_mk_taint_spec_match_preds : *) val taint_config_of_rule : per_file_formula_cache:Formula_cache.t -> + ?require_source_sink:bool -> ?handle_effects:Taint_rule_inst.effects_handler (** Use 'handle_effects' to e.g. apply hash-consing (see 'Deep_tainting'), or to do some side-effect if needed. diff --git a/src/engine/Match_tainting_mode.ml b/src/engine/Match_tainting_mode.ml index c33104e10..1adfc1de9 100644 --- a/src/engine/Match_tainting_mode.ml +++ b/src/engine/Match_tainting_mode.ml @@ -40,6 +40,12 @@ module Log = Log_tainting.Log module Effect = Shape_and_sig.Effect module Effects = Shape_and_sig.Effects module Signature = Shape_and_sig.Signature +module Shape = Shape_and_sig.Shape +module CanonicalBindingMap = Map.Make (struct + type t = string list + + let compare = Stdlib.compare +end) type fun_info = { name : IL.name; @@ -50,6 +56,291 @@ type fun_info = { is_lambda_assignment : bool; } +type interfile_rule_context = { + signature_db : Shape_and_sig.signature_database; + builtin_signature_db : Shape_and_sig.builtin_signature_database; + call_graph : Call_graph.G.t; + imported_global_index : Shape.cell list CanonicalBindingMap.t; +} + +type interfile_context = interfile_rule_context Rule_ID.Map.t + +type project_target = { + xtarget : Xtarget.t; + ast : G.program; + taint_inst : Taint_rule_inst.t; + spec_matches : Match_taint_spec.spec_matches; + ctx : AST_to_IL.ctx; + object_mappings : (G.name * G.name) list; + info_map : fun_info Shape_and_sig.FunctionMap.t; +} + +type project_fun_info = { + target : project_target; + info : fun_info; +} + +let add_imported_global_binding imported_global_index canonical cell = + CanonicalBindingMap.update canonical + (function + | Some cells when List.exists (Shape.equal_cell cell) cells -> Some cells + | Some cells -> Some (cell :: cells) + | None -> Some [ cell ]) + imported_global_index + +let rec starts_with_segments ~prefix segments = + match (prefix, segments) with + | [], _ -> true + | _, [] -> false + | p :: prefix_rest, s :: segments_rest -> + String.equal p s && starts_with_segments ~prefix:prefix_rest segments_rest + +let lookup_imported_global_binding ?current_file imported_global_index canonical + = + Graph_from_AST.canonical_lookup_candidates ?current_file canonical + |> List.find_map (fun candidate -> + match CanonicalBindingMap.find_opt candidate imported_global_index with + | Some [ cell ] -> Some cell + | Some _ + | None -> + None) + +let add_tainted_bindings_for_file imported_global_index ~(file : Fpath.t) + (env : Lval_env.env) = + let module_candidates = Graph_from_AST.module_candidates_of_path file in + Lval_env.seq_of_tainted env + |> Seq.fold_left + (fun acc (name, cell) -> + if Tok.is_fake (snd name.IL.ident) then acc + else + module_candidates + |> List.fold_left + (fun acc module_name -> + add_imported_global_binding acc + (module_name @ [ fst name.IL.ident ]) + cell) + acc) + imported_global_index + +let add_top_level_imported_global_bindings lookup_index + (imported_global_index : Shape.cell list CanonicalBindingMap.t) + ~(file : Fpath.t) (ast : G.program) = + let module_candidates = Graph_from_AST.module_candidates_of_path file in + let add_module_binding acc local_name cell = + module_candidates + |> List.fold_left + (fun acc module_name -> + add_imported_global_binding acc (module_name @ [ local_name ]) cell) + acc + in + ast + |> List.fold_left + (fun acc stmt -> + match stmt.G.s with + | G.DirectiveStmt directive -> ( + match directive.G.d with + | G.ImportFrom (_, G.DottedName xs, imported_names) -> + imported_names + |> List.fold_left + (fun acc (id, alias_opt) -> + let local_name = + match alias_opt with + | Some (alias, _id_info) -> fst alias + | None -> fst id + in + let canonical = G.dotted_to_canonical (xs @ [ id ]) in + match + lookup_imported_global_binding ~current_file:file + lookup_index canonical + with + | Some cell -> add_module_binding acc local_name cell + | None -> acc) + acc + | G.ImportAll (_, G.DottedName xs, _) -> + let resolved_targets = + Graph_from_AST.canonical_lookup_candidates + ~current_file:file + (G.dotted_to_canonical xs) + in + CanonicalBindingMap.fold + (fun canonical cells acc -> + match cells with + | [ cell ] -> ( + match + List.find_opt + (fun target -> + starts_with_segments ~prefix:target canonical) + resolved_targets + with + | Some target -> ( + match + List_.drop (List.length target) canonical + with + | [ local_name ] -> + add_module_binding acc local_name cell + | _ -> acc) + | None -> acc) + | _ -> acc) + lookup_index acc + | _ -> acc) + | _ -> acc) + imported_global_index + +let imported_global_index_equal left right = + CanonicalBindingMap.equal (List.equal Shape.equal_cell) left right + +let build_imported_global_index + ~(signature_db : Shape_and_sig.signature_database) + ~(builtin_signature_db : Shape_and_sig.builtin_signature_database) + ~(call_graph : Call_graph.G.t) (project_targets : project_target list) = + let base_index = + project_targets + |> List.fold_left + (fun acc (target : project_target) -> + let file = target.xtarget.path.internal_path_to_content in + let stmts = + AST_to_IL.stmt target.taint_inst.lang (G.stmt1 target.ast) + in + let cfg, lambdas = CFG_build.cfg_of_stmts stmts in + let top_level_name = + Graph_from_AST.top_level_il_name ~current_file:file target.ast + in + let _top_effects, end_mapping = + D.fixpoint target.taint_inst ~name:top_level_name + ~signature_db ~builtin_signature_db ~call_graph + IL.{ params = []; cfg; lambdas } + in + let exit_env = end_mapping.(cfg.exit).Dataflow_core.out_env in + add_tainted_bindings_for_file acc ~file exit_env) + CanonicalBindingMap.empty + in + let rec close_over_top_level_imports imported_global_index = + let next_index = + project_targets + |> List.fold_left + (fun acc (target : project_target) -> + add_top_level_imported_global_bindings imported_global_index acc + ~file:target.xtarget.path.internal_path_to_content target.ast) + imported_global_index + in + if imported_global_index_equal imported_global_index next_index then + imported_global_index + else close_over_top_level_imports next_index + in + close_over_top_level_imports base_index + +let imported_global_env_of_ast ~(file : Fpath.t) + (imported_global_index : Shape.cell list CanonicalBindingMap.t) + (ast : G.program) = + let env = ref Lval_env.empty in + let imported_module_bindings = + let bindings = ref SMap.empty in + let visitor = + object + inherit [_] G.iter as super + + method! visit_directive env directive = + (match directive.G.d with + | G.ImportAs (_, G.DottedName (head :: _ as xs), alias_opt) -> + let local_name = + match alias_opt with + | Some (alias, _id_info) -> fst alias + | None -> fst head + in + bindings := SMap.add local_name (G.dotted_to_canonical xs) !bindings + | _ -> ()); + super#visit_directive env directive + end + in + visitor#visit_program () ast; + !bindings + in + let add_imported_global_path (base_id, base_id_info) field_path canonical = + match + lookup_imported_global_binding ~current_file:file imported_global_index + canonical + with + | None -> () + | Some (Shape.Cell (xtaint, shape)) -> + let base_name = AST_to_IL.var_of_id_info base_id base_id_info in + let offset = + field_path + |> List_.map (fun (field_id, field_id_info) -> + T.Ofld (AST_to_IL.var_of_id_info field_id field_id_info)) + in + env := + Lval_env.add_shape base_name offset (Xtaint.to_taints xtaint) shape + !env + in + let add_imported_global id id_info canonical = + add_imported_global_path (id, id_info) [] canonical + in + let rec dot_access_path (expr : G.expr) = + match expr.G.e with + | G.N (G.Id (id, id_info)) -> Some ((id, id_info), []) + | G.DotAccess (base_expr, _, G.FN (G.Id (field_id, field_id_info))) -> ( + match dot_access_path base_expr with + | None -> None + | Some (base, field_path) -> + Some (base, field_path @ [ (field_id, field_id_info) ])) + | _ -> None + in + let visitor = + object + inherit [_] G.iter_no_id_info as super + + method! visit_expr env_ expr = + (match expr.G.e with + | G.N (G.Id (id, id_info)) -> ( + match !(id_info.G.id_resolved) with + | Some (G.ImportedEntity canonical, _sid) -> + add_imported_global id id_info canonical + | Some _ -> + () + | None -> + add_imported_global id id_info [ fst id ]) + | G.N + (G.IdQualified + { name_last = (id, _typeargsTODO); name_info; _ }) -> ( + match !(name_info.G.id_resolved) with + | Some (G.ImportedEntity canonical, _sid) -> + add_imported_global id name_info canonical + | Some _ -> + () + | None -> + ()) + | G.DotAccess (_, _, G.FN (G.Id _)) -> ( + match dot_access_path expr with + | Some ((base_id, base_id_info), ((_field_id, _) :: _ as field_path)) + -> ( + let canonical_module_opt = + match !(base_id_info.G.id_resolved) with + | Some (G.ImportedModule canonical, _sid) + | Some (G.ImportedEntity canonical, _sid) -> + Some canonical + | Some _ + | None -> + SMap.find_opt (fst base_id) imported_module_bindings + in + match canonical_module_opt with + | Some canonical -> + let canonical = + canonical @ (field_path |> List_.map (fun (id, _) -> fst id)) + in + add_imported_global_path (base_id, base_id_info) field_path + canonical + | None -> + ()) + | Some _ + | None -> + ()) + | _ -> ()); + super#visit_expr env_ expr + end + in + visitor#visit_program () ast; + !env + (*****************************************************************************) (* Prelude *) (*****************************************************************************) @@ -276,6 +567,110 @@ let get_arity params info lang = in List.length filtered_params +let filtered_signature_params params info lang = + match (lang, info.class_name_str) with + (* Python methods: filter out 'self' and 'cls' params *) + | Lang.Python, Some _ -> + List.filter + (function + | G.Param { pname = Some (("self" | "cls"), _); _ } -> false + | _ -> true) + params + (* Go methods: filter out ParamReceiver *) + | Lang.Go, Some _ -> + List.filter + (function + | G.ParamReceiver _ -> false + | _ -> true) + params + | _ -> params + +let rec trailing_default_params acc = function + | G.Param { pname = Some id; pinfo = id_info; pdefault = Some default; _ } + :: rest -> + trailing_default_params ((id, id_info, default) :: acc) rest + | [] -> acc + | _ :: _ -> acc + +let python_trailing_default_params params info = + filtered_signature_params params info Lang.Python |> List.rev + |> trailing_default_params [] |> List.rev + +let default_assignment_stmt (id, id_info, default_expr) = + G.Assign (G.N (G.Id (id, id_info)) |> G.e, Tok.fake_tok (snd id) "=", default_expr) + |> G.e |> G.exprstmt + +let prepend_default_assignments defaults (fbody : G.function_body) : + G.function_body = + let body_stmt = H.funcbody_to_stmt fbody in + let prologue = defaults |> List_.map default_assignment_stmt in + G.FBStmt (G.Block (Tok.unsafe_fake_bracket (prologue @ [ body_stmt ])) |> G.s) + +let extract_single_arity_signatures ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) + ~(taint_inst : Taint_rule_inst.t) ~(ast : G.program) + ?(builtin_signature_db : Shape_and_sig.builtin_signature_database option) + ~(call_graph : Call_graph.G.t) (info : fun_info) + (db : Shape_and_sig.signature_database) : + Shape_and_sig.signature_database = + let params = Tok.unbracket info.fdef.fparams in + let arity = get_arity params info lang in + let extract_signature_for cfg arity db = + fst + (Taint_signature_extractor.extract_signature_with_file_context + ~arity:(Shape_and_sig.Arity_exact arity) ~db ?builtin_signature_db + taint_inst ~name:info.name + ~method_properties:info.method_properties + ~call_graph:(Some call_graph) cfg ast) + in + let updated_db = + extract_signature_for info.cfg arity db + in + let updated_db = + if Lang.equal lang Lang.Python then + let default_suffix = python_trailing_default_params params info in + default_suffix + |> List_.mapi (fun i _ -> i + 1) + |> List.fold_left + (fun acc omitted_count -> + let kept_params = + List_.take (List.length params - omitted_count) params + in + let kept_arity = arity - omitted_count in + let omitted_defaults = + default_suffix + |> List_.drop (List.length default_suffix - omitted_count) + in + let synthetic_fdef : G.function_definition = + { + info.fdef with + G.fparams = Tok.unsafe_fake_bracket kept_params; + fbody = prepend_default_assignments omitted_defaults info.fdef.fbody; + } + in + let fdef_il = + AST_to_IL.function_definition lang ~ctx synthetic_fdef + in + let cfg = CFG_build.cfg_of_fdef fdef_il in + extract_signature_for cfg kept_arity acc) + updated_db + else updated_db + in + if Lang.equal lang Lang.Kotlin && arity >= 1 then + let last_param_is_lambda = + match List.rev params with + | G.Param { G.ptype = Some { t = G.TyFun _; _ }; _ } :: _ -> true + | _ -> false + in + if last_param_is_lambda then + fst + (Taint_signature_extractor.extract_signature_with_file_context + ~arity:(Shape_and_sig.Arity_exact (arity - 1)) + ~db:updated_db ?builtin_signature_db taint_inst ~name:info.name + ~method_properties:info.method_properties + ~call_graph:(Some call_graph) info.cfg ast) + else updated_db + else updated_db + (** Convert a Case pattern back into a [G.parameter list] for per-arity signature extraction (Clojure multi-arity / Elixir multi-clause). *) let params_of_case_pattern (pat : G.pattern) : G.parameter list = @@ -372,9 +767,300 @@ let extract_multi_arity_cases (fdef : G.function_definition) : | _ -> Some sorted) | _ -> None +let build_ast_ctx (ast : G.program) : AST_to_IL.ctx = + let ctx = ref AST_to_IL.empty_ctx in + Visit_function_defs.visit + (fun opt_ent _fdef -> + match opt_ent with + | Some { name = EN (Id (n, _)); _ } -> + ctx := AST_to_IL.add_entity_name !ctx n + | __else__ -> ()) + ast; + !ctx + +let collect_fun_info_map ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) + (ast : G.program) : fun_info Shape_and_sig.FunctionMap.t = + let add_info info info_map = + let fn_id = Function_id.of_il_name info.name in + if Shape_and_sig.FunctionMap.mem fn_id info_map then info_map + else Shape_and_sig.FunctionMap.add fn_id info info_map + in + Visit_function_defs.fold_with_parent_path + (fun info_map opt_ent parent_path fdef -> + match fst fdef.fkind with + | LambdaKind + | Arrow -> ( + match opt_ent with + | None -> info_map + | Some ent -> ( + match AST_to_IL.name_of_entity ent with + | None -> info_map + | Some name -> + let class_name_str = + match parent_path with + | Some class_il :: _ -> Some (fst class_il.IL.ident) + | _ -> None + in + let fdef_il = + AST_to_IL.function_definition lang ~ctx fdef + in + let cfg = CFG_build.cfg_of_fdef fdef_il in + let info = + { + name; + class_name_str; + method_properties = []; + cfg; + fdef; + is_lambda_assignment = true; + } + in + add_info info info_map)) + | Function + | Method + | BlockCases -> ( + match Option.bind opt_ent AST_to_IL.name_of_entity with + | None -> info_map + | Some name -> + let go_receiver_name = + match lang with + | Lang.Go -> Graph_from_AST.extract_go_receiver_type fdef + | _ -> None + in + let class_name_str = + match go_receiver_name with + | Some recv_name -> Some recv_name + | None -> ( + match parent_path with + | Some class_il :: _ -> Some (fst class_il.IL.ident) + | _ -> None) + in + let method_properties = + match fst fdef.fkind with + | Method -> + Taint_signature_extractor.extract_method_properties fdef + | Function + | LambdaKind + | Arrow + | BlockCases -> + [] + in + let fdef_il = + AST_to_IL.function_definition lang ~ctx fdef + in + let cfg = CFG_build.cfg_of_fdef fdef_il in + let info = + { + name; + class_name_str; + method_properties; + cfg; + fdef; + is_lambda_assignment = false; + } + in + add_info info info_map)) + Shape_and_sig.FunctionMap.empty ast + +let add_signatures_for_fun_info ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) + ~(taint_inst : Taint_rule_inst.t) ~(ast : G.program) + ?(builtin_signature_db : Shape_and_sig.builtin_signature_database option) + ~(call_graph : Call_graph.G.t) (info : fun_info) + (db : Shape_and_sig.signature_database) : + Shape_and_sig.signature_database = + match extract_multi_arity_cases info.fdef with + | Some arity_cases -> + List.fold_left + (fun acc_db (case_params, case_body, arity) -> + let synthetic_fdef : G.function_definition = + { + G.fparams = Tok.unsafe_fake_bracket case_params; + frettype = None; + fkind = info.fdef.G.fkind; + fbody = case_body; + } + in + let fdef_il = + AST_to_IL.function_definition lang ~ctx synthetic_fdef + in + let cfg = CFG_build.cfg_of_fdef fdef_il in + let db', _sig = + Taint_signature_extractor.extract_signature_with_file_context + ~arity ~db:acc_db ?builtin_signature_db taint_inst + ~name:info.name + ~method_properties:info.method_properties + ~call_graph:(Some call_graph) cfg ast + in + db') + db arity_cases + | None -> + extract_single_arity_signatures ~lang ~ctx ~taint_inst ~ast + ?builtin_signature_db ~call_graph info db + +let check_function_defs_for_matches ~(lang : Lang.t) ~(ctx : AST_to_IL.ctx) + ~(taint_inst : Taint_rule_inst.t) ~(glob_env : Taint_lval_env.t) + ?(signature_db : Shape_and_sig.signature_database option) + ?(builtin_signature_db : Shape_and_sig.builtin_signature_database option) + ?(call_graph : Call_graph.G.t option) + ~(record_matches : Shape_and_sig.Effects.t -> unit) (ast : G.program) : unit = + let info_map = collect_fun_info_map ~lang ~ctx ast in + Shape_and_sig.FunctionMap.iter + (fun _fn_id info -> + if not info.is_lambda_assignment then ( + Log.info (fun m -> + m + "Match_tainting_mode:\n\ + --------------------\n\ + Checking func def: %s\n\ + --------------------" + (IL.str_of_name info.name)); + let _flow, fdef_effects, _mapping = + check_fundef taint_inst info.name ctx ~glob_env + ?class_name:info.class_name_str ?signature_db + ?builtin_signature_db ?call_graph info.fdef + in + record_matches fdef_effects)) + info_map + +let build_interfile_rule_context (xconf : Match_env.xconfig) + (rule : R.taint_rule) (xtargets : Xtarget.t list) : + interfile_rule_context option = + match Xlang.to_lang rule.R.target_analyzer with + | Error _ -> None + | Ok lang -> + let builtin_signature_db = + Builtin_models.create_all_builtin_models lang + in + let project_targets = + xtargets + |> List_.filter_map (fun (xtarget : Xtarget.t) -> + let file = xtarget.path.internal_path_to_content in + let ast, _skipped_tokens = lazy_force xtarget.lazy_ast_and_errors in + let per_file_formula_cache = + Formula_cache.mk_specialized_formula_cache [ rule ] + in + let* taint_inst, spec_matches, _expls = + Match_taint_spec.taint_config_of_rule + ~per_file_formula_cache ~require_source_sink:false xconf + lang file (ast, []) rule + in + let ctx = build_ast_ctx ast in + let object_mappings = + Taint_signature_extractor.detect_object_initialization ast + taint_inst.lang + in + let info_map = collect_fun_info_map ~lang ~ctx ast in + Some + { + xtarget; + ast; + taint_inst; + spec_matches; + ctx; + object_mappings; + info_map; + }) + in + let project_graph_inputs = + project_targets + |> List_.map (fun target -> + (target.xtarget.path, target.ast, target.object_mappings)) + in + let call_graph = + Graph_from_AST.build_project_call_graph ~lang project_graph_inputs + in + let source_functions = + project_targets + |> List.concat_map (fun target -> + let source_ranges = + target.spec_matches.sources + |> List_.map (fun (rwm, _src) -> rwm.Range_with_metavars.r) + in + Graph_from_AST.find_functions_containing_ranges ~lang + ~current_file:target.xtarget.path.internal_path_to_content + target.ast source_ranges) + |> List.sort_uniq Function_id.compare + in + let sink_functions = + project_targets + |> List.concat_map (fun target -> + let sink_ranges = + target.spec_matches.sinks + |> List_.map (fun (rwm, _sink) -> rwm.Range_with_metavars.r) + in + Graph_from_AST.find_functions_containing_ranges ~lang + ~current_file:target.xtarget.path.internal_path_to_content + target.ast sink_ranges) + |> List.sort_uniq Function_id.compare + in + let relevant_graph = + Graph_reachability.compute_relevant_subgraph call_graph + ~sources:source_functions ~sinks:sink_functions + in + let analysis_order = + Call_graph.Topo.fold (fun fn acc -> fn :: acc) relevant_graph [] + |> List.rev + in + let base_db = + Builtin_models.init_signature_database None + in + let object_mappings = + project_targets + |> List.concat_map (fun target -> target.object_mappings) + in + let initial_signature_db = + Shape_and_sig.add_object_mappings base_db object_mappings + in + let project_info_map = + project_targets + |> List.fold_left + (fun acc target -> + Shape_and_sig.FunctionMap.fold + (fun fn_id info acc -> + if Shape_and_sig.FunctionMap.mem fn_id acc then acc + else + Shape_and_sig.FunctionMap.add fn_id { target; info } acc) + target.info_map acc) + Shape_and_sig.FunctionMap.empty + in + let signature_db = + List.fold_left + (fun db node -> + match Shape_and_sig.FunctionMap.find_opt node project_info_map with + | None -> db + | Some { target; info } -> + add_signatures_for_fun_info ~lang ~ctx:target.ctx + ~taint_inst:target.taint_inst ~ast:target.ast + ?builtin_signature_db:(Some builtin_signature_db) + ~call_graph:relevant_graph info db) + initial_signature_db analysis_order + in + let imported_global_index = + build_imported_global_index ~signature_db ~builtin_signature_db + ~call_graph:relevant_graph project_targets + in + Some + { + signature_db; + builtin_signature_db; + call_graph = relevant_graph; + imported_global_index; + } + +let build_interfile_contexts (xconf : Match_env.xconfig) + (rule_targets : (R.taint_rule * Xtarget.t list) list) : interfile_context = + rule_targets + |> List.fold_left + (fun acc (rule, xtargets) -> + match build_interfile_rule_context xconf rule xtargets with + | None -> acc + | Some context -> Rule_ID.Map.add (fst rule.R.id) context acc) + Rule_ID.Map.empty + let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook ?(signature_db : Shape_and_sig.signature_database option) ?(builtin_signature_db : Shape_and_sig.builtin_signature_database option) + ?(interfile_rule_context : interfile_rule_context option) ?(shared_call_graph : (Call_graph.G.t * (G.name * G.name) list) option = None) (xconf : Match_env.xconfig) (xtarget : Xtarget.t) = @@ -423,27 +1109,35 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook (* TODO: 'debug_taint' should just be part of 'res' * (i.e., add a "debugging" field to 'Report.match_result'). *) match - Match_taint_spec.taint_config_of_rule ~per_file_formula_cache xconf lang - file (ast, []) rule + Match_taint_spec.taint_config_of_rule ~per_file_formula_cache + ~require_source_sink:(not xconf.config.interfile) xconf lang file + (ast, []) rule with | None -> (None, None) | Some (taint_inst, spec_matches, expls) -> - (* FIXME: This is no longer needed, now we can just check the type 'n'. *) - let ctx = ref AST_to_IL.empty_ctx in - Visit_function_defs.visit - (fun opt_ent _fdef -> - match opt_ent with - | Some { name = EN (Id (n, _)); _ } -> - ctx := AST_to_IL.add_entity_name !ctx n - | __else__ -> ()) - ast; + let ctx = build_ast_ctx ast in - let glob_env, glob_effects = Taint_input_env.mk_file_env taint_inst ast in + let file_glob_env, glob_effects = Taint_input_env.mk_file_env taint_inst ast in + let imported_glob_env = + match interfile_rule_context with + | Some context -> + imported_global_env_of_ast ~file context.imported_global_index ast + | None -> Lval_env.empty + in + let glob_env = Lval_env.union file_glob_env imported_glob_env in record_matches glob_effects; + let builtin_signature_db = + match interfile_rule_context with + | Some context -> Some context.builtin_signature_db + | None -> builtin_signature_db + in (* Only use signature database if cross-function taint analysis is enabled *) - let final_signature_db, relevant_graph = - if taint_inst.options.taint_intrafile then ( + let final_signature_db, relevant_graph, needs_function_match_pass = + match interfile_rule_context with + | Some context -> + (Some context.signature_db, Some context.call_graph, true) + | None when taint_inst.options.taint_intrafile -> ( (* Detect object initialization mappings for this file *) let object_mappings = Taint_signature_extractor.detect_object_initialization ast @@ -483,7 +1177,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook in let fdef_il = AST_to_IL.function_definition taint_inst.lang - ~ctx:!ctx fdef + ~ctx fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in let info = @@ -530,7 +1224,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook [] in let fdef_il = - AST_to_IL.function_definition taint_inst.lang ~ctx:!ctx + AST_to_IL.function_definition taint_inst.lang ~ctx fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in @@ -636,7 +1330,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook if info.is_lambda_assignment then updated_db else begin let _flow, fdef_effects, _mapping = - check_fundef taint_inst info.name !ctx ~glob_env + check_fundef taint_inst info.name ctx ~glob_env ?class_name:info.class_name_str ~signature_db:updated_db ?builtin_signature_db ?call_graph:(Some relevant_graph) info.fdef @@ -662,7 +1356,7 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook } in let fdef_il = - AST_to_IL.function_definition lang ~ctx:!ctx + AST_to_IL.function_definition lang ~ctx synthetic_fdef in let cfg = CFG_build.cfg_of_fdef fdef_il in @@ -678,41 +1372,9 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook in run_check_fundef_if_needed info updated_db | None -> - (* Single-arity path (unchanged logic) *) - let params = Tok.unbracket info.fdef.fparams in - let arity = get_arity params info lang in - let updated_db, _signature = - Taint_signature_extractor.extract_signature_with_file_context - ~arity:(Shape_and_sig.Arity_exact arity) ~db - ?builtin_signature_db taint_inst ~name:info.name - ~method_properties:info.method_properties - ~call_graph:(Some relevant_graph) info.cfg ast - in - (* For Kotlin, if the last parameter is a lambda (function type), - * also extract signature with arity-1 to handle trailing lambda syntax: - * f(a, b) vs f(a) { b } *) let updated_db = - if Lang.equal lang Lang.Kotlin && arity >= 1 then - let last_param_is_lambda = - match List.rev params with - | G.Param { G.ptype = Some { t = G.TyFun _; _ }; _ } :: _ - -> - true - | _ -> false - in - if last_param_is_lambda then - let db', _ = - Taint_signature_extractor - .extract_signature_with_file_context - ~arity:(Shape_and_sig.Arity_exact (arity - 1)) - ~db:updated_db ?builtin_signature_db taint_inst - ~name:info.name - ~method_properties:info.method_properties - ~call_graph:(Some relevant_graph) info.cfg ast - in - db' - else updated_db - else updated_db + extract_single_arity_signatures ~lang ~ctx ~taint_inst ~ast + ?builtin_signature_db ~call_graph:relevant_graph info db in run_check_fundef_if_needed info updated_db in @@ -746,8 +1408,8 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook (* Skip the "remaining functions" phase entirely - if a function isn't in the relevant subgraph, we don't need to analyze it *) let final_signature_db = signature_db_after_order in - (Some final_signature_db, Some relevant_graph)) - else ( + (Some final_signature_db, Some relevant_graph, false)) + | None -> (* Cross-function taint analysis disabled: use main branch behavior *) Visit_function_defs.visit (fun opt_ent fdef -> @@ -776,13 +1438,17 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook --------------------" (IL.str_of_name name)); let _flow, fdef_effects, _mapping = - check_fundef taint_inst name !ctx ~glob_env + check_fundef taint_inst name ctx ~glob_env ?builtin_signature_db fdef in record_matches fdef_effects) ast; - (None, None)) + (None, None, false) in + if needs_function_match_pass then + check_function_defs_for_matches ~lang ~ctx ~taint_inst ~glob_env + ?signature_db:final_signature_db ?builtin_signature_db + ?call_graph:relevant_graph ~record_matches ast; (* Check execution of statements during object initialization. *) Visit_class_defs.visit @@ -799,7 +1465,8 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook let stmts = AST_to_IL.stmt taint_inst.lang fields in let cfg, lambdas = CFG_build.cfg_of_stmts stmts in let init_effects, _mapping = - Dataflow_tainting.fixpoint taint_inst ?name:opt_name + Dataflow_tainting.fixpoint taint_inst ~in_env:imported_glob_env + ?name:opt_name ?signature_db:final_signature_db ?builtin_signature_db ?call_graph:relevant_graph IL.{ params = []; cfg; lambdas } @@ -816,11 +1483,11 @@ let check_rule per_file_formula_cache (rule : R.taint_rule) match_hook let xs = AST_to_IL.stmt taint_inst.lang (G.stmt1 ast) in let cfg, lambdas = CFG_build.cfg_of_stmts xs in let top_level_name = - let fake_tok = Tok.unsafe_fake_tok "" in - IL.{ ident = ("", fake_tok); sid = G.SId.unsafe_default; id_info = G.empty_id_info () } + Graph_from_AST.top_level_il_name ~current_file:file ast in let top_effects, _mapping = - Dataflow_tainting.fixpoint taint_inst ~name:top_level_name + Dataflow_tainting.fixpoint taint_inst ~in_env:imported_glob_env + ~name:top_level_name ?signature_db:final_signature_db ?builtin_signature_db ?call_graph:relevant_graph IL.{ params = []; cfg; lambdas } @@ -864,6 +1531,7 @@ let check_rules ~match_hook R.rule -> (unit -> Core_profiling.rule_profiling Core_result.match_result option) -> Core_profiling.rule_profiling Core_result.match_result option) + ?(interfile_context : interfile_context option) (rules : R.taint_rule list) (xconf : Match_env.xconfig) (xtarget : Xtarget.t) : Core_profiling.rule_profiling Core_result.match_result list = @@ -871,11 +1539,11 @@ let check_rules ~match_hook (Dataflow_tainting.reset_constructor (); match rules with | rule :: _ -> ( - (* Check if any rule has taint_intrafile enabled *) let has_taint_intrafile = - match rule.options with - | Some opts -> opts.taint_intrafile - | None -> xconf.config.taint_intrafile + let xconf_rule = + Match_env.adjust_xconfig_with_rule_options xconf rule.R.options + in + xconf_rule.config.taint_intrafile in if has_taint_intrafile then (* Warn for unsupported languages *) @@ -923,19 +1591,27 @@ let check_rules ~match_hook Formula_cache.mk_specialized_formula_cache rules in + let has_rule_interfile_context rule = + match interfile_context with + | Some contexts -> Rule_ID.Map.mem (fst rule.R.id) contexts + | None -> false + in + (* Collect all languages that have rules with taint_intrafile enabled *) let langs_needing_call_graph = rules |> List.fold_left (fun acc rule -> - let xconf_rule = - Match_env.adjust_xconfig_with_rule_options xconf rule.R.options - in - if xconf_rule.config.taint_intrafile then - match Xlang.to_lang rule.R.target_analyzer with - | Ok lang -> LangSet.add lang acc - | Error _ -> acc - else acc) + let xconf_rule = + Match_env.adjust_xconfig_with_rule_options xconf rule.R.options + in + if xconf_rule.config.taint_intrafile + && not (has_rule_interfile_context rule) + then + match Xlang.to_lang rule.R.target_analyzer with + | Ok lang -> LangSet.add lang acc + | Error _ -> acc + else acc) LangSet.empty in @@ -967,12 +1643,24 @@ let check_rules ~match_hook let results = rules |> List.filter_map (fun rule -> - let xconf = + let rule_xconf = Match_env.adjust_xconfig_with_rule_options xconf rule.R.options in - (* Only pass call graph and builtin db if taint_intrafile is enabled for this rule *) + let rule_interfile_context = + match (rule_xconf.config.interfile, interfile_context) with + | true, Some contexts -> + Rule_ID.Map.find_opt (fst rule.R.id) contexts + | _ -> None + in + let needs_shared_intrafile_setup = + rule_xconf.config.taint_intrafile + && Option.is_none rule_interfile_context + in + (* Only pass shared per-file intrafile state to rules that still + * need it. Interfile rules with a prebuilt project context should + * reuse that context directly. *) let rule_shared_call_graph, rule_builtin_signature_db = - if xconf.config.taint_intrafile then + if needs_shared_intrafile_setup then match Xlang.to_lang rule.R.target_analyzer with | Ok lang -> ( LangMap.find_opt lang call_graph_by_lang, @@ -993,7 +1681,9 @@ let check_rules ~match_hook let report, _signature_db = check_rule per_file_formula_cache rule match_hook ?builtin_signature_db:rule_builtin_signature_db - ~shared_call_graph:rule_shared_call_graph xconf xtarget + ?interfile_rule_context:rule_interfile_context + ~shared_call_graph:rule_shared_call_graph rule_xconf + xtarget in report))) in diff --git a/src/engine/Match_tainting_mode.mli b/src/engine/Match_tainting_mode.mli index b2869655c..341685e72 100644 --- a/src/engine/Match_tainting_mode.mli +++ b/src/engine/Match_tainting_mode.mli @@ -1,3 +1,14 @@ +module CanonicalBindingMap : Map.S with type key = string list + +type interfile_rule_context = { + signature_db : Shape_and_sig.signature_database; + builtin_signature_db : Shape_and_sig.builtin_signature_database; + call_graph : Call_graph.G.t; + imported_global_index : Shape_and_sig.Shape.cell list CanonicalBindingMap.t; +} + +type interfile_context = interfile_rule_context Rule_ID.Map.t + val hook_setup_hook_function_taint_signature : (Rule.taint_rule -> Taint_rule_inst.t -> Xtarget.t -> unit) option ref (** This is used for intra-file inter-procedural taint-tracking, and the idea is @@ -14,6 +25,11 @@ val hook_setup_hook_function_taint_signature : * (or we could infer a signature for them too...). *) +val build_interfile_contexts : + Match_env.xconfig -> + (Rule.taint_rule * Xtarget.t list) list -> + interfile_context + val check_fundef : Taint_rule_inst.t -> IL.name (** entity being analyzed *) -> @@ -38,6 +54,7 @@ val check_rule : (Core_match.t list -> Core_match.t list) -> ?signature_db:Shape_and_sig.signature_database -> ?builtin_signature_db:Shape_and_sig.builtin_signature_database -> + ?interfile_rule_context:interfile_rule_context -> ?shared_call_graph:(Call_graph.G.t * (AST_generic.name * AST_generic.name) list) option -> Match_env.xconfig -> Xtarget.t -> @@ -52,6 +69,7 @@ val check_rules : (Rule.rule -> (unit -> Core_profiling.rule_profiling Core_result.match_result option) -> Core_profiling.rule_profiling Core_result.match_result option) -> + ?interfile_context:interfile_context -> Rule.taint_rule list -> Match_env.xconfig -> Xtarget.t -> diff --git a/src/engine/tests/Unit_engine.ml b/src/engine/tests/Unit_engine.ml index ad8ea81e2..bb660c401 100644 --- a/src/engine/tests/Unit_engine.ml +++ b/src/engine/tests/Unit_engine.ml @@ -612,6 +612,3720 @@ let filter_irrelevant_rules_tests () = (* Tainting tests *) (*****************************************************************************) +let interfile_taint_tests () = + let write_interfile_rule ?(extra_sections = "") rule_file = + UFile.write_file rule_file + ({|rules: +- id: interfile-python + languages: + - python + severity: ERROR + message: Interfile taint + mode: taint + options: + interfile: true + pattern-sources: + - pattern: tainted(...) + pattern-sinks: + - pattern: sink(...) +|} + ^ extra_sections) + in + let parse_taint_rule rule_file = + match Parse_rule.parse rule_file |> Result.get_ok with + | [ ({ R.mode = `Taint _ as mode; _ } as rule) ] -> { rule with mode } + | _ -> Alcotest.fail "expected a single taint rule" + in + let mk_interfile_checker ?(rule_extra_sections = "") root files = + let _ = Domain.DLS.set cache (Hashtbl.create 101) in + let rule_file = root / "rule.yaml" in + write_interfile_rule ~extra_sections:rule_extra_sections rule_file; + let rule = parse_taint_rule rule_file in + let xlang = Xlang.of_lang Lang.Python in + let xtarget_of_file = Test_engine.xtarget_of_file xlang in + let xtargets = files |> List_.map xtarget_of_file in + let xconf = + { + Match_env.default_xconfig with + filter_irrelevant_rules = Match_env.PrefilterWithCache cache; + } + in + let interfile_context = + Match_tainting_mode.build_interfile_contexts xconf [ (rule, xtargets) ] + in + fun file -> + let xtarget = xtarget_of_file file in + Match_rules.check ~match_hook:(fun _pm -> ()) ~timeout:None + ~interfile_context xconf [ (rule :> R.rule) ] xtarget + in + let match_locations (check_file : Fpath.t -> Core_result.matches_single_file) + file = + let (res : Core_result.matches_single_file) = check_file file in + res.matches |> List_.map TCM.location_of_pm + in + let check_single_match ~name ~file ~line matches = + Alcotest.(check int) (spf "one finding for %s" name) 1 (List.length matches); + let actual_file, actual_line = List.hd matches in + Alcotest.(check bool) (spf "match is reported on %s" name) true + (Fpath.equal file actual_file); + Alcotest.(check int) (spf "match line is correct for %s" name) line + actual_line + in + let check_match_lines ~name ~file ~lines matches = + Alcotest.(check int) + (spf "expected number of findings for %s" name) + (List.length lines) (List.length matches); + let actual_lines = + matches + |> List_.map (fun (actual_file, actual_line) -> + Alcotest.(check bool) (spf "match is reported on %s" name) true + (Fpath.equal file actual_file); + actual_line) + |> List.sort compare + in + Alcotest.(check (list int)) (spf "match lines are correct for %s" name) + (List.sort compare lines) actual_lines + in + let check_no_matches ~name matches = + Alcotest.(check int) (spf "no findings for %s" name) 0 (List.length matches) + in + [ + t "interfile taint across direct imported sources" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file app_file + {|from source import source + +def run(): + sink(source()) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the direct imported source sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across imported module-level values" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from source import payload + +def run(): + sink(payload) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the imported module-level value sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across directly aliased imported module-level values" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from source import payload as imported_payload + +def run(): + sink(imported_payload) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match + ~name:"the directly aliased imported module-level value sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint does not overtaint imported safe module-level values" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +safe_payload = "safe" +|}; + UFile.write_file app_file + {|from source import safe_payload + +def run(): + sink(safe_payload) +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the imported safe module-level value sink" + app_matches)); + t "interfile taint across python imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helpers.py" in + let app_file = root / "app.py" in + let safe_app_file = root / "safe_app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file app_file + {|import helpers + +def run(): + sink(helpers.helper()) # ruleid: interfile-python +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file safe_app_file + {|import helpers + +def run(): + sink("safe") +|}; + let check_file = + mk_interfile_checker root + [ source_file; helper_file; app_file; safe_app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + let safe_app_matches = + let res = check_file safe_app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) "one interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) "match is reported on the sink file" true + (Fpath.equal app_file actual_file); + Alcotest.(check int) "match is reported on the sink line" 4 + actual_line; + Alcotest.(check int) "no finding in safe file" 0 + (List.length safe_app_matches))); + t "interfile taint across local imports inside imported helpers" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(): + from source import source + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match + ~name:"the helper sink reached through an upstream local import" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across local module imports inside imported helpers" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(): + import source as source_mod + return source_mod.source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match + ~name: + "the helper sink reached through an upstream local module import" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across local relative imports inside imported helpers" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = pkg_dir / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(): + from .source import source + return source() +|}; + UFile.write_file app_file + {|from .helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match + ~name: + "the helper sink reached through an upstream local relative import" + ~file:app_file ~line:4 app_matches)); + t "interfile taint is independent of scan input order" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ app_file; helper_file; source_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the reversed scan-order sink" ~file:app_file + ~line:4 app_matches)); + t "interfile taint across python package imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "src" / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = pkg_dir / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from pkg.source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg.helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) "one package interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) "match is reported on the package sink file" + true (Fpath.equal app_file actual_file); + Alcotest.(check int) "match is reported on the package sink line" 4 + actual_line)); + t "interfile taint across aliased module imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|import helper as helper_mod + +def run(): + sink(helper_mod.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) "one aliased module interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) + "match is reported on the aliased module sink file" true + (Fpath.equal app_file actual_file); + Alcotest.(check int) + "match is reported on the aliased module sink line" 4 actual_line)); + t "interfile taint across aliased function imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper as run_helper + +def run(): + sink(run_helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) "one aliased function interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) + "match is reported on the aliased function sink file" true + (Fpath.equal app_file actual_file); + Alcotest.(check int) + "match is reported on the aliased function sink line" 4 + actual_line)); + t "interfile taint across aliased upstream imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source as source_fn + +def helper(): + return source_fn() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased upstream helper sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across python relative imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = pkg_dir / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from .helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) "one relative import interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) + "match is reported on the relative import sink file" true + (Fpath.equal app_file actual_file); + Alcotest.(check int) + "match is reported on the relative import sink line" 4 actual_line)); + t "interfile taint across python parent relative imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + let subpkg_dir = pkg_dir / "subpkg" in + UFile.make_directories subpkg_dir; + let pkg_init = pkg_dir / "__init__.py" in + let subpkg_init = subpkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = subpkg_dir / "helper.py" in + let app_file = subpkg_dir / "app.py" in + UFile.write_file pkg_init ""; + UFile.write_file subpkg_init ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from ..source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from .helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ pkg_init; subpkg_init; source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) + "one parent relative import interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) + "match is reported on the parent relative import sink file" true + (Fpath.equal app_file actual_file); + Alcotest.(check int) + "match is reported on the parent relative import sink line" 4 + actual_line)); + t "interfile taint across aliased relative function imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = pkg_dir / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from .helper import helper as run_helper + +def run(): + sink(run_helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased relative helper sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across package re-exports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file + {|from .helper import helper +|}; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) "one package re-export interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) + "match is reported on the package re-export sink file" true + (Fpath.equal app_file actual_file); + Alcotest.(check int) + "match is reported on the package re-export sink line" 4 + actual_line)); + t "interfile taint across package re-exported module-level values" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let app_file = root / "app.py" in + UFile.write_file init_file + {|from .source import payload +|}; + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from pkg import payload + +def run(): + sink(payload) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ init_file; source_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the package re-exported value sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across aliased package re-exported module-level values" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let app_file = root / "app.py" in + UFile.write_file init_file + {|from .source import payload as exported_payload +|}; + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from pkg import exported_payload + +def run(): + sink(exported_payload) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ init_file; source_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased package re-exported value sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across aliased symbol package re-exports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file + {|from .helper import helper +|}; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|import pkg as pkg_mod + +def run(): + sink(pkg_mod.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased package re-export sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across package submodule imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg import helper + +def run(): + sink(helper.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the package submodule sink" ~file:app_file + ~line:4 app_matches)); + t "interfile taint across aliased package submodule imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg import helper as helper_mod + +def run(): + sink(helper_mod.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased package submodule sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across aliased dotted package module imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|import pkg.helper as helper_mod + +def run(): + sink(helper_mod.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased dotted package module sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across aliased package re-exports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file + {|from .helper import helper as exported_helper +|}; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg import exported_helper as run_helper + +def run(): + sink(run_helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased package re-exported sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across aliased dotted package symbol imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg.helper import helper as run_helper + +def run(): + sink(run_helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased dotted package helper sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across imported class methods" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +class Helper: + @staticmethod + def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import Helper + +def run(): + sink(Helper.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) "one imported method interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) "match is reported on the imported method sink file" + true (Fpath.equal app_file actual_file); + Alcotest.(check int) "match is reported on the imported method sink line" + 4 actual_line)); + t "interfile taint across imported classmethod parameters" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + @classmethod + def run(cls, value): + sink(value) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner.run(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the imported classmethod-parameter sink" + ~file:helper_file ~line:4 helper_matches)); + t "interfile taint across imported instance methods" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +class Helper: + def helper(self): + return source() +|}; + UFile.write_file app_file + {|from helper import Helper + +def run(): + sink(Helper().helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) + "one imported instance method interfile finding" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) + "match is reported on the imported instance method sink file" + true (Fpath.equal app_file actual_file); + Alcotest.(check int) + "match is reported on the imported instance method sink line" 4 + actual_line)); + t "interfile taint does not overtaint safe instance methods from mixed classes" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +class Helper: + def helper(self): + return source() + + def safe_helper(self): + return "safe" +|}; + UFile.write_file app_file + {|from helper import Helper + +def run(): + sink(Helper().safe_helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches + ~name:"the safe instance method imported from a mixed-taint class" + app_matches)); + t "interfile taint across aliased imported instance methods" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +class Helper: + def helper(self): + return source() +|}; + UFile.write_file app_file + {|from helper import Helper as ImportedHelper + +def run(): + sink(ImportedHelper().helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased imported instance-method sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across aliased imported class methods" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +class Helper: + @staticmethod + def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import Helper as ImportedHelper + +def run(): + sink(ImportedHelper.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased imported class-method sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across module-qualified imported class methods" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +class Helper: + @staticmethod + def helper(): + return source() +|}; + UFile.write_file app_file + {|import helper + +def run(): + sink(helper.Helper.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match + ~name:"the module-qualified imported class-method sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across module-qualified imported instance methods" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +class Helper: + def helper(self): + return source() +|}; + UFile.write_file app_file + {|import helper + +def run(): + sink(helper.Helper().helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match + ~name:"the module-qualified imported instance-method sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint resolves the correct package when sibling modules share a basename" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let pkg1_dir = root / "pkg1" in + let pkg2_dir = root / "pkg2" in + let pkg1_init = pkg1_dir / "__init__.py" in + let pkg2_init = pkg2_dir / "__init__.py" in + let pkg1_util = pkg1_dir / "util.py" in + let pkg2_util = pkg2_dir / "util.py" in + let app_file = root / "app.py" in + UFile.make_directories pkg1_dir; + UFile.make_directories pkg2_dir; + UFile.write_file pkg1_init ""; + UFile.write_file pkg2_init ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file pkg1_util + {|def helper(): + return "safe" +|}; + UFile.write_file pkg2_util + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg2.util import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ source_file; pkg1_util; pkg2_util; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) + "one interfile finding through the imported package helper" 1 + (List.length app_matches); + let actual_file, actual_line = List.hd app_matches in + Alcotest.(check bool) "match is reported on the app file" true + (Fpath.equal app_file actual_file); + Alcotest.(check int) "match is reported on the sink line" 4 + actual_line)); + t "interfile taint does not overtaint sibling packages that share a basename" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let pkg1_dir = root / "pkg1" in + let pkg2_dir = root / "pkg2" in + let pkg1_init = pkg1_dir / "__init__.py" in + let pkg2_init = pkg2_dir / "__init__.py" in + let pkg1_util = pkg1_dir / "util.py" in + let pkg2_util = pkg2_dir / "util.py" in + let app_file = root / "app.py" in + UFile.make_directories pkg1_dir; + UFile.make_directories pkg2_dir; + UFile.write_file pkg1_init ""; + UFile.write_file pkg2_init ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file pkg1_util + {|def helper(): + return "safe" +|}; + UFile.write_file pkg2_util + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg1.util import helper + +def run(): + sink(helper()) +|}; + let check_file = + mk_interfile_checker root + [ source_file; pkg1_util; pkg2_util; app_file ] + in + let app_matches = + let res = check_file app_file in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) + "no interfile finding through the safe sibling package helper" 0 + (List.length app_matches))); + t "interfile taint does not overtaint sibling packages with relative imports" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg1_dir = root / "pkg1" in + let pkg2_dir = root / "pkg2" in + let pkg1_init = pkg1_dir / "__init__.py" in + let pkg2_init = pkg2_dir / "__init__.py" in + let pkg1_util = pkg1_dir / "util.py" in + let pkg2_util = pkg2_dir / "util.py" in + let pkg1_app = pkg1_dir / "app.py" in + UFile.make_directories pkg1_dir; + UFile.make_directories pkg2_dir; + UFile.write_file pkg1_init ""; + UFile.write_file pkg2_init ""; + UFile.write_file pkg1_util + {|def helper(): + return "safe" +|}; + UFile.write_file pkg2_util + {|def helper(): + return tainted() +|}; + UFile.write_file pkg1_app + {|from .util import helper + +def run(): + sink(helper()) +|}; + let check_file = + mk_interfile_checker root + [ pkg1_init; pkg2_init; pkg1_util; pkg2_util; pkg1_app ] + in + let app_matches = + let res = check_file pkg1_app in + res.matches |> List_.map TCM.location_of_pm + in + Alcotest.(check int) + "no interfile finding through the safe relative import helper" 0 + (List.length app_matches))); + t "interfile taint across dotted package module imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|import pkg.helper + +def run(): + sink(pkg.helper.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the dotted package module sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across multi-hop python imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let relay_file = root / "relay.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file relay_file + {|from helper import helper + +def relay(): + return helper() +|}; + UFile.write_file app_file + {|from relay import relay + +def run(): + sink(relay()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ source_file; helper_file; relay_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the multi-hop sink" ~file:app_file ~line:4 + app_matches)); + t "interfile taint reports all sink files that share a tainted helper" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_one_file = root / "app_one.py" in + let app_two_file = root / "app_two.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_one_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + UFile.write_file app_two_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ source_file; helper_file; app_one_file; app_two_file ] + in + let app_one_matches = match_locations check_file app_one_file in + let app_two_matches = match_locations check_file app_two_file in + check_single_match ~name:"the first shared-helper sink" + ~file:app_one_file ~line:4 app_one_matches; + check_single_match ~name:"the second shared-helper sink" + ~file:app_two_file ~line:4 app_two_matches)); + t "interfile taint reports multiple sink locations in one file" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_match_lines ~name:"the repeated sink file" + ~file:app_file ~lines:[ 4; 5 ] app_matches)); + t "interfile taint reaches sinks at module top level" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the top-level sink" ~file:app_file + ~line:3 app_matches)); + t "interfile taint does not overtaint sibling helpers defined in the same module" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() + +def safe_helper(): + return "safe" +|}; + UFile.write_file app_file + {|from helper import safe_helper + +def run(): + sink(safe_helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches + ~name:"the safe helper imported from a mixed-taint module" app_matches)); + t "interfile taint resolves aliased imports even when the original name is shadowed locally" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper as imported_helper + +def helper(): + return "safe" + +def run(): + sink(helper()) + sink(imported_helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match + ~name:"the aliased helper sink after local shadowing" ~file:app_file + ~line:8 app_matches)); + t "interfile taint does not overtaint when a local definition shadows an imported helper" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def helper(): + return "safe" + +def run(): + sink(helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the locally shadowed helper sink" + app_matches)); + t "interfile taint does not overtaint when an imported helper is shadowed by a parameter" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(helper): + sink(helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the parameter-shadowed helper sink" + app_matches)); + t "interfile taint does not overtaint when an imported helper is shadowed in local scope" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def safe_helper(): + return "safe" + +def run(): + helper = safe_helper + sink(helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the locally rebound helper sink" + app_matches)); + t "interfile taint does not overtaint when an imported helper is rebound" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def safe_helper(): + return "safe" + +helper = safe_helper + +def run(): + sink(helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the rebound imported helper sink" + app_matches)); + t "interfile taint does not overtaint when an imported module alias is rebound" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|import helper as helper_mod + +class SafeModule: + @staticmethod + def helper(): + return "safe" + +helper_mod = SafeModule + +def run(): + sink(helper_mod.helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the rebound imported module alias sink" + app_matches)); + t "interfile taint does not report when upstream modules are outside the scan inputs" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) +|}; + let check_file = mk_interfile_checker root [ app_file ] in + let app_matches = match_locations check_file app_file in + check_no_matches + ~name:"the sink with upstream modules excluded from the scan inputs" + app_matches)); + t "interfile taint does not report when the source module is outside the scan inputs" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) +|}; + let check_file = mk_interfile_checker root [ helper_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_no_matches + ~name:"the sink with the source module excluded from the scan inputs" + app_matches)); + t "interfile taint across relative module imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = pkg_dir / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from . import helper + +def run(): + sink(helper.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the relative module import sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across wildcard imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import * + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the wildcard import sink" ~file:app_file + ~line:4 app_matches)); + t "interfile taint across wildcard-imported module-level values" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from source import * + +def run(): + sink(payload) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the wildcard-imported value sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across wildcard package re-exports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = root / "app.py" in + UFile.make_directories pkg_dir; + UFile.write_file init_file + {|from .helper import helper +|}; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from pkg import * + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the wildcard package re-export sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across wildcard package re-exported module-level values" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let app_file = root / "app.py" in + UFile.make_directories pkg_dir; + UFile.write_file init_file + {|from .source import payload +|}; + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from pkg import * + +def run(): + sink(payload) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ init_file; source_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the wildcard package re-exported value sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint does not overtaint wildcard-imported safe helpers from mixed modules" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() + +def safe_helper(): + return "safe" +|}; + UFile.write_file app_file + {|from helper import * + +def run(): + sink(safe_helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the wildcard-imported safe helper sink" + app_matches)); + t "interfile taint does not overtaint wildcard-imported safe module-level values from mixed modules" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +safe_payload = "safe" +|}; + UFile.write_file app_file + {|from source import * + +def run(): + sink(safe_payload) +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the wildcard-imported safe value sink" + app_matches)); + t "interfile taint only reports sinks before a wildcard-imported module-level value is rebound" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from source import * + +sink(payload) # ruleid: interfile-python + +payload = "safe" + +sink(payload) +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_match_lines ~name:"the rebound wildcard-imported value sinks" + ~file:app_file ~lines:[ 3 ] app_matches)); + t "interfile taint across local symbol imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file app_file + {|def run(): + from source import source + sink(source()) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the local symbol import sink" + ~file:app_file ~line:3 app_matches)); + t "interfile taint across local imported module-level values" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|def run(): + from source import payload + sink(payload) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the local imported value sink" + ~file:app_file ~line:3 app_matches)); + t "interfile taint across local aliased symbol imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file app_file + {|def run(): + from source import source as imported_source + sink(imported_source()) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the local aliased symbol import sink" + ~file:app_file ~line:3 app_matches)); + t "interfile taint across local aliased imported module-level values" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|def run(): + from source import payload as imported_payload + sink(imported_payload) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the local aliased imported value sink" + ~file:app_file ~line:3 app_matches)); + t "interfile taint across local module imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|def run(): + import helper + sink(helper.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the local module import sink" + ~file:app_file ~line:3 app_matches)); + t "interfile taint across local aliased module imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|def run(): + import helper as helper_mod + sink(helper_mod.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the local aliased module import sink" + ~file:app_file ~line:3 app_matches)); + t "interfile taint only reports local-imported helper sinks before a rebind" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|def safe_helper(): + return "safe" + +def run(): + from helper import helper + sink(helper()) # ruleid: interfile-python + helper = safe_helper + sink(helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_match_lines ~name:"the locally rebound helper sinks" + ~file:app_file ~lines:[ 6 ] app_matches)); + t "interfile taint only reports local-imported module-level value sinks before a rebind" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|def run(): + from source import payload + sink(payload) # ruleid: interfile-python + payload = "safe" + sink(payload) +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_match_lines ~name:"the locally rebound imported-value sinks" + ~file:app_file ~lines:[ 3 ] app_matches)); + t "interfile taint across imported module-level values via module attributes" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|import source + +def run(): + sink(source.payload) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the imported module attribute sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint across aliased imported module-level values" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|import source as source_mod + +def run(): + sink(source_mod.payload) # ruleid: interfile-python +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased imported module value sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint keeps imported module attributes symbol-specific" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +safe_payload = "safe" +|}; + UFile.write_file app_file + {|import source + +def run(): + sink(source.safe_payload) +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the safe imported module attribute sink" + app_matches)); + t "interfile taint across package-imported module-level attributes" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let app_file = root / "app.py" in + UFile.write_file init_file + {|from .source import payload +|}; + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|import pkg + +def run(): + sink(pkg.payload) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ init_file; source_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the package-imported value attribute sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint keeps package-imported module-level attributes symbol-specific" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let app_file = root / "app.py" in + UFile.write_file init_file + {|from .source import payload, safe_payload +|}; + UFile.write_file source_file + {|payload = tainted() +safe_payload = "safe" +|}; + UFile.write_file app_file + {|import pkg + +def run(): + sink(pkg.safe_payload) +|}; + let check_file = + mk_interfile_checker root [ init_file; source_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the safe package-imported value attribute sink" + app_matches)); + t "interfile taint keeps same-named imported symbols distinct across modules" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let tainted_source_file = root / "tainted_source.py" in + let safe_source_file = root / "safe_source.py" in + let app_file = root / "app.py" in + UFile.write_file tainted_source_file + {|def build(): + return tainted() +|}; + UFile.write_file safe_source_file + {|def build(): + return "safe" +|}; + UFile.write_file app_file + {|from tainted_source import build as tainted_build +from safe_source import build as safe_build + +def run(): + sink(safe_build()) + sink(tainted_build()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ tainted_source_file; safe_source_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the same-named imported symbol sink" + ~file:app_file ~line:6 app_matches)); + t "interfile taint keeps mixed named imports symbol-specific" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() + +def safe_helper(): + return "safe" +|}; + UFile.write_file app_file + {|from helper import helper, safe_helper + +def run(): + sink(safe_helper()) + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the mixed named-import sink" + ~file:app_file ~line:5 app_matches)); + t "interfile taint only reports sinks before an imported helper is rebound" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +sink(helper()) # ruleid: interfile-python + +def safe_helper(): + return "safe" + +helper = safe_helper + +sink(helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_match_lines ~name:"the rebound helper sinks" + ~file:app_file ~lines:[ 3 ] app_matches)); + t "interfile taint only reports sinks before an imported module alias is rebound" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|import helper as helper_mod + +sink(helper_mod.helper()) # ruleid: interfile-python + +class SafeModule: + @staticmethod + def helper(): + return "safe" + +helper_mod = SafeModule + +sink(helper_mod.helper()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_match_lines ~name:"the rebound module-alias sinks" + ~file:app_file ~lines:[ 3 ] app_matches)); + t "interfile taint only reports sinks before an imported module-level value is rebound" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from source import payload + +sink(payload) # ruleid: interfile-python + +payload = "safe" + +sink(payload) +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_match_lines ~name:"the rebound imported-value sinks" + ~file:app_file ~lines:[ 3 ] app_matches)); + t "interfile taint does not overtaint when an imported module-level value is shadowed by a parameter" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|payload = tainted() +|}; + UFile.write_file app_file + {|from source import payload + +def run(payload): + sink(payload) +|}; + let check_file = mk_interfile_checker root [ source_file; app_file ] in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the parameter-shadowed imported-value sink" + app_matches)); + t "interfile taint across aliased relative module imports" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg_dir = root / "pkg" in + UFile.make_directories pkg_dir; + let init_file = pkg_dir / "__init__.py" in + let source_file = pkg_dir / "source.py" in + let helper_file = pkg_dir / "helper.py" in + let app_file = pkg_dir / "app.py" in + UFile.write_file init_file ""; + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from .source import source + +def helper(): + return source() +|}; + UFile.write_file app_file + {|from . import helper as helper_mod + +def run(): + sink(helper_mod.helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ init_file; source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the aliased relative module sink" + ~file:app_file ~line:4 app_matches)); + t "interfile taint does not overtaint cyclic imports without a tainted path" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let helper_file = root / "helper.py" in + let relay_file = root / "relay.py" in + let app_file = root / "app.py" in + UFile.write_file helper_file + {|from relay import relay + +def helper(): + return relay() + +def noop(): + return "safe" +|}; + UFile.write_file relay_file + {|from helper import noop + +def relay(): + noop() + return "safe" +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) +|}; + let check_file = + mk_interfile_checker root [ helper_file; relay_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches ~name:"the safe cyclic-import sink" app_matches)); + t "interfile taint handles cyclic imports without losing the tainted flow" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let relay_file = root / "relay.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from relay import relay + +def helper(): + return relay() + +def noop(): + return "safe" +|}; + UFile.write_file relay_file + {|from helper import noop +from source import source + +def relay(): + noop() + return source() +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) # ruleid: interfile-python +|}; + let check_file = + mk_interfile_checker root + [ source_file; helper_file; relay_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match ~name:"the cyclic-import sink" ~file:app_file + ~line:4 app_matches)); + t "interfile taint across imported helper parameters" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(value): + sink(value) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match ~name:"the imported helper-parameter sink" + ~file:helper_file ~line:2 helper_matches)); + t "interfile taint across imported helper default parameters" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(value=source()): + sink(value) +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + helper() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the imported helper sink reached through a tainted default" + ~file:helper_file ~line:4 helper_matches)); + t + "interfile taint does not overtaint imported helper default parameters when callers override them" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def helper(value=source()): + sink(value) +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + helper("safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the imported helper sink when a tainted default is overridden" + helper_matches)); + t "interfile taint across module-qualified imported helper parameters" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(value): + sink(value) +|}; + UFile.write_file app_file + {|from source import source +import helper as helper_mod + +def run(): + helper_mod.helper(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the module-qualified imported helper-parameter sink" + ~file:helper_file ~line:2 helper_matches)); + t "interfile taint maps imported helper arguments by position" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper("safe", source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the imported helper sink reached through the second argument" + ~file:helper_file ~line:2 helper_matches)); + t "interfile taint keeps imported helper argument positions distinct" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(source(), "safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the imported helper sink when only a non-sunk parameter is tainted" + helper_matches)); + t "interfile taint maps imported helper keyword arguments to parameters" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(second=source(), first="safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name: + "the imported helper sink reached through a keyword-mapped argument" + ~file:helper_file ~line:2 helper_matches)); + t "interfile taint keeps imported helper keyword arguments parameter-specific" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(first=source(), second="safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name: + "the imported helper sink when only a non-sunk keyword argument is tainted" + helper_matches)); + t "interfile taint maps imported helper keyword argument relays" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def sink_value(danger): + sink(danger) + +def helper(value): + sink_value(danger=value) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name: + "the imported helper sink reached through a keyword relay" + ~file:helper_file ~line:2 helper_matches)); + t "interfile taint maps imported variadic helper arguments by position" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(*args): + sink(args[1]) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper("safe", source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the imported variadic helper sink reached through the second argument" + ~file:helper_file ~line:2 helper_matches)); + t "interfile taint maps imported keyword-only helper arguments after variadics" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(first, *args, y): + sink(first) + sink(y) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(source(), "safe", y=source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_match_lines + ~name: + "the imported variadic helper sinks reached through the positional and keyword-only arguments" + ~file:helper_file ~lines:[ 2; 3 ] helper_matches)); + t "interfile taint respects sanitizers at imported helper call sites" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let sanitizer_file = root / "sanitizer.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file sanitizer_file + {|def sanitize(value): + return "safe" +|}; + UFile.write_file helper_file + {|def helper(value): + sink(value) +|}; + UFile.write_file app_file + {|from helper import helper +from sanitizer import sanitize +from source import source + +def run(): + helper(sanitize(source())) +|}; + let check_file = + mk_interfile_checker + ~rule_extra_sections: + {| pattern-sanitizers: + - pattern: sanitize(...) +|} + root [ source_file; sanitizer_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the imported helper sink sanitized by the caller" + helper_matches)); + t "interfile taint respects sanitizers in imported helper returns" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|from source import source + +def sanitize(value): + return "safe" + +def helper(): + return sanitize(source()) +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + sink(helper()) +|}; + let check_file = + mk_interfile_checker + ~rule_extra_sections: + {| pattern-sanitizers: + - pattern: sanitize(...) +|} + root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_no_matches + ~name:"the imported helper return sanitized before the sink" + app_matches)); + t "interfile taint respects sanitizers in imported helper parameters" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def sanitize(value): + return "safe" + +def helper(value): + sink(sanitize(value)) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(source()) +|}; + let check_file = + mk_interfile_checker + ~rule_extra_sections: + {| pattern-sanitizers: + - pattern: sanitize(...) +|} + root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the imported helper-parameter sink sanitized across files" + helper_matches)); + t "interfile taint does not overtaint imported helper parameters without a tainted caller" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(value): + sink(value) +|}; + UFile.write_file app_file + {|from helper import helper + +def run(): + helper("safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the helper-parameter sink with only safe callers" + helper_matches)); + t "interfile taint reports imported helper parameters with mixed callers" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(value): + sink(value) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run_safe(): + helper("safe") + +def run_tainted(): + helper(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match ~name:"the helper-parameter sink with mixed callers" + ~file:helper_file ~line:2 helper_matches)); + t "interfile taint does not overtaint when imported helper parameters are rebound" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|def helper(value): + value = "safe" + sink(value) +|}; + UFile.write_file app_file + {|from source import source +from helper import helper + +def run(): + helper(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the rebound helper-parameter sink from a tainted caller" + helper_matches)); + t "interfile taint across imported instance-method parameters" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def run(self, value): + sink(value) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner().run(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match ~name:"the imported instance-method parameter sink" + ~file:helper_file ~line:3 helper_matches)); + t "interfile taint maps imported instance-method arguments by position" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def run(self, first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner().run("safe", source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name: + "the imported instance-method sink reached through the second argument" + ~file:helper_file ~line:3 helper_matches)); + t "interfile taint keeps imported instance-method argument positions distinct" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def run(self, first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner().run(source(), "safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name: + "the imported instance-method sink when only a non-sunk parameter is tainted" + helper_matches)); + t "interfile taint maps imported instance-method keyword arguments to parameters" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def run(self, first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner().run(second=source(), first="safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name: + "the imported instance-method sink reached through a keyword-mapped argument" + ~file:helper_file ~line:3 helper_matches)); + t "interfile taint keeps imported instance-method keyword arguments parameter-specific" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def run(self, first, second): + sink(second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner().run(first=source(), second="safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name: + "the imported instance-method sink when only a non-sunk keyword argument is tainted" + helper_matches)); + t "interfile taint across stored imported instance-method parameters" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def run(self, value): + sink(value) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + runner = Runner() + runner.run(source()) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the stored imported instance-method parameter sink" + ~file:helper_file ~line:3 helper_matches)); + t "interfile taint does not report stored imported instance-method parameters with safe input" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def run(self, value): + sink(value) +|}; + UFile.write_file app_file + {|from helper import Runner + +def run(): + runner = Runner() + runner.run("safe") +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the stored imported instance-method sink with safe input" + helper_matches)); + t "interfile taint across imported constructor state" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, value): + self.value = value + + def run(self): + sink(self.value) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner(source()).run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match ~name:"the imported constructor-state sink" + ~file:helper_file ~line:6 helper_matches)); + t "interfile taint across aliased imported constructor state" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, value): + self.value = value + + def run(self): + sink(self.value) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner as ImportedRunner + +def run(): + ImportedRunner(source()).run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the aliased imported constructor-state sink" + ~file:helper_file ~line:6 helper_matches)); + t "interfile taint across module-qualified imported constructor state" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, value): + self.value = value + + def run(self): + sink(self.value) +|}; + UFile.write_file app_file + {|from source import source +import helper as helper_mod + +def run(): + helper_mod.Runner(source()).run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the module-qualified imported constructor-state sink" + ~file:helper_file ~line:6 helper_matches)); + t "interfile taint across stored imported constructor state" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, value): + self.value = value + + def run(self): + sink(self.value) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + runner = Runner(source()) + runner.run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the stored imported constructor-state sink" + ~file:helper_file ~line:6 helper_matches)); + t "interfile taint does not report stored imported constructor state with safe input" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, value): + self.value = value + + def run(self): + sink(self.value) +|}; + UFile.write_file app_file + {|from helper import Runner + +def run(): + runner = Runner("safe") + runner.run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the stored imported constructor-state sink with safe input" + helper_matches)); + t "interfile taint exposes stored imported constructor fields to callers" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, value): + self.value = value +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + runner = Runner(source()) + sink(runner.value) +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let app_matches = match_locations check_file app_file in + check_single_match + ~name:"the caller-side sink for stored imported constructor state" + ~file:app_file ~line:6 app_matches)); + t "interfile taint maps imported constructor keyword arguments to fields" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, first, second): + self.first = first + self.second = second + + def run(self): + sink(self.second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + runner = Runner(second=source(), first="safe") + runner.run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the imported constructor sink reached through a keyword field" + ~file:helper_file ~line:7 helper_matches)); + t "interfile taint maps imported constructor arguments by position" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, first, second): + self.first = first + self.second = second + + def run(self): + sink(self.second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + runner = Runner("safe", source()) + runner.run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_single_match + ~name:"the imported constructor sink reached through the second positional field" + ~file:helper_file ~line:7 helper_matches)); + t "interfile taint keeps imported constructor argument positions distinct" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, first, second): + self.first = first + self.second = second + + def run(self): + sink(self.second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + runner = Runner(source(), "safe") + runner.run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name: + "the imported constructor sink when only a non-sunk positional field is tainted" + helper_matches)); + t "interfile taint keeps imported constructor keyword arguments field-specific" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, first, second): + self.first = first + self.second = second + + def run(self): + sink(self.second) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + runner = Runner(first=source(), second="safe") + runner.run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name: + "the imported constructor sink when only a non-sunk keyword field is tainted" + helper_matches)); + t "interfile taint does not overtaint overwritten imported constructor state" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let helper_file = root / "helper.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file helper_file + {|class Runner: + def __init__(self, value): + self.value = value + + def run(self): + self.value = "safe" + sink(self.value) +|}; + UFile.write_file app_file + {|from source import source +from helper import Runner + +def run(): + Runner(source()).run() +|}; + let check_file = + mk_interfile_checker root [ source_file; helper_file; app_file ] + in + let helper_matches = match_locations check_file app_file in + check_no_matches + ~name:"the overwritten constructor-state sink from a tainted caller" + helper_matches)); + t "interfile taint across multi-hop imported parameter relays" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let source_file = root / "source.py" in + let sink_helper_file = root / "sink_helper.py" in + let relay_file = root / "relay.py" in + let app_file = root / "app.py" in + UFile.write_file source_file + {|def source(): + return tainted() +|}; + UFile.write_file sink_helper_file + {|def sink_value(value): + sink(value) +|}; + UFile.write_file relay_file + {|from sink_helper import sink_value + +def relay(value): + sink_value(value) +|}; + UFile.write_file app_file + {|from source import source +from relay import relay + +def run(): + relay(source()) +|}; + let check_file = + mk_interfile_checker root + [ source_file; sink_helper_file; relay_file; app_file ] + in + let sink_matches = match_locations check_file app_file in + check_single_match ~name:"the multi-hop imported relay sink" + ~file:sink_helper_file ~line:2 sink_matches)); + ] + +let interfile_project_graph_tests () = + let build_project_graph files = + let xlang = Xlang.of_lang Lang.Python in + let xtarget_of_file = Test_engine.xtarget_of_file xlang in + let project_inputs = + files + |> List_.map (fun file -> + let xtarget = xtarget_of_file file in + let ast, _ = Lazy.force xtarget.lazy_ast_and_errors in + (xtarget.path, ast, [])) + in + Graph_from_AST.build_project_call_graph ~lang:Lang.Python project_inputs + in + let count_vertices_by_name graph name = + Call_graph.G.fold_vertex + (fun vertex count -> + if String.equal (Function_id.show vertex) name then count + 1 else count) + graph 0 + in + let count_vertices graph = + Call_graph.G.fold_vertex (fun _ count -> count + 1) graph 0 + in + [ + t "project call graph keeps distinct top-level nodes per file" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let file_a = root / "a.py" in + let file_b = root / "b.py" in + UFile.write_file file_a + {|def f(): + return 1 +|}; + UFile.write_file file_b + {|def g(): + return 2 +|}; + let graph = build_project_graph [ file_a; file_b ] in + Alcotest.(check int) "one top-level node per file" 2 + (count_vertices_by_name graph ""))); + t "project call graph keeps distinct class-init nodes per file" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let file_a = root / "a.py" in + let file_b = root / "b.py" in + UFile.write_file file_a + {|class Widget: + data = 1 +|}; + UFile.write_file file_b + {|class Widget: + data = 2 +|}; + let graph = build_project_graph [ file_a; file_b ] in + Alcotest.(check int) "one class-init node per file" 2 + (count_vertices_by_name graph "Class:Widget"))); + t "project call graph keeps distinct function nodes per file" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let file_a = root / "a.py" in + let file_b = root / "b.py" in + UFile.write_file file_a + {|def helper(): + return 1 +|}; + UFile.write_file file_b + {|def helper(): + return 2 +|}; + let graph = build_project_graph [ file_a; file_b ] in + Alcotest.(check int) "one function node per file" 2 + (count_vertices_by_name graph "helper"))); + t "project call graph keeps same-basename module functions distinct across directories" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg1_dir = root / "pkg1" in + let pkg2_dir = root / "pkg2" in + let pkg1_util = pkg1_dir / "util.py" in + let pkg2_util = pkg2_dir / "util.py" in + UFile.make_directories pkg1_dir; + UFile.make_directories pkg2_dir; + UFile.write_file pkg1_util + {|def helper(): + return 1 +|}; + UFile.write_file pkg2_util + {|def helper(): + return 2 +|}; + let graph = build_project_graph [ pkg1_util; pkg2_util ] in + Alcotest.(check int) + "one top-level node per same-basename module" 2 + (count_vertices_by_name graph ""); + Alcotest.(check int) + "one helper node per same-basename module" 2 + (count_vertices_by_name graph "helper"))); + t "project call graph keeps distinct method nodes per file" (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let file_a = root / "a.py" in + let file_b = root / "b.py" in + UFile.write_file file_a + {|class Widget: + def helper(self): + return 1 +|}; + UFile.write_file file_b + {|class Widget: + def helper(self): + return 2 +|}; + let graph = build_project_graph [ file_a; file_b ] in + Alcotest.(check int) "top-level, class-init, and method nodes stay per-file" + 6 (count_vertices graph))); + t "project call graph keeps same-named methods distinct across classes in one file" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let file = root / "a.py" in + UFile.write_file file + {|class First: + def helper(self): + return 1 + +class Second: + def helper(self): + return 2 +|}; + let graph = build_project_graph [ file ] in + Alcotest.(check int) + "top-level, both class-init nodes, and both methods are preserved" + 5 (count_vertices graph))); + t "project call graph keeps same-basename class methods distinct across directories" + (fun () -> + Testutil_files.with_tempdir ~chdir:true (fun root -> + let pkg1_dir = root / "pkg1" in + let pkg2_dir = root / "pkg2" in + let pkg1_util = pkg1_dir / "util.py" in + let pkg2_util = pkg2_dir / "util.py" in + UFile.make_directories pkg1_dir; + UFile.make_directories pkg2_dir; + UFile.write_file pkg1_util + {|class Widget: + def helper(self): + return 1 +|}; + UFile.write_file pkg2_util + {|class Widget: + def helper(self): + return 2 +|}; + let graph = build_project_graph [ pkg1_util; pkg2_util ] in + Alcotest.(check int) + "top-level, class-init, and method nodes stay per same-basename file" + 6 (count_vertices graph))); + ] + let lang_tainting_tests () = let taint_tests_path = tests_path / "tainting_rules" in let lang_specs = @@ -805,6 +4519,8 @@ let tests () = lang_autofix_tests ~polyglot_pattern_path; eval_regression_tests (); filter_irrelevant_rules_tests (); + interfile_taint_tests (); + interfile_project_graph_tests (); maturity_tests (); full_rule_taint_maturity_tests (); full_rule_regression_tests (); diff --git a/src/tainting/Dataflow_tainting.ml b/src/tainting/Dataflow_tainting.ml index 2975f5ff4..993535322 100644 --- a/src/tainting/Dataflow_tainting.ml +++ b/src/tainting/Dataflow_tainting.ml @@ -633,10 +633,8 @@ let effects_of_call_func_arg fun_exp fun_shape args_taints = (S.show_shape fun_shape)); [] -let get_signature_for_object graph caller_node db method_name obj arity = +let get_signature_for_object graph caller_node db method_name ~call_tok arity = (* Method call: obj.method() *) - (* Use obj's token (start of call expression) to match edge labels *) - let call_tok = snd obj.ident in (* First try to look up via call graph to get the correct node with definition token *) match Call_graph.lookup_callee_from_graph graph (Option.map Function_id.of_il_name caller_node) call_tok with @@ -659,6 +657,46 @@ let try_builtin_fallback env func_name arity result = builtin_result | None -> None) +let structural_call_tok_of_fun_exp fun_exp = + match fun_exp.e with + | Fetch + { base = Var name; rev_offset = [] } + when not (Tok.is_fake (snd name.ident)) -> + Some (snd name.ident) + | Fetch { rev_offset = { o = Dot name; _ } :: _; _ } + when not (Tok.is_fake (snd name.ident)) -> + Some (snd name.ident) + | Fetch { base = VarSpecial (_, tok); rev_offset = [] } + when not (Tok.is_fake tok) -> + Some tok + | _ -> None + +let call_tok_of_fun_exp ~default_tok fun_exp = + match structural_call_tok_of_fun_exp fun_exp with + | Some tok -> tok + | None -> ( + match fun_exp.eorig with + | SameAs orig_exp -> ( + match AST_generic_helpers.ii_of_any (G.E orig_exp) with + | tok :: _ when not (Tok.is_fake tok) -> tok + | _ -> default_tok) + | Related orig_any -> ( + match AST_generic_helpers.ii_of_any orig_any with + | tok :: _ when not (Tok.is_fake tok) -> tok + | _ -> default_tok) + | NoOrig -> default_tok) + +let dotted_fetch_path ({ base; rev_offset } : IL.lval) = + let rec collect_offsets acc = function + | [] -> Some (List.rev acc) + | { IL.o = Dot name; _ } :: rest -> collect_offsets (name :: acc) rest + | _ -> None + in + match (base, List.rev rev_offset) with + | Var base_name, ((_ :: _) as offsets) -> + Option.map (fun names -> (base_name, names)) (collect_offsets [] offsets) + | _ -> None + let lookup_signature_with_object_context env fun_exp arity = Log.debug (fun m -> m "TAINT_SIG_LOOKUP: Looking up %s with arity %d" @@ -674,26 +712,14 @@ let lookup_signature_with_object_context env fun_exp arity = (* Try to look up via call graph using the ORIGINAL AST token position. This handles temp variables like _tmp:N which have eorig pointing to the actual callback reference in the original AST. *) - let call_tok = - match fun_exp.eorig with - | SameAs orig_exp -> - (* Use first token from original AST expression *) - (match AST_generic_helpers.ii_of_any (G.E orig_exp) with - | tok :: _ when not (Tok.is_fake tok) -> tok - | _ -> snd name.ident) - | Related orig_any -> - (* Related contains G.any, extract tokens directly *) - (match AST_generic_helpers.ii_of_any orig_any with - | tok :: _ when not (Tok.is_fake tok) -> tok - | _ -> snd name.ident) - | NoOrig -> snd name.ident - in - (match + let call_tok = call_tok_of_fun_exp ~default_tok:(snd name.ident) fun_exp in + let graph_lookup = Call_graph.lookup_callee_from_graph env.call_graph (Option.map Function_id.of_il_name env.func.name) call_tok - with + in + (match graph_lookup with | Some callee_node -> Shape_and_sig.(lookup_signature db callee_node arity) | None -> @@ -705,6 +731,46 @@ let lookup_signature_with_object_context env fun_exp arity = let func_name = fst name.ident in let result = Shape_and_sig.lookup_signature db (Function_id.of_il_name name) arity in try_builtin_fallback env func_name arity result) + | Fetch ({ base = Var _; rev_offset = _ :: _ :: _ } as lval) -> ( + match dotted_fetch_path lval with + | Some (base_name, offset_names) -> ( + let call_tok = + call_tok_of_fun_exp ~default_tok:(snd base_name.ident) fun_exp + in + match + Call_graph.lookup_callee_from_graph env.call_graph + (Option.map Function_id.of_il_name env.func.name) + call_tok + with + | Some callee_node -> + Shape_and_sig.(lookup_signature db callee_node arity) + | None -> ( + match List_.init_and_last_opt offset_names with + | Some (_, last_name) -> + let qualified_name_str = + String.concat "." + (fst base_name.ident + :: List_.map (fun name -> fst name.ident) offset_names) + in + let qualified_name = + { + ident = (qualified_name_str, snd last_name.ident); + sid = last_name.sid; + id_info = last_name.id_info; + } + in + let result = + Shape_and_sig.lookup_signature db + (Function_id.of_il_name qualified_name) + arity + in + let result = + try_builtin_fallback env qualified_name_str arity result + in + try_builtin_fallback env (fst last_name.ident) arity + result + | None -> None)) + | None -> None) | Fetch { base = VarSpecial ((Self | This), self_tok); @@ -713,8 +779,10 @@ let lookup_signature_with_object_context env fun_exp arity = when Option.is_some env.class_name -> ( (* Method call on self/this: self.method() or this.method() *) (* First try to look up via call graph to get the correct fn_id *) - (* Use self_tok (start of call expression) to match edge labels *) - let call_tok = self_tok in + let call_tok = + if Tok.is_fake (snd method_name.ident) then self_tok + else snd method_name.ident + in match Call_graph.lookup_callee_from_graph env.call_graph @@ -732,7 +800,7 @@ let lookup_signature_with_object_context env fun_exp arity = env.func.name db (Function_id.of_il_name method_name) - obj + ~call_tok:(call_tok_of_fun_exp ~default_tok:(snd obj.ident) fun_exp) arity with | Some _ as result -> result @@ -758,6 +826,53 @@ let lookup_signature env fun_exp = (Option.fold ~none:"" ~some:Call_graph.show_node (Option.map Function_id.of_il_name env.func.name))); lookup_signature_with_object_context env fun_exp +let receiver_lval_for_constructor_call env lval_opt fun_exp = + let looks_like_constructor_name name = + List.mem name + (Object_initialization.get_constructor_names env.taint_inst.lang) + in + let bare_call_name_looks_like_constructor name = + (not (Object_initialization.uses_new_keyword env.taint_inst.lang)) + && String.length name > 0 + && Char.equal (Char.uppercase_ascii name.[0]) name.[0] + in + match lval_opt with + | None -> None + | Some receiver_lval -> + let call_tok_opt = + match fun_exp.e with + | Fetch { base = Var name; rev_offset = [] } -> + Some (call_tok_of_fun_exp ~default_tok:(snd name.ident) fun_exp) + | Fetch { base = VarSpecial ((Self | This), self_tok); rev_offset = _ } -> + Some (call_tok_of_fun_exp ~default_tok:self_tok fun_exp) + | Fetch { base = Var obj; rev_offset = _ } -> + Some (call_tok_of_fun_exp ~default_tok:(snd obj.ident) fun_exp) + | _ -> None + in + let resolved_constructor = + match (env.func.name, call_tok_opt) with + | Some caller_name, Some call_tok -> ( + match + Call_graph.lookup_callee_from_graph env.call_graph + (Some (Function_id.of_il_name caller_name)) + call_tok + with + | Some callee_node -> + looks_like_constructor_name (Call_graph.show_node callee_node) + | None -> false) + | _ -> false + in + let syntactic_fallback = + match fun_exp.e with + | Fetch { base = Var name; rev_offset = [] } -> + let callee_name = fst name.ident in + looks_like_constructor_name callee_name + || bare_call_name_looks_like_constructor callee_name + | _ -> false + in + if resolved_constructor || syntactic_fallback then Some receiver_lval + else None + (*****************************************************************************) (* Lambdas *) (*****************************************************************************) @@ -1536,7 +1651,7 @@ let check_tainted_var env (var : IL.name) : Taints.t * S.shape * Lval_env.t = 2) Are there any effects that occur within the function due to taints being input into the function body, from the calling context? *) -let check_function_call env fun_exp args +let check_function_call ?receiver_lval env fun_exp args (args_taints : (Taints.t * S.shape) argument list) ?(_implicit_lambda : (IL.exp * IL.function_definition) option = None) () : (Taints.t * S.shape * Lval_env.t) option = @@ -1562,7 +1677,7 @@ let check_function_call env fun_exp args in let* call_effects = Sig_inst.instantiate_function_signature env.lval_env fun_sig - ~callee:fun_exp ~args:(Some args) args_taints + ~callee:fun_exp ~args:(Some args) args_taints ?receiver_lval ~lookup_sig:lookup_sig_fn () in Log.debug (fun m -> @@ -1964,8 +2079,11 @@ let call_with_intrafile lval_opt e env args instr = (all_args_taints, S.Bot, lval_env))) | None -> (* No implicit lambda, try unified constructor execution *) + let receiver_lval = + receiver_lval_for_constructor_call env lval_opt e + in let check_function_call_wrapper env' e' args' args_taints' = - check_function_call env' e' args' args_taints' () + check_function_call ?receiver_lval env' e' args' args_taints' () in match Object_initialization.execute_unified_constructor e args args_taints @@ -1976,7 +2094,10 @@ let call_with_intrafile lval_opt e env args instr = (* Regular function call processing *) Log.debug (fun m -> m "INTRAFILE: Checking function call %s" (Display_IL.string_of_exp e)); - match check_function_call { env with lval_env } e args args_taints () with + match + check_function_call ?receiver_lval { env with lval_env } e args + args_taints () + with | Some (call_taints, shape, lval_env) -> Log.debug (fun m -> m ~tags:sigs_tag "- Instantiating %s: returns %s & %s" @@ -2091,7 +2212,7 @@ let call_with_intrafile lval_opt e env args instr = in (all_call_taints, shape, lval_env) -let new_with_intrafile env _result_lval _ty args constructor = +let new_with_intrafile env result_lval _ty args constructor = (* 'New' with reference to constructor - use constructor signatures *) let args_taints, all_args_taints, lval_env = check_function_call_arguments env args @@ -2099,7 +2220,8 @@ let new_with_intrafile env _result_lval _ty args constructor = let call_result = (* Try unified constructor execution first *) let check_function_call_wrapper env' e' args' args_taints' = - check_function_call env' e' args' args_taints' () + check_function_call ~receiver_lval:result_lval env' e' args' args_taints' + () in match Object_initialization.execute_unified_constructor constructor args @@ -2107,7 +2229,8 @@ let new_with_intrafile env _result_lval _ty args constructor = with | Some (call_taints, shape, lval_env) -> Some (call_taints, shape, lval_env) | None -> - check_function_call { env with lval_env } constructor args args_taints () + check_function_call ~receiver_lval:result_lval + { env with lval_env } constructor args args_taints () in match call_result with | Some (call_taints, shape, lval_env) -> (call_taints, shape, lval_env) diff --git a/src/tainting/Graph_from_AST.ml b/src/tainting/Graph_from_AST.ml index ab39972f0..e22bab9b1 100644 --- a/src/tainting/Graph_from_AST.ml +++ b/src/tainting/Graph_from_AST.ml @@ -3,6 +3,12 @@ module G = AST_generic module Log = Log_call_graph.Log (* *open Shape_and_sig *) module Reachable = Graph_reachability +module CanonicalMap = Map.Make (struct + type t = string list + + let compare = Stdlib.compare +end) +module StringMap = Map.Make (String) (* Function identifier as a path from outermost to innermost scope. * For example: @@ -34,6 +40,21 @@ type func_info = { fdef : G.function_definition; } +type project_file = { + path : Target.path; + ast : G.program; + object_mappings : (G.name * G.name) list; + funcs : func_info list; +} + +type import_binding_state = + | Imported + | Shadowed + +type import_lookup_scope = + | FinalModuleState + | TopLevelAtCallSite of Tok.t + (* Position-aware equality for fn_id paths. Compares function identifiers using both name AND source position (file, line, column) via Function_id.equal. *) let equal_with_pos f1 f2 = @@ -61,6 +82,413 @@ let fn_id_to_node (fn_id : fn_id) : node option = (* Equality for fn_id using compare_fn_id *) let equal_fn_id f1 f2 = Int.equal (compare_fn_id f1 f2) 0 +let normalize_file = Fun.compose Fpath.to_string Fpath.normalize + +let matches_current_file current_file (func : func_info) = + match fn_id_to_node func.fn_id with + | Some node -> + let file, _, _ = Function_id.to_file_line_col node in + String.equal file (normalize_file current_file) + | None -> false + +let rec suffixes = function + | [] -> [] + | (_ :: xs as segments) -> segments :: suffixes xs + +let full_module_path_of_path (path : Fpath.t) = + match List.rev (Fpath.segs (Fpath.normalize path)) with + | [] -> [] + | base :: rev_dirs -> + let dirs = List.rev rev_dirs in + let _, base_name, _ = Filename_.dbe_of_filename_noext_ok base in + match base_name with + | "__init__" + | "index" -> dirs + | _ -> dirs @ [ base_name ] + +let module_candidates_of_path (path : Fpath.t) = + full_module_path_of_path path |> suffixes + |> List.filter (fun segments -> segments <> []) + |> List.sort_uniq Stdlib.compare + +let is_relative_segment segment = + (not (String.equal segment "")) + && String.for_all (fun c -> Char.equal c '.') segment + +let split_relative_canonical canonical = + let rec aux depth = function + | segment :: rest + when String.equal segment "" || is_relative_segment segment -> + let depth_incr = Int.max 1 (String.length segment) in + aux (depth + depth_incr) rest + | rest -> (depth, rest) + in + aux 0 canonical + +let take n xs = + let rec aux n acc = function + | _ when n <= 0 -> List.rev acc + | [] -> List.rev acc + | x :: rest -> aux (n - 1) (x :: acc) rest + in + aux n [] xs + +let drop_last_n n xs = + take (Int.max 0 (List.length xs - n)) xs + +let dedup_canonicals canonicals = + let rec aux seen acc = function + | [] -> List.rev acc + | canonical :: rest -> + if List.mem canonical seen then aux seen acc rest + else aux (canonical :: seen) (canonical :: acc) rest + in + aux [] [] canonicals + +let canonical_lookup_candidates ?(current_file : Fpath.t option) canonical = + let relative_depth, target_segments = split_relative_canonical canonical in + if Int.equal relative_depth 0 then + match current_file with + | None -> [ canonical ] + | Some file -> + let current_modules = module_candidates_of_path file in + current_modules + |> List_.map (fun current_module -> current_module @ canonical) + |> fun qualified -> dedup_canonicals (canonical :: qualified) + else + let fallback = [ target_segments ] in + match current_file with + | None -> fallback + | Some file -> + let levels_up = Int.max 0 (relative_depth - 1) in + let current_modules = + module_candidates_of_path file + |> List.sort (fun xs ys -> + match Int.compare (List.length ys) (List.length xs) with + | 0 -> Stdlib.compare xs ys + | order -> order) + in + current_modules + |> List_.filter_map (fun current_module -> + match List.rev current_module with + | [] -> None + | _current_name :: rev_package -> + let package = List.rev rev_package in + Some (drop_last_n levels_up package @ target_segments)) + |> fun resolved -> dedup_canonicals (resolved @ fallback) + +let lookup_imported_entity ?current_file imported_entity_index canonical = + canonical_lookup_candidates ?current_file canonical + |> List.find_map (fun candidate -> + match CanonicalMap.find_opt candidate imported_entity_index with + | Some [ func ] -> Some func.fn_id + | Some _ + | None -> + None) + +let lookup_imported_func ?current_file imported_entity_index canonical = + canonical_lookup_candidates ?current_file canonical + |> List.find_map (fun candidate -> + match CanonicalMap.find_opt candidate imported_entity_index with + | Some [ func ] -> Some func + | Some _ + | None -> + None) + +let add_imported_entity imported_entity_index canonical func = + CanonicalMap.update canonical + (function + | Some funcs -> Some (func :: funcs) + | None -> Some [ func ]) + imported_entity_index + +let class_name_of_constructor_fn_id ~(lang : Lang.t) (fn_id : fn_id) : + G.name option = + match fn_id with + | [ Some cls; Some meth ] -> + let class_name = fst cls.IL.ident in + let method_name = fst meth.IL.ident in + if Object_initialization.is_constructor lang method_name (Some class_name) + then Some (G.Id ((class_name, snd cls.IL.ident), G.empty_id_info ())) + else None + | _ -> None + +let imported_class_name ~(lang : Lang.t) ~imported_entity_index + ~(canonical : string list) ~(default_tok : Tok.t) ?current_file : + unit -> G.name option = + fun () -> + let rec has_prefix prefix segments = + match (prefix, segments) with + | [], _ -> true + | _, [] -> false + | p :: prefix_rest, s :: segments_rest -> + String.equal p s && has_prefix prefix_rest segments_rest + in + match + Option.bind + (lookup_imported_entity ?current_file imported_entity_index canonical) + (class_name_of_constructor_fn_id ~lang) + with + | Some _ as result -> result + | None -> + let candidate_matches = + canonical_lookup_candidates ?current_file canonical + |> List.exists (fun candidate -> + CanonicalMap.exists + (fun indexed_canonical _funcs -> + has_prefix candidate indexed_canonical + && List.length indexed_canonical > List.length candidate) + imported_entity_index) + in + if candidate_matches then + match List_.last_opt canonical with + | Some class_name -> + Some (G.Id ((class_name, default_tok), G.empty_id_info ())) + | None -> None + else None + +let imported_constructor_class_name ~(lang : Lang.t) ~imported_entity_index + (expr : G.expr) ?current_file : unit -> G.name option = + fun () -> + let default_tok = + match AST_generic_helpers.ii_of_any (G.E expr) with + | tok :: _ -> tok + | [] -> Tok.unsafe_fake_tok "" + in + match expr.G.e with + | G.Call (callee, _) -> ( + match callee.G.e with + | G.N (G.Id (_, id_info)) -> ( + match !(id_info.G.id_resolved) with + | Some (G.ImportedEntity canonical, _sid) -> + imported_class_name ~lang ~imported_entity_index ~canonical + ~default_tok ?current_file () + | _ -> None) + | G.N (G.IdQualified ({ name_last = _; name_info; _ } as qualified_info)) -> ( + match !(name_info.G.id_resolved) with + | Some (G.ImportedEntity canonical, _sid) -> + imported_class_name ~lang ~imported_entity_index ~canonical + ~default_tok ?current_file () + | _ -> + let canonical = + AST_generic_helpers.dotted_ident_of_name + (G.IdQualified qualified_info) + |> List_.map fst + in + imported_class_name ~lang ~imported_entity_index ~canonical + ~default_tok ?current_file ()) + | G.DotAccess ({ e = G.N (G.Id ((obj_name, _), obj_info)); _ }, _, G.FN (G.Id ((id, _), _))) -> ( + match !(obj_info.G.id_resolved) with + | Some (G.ImportedModule canonical_module, _sid) -> + imported_class_name ~lang ~imported_entity_index + ~canonical:(canonical_module @ [ id ]) + ~default_tok ?current_file () + | Some (G.ImportedEntity canonical_entity, _sid) -> + imported_class_name ~lang ~imported_entity_index + ~canonical:(canonical_entity @ [ id ]) + ~default_tok ?current_file () + | _ -> + imported_class_name ~lang ~imported_entity_index + ~canonical:[ obj_name; id ] ~default_tok ?current_file ()) + | _ -> None) + | _ -> None + +let detect_imported_object_initialization ~(lang : Lang.t) ~imported_entity_index + (ast : G.program) ?current_file : unit -> (G.name * G.name) list = + fun () -> + let object_mappings = ref [] in + let add_mapping var_name init_expr = + match + imported_constructor_class_name ~lang ~imported_entity_index init_expr + ?current_file () + with + | Some cls -> object_mappings := (var_name, cls) :: !object_mappings + | None -> () + in + let visitor = + object + inherit [_] G.iter as super + + method! visit_stmt () stmt = + (match stmt.G.s with + | G.ExprStmt (expr, _) -> ( + match expr.G.e with + | G.Assign (lval_expr, _, rval_expr) + | G.AssignOp (lval_expr, _, rval_expr) -> ( + match lval_expr.G.e with + | G.N var_name -> add_mapping var_name rval_expr + | _ -> ()) + | _ -> ()) + | _ -> ()); + super#visit_stmt () stmt + + method! visit_definition () def = + (match def with + | entity, G.VarDef var_def -> ( + match (entity.G.name, var_def.G.vinit) with + | G.EN var_name, Some init_expr -> add_mapping var_name init_expr + | _ -> ()) + | _ -> ()); + super#visit_definition () def + end + in + visitor#visit_program () ast; + !object_mappings + +let starts_with_segments ~prefix segments = + let rec aux prefix segments = + match (prefix, segments) with + | [], _ -> true + | _, [] -> false + | p :: prefix_rest, s :: segments_rest -> + String.equal p s && aux prefix_rest segments_rest + in + aux prefix segments + +let stmt_bytepos (stmt : G.stmt) = + AST_generic_helpers.ii_of_any (G.S stmt) + |> List.find_opt (fun tok -> not (Tok.is_fake tok)) + |> Option.map Tok.bytepos_of_tok + +let simple_entity_name (ent : G.entity) = + match ent.G.name with + | G.EN (G.Id ((name, _), _)) -> Some name + | _ -> None + +let imported_local_names (directive : G.directive) = + match directive.G.d with + | G.ImportFrom (_, _, imported_names) -> + imported_names + |> List_.map (function + | _id, Some (alias, _id_info) -> fst alias + | id, None -> fst id) + | G.ImportAs (_, G.DottedName (head :: _), alias_opt) -> ( + match alias_opt with + | Some (alias, _id_info) -> [ fst alias ] + | None -> [ fst head ]) + | _ -> [] + +let add_import_binding_event acc name pos state = + StringMap.update name + (function + | Some events -> Some ((pos, state) :: events) + | None -> Some [ (pos, state) ]) + acc + +let add_import_binding_events acc (stmt : G.stmt) = + match stmt_bytepos stmt with + | None -> acc + | Some pos -> ( + match stmt.G.s with + | G.DirectiveStmt directive -> + imported_local_names directive + |> List.fold_left + (fun acc name -> add_import_binding_event acc name pos Imported) + acc + | G.DefStmt (ent, (G.FuncDef _ | G.ClassDef _ | G.VarDef _)) -> ( + match simple_entity_name ent with + | Some name -> add_import_binding_event acc name pos Shadowed + | None -> acc) + | G.ExprStmt + ({ e = G.Assign ({ e = G.N (G.Id ((name, _), _)); _ }, _, _); _ }, _) -> + add_import_binding_event acc name pos Shadowed + | _ -> acc) + +let finalize_import_binding_timeline timeline = + timeline |> StringMap.map List.rev + +let build_import_binding_timeline (ast : G.program) = + ast |> List.fold_left add_import_binding_events StringMap.empty + |> finalize_import_binding_timeline + +let build_local_import_binding_timeline (fdef : G.function_definition) = + let timeline = ref StringMap.empty in + let body_stmt = AST_generic_helpers.funcbody_to_stmt fdef.G.fbody in + let visitor = + object + inherit [_] G.iter_no_id_info as super + + method! visit_stmt env stmt = + timeline := add_import_binding_events !timeline stmt; + match stmt.G.s with + | G.DefStmt (_, (G.FuncDef _ | G.ClassDef _)) -> () + | _ -> super#visit_stmt env stmt + end + in + visitor#visit_stmt () body_stmt; + finalize_import_binding_timeline !timeline + +let latest_import_binding_state_before events pos = + let rec aux latest = function + | [] -> latest + | (event_pos, state) :: rest -> + if event_pos > pos then latest else aux (Some state) rest + in + aux None events + +let is_active_import_binding import_binding_timeline ~name ~scope = + match StringMap.find_opt name import_binding_timeline with + | None -> true + | Some [] -> true + | Some events -> ( + match scope with + | FinalModuleState -> ( + match List_.last_opt events with + | Some (_, Imported) -> true + | Some (_, Shadowed) -> false + | None -> true) + | TopLevelAtCallSite tok -> + if Tok.is_fake tok then true + else + match latest_import_binding_state_before events (Tok.bytepos_of_tok tok) with + | Some Imported -> true + | Some Shadowed -> false + | None -> true) + +let local_import_binding_state_before local_import_binding_timeline ~name tok = + match StringMap.find_opt name local_import_binding_timeline with + | None + | Some [] -> + None + | Some events -> + if Tok.is_fake tok then None + else + latest_import_binding_state_before events (Tok.bytepos_of_tok tok) + +let is_active_import_binding_for_call import_binding_timeline + local_import_binding_timeline ~name ~scope ~call_tok = + match + local_import_binding_state_before local_import_binding_timeline ~name + call_tok + with + | Some Imported -> true + | Some Shadowed -> false + | None -> + is_active_import_binding import_binding_timeline ~name ~scope + +let dotted_name_segments_of_expr (expr : G.expr) = + AST_generic_helpers.name_of_dot_access expr + |> Option.map AST_generic_helpers.dotted_ident_of_name + |> Option.map (List_.map fst) + +let rec callsite_tok_of_callee_expr (expr : G.expr) : Tok.t option = + match expr.G.e with + | G.N (G.Id ((_name, tok), _id_info)) when not (Tok.is_fake tok) -> Some tok + | G.N (G.IdQualified { name_last = (_, _typeargsTODO); name_info; _ }) -> ( + match AST_generic_helpers.ii_of_any (G.E expr) with + | tok :: _ when not (Tok.is_fake tok) -> Some tok + | _ -> ( + match !(name_info.G.id_resolved) with + | _ -> None)) + | G.DotAccess (_obj, _, G.FN (G.Id ((_name, tok), _id_info))) + when not (Tok.is_fake tok) -> + Some tok + | G.Call (callee, _) -> callsite_tok_of_callee_expr callee + | _ -> ( + match AST_generic_helpers.ii_of_any (G.E expr) with + | tok :: _ when not (Tok.is_fake tok) -> Some tok + | _ -> None) + (* Extract Go receiver type from method *) let extract_go_receiver_type (fdef : G.function_definition) : string option = let params = Tok.unbracket fdef.fparams in @@ -140,81 +568,182 @@ let dedup_fn_ids (ids : (fn_id * Tok.t) list) : (fn_id * Tok.t) list = (* Helper function to identify the callee fn_id from a call expression's callee *) let identify_callee ?(object_mappings = []) ?(all_funcs = []) - ?(caller_parent_path = []) ?(call_arity : int option) (callee : G.expr) : fn_id option = + ?(import_binding_timeline = StringMap.empty) + ?(local_import_binding_timeline = StringMap.empty) + ?(imported_entity_index = CanonicalMap.empty) ?(current_file : Fpath.t option) + ?(caller_parent_path = []) ?(call_arity : int option) + ?call_tok ?(import_lookup_scope = FinalModuleState) (callee : G.expr) : + fn_id option = (* Extract class from caller_parent_path if present *) let current_class = match caller_parent_path with | Some cls :: _ -> Some cls | _ -> None in + let is_import_binding_active name = + match call_tok with + | Some tok -> + is_active_import_binding_for_call import_binding_timeline + local_import_binding_timeline ~name ~scope:import_lookup_scope + ~call_tok:tok + | None -> + is_active_import_binding import_binding_timeline ~name + ~scope:import_lookup_scope + in + let is_local_function func = + match current_file with + | Some file -> matches_current_file file func + | None -> true + in + let resolve_local_function_call callee_name_str = + let nested_match = + List.find_opt + (fun f -> + match List_.init_and_last_opt f.fn_id with + | Some (f_parent, Some name) + when String.equal (fst name.IL.ident) callee_name_str -> + equal_with_pos f_parent caller_parent_path + | _ -> false) + all_funcs + in + match nested_match with + | Some f -> + Log.debug (fun m -> + m "CALL_EXTRACT: Found nested function %s in same scope" + callee_name_str); + Some f.fn_id + | None -> ( + match current_class with + | Some class_name -> + let class_name_str = fst class_name.IL.ident in + let method_match = + List.find_opt + (fun f -> + is_local_function f + && + match f.fn_id with + | [ Some c; Some m ] -> + fst c.IL.ident = class_name_str + && fst m.IL.ident = callee_name_str + | _ -> false) + all_funcs + in + let all_names = + all_funcs |> List.map (fun f -> show_fn_id f.fn_id) + |> String.concat ", " + in + Log.debug (fun m -> + m + "CALL_EXTRACT: In class %s, call to %s, checking %d funcs, \ + method_exists=%b, ALL: [%s]" + class_name_str callee_name_str (List.length all_funcs) + (Option.is_some method_match) all_names); + (match method_match with + | Some f -> Some f.fn_id + | None -> + let free_fn_match = + List.find_opt + (fun f -> + is_local_function f + && + match f.fn_id with + | [ None; Some name ] -> + fst name.IL.ident = callee_name_str + | _ -> false) + all_funcs + in + Option.map (fun f -> f.fn_id) free_fn_match) + | None -> + let free_fn_match = + List.find_opt + (fun f -> + is_local_function f + && + match f.fn_id with + | [ None; Some name ] -> + fst name.IL.ident = callee_name_str + | _ -> false) + all_funcs + in + Option.map (fun f -> f.fn_id) free_fn_match) + in + let resolve_method_call_in_class class_name_str method_name_str = + let method_matches = + List.filter + (fun f -> + is_local_function f + && + match f.fn_id with + | [ Some c; Some m ] -> + String.equal (fst c.IL.ident) class_name_str + && String.equal (fst m.IL.ident) method_name_str + | _ -> false) + all_funcs + in + match method_matches with + | [ single_match ] -> Some single_match.fn_id + | [] -> None + | _ -> ( + match call_arity with + | Some arity -> + let arity_matches = + List.filter + (fun f -> Int.equal (get_func_arity f.fdef) arity) + method_matches + in + (match arity_matches with + | [ single_match ] -> Some single_match.fn_id + | _ -> None) + | None -> None) + in match callee.G.e with (* Simple function call: foo() *) - | G.N (G.Id ((id, _), _id_info)) -> - let callee_name_str = id in - (* First check if it's a nested function in the same scope. - Use position-aware match to distinguish same-named parent functions. *) - let nested_match = - List.find_opt (fun f -> - match List_.init_and_last_opt f.fn_id with - | Some (f_parent, Some name) when String.equal (fst name.IL.ident) callee_name_str -> - equal_with_pos f_parent caller_parent_path - | _ -> false - ) all_funcs - in - begin - match nested_match with - | Some f -> - Log.debug (fun m -> m "CALL_EXTRACT: Found nested function %s in same scope" callee_name_str); - Some f.fn_id - | None -> - (* For class-based languages, foo() might be an implicit this.foo() call. - Check if a method with this name exists in the current class. *) - match current_class with - | Some class_name -> - let class_name_str = fst class_name.IL.ident in - (* Check if this method exists in the class - use string matching *) - let method_match = List.find_opt (fun f -> - match f.fn_id with - | [Some c; Some m] when fst c.IL.ident = class_name_str && fst m.IL.ident = callee_name_str -> true - | _ -> false - ) all_funcs in - (* Debug: show all function names *) - let all_names = - all_funcs - |> List.map (fun f -> show_fn_id f.fn_id) - |> String.concat ", " - in - Log.debug (fun m -> m "CALL_EXTRACT: In class %s, call to %s, checking %d funcs, method_exists=%b, ALL: [%s]" - class_name_str callee_name_str (List.length all_funcs) (Option.is_some method_match) all_names); - (match method_match with - | Some f -> Some f.fn_id - | None -> - (* It's a free function call, not a method - use string matching *) - let free_fn_match = List.find_opt (fun f -> - match f.fn_id with - | [None; Some name] when fst name.IL.ident = callee_name_str -> true - | _ -> false - ) all_funcs in - Option.map (fun f -> f.fn_id) free_fn_match) - | None -> - (* Top-level free function - use string matching *) - let free_fn_match = - List.find_opt (fun f -> - match f.fn_id with - | [None; Some name] when fst name.IL.ident = callee_name_str -> true - | _ -> false - ) all_funcs in - Option.map (fun f -> f.fn_id) free_fn_match - end + | G.N (G.Id ((id, _), id_info)) -> ( + match resolve_local_function_call id with + | Some _ as result -> result + | None -> ( + match !(id_info.G.id_resolved) with + | Some (G.ImportedEntity canonical, _sid) when is_import_binding_active id -> + lookup_imported_entity ?current_file imported_entity_index + canonical + | Some (G.ImportedModule _, _sid) -> None + | None when is_import_binding_active id -> + lookup_imported_entity ?current_file imported_entity_index + [ id ] + | Some _ + | None -> + None)) (* Qualified call: Module.foo() *) - | G.N (G.IdQualified { name_last = (id, _), _; _ }) -> - let callee_name_str = id in - (* Use string matching to find the qualified function *) - let qualified_match = List.find_opt (fun f -> - match f.fn_id with - | [None; Some name] when fst name.IL.ident = callee_name_str -> true - | _ -> false - ) all_funcs in - Option.map (fun f -> f.fn_id) qualified_match + | G.N + (G.IdQualified + ({ name_last = (id, _typeargsTODO); name_info; _ } as qualified_info)) + -> ( + match !(name_info.G.id_resolved) with + | Some (G.ImportedEntity canonical, _sid) -> + lookup_imported_entity ?current_file imported_entity_index canonical + | Some _ + | None -> + let qualified_name = + AST_generic_helpers.dotted_ident_of_name + (G.IdQualified qualified_info) + in + let canonical = qualified_name |> List_.map fst in + match lookup_imported_entity ?current_file imported_entity_index canonical with + | Some _ as result -> result + | None -> + let callee_name_str = fst id in + (* Use string matching to find the qualified function *) + let qualified_match = + List.find_opt + (fun f -> + is_local_function f + && + match f.fn_id with + | [ None; Some name ] -> + fst name.IL.ident = callee_name_str + | _ -> false) + all_funcs + in + Option.map (fun f -> f.fn_id) qualified_match) (* Method call: this.method() or self.method() *) | G.DotAccess ( { e = G.IdSpecial ((G.This | G.Self), _); _ }, @@ -227,6 +756,7 @@ let identify_callee ?(object_mappings = []) ?(all_funcs = []) let class_name_str = fst class_name.IL.ident in (* Find all methods matching class and name *) let method_matches = List.filter (fun f -> + is_local_function f && match f.fn_id with | [Some c; Some m] when fst c.IL.ident = class_name_str && fst m.IL.ident = method_name_str -> true | _ -> false @@ -246,48 +776,139 @@ let identify_callee ?(object_mappings = []) ?(all_funcs = []) | _ -> None) (* Still 0 or multiple matches *) | None -> None)) (* No arity info, can't disambiguate *) | None -> None) - (* Method call: obj.method() - look up obj's class *) | G.DotAccess - ( { e = G.N (G.Id ((obj_name, _), _)); _ }, + ({ e = G.DotAccess _; _ } as obj, _, G.FN (G.Id ((id, _), _id_info))) + -> ( + match dotted_name_segments_of_expr obj with + | Some canonical_module -> + lookup_imported_entity ?current_file imported_entity_index + (canonical_module @ [ id ]) + | None -> None) + | G.DotAccess + ( { e = G.N (G.Id ((_obj_name, _), obj_info)); _ }, _, - G.FN (G.Id ((id, _), _id_info)) ) -> + G.FN (G.Id ((id, _), _id_info)) ) -> ( + match !(obj_info.G.id_resolved) with + | Some (G.ImportedModule canonical_module, _sid) + when is_import_binding_active _obj_name -> + lookup_imported_entity ?current_file imported_entity_index + (canonical_module @ [ id ]) + | Some (G.ImportedEntity canonical_entity, _sid) + when is_import_binding_active _obj_name -> + lookup_imported_entity ?current_file imported_entity_index + (canonical_entity @ [ id ]) + | Some _ + | None -> + let method_name_str = id in + (* First try: treat obj as a module name and look up in + imported entities. This handles bare `import runner` in + Python where the naming pass does not set ImportedModule. *) + let module_lookup = + if is_import_binding_active _obj_name then + lookup_imported_entity ?current_file imported_entity_index + [ _obj_name; method_name_str ] + else None + in + (match module_lookup with + | Some _ as result -> result + | None -> + (* Fall back: look up obj's class in object_mappings *) + let obj_class_opt = + object_mappings + |> List.find_opt (fun (var_name, _class_name) -> + match var_name with + | G.Id ((var_str, _), _) -> var_str = _obj_name + | _ -> false) + |> Option.map (fun (_var_name, class_name) -> class_name) + in + (match obj_class_opt with + | Some class_name -> + let class_name_str = match class_name with + | G.Id ((str, _), _) -> str + | _ -> "" + in + let imported_method = + lookup_imported_entity ?current_file imported_entity_index + [ class_name_str; method_name_str ] + in + (* Find all methods matching class and name *) + let method_matches = List.filter (fun f -> + is_local_function f && + match f.fn_id with + | [Some c; Some m] when fst c.IL.ident = class_name_str && fst m.IL.ident = method_name_str -> true + | _ -> false + ) all_funcs in + (match imported_method with + | Some _ as result -> result + | None -> + match method_matches with + | [single_match] -> Some single_match.fn_id (* Exactly one match by name *) + | [] -> None + | _ -> + (* Multiple matches - filter by arity if available *) + (match call_arity with + | Some arity -> + let arity_matches = List.filter (fun f -> + Int.equal (get_func_arity f.fdef) arity + ) method_matches in + (match arity_matches with + | [single_match] -> Some single_match.fn_id + | _ -> None) (* Still 0 or multiple matches *) + | None -> None)) (* No arity info, can't disambiguate *) + | None -> None))) + | G.DotAccess + ({ e = G.Call (constructor_callee, _); _ }, _, + G.FN (G.Id ((id, _), _id_info))) -> ( let method_name_str = id in - (* Look up obj's class in object_mappings *) - let obj_class_opt = - object_mappings - |> List.find_opt (fun (var_name, _class_name) -> - match var_name with - | G.Id ((var_str, _), _) -> var_str = obj_name - | _ -> false) - |> Option.map (fun (_var_name, class_name) -> class_name) + let imported_method = + let lookup canonical = + lookup_imported_entity ?current_file imported_entity_index + (canonical @ [ method_name_str ]) + in + match constructor_callee.G.e with + | G.N (G.Id ((_class_name, _), class_info)) -> ( + match !(class_info.G.id_resolved) with + | Some (G.ImportedEntity canonical_entity, _sid) + | Some (G.ImportedModule canonical_entity, _sid) -> + lookup canonical_entity + | Some _ + | None -> + None) + | G.N + (G.IdQualified + ({ name_last = _; name_info; _ } as qualified_info)) -> ( + match !(name_info.G.id_resolved) with + | Some (G.ImportedEntity canonical_entity, _sid) + | Some (G.ImportedModule canonical_entity, _sid) -> + lookup canonical_entity + | Some _ + | None -> + let canonical = + AST_generic_helpers.dotted_ident_of_name + (G.IdQualified qualified_info) + |> List_.map fst + in + lookup canonical) + | G.DotAccess _ -> ( + match dotted_name_segments_of_expr constructor_callee with + | Some canonical -> lookup canonical + | None -> None) + | _ -> None in - (match obj_class_opt with - | Some class_name -> - let class_name_str = match class_name with - | G.Id ((str, _), _) -> str - | _ -> "" + match imported_method with + | Some _ as result -> result + | None -> + let class_name_str_opt = + match constructor_callee.G.e with + | G.N (G.Id ((class_name, _), _)) -> Some class_name + | _ -> + (match dotted_name_segments_of_expr constructor_callee with + | Some canonical -> List_.last_opt canonical + | None -> None) in - (* Find all methods matching class and name *) - let method_matches = List.filter (fun f -> - match f.fn_id with - | [Some c; Some m] when fst c.IL.ident = class_name_str && fst m.IL.ident = method_name_str -> true - | _ -> false - ) all_funcs in - (match method_matches with - | [single_match] -> Some single_match.fn_id (* Exactly one match by name *) - | [] -> None - | _ -> - (* Multiple matches - filter by arity if available *) - (match call_arity with - | Some arity -> - let arity_matches = List.filter (fun f -> - Int.equal (get_func_arity f.fdef) arity - ) method_matches in - (match arity_matches with - | [single_match] -> Some single_match.fn_id - | _ -> None) (* Still 0 or multiple matches *) - | None -> None)) (* No arity info, can't disambiguate *) - | None -> None) + Option.bind class_name_str_opt (fun class_name_str -> + resolve_method_call_in_class class_name_str method_name_str)) + (* Method call: obj.method() - look up obj's class *) | _ -> Log.debug (fun m -> m "CALL_EXTRACT: Unmatched call pattern: %s" @@ -296,9 +917,14 @@ let identify_callee ?(object_mappings = []) ?(all_funcs = []) (* Extract all calls from a function body and resolve them to fn_ids *) let extract_calls ?(object_mappings = []) ?(all_funcs = []) ?(caller_parent_path = []) + ?(import_binding_timeline = StringMap.empty) + ?(imported_entity_index = CanonicalMap.empty) ?(current_file : Fpath.t option) (fdef : G.function_definition) : (fn_id * Tok.t) list = Log.debug (fun m -> m "CALL_EXTRACT: Starting extraction for function"); let calls = ref [] in + let local_import_binding_timeline = + build_local_import_binding_timeline fdef + in (* Check if an argument is an unresolved Id that could be a function call. * In Ruby, `foo(bar)` where `bar` is a method is actually `foo(bar())`. * If id_resolved is None and we can identify it as a function, add it as a call. *) @@ -311,7 +937,14 @@ let extract_calls ?(object_mappings = []) ?(all_funcs = []) ?(caller_parent_path (match !(id_info.G.id_resolved) with | None -> (* Unresolved - try to identify it as a function *) - (match identify_callee ~object_mappings ~all_funcs ~caller_parent_path arg_exp with + (match + identify_callee ~object_mappings ~all_funcs + ~import_binding_timeline + ~local_import_binding_timeline + ~imported_entity_index ?current_file ~caller_parent_path + ~call_tok:tok + arg_exp + with | Some fn_id -> Log.debug (fun m -> m "CALL_EXTRACT: Found unresolved Id that is a function, adding as implicit call"); calls := (fn_id, tok) :: !calls @@ -329,14 +962,22 @@ let extract_calls ?(object_mappings = []) ?(all_funcs = []) ?(caller_parent_path | G.Call (callee, args) -> let (_, args_list, _) = args in let call_arity = List.length args_list in - (match identify_callee ~object_mappings ~all_funcs ~caller_parent_path ~call_arity callee with - | Some fn_id -> - (* Extract token from the call expression for edge label *) - let tok = + let tok = + match callsite_tok_of_callee_expr callee with + | Some tok -> tok + | None -> ( match AST_generic_helpers.ii_of_any (G.E e) with | tok :: _ -> tok - | [] -> Tok.unsafe_fake_tok "" - in + | [] -> Tok.unsafe_fake_tok "") + in + (match + identify_callee ~object_mappings ~all_funcs + ~import_binding_timeline + ~local_import_binding_timeline + ~imported_entity_index ?current_file ~caller_parent_path + ~call_arity ~call_tok:tok callee + with + | Some fn_id -> calls := (fn_id, tok) :: !calls | None -> ()); (* Check arguments for unresolved function calls (Ruby-style) *) @@ -355,20 +996,30 @@ let extract_calls ?(object_mappings = []) ?(all_funcs = []) ?(caller_parent_path (* Extract calls from top-level statements (outside any function). This returns a list of (callee_fn_id, call_tok) pairs. *) -let extract_toplevel_calls ?(object_mappings = []) ?(all_funcs = []) (ast : G.program) : (fn_id * Tok.t) list = +let extract_toplevel_calls ?(object_mappings = []) ?(all_funcs = []) + ?(import_binding_timeline = StringMap.empty) + ?(imported_entity_index = CanonicalMap.empty) ?(current_file : Fpath.t option) + (ast : G.program) : (fn_id * Tok.t) list = Log.debug (fun m -> m "CALL_EXTRACT: Starting extraction for top-level statements"); let calls = ref [] in (* Build a set of byte ranges covered by function bodies *) let func_ranges = ref [] in - List.iter (fun func -> - let body_stmt = AST_generic_helpers.funcbody_to_stmt func.fdef.G.fbody in - match AST_generic_helpers.range_of_any_opt (G.S body_stmt) with - | Some (loc_start, loc_end) -> - let range = Range.range_of_token_locations loc_start loc_end in - func_ranges := (range.start, range.end_) :: !func_ranges - | None -> ()) - all_funcs; + let funcs_in_current_file = + match current_file with + | Some file -> + all_funcs |> List.filter (matches_current_file file) + | None -> all_funcs + in + List.iter + (fun func -> + let body_stmt = AST_generic_helpers.funcbody_to_stmt func.fdef.G.fbody in + match AST_generic_helpers.range_of_any_opt (G.S body_stmt) with + | Some (loc_start, loc_end) -> + let range = Range.range_of_token_locations loc_start loc_end in + func_ranges := (range.start, range.end_) :: !func_ranges + | None -> ()) + funcs_in_current_file; (* Check if a position is inside any function body *) let is_inside_function pos = @@ -390,13 +1041,22 @@ let extract_toplevel_calls ?(object_mappings = []) ?(all_funcs = []) (ast : G.pr in if call_pos >= 0 && not (is_inside_function call_pos) then ( (* Top-level call - no class context *) - match identify_callee ~object_mappings ~all_funcs ~caller_parent_path:[] callee with - | Some fn_id -> - let tok = + let tok = + match callsite_tok_of_callee_expr callee with + | Some tok -> tok + | None -> ( match AST_generic_helpers.ii_of_any (G.E e) with | tok :: _ -> tok - | [] -> Tok.unsafe_fake_tok "" - in + | [] -> Tok.unsafe_fake_tok "") + in + match + identify_callee ~object_mappings ~all_funcs + ~import_binding_timeline + ~imported_entity_index ?current_file ~caller_parent_path:[] + ~call_tok:tok ~import_lookup_scope:(TopLevelAtCallSite tok) + callee + with + | Some fn_id -> Log.debug (fun m -> m "CALL_EXTRACT: Found top-level call to %s" (show_fn_id fn_id)); calls := (fn_id, tok) :: !calls | None -> () @@ -452,6 +1112,7 @@ let extract_callback_from_arg (arg_expr : G.expr) : (IL.name * Tok.t * IL.name o (* Helper to identify a callback fn_id, checking nested functions in same scope first *) let identify_callback ?(all_funcs = []) ?(caller_parent_path = []) + ?(imported_entity_index = CanonicalMap.empty) ?(current_file : Fpath.t option) (callback_name : IL.name) : fn_id option = let callback_name_str = fst callback_name.IL.ident in (* Extract class from caller_parent_path if present *) @@ -459,6 +1120,16 @@ let identify_callback ?(all_funcs = []) ?(caller_parent_path = []) | Some cls :: _ -> Some cls | _ -> None in + let is_local_function func = + match current_file with + | Some file -> matches_current_file file func + | None -> true + in + match !(callback_name.IL.id_info.G.id_resolved) with + | Some (G.ImportedEntity canonical, _sid) -> + lookup_imported_entity ?current_file imported_entity_index canonical + | Some _ + | None -> (* First check if it's a nested function in the same scope - position-aware match *) let nested_match = List.find_opt (fun f -> @@ -479,6 +1150,7 @@ let identify_callback ?(all_funcs = []) ?(caller_parent_path = []) | Some cls -> let class_name_str = fst cls.IL.ident in List.find_opt (fun f -> + is_local_function f && match f.fn_id with | [Some c; Some m] when fst c.IL.ident = class_name_str && fst m.IL.ident = callback_name_str -> true | _ -> false @@ -493,6 +1165,7 @@ let identify_callback ?(all_funcs = []) ?(caller_parent_path = []) | None -> (* Check for top-level function - match by string name *) let top_level_match = List.find_opt (fun f -> + is_local_function f && match f.fn_id with | [None; Some name] when fst name.IL.ident = callback_name_str -> true | _ -> false @@ -508,7 +1181,10 @@ let identify_callback ?(all_funcs = []) ?(caller_parent_path = []) (* Try to identify a callback from a G.argument, returning fn_id, token, and optional _tmp node. The _tmp node is present for Elixir ShortLambda to create the intermediate wrapper node. *) -let try_identify_callback_arg ~all_funcs ~caller_parent_path (arg : G.argument) : (fn_id * Tok.t * IL.name option) option = +let try_identify_callback_arg ~all_funcs ~caller_parent_path + ?(imported_entity_index = CanonicalMap.empty) + ?(current_file : Fpath.t option) (arg : G.argument) : + (fn_id * Tok.t * IL.name option) option = match arg with | G.Arg expr -> (* Also handle this.foo pattern *) @@ -520,7 +1196,8 @@ let try_identify_callback_arg ~all_funcs ~caller_parent_path (arg : G.argument) (match callback_opt with | Some (callback_name, tok, tmp_opt) -> (* Use real token from the callback argument *) - identify_callback ~all_funcs ~caller_parent_path callback_name + identify_callback ~all_funcs ~imported_entity_index ?current_file + ~caller_parent_path callback_name |> Option.map (fun fn_id -> (fn_id, tok, tmp_opt)) | None -> None) | _ -> None @@ -528,8 +1205,13 @@ let try_identify_callback_arg ~all_funcs ~caller_parent_path (arg : G.argument) (* Extract HOF callbacks from a single call expression. Returns list of (fn_id, tok, tmp_opt) where tmp_opt is the _tmp node for ShortLambda. *) let extract_hof_callbacks_from_call ~method_hofs ~function_hofs ~all_funcs - ~caller_parent_path (callee : G.expr) (args : G.arguments) : (fn_id * Tok.t * IL.name option) list = - let try_arg arg = try_identify_callback_arg ~all_funcs ~caller_parent_path arg in + ~caller_parent_path ?(imported_entity_index = CanonicalMap.empty) + ?(current_file : Fpath.t option) (callee : G.expr) (args : G.arguments) : + (fn_id * Tok.t * IL.name option) list = + let try_arg arg = + try_identify_callback_arg ~all_funcs ~imported_entity_index ?current_file + ~caller_parent_path arg + in let try_arg_at_index idx = match List.nth_opt (Tok.unbracket args) idx with | Some arg -> try_arg arg @@ -560,7 +1242,8 @@ let extract_hof_callbacks_from_call ~method_hofs ~function_hofs ~all_funcs (* Extract HOF callbacks, returning (fn_id, tok, tmp_opt) tuples. tmp_opt is Some IL.name for ShortLambda callbacks that need a _tmp intermediate node. *) let extract_hof_callbacks ?(_object_mappings = []) ?(all_funcs = []) - ?(caller_parent_path = []) + ?(imported_entity_index = CanonicalMap.empty) + ?(current_file : Fpath.t option) ?(caller_parent_path = []) ~(lang : Lang.t) (fdef : G.function_definition) : (fn_id * Tok.t * IL.name option) list = let hof_configs = (Lang_config.get lang).hof_configs in let method_hofs = @@ -584,7 +1267,8 @@ let extract_hof_callbacks ?(_object_mappings = []) ?(all_funcs = []) (match e.G.e with | G.Call (callee, args) -> let found = extract_hof_callbacks_from_call - ~method_hofs ~function_hofs ~all_funcs ~caller_parent_path + ~method_hofs ~function_hofs ~all_funcs ~imported_entity_index + ?current_file ~caller_parent_path callee args in callbacks := found @ !callbacks @@ -595,34 +1279,70 @@ let extract_hof_callbacks ?(_object_mappings = []) ?(all_funcs = []) v#visit_function_definition () fdef; !callbacks +let collect_functions ~(lang : Lang.t) (ast : G.program) : func_info list = + Visit_function_defs.fold_with_parent_path + (fun funcs opt_ent parent_path fdef -> + match fn_id_of_entity ~lang opt_ent parent_path fdef with + | Some fn_id -> + let func = { fn_id; entity = opt_ent; fdef } in + func :: funcs + | None -> funcs) + [] ast + +let first_program_tok ?current_file (ast : G.program) : Tok.t = + match current_file with + | Some file -> Tok.first_tok_of_file file + | None -> ( + match AST_generic_helpers.ii_of_any (G.Pr ast) |> List.find_opt Tok.is_origintok with + | Some tok -> tok + | None -> Tok.unsafe_fake_tok "") + +let top_level_il_name ?current_file (ast : G.program) : IL.name = + let tok = first_program_tok ?current_file ast in + IL. + { + ident = ("", tok); + sid = G.SId.unsafe_default; + id_info = AST_generic.empty_id_info (); + } + +let class_init_il_name (class_g_name : G.name) : IL.name = + let class_il_name = AST_to_IL.var_of_name class_g_name in + let class_str, class_tok = class_il_name.IL.ident in + IL. + { + ident = ("Class:" ^ class_str, class_tok); + sid = G.SId.unsafe_default; + id_info = G.empty_id_info (); + } + (* Build call graph - Visit_function_defs handles regular functions, arrow functions, and lambda assignments like const x = () => {} *) -let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) - : Call_graph.G.t = +let build_call_graph_with_context ~(lang : Lang.t) ?(object_mappings = []) + ?(all_funcs : func_info list option) + ?(imported_entity_index = CanonicalMap.empty) + ?(current_file : Fpath.t option) (ast : G.program) : Call_graph.G.t = let graph = Call_graph.G.create () in + let import_binding_timeline = build_import_binding_timeline ast in + let is_local_function func = + match current_file with + | Some file -> matches_current_file file func + | None -> true + in (* Create a special top_level node to represent code outside functions *) let top_level_node : node = - let fake_tok = Tok.unsafe_fake_tok "" in - let il_name = IL.{ ident = ("", fake_tok); sid = G.SId.unsafe_default; id_info = AST_generic.empty_id_info () } in - Function_id.of_il_name il_name + Function_id.of_il_name (top_level_il_name ?current_file ast) in Call_graph.G.add_vertex graph top_level_node; - let funcs = - Visit_function_defs.fold_with_parent_path - (fun funcs opt_ent parent_path fdef -> - match fn_id_of_entity ~lang opt_ent parent_path fdef with - | Some fn_id -> - let func = { fn_id; entity = opt_ent; fdef } in - (* Add vertex using the node (last element of fn_id) *) - (match fn_id_to_node fn_id with - | Some node -> Call_graph.G.add_vertex graph node - | None -> ()); - func :: funcs - | None -> funcs) - [] ast - in + let local_funcs = collect_functions ~lang ast in + let funcs = Option.value ~default:local_funcs all_funcs in + funcs + |> List.iter (fun func -> + match fn_id_to_node func.fn_id with + | Some node -> Call_graph.G.add_vertex graph node + | None -> ()); (* Visit all calls in the AST, tracking the current function context *) Visit_function_defs.visit_with_parent_path (fun opt_ent parent_path fdef -> @@ -637,7 +1357,10 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) (* Extract calls - class context is already in fn_id *) let callee_calls = - extract_calls ~object_mappings ~all_funcs:funcs ~caller_parent_path:fn_id fdef + extract_calls ~object_mappings ~all_funcs:funcs + ~import_binding_timeline + ~imported_entity_index ?current_file ~caller_parent_path:fn_id + fdef in (* Add labeled edges for each call - edge from callee to caller for bottom-up analysis *) @@ -653,7 +1376,9 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) (* Extract HOF callbacks and add edges: callback -> caller (or callback -> _tmp -> caller for ShortLambda) *) let callback_calls = - extract_hof_callbacks ~_object_mappings:object_mappings ~all_funcs:funcs ~caller_parent_path:fn_id ~lang fdef + extract_hof_callbacks ~_object_mappings:object_mappings + ~all_funcs:funcs ~imported_entity_index ?current_file + ~caller_parent_path:fn_id ~lang fdef in (* Add labeled edges for each callback - edge from callback to caller for bottom-up analysis. For ShortLambda, create intermediate _tmp node: callback -> _tmp -> caller *) @@ -677,7 +1402,11 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) ast; (* Extract calls from top-level code (outside any function) and add edges to *) - let toplevel_calls = extract_toplevel_calls ~object_mappings ~all_funcs:funcs ast in + let toplevel_calls = + extract_toplevel_calls ~object_mappings ~all_funcs:funcs + ~import_binding_timeline + ~imported_entity_index ?current_file ast + in List.iter (fun (callee_fn_id, call_tok) -> match fn_id_to_node callee_fn_id with @@ -704,7 +1433,8 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) in Visit_function_defs.fold_toplevel_calls (fun acc _call_e callee args -> let found = extract_hof_callbacks_from_call - ~method_hofs ~function_hofs ~all_funcs:funcs ~caller_parent_path:[] + ~method_hofs ~function_hofs ~all_funcs:funcs + ~imported_entity_index ?current_file ~caller_parent_path:[] callee args in found @ acc @@ -751,6 +1481,8 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) let other_class_name_str = Option.map (fun n -> fst n.IL.ident) other_class_opt in + is_local_function other + && (not (Object_initialization.is_constructor lang other_name other_class_name_str)) @@ -782,9 +1514,7 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) let class_str = fst class_il_name.IL.ident in (* Create Class:* node *) let class_init_node : node = - let fake_tok = Tok.unsafe_fake_tok ("Class:" ^ class_str) in - let il_name = IL.{ ident = ("Class:" ^ class_str, fake_tok); sid = G.SId.unsafe_default; id_info = G.empty_id_info () } in - Function_id.of_il_name il_name + Function_id.of_il_name (class_init_il_name class_g_name) in Call_graph.G.add_vertex graph class_init_node; @@ -792,6 +1522,7 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) let class_methods = List.filter (fun func -> + is_local_function func && let func_class_opt = match func.fn_id with class_opt :: _ -> class_opt | [] -> None in match func_class_opt with | Some func_class_il_name -> @@ -818,9 +1549,225 @@ let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) graph +let build_call_graph ~(lang : Lang.t) ?(object_mappings = []) (ast : G.program) + : Call_graph.G.t = + build_call_graph_with_context ~lang ~object_mappings ast + +let importable_segments_of_fn_id (fn_id : fn_id) = + match fn_id with + | [ None; Some name ] -> Some [ fst name.IL.ident ] + | [ Some cls; Some meth ] -> + Some [ fst cls.IL.ident; fst meth.IL.ident ] + | _ -> None + +let constructor_import_alias_segments ~(lang : Lang.t) (fn_id : fn_id) = + match fn_id with + | [ Some cls; Some meth ] -> + let class_name = fst cls.IL.ident in + let method_name = fst meth.IL.ident in + if Object_initialization.is_constructor lang method_name (Some class_name) + then Some [ class_name ] + else None + | _ -> None + +let collect_imported_aliases (ast : G.program) : + (string list * string) list = + let aliases = ref [] in + let visitor = + object + inherit [_] G.iter as super + + method! visit_directive env directive = + (match directive.G.d with + | G.ImportFrom (_, G.DottedName xs, imported_names) -> + imported_names + |> List.iter (function + | id, Some (alias, _id_info) -> + aliases := + (G.dotted_to_canonical (xs @ [ id ]), fst alias) + :: !aliases + | id, None -> + aliases := + (G.dotted_to_canonical (xs @ [ id ]), fst id) :: !aliases) + | G.ImportAs (_, G.DottedName xs, Some (alias, _id_info)) -> + aliases := (G.dotted_to_canonical xs, fst alias) :: !aliases + | _ -> ()); + super#visit_directive env directive + end + in + visitor#visit_program () ast; + !aliases + +let collect_imported_wildcards (ast : G.program) : string list list = + let wildcards = ref [] in + let visitor = + object + inherit [_] G.iter as super + + method! visit_directive env directive = + (match directive.G.d with + | G.ImportAll (_, G.DottedName xs, _) -> + wildcards := G.dotted_to_canonical xs :: !wildcards + | _ -> ()); + super#visit_directive env directive + end + in + visitor#visit_program () ast; + !wildcards + +let build_imported_entity_index ~(lang : Lang.t) (files : project_file list) = + let module_candidates_for_file file = + (* Keep every suffix of the full filesystem path. Trimming the shared + * prefix drops package segments when all files live under the same + * package tree, which breaks package import resolution. *) + module_candidates_of_path file.path.internal_path_to_content + |> List.sort_uniq Stdlib.compare + in + let base_index = + List.fold_left + (fun acc file -> + let module_candidates = module_candidates_for_file file in + file.funcs + |> List.fold_left + (fun acc func -> + let entity_segments = + importable_segments_of_fn_id func.fn_id |> Option.to_list + in + let entity_segments = + match constructor_import_alias_segments ~lang func.fn_id with + | Some alias_segments when not (List.mem alias_segments entity_segments) + -> + alias_segments :: entity_segments + | Some _ + | None -> + entity_segments + in + entity_segments + |> List.fold_left + (fun acc entity_segments -> + module_candidates + |> List.fold_left + (fun acc module_name -> + let canonical = module_name @ entity_segments in + add_imported_entity acc canonical func) + acc) + acc) + acc) + CanonicalMap.empty files + in + List.fold_left + (fun acc file -> + let module_candidates = module_candidates_for_file file in + let acc = + collect_imported_aliases file.ast + |> List.fold_left + (fun acc (canonical_target, local_name) -> + let acc = + match + lookup_imported_func + ~current_file:file.path.internal_path_to_content base_index + canonical_target + with + | None -> acc + | Some func -> + module_candidates + |> List.fold_left + (fun acc module_name -> + add_imported_entity acc (module_name @ [ local_name ]) + func) + acc + in + CanonicalMap.fold + (fun canonical funcs acc -> + if + List.length funcs <> 1 + || List.length canonical <= List.length canonical_target + || not + (starts_with_segments ~prefix:canonical_target + canonical) + then acc + else + let tail = + List_.drop (List.length canonical_target) canonical + in + let func = List.hd funcs in + module_candidates + |> List.fold_left + (fun acc module_name -> + add_imported_entity acc + (module_name @ (local_name :: tail)) + func) + acc) + base_index acc) + acc + in + collect_imported_wildcards file.ast + |> List.fold_left + (fun acc canonical_target -> + let resolved_targets = + canonical_lookup_candidates + ~current_file:file.path.internal_path_to_content canonical_target + in + (* Expand wildcard imports from the alias-augmented index built so + * far, not just the raw base index. Package __init__.py re-exports + * are added in the alias pass above, and wildcard imports like + * `from pkg import *` need to see those bindings too. *) + CanonicalMap.fold + (fun canonical funcs acc -> + if List.length funcs <> 1 then acc + else + match List.find_opt (fun target -> starts_with_segments ~prefix:target canonical) resolved_targets with + | None -> acc + | Some target -> ( + match List_.drop (List.length target) canonical with + | [ local_name ] -> + let func = List.hd funcs in + module_candidates + |> List.fold_left + (fun acc module_name -> + add_imported_entity acc + (module_name @ [ local_name ]) + func) + acc + | _ -> acc)) + acc acc) + acc) + base_index files + +let build_project_call_graph ~(lang : Lang.t) + (files : (Target.path * G.program * (G.name * G.name) list) list) : + Call_graph.G.t = + let files = + files + |> List_.map (fun (path, ast, object_mappings) -> + { path; ast; object_mappings; funcs = collect_functions ~lang ast }) + in + let graph = Call_graph.G.create () in + let imported_entity_index = build_imported_entity_index ~lang files in + let all_funcs = files |> List.concat_map (fun file -> file.funcs) in + files + |> List.iter (fun file -> + let imported_object_mappings = + detect_imported_object_initialization ~lang ~imported_entity_index + file.ast ~current_file:file.path.internal_path_to_content () + in + let object_mappings = + file.object_mappings @ imported_object_mappings + in + let local_graph = + build_call_graph_with_context ~lang + ~object_mappings + ~all_funcs ~imported_entity_index + ~current_file:file.path.internal_path_to_content file.ast + in + Call_graph.G.iter_vertex (Call_graph.G.add_vertex graph) local_graph; + Call_graph.G.iter_edges_e (Call_graph.G.add_edge_e graph) local_graph); + graph + (* Identify functions that contain byte ranges (from pattern matches) *) -let find_functions_containing_ranges ~(lang : Lang.t) (ast : G.program) - (ranges : Range.t list) : Function_id.t list = +let find_functions_containing_ranges ~(lang : Lang.t) + ?(current_file : Fpath.t option) (ast : G.program) (ranges : Range.t list) : + Function_id.t list = (* Hash table to track ALL functions containing each range, along with function size *) let range_to_funcs : (Range.t, (fn_id * int) list) Hashtbl.t = Hashtbl.create 10 in List.iter (fun range -> Hashtbl.add range_to_funcs range []) ranges; @@ -869,11 +1816,8 @@ let find_functions_containing_ranges ~(lang : Lang.t) (ast : G.program) (* This class contains this range - add it to the list *) match !current_class with | Some class_g_name -> - let class_il_name = AST_to_IL.var_of_name class_g_name in - let class_str = fst class_il_name.IL.ident in let class_node_name = - let fake_tok = Tok.unsafe_fake_tok ("Class:" ^ class_str) in - Some IL.{ ident = ("Class:" ^ class_str, fake_tok); sid = G.SId.unsafe_default; id_info = AST_generic.empty_id_info () } + Some (class_init_il_name class_g_name) in let class_fn_id = [None; class_node_name] in let existing = Hashtbl.find range_to_funcs range in @@ -943,12 +1887,7 @@ let find_functions_containing_ranges ~(lang : Lang.t) (ast : G.program) let funcs_list = Hashtbl.find range_to_funcs range in if List.is_empty funcs_list then (* No function contains this range - it's at top level *) - let top_level_name = - let fake_tok = Tok.unsafe_fake_tok "" in - Some IL.{ ident = ("", fake_tok); - sid = G.SId.unsafe_default; - id_info = AST_generic.empty_id_info () } - in + let top_level_name = Some (top_level_il_name ?current_file ast) in let top_level_fn_id = [None; top_level_name] in if List.mem top_level_fn_id matching_funcs then matching_funcs diff --git a/src/tainting/Graph_from_AST.mli b/src/tainting/Graph_from_AST.mli index 532ae4889..7620704fa 100644 --- a/src/tainting/Graph_from_AST.mli +++ b/src/tainting/Graph_from_AST.mli @@ -1,11 +1,28 @@ (* Special case for go *) val extract_go_receiver_type : AST_generic.function_definition -> string option +val top_level_il_name : + ?current_file:Fpath.t -> AST_generic.program -> IL.name + +val module_candidates_of_path : Fpath.t -> string list list + +val canonical_lookup_candidates : + ?current_file:Fpath.t -> string list -> string list list + val build_call_graph : lang : Lang.t -> ?object_mappings : (AST_generic.name * AST_generic.name) list -> AST_generic.program -> Call_graph.G.t +val build_project_call_graph : + lang:Lang.t -> + (Target.path * AST_generic.program * (AST_generic.name * AST_generic.name) list) list -> + Call_graph.G.t + val find_functions_containing_ranges : - lang : Lang.t -> AST_generic.program -> Range.t list -> Function_id.t list + lang : Lang.t -> + ?current_file:Fpath.t -> + AST_generic.program -> + Range.t list -> + Function_id.t list diff --git a/src/tainting/Sig_inst.ml b/src/tainting/Sig_inst.ml index 6e1398266..894bd8e04 100644 --- a/src/tainting/Sig_inst.ml +++ b/src/tainting/Sig_inst.ml @@ -440,6 +440,26 @@ let%test _ = Option.equal (=|=) (func {name = ""; index = 2}) (Some 2) && Option.equal (=|=) (func {name = ""; index = 3}) (Some 3) +let uses_implicit_receiver (fun_exp : IL.exp) = + match fun_exp.e with + | Fetch + { + base = (Var _ | VarSpecial ((Self | This), _)); + rev_offset = { o = Dot _; _ } :: _; + } -> + true + | _ -> false + +let adjust_fparams_for_implicit_receiver ?receiver_lval (fun_exp : IL.exp) + (fparams : Signature.params) num_args = + match fparams with + | Signature.P ("self" | "cls" | "this") :: rest + when + (uses_implicit_receiver fun_exp || Option.is_some receiver_lval) + && Int.equal (List.length fparams) (num_args + 1) -> + rest + | _ -> fparams + let combine_rest_args_exp (es : IL.exp list) : IL.exp = let e = IL.Composite (IL.CList, Tok.unsafe_fake_bracket es) in let eorig = @@ -461,8 +481,13 @@ let combine_rest_args_exp (es : IL.exp list) : IL.exp = TODO(shapes): This is needed for stuff that is not yet fully adapted to shapes, in theory we should only need 'instantiate_lval_using_shape'. *) -let instantiate_lval_using_actual_exps (fun_exp : IL.exp) fparams args_exps - (tlval : T.lval) : (IL.name * T.offset list * T.tainted_token) option = +let instantiate_lval_using_actual_exps ?receiver_lval (fun_exp : IL.exp) fparams + args_exps (tlval : T.lval) : + (IL.name * T.offset list * T.tainted_token) option = + let fparams = + adjust_fparams_for_implicit_receiver ?receiver_lval fun_exp fparams + (List.length args_exps) + in (* Error handling *) let log_error () = Log.err (fun m -> @@ -502,6 +527,12 @@ let instantiate_lval_using_actual_exps (fun_exp : IL.exp) fparams args_exps Some (var, offset @ tlval.offset, snd obj.ident) | __else__ -> None) | BThis -> ( + match receiver_lval with + | Some receiver_lval -> ( + let* var, receiver_offset = Lval_env.normalize_lval receiver_lval in + Some (var, receiver_offset @ tlval.offset, snd var.ident)) + | None -> + ( (* A field of the callee object, e.g.: @@ -548,7 +579,7 @@ let instantiate_lval_using_actual_exps (fun_exp : IL.exp) fparams args_exps Some (var, offset @ tlval.offset, snd method_.ident)) | __else__ -> log_error (); - None) + None)) (* HACK(implicit-taint-variables-in-env): * We have a function call with a taint variable, corresponding to a global or @@ -631,14 +662,23 @@ let combine_rest_args_taint (ts : (Taints.t * shape) list) : Taints.t * shape = in (taints, shape) -let instantiate_lval_using_shape lval_env fparams (fun_exp : IL.exp) args_taints - lval : (Taints.t * shape) option = +let instantiate_lval_using_shape ?receiver_lval lval_env fparams + (fun_exp : IL.exp) args_taints lval : (Taints.t * shape) option = + let fparams = + adjust_fparams_for_implicit_receiver ?receiver_lval fun_exp fparams + (List.length args_taints) + in let { T.base; offset } = lval in let* base, offset = match base with | T.BArg pos -> Some (`Arg pos, offset) | BThis -> ( (* TODO: Should we refactor this with 'instantiate_lval_using_actual_exps' ? *) + match receiver_lval with + | Some receiver_lval -> ( + let* var, receiver_offset = Lval_env.normalize_lval receiver_lval in + Some (`Var var, receiver_offset @ offset)) + | None -> ( match fun_exp with | { e = Fetch { base = Var obj; rev_offset = [ { o = Dot _method; _ } ] }; @@ -654,7 +694,7 @@ let instantiate_lval_using_shape lval_env fparams (fun_exp : IL.exp) args_taints | [] -> Some (`Var var, offset) | Ofld var :: offset -> Some (`Var var, offset) | (Oint _ | Ostr _ | Oany) :: _ -> None) - | __else__ -> None) + | __else__ -> None)) | BGlob var -> Some (`Var var, offset) in let* base_taints, base_shape = @@ -671,10 +711,11 @@ let instantiate_lval_using_shape lval_env fparams (fun_exp : IL.exp) args_taints Shape.find_in_shape_poly ~taints:base_taints offset base_shape (* What is the taint denoted by 'sig_lval' ? *) -let instantiate_lval lval_env fparams fun_exp args_exps +let instantiate_lval ?receiver_lval lval_env fparams fun_exp args_exps (args_taints : (Taints.t * shape) IL.argument list) (sig_lval : T.lval) = match - instantiate_lval_using_shape lval_env fparams fun_exp args_taints sig_lval + instantiate_lval_using_shape ?receiver_lval lval_env fparams fun_exp + args_taints sig_lval with | Some (taints, shape) -> Some (taints, shape) | None -> ( @@ -692,8 +733,8 @@ let instantiate_lval lval_env fparams fun_exp args_exps * see 'lval_of_sig_lval'. *) let* var, offset, _obj = - instantiate_lval_using_actual_exps fun_exp fparams args_exps - sig_lval + instantiate_lval_using_actual_exps ?receiver_lval fun_exp fparams + args_exps sig_lval in let lval_taints, shape = match Lval_env.find_poly lval_env var offset with @@ -717,6 +758,7 @@ let instantiate_lval lval_env fparams fun_exp args_exps let rec instantiate_function_signature lval_env (taint_sig : Signature.t) ~callee ~(args : _ option) (args_taints : (Taints.t * shape) IL.argument list) + ?receiver_lval ?(lookup_sig : (IL.exp -> int -> Signature.t option) option) ?(depth : int = 0) () : call_effects option = Log.debug (fun m -> @@ -737,7 +779,8 @@ let rec instantiate_function_signature lval_env (taint_sig : Signature.t) So we will isolate this as a specific step to be applied as necessary. *) let opt_taints_shape = - instantiate_lval lval_env taint_sig.params callee args args_taints lval + instantiate_lval ?receiver_lval lval_env taint_sig.params callee args + args_taints lval in Log.debug (fun m -> m ~tags:sigs_tag "- Instantiating %s: %s -> %s" @@ -877,8 +920,8 @@ let rec instantiate_function_signature lval_env (taint_sig : Signature.t) (T.show_lval dst_sig_lval)); None | Some args -> - instantiate_lval_using_actual_exps callee taint_sig.params args - dst_sig_lval + instantiate_lval_using_actual_exps ?receiver_lval callee + taint_sig.params args dst_sig_lval in let taints = taints diff --git a/src/tainting/Sig_inst.mli b/src/tainting/Sig_inst.mli index d35a5d727..17eec7913 100644 --- a/src/tainting/Sig_inst.mli +++ b/src/tainting/Sig_inst.mli @@ -22,6 +22,7 @@ val instantiate_function_signature : callee:IL.exp -> args:IL.exp IL.argument list option (** actual arguments *) -> (Taint.Taint_set.t * Shape_and_sig.Shape.shape) IL.argument list -> + ?receiver_lval:IL.lval -> ?lookup_sig:(IL.exp -> int -> Shape_and_sig.Signature.t option) -> ?depth:int -> unit ->