Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions opam/call_graph.opam
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
version: "1.100.0"
synopsis: "Call graph infrastructure for opengrep"
description:
"Call graph construction, serialization, and analysis utilities."
maintainer: ["Opengrep authors"]
authors: ["Semgrep authors, Opengrep authors"]
homepage: "https://opengrep.dev"
bug-reports: "https://github.com/opengrep/opengrep/issues"
depends: [
"dune" {>= "3.8"}
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/opengrep/opengrep.git"
10 changes: 3 additions & 7 deletions src/call_graph/Call_graph.ml
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,8 @@ let lookup_callee_from_graph (graph : G.t option)
m "CALL_GRAPH: caller_node is None during lookup!");
None
| Some g, Some caller ->
if not (G.mem_vertex g caller) then (
Log.debug (fun m ->
m "CALL_GRAPH: Caller %s not in graph" (show_node caller));
None
) else
if not (G.mem_vertex g caller) then None
else
let call_pos = pos_of_tok call_tok in
(* Get edges coming INTO the caller (callee -> caller) *)
let incoming_edges = G.pred_e g caller in
Expand All @@ -115,8 +112,7 @@ let lookup_callee_from_graph (graph : G.t option)
Pos.equal label.call_site call_pos)
in
match exact_match with
| Some edge ->
Some (G.E.src edge)
| Some edge -> Some (G.E.src edge)
| None ->
(* No fallback - return None so external calls use direct signature lookup.
Previously there was a line 0 fallback that matched implicit/HOF edges,
Expand Down
20 changes: 11 additions & 9 deletions src/call_graph/Function_id.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ let normalize_file (file : Fpath.t) : string =
Fpath.to_string (Fpath.normalize file)

let key ((id, tok) : t) =
if Tok.is_fake tok then
(id, "", 0, 0)
else
let file = Tok.file_of_tok tok in
let line = Tok.line_of_tok tok in
let col = Tok.col_of_tok tok in
(id, normalize_file file, line, col)
match Tok.loc_of_tok tok with
| Ok loc ->
let file = loc.pos.file in
let line = loc.pos.line in
let col = loc.pos.column in
(id, normalize_file file, line, col)
| Error _ -> (id, "", 0, 0)

let hash (v : t) = Hashtbl.hash (key v)

Expand Down Expand Up @@ -63,5 +63,7 @@ let of_il_name (n : IL.name) : t =
n.IL.ident

let to_file_line_col ((_, tok) : t) : string * int * int =
if Tok.is_fake tok then ("unknown", 0, 0)
else (normalize_file (Tok.file_of_tok tok), Tok.line_of_tok tok, Tok.col_of_tok tok)
match Tok.loc_of_tok tok with
| Ok loc ->
(normalize_file loc.pos.file, loc.pos.line, loc.pos.column)
| Error _ -> ("unknown", 0, 0)
119 changes: 101 additions & 18 deletions src/core_scan/Core_scan.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ module MR = Mini_rule
module R = Rule
module Out = Semgrep_output_v1_j
module TLS = Thread_local_storage
module PathOrd = struct
type t = Fpath.t

let compare = Fpath.compare
end

module PathMap = Map.Make (PathOrd)

(*****************************************************************************)
(* Purpose *)
Expand Down Expand Up @@ -754,9 +761,24 @@ let sca_rules_filtering (target : Target.regular) (rules : Rule.t list) :
(*****************************************************************************)

(* build the callback for iter_targets_and_get_matches_and_exn_to_errors *)
let mk_xconf (config : Core_scan_config.t)
(prefilter_cache_opt : Match_env.prefilter_config) : Match_env.xconfig =
{
Match_env.config =
{ Rule_options.default with taint_intrafile = config.taint_intrafile };
equivs = parse_equivalences config.equivalences_file;
nested_formula = false;
matching_conf = config.matching_conf;
matching_explanations = config.matching_explanations;
filter_irrelevant_rules = prefilter_cache_opt;
}

let mk_target_handler (caps : < Cap.time_limit >) (config : Core_scan_config.t)
(valid_rules : Rule.t list)
(prefilter_cache_opt : Match_env.prefilter_config) : target_handler =
(prefilter_cache_opt : Match_env.prefilter_config)
?(resolved_xtargets : Xtarget.t PathMap.t option)
?(interfile_context : Match_tainting_mode.interfile_context option) :
target_handler =
function
| Lockfile ({ path; kind } as lockfile) ->
(* TODO: (sca) we always pass None as the manifest target here, but this
Expand Down Expand Up @@ -792,18 +814,15 @@ let mk_target_handler (caps : < Cap.time_limit >) (config : Core_scan_config.t)

(* TODO: can we skip all of this if there are no applicable
rules? In particular, can we skip print_cli_progress? *)
let xtarget = Xtarget.resolve parse_and_resolve_name target in
let match_hook _ = () in
let xconf =
{
Match_env.config = { Rule_options.default with taint_intrafile = config.taint_intrafile };
equivs = parse_equivalences config.equivalences_file;
nested_formula = false;
matching_conf = config.matching_conf;
matching_explanations = config.matching_explanations;
filter_irrelevant_rules = prefilter_cache_opt;
}
let xtarget =
match resolved_xtargets with
| Some xtargets ->
PathMap.find_opt file xtargets
|> Option.value ~default:(Xtarget.resolve parse_and_resolve_name target)
| None -> Xtarget.resolve parse_and_resolve_name target
in
let match_hook _ = () in
let xconf = mk_xconf config prefilter_cache_opt in
let rules, dependency_match_table = sca_rules_filtering target rules in
let timeout =
let caps = (caps :> < Cap.time_limit >) in
Expand All @@ -821,8 +840,8 @@ let mk_target_handler (caps : < Cap.time_limit >) (config : Core_scan_config.t)
in
let matches : Core_result.matches_single_file =
(* !!Calling Match_rules!! Calling the matching engine!! *)
Match_rules.check ~match_hook ~timeout ~dependency_match_table xconf
rules xtarget
Match_rules.check ~match_hook ~timeout ~dependency_match_table
?interfile_context xconf rules xtarget
in
(* Add file size when profiling is on. *)
let matches =
Expand Down Expand Up @@ -882,22 +901,86 @@ let scan_exn (caps : < caps ; .. >) (config : Core_scan_config.t)
end
else NoPrefiltering
in
let base_xconf = mk_xconf config prefilter_cache_opt in
let interfile_rule_targets, resolved_xtargets, interfile_languages_used =
let interfile_rules =
valid_rules
|> List_.filter_map (fun rule ->
match rule.R.mode with
| `Taint _ as mode ->
let xconf_rule =
Match_env.adjust_xconfig_with_rule_options base_xconf
rule.R.options
in
if xconf_rule.config.interfile then Some { rule with mode }
else None
| _ -> None)
in
if List_.null interfile_rules then ([], None, [])
else
let resolved_xtargets =
targets
|> List.fold_left
(fun acc target ->
match target with
| Target.Regular regular ->
let xtarget = Xtarget.resolve parse_and_resolve_name regular in
PathMap.add regular.path.internal_path_to_content xtarget acc
| Target.Lockfile _ -> acc)
PathMap.empty
in
let interfile_rule_targets =
interfile_rules
|> List_.map (fun rule ->
let xtargets =
targets
|> List_.filter_map (function
| Target.Regular regular ->
let applicable =
rules_for_target ~analyzer:regular.analyzer
~products:regular.products
~origin:regular.path.origin
~respect_rule_paths:config.respect_rule_paths
[ (rule :> R.rule) ]
<> []
in
if applicable then
PathMap.find_opt regular.path.internal_path_to_content
resolved_xtargets
else None
| Target.Lockfile _ -> None)
in
(rule, xtargets))
in
let interfile_languages_used =
interfile_rules
|> List_.map (fun rule -> rule.R.target_analyzer)
|> List.sort_uniq Stdlib.compare
in
(interfile_rule_targets, Some resolved_xtargets, interfile_languages_used)
in
let interfile_context =
match interfile_rule_targets with
| [] -> None
| _ ->
Some
(Match_tainting_mode.build_interfile_contexts base_xconf
interfile_rule_targets)
in
let file_results, scanned_targets =
targets
|> iter_targets_and_get_matches_and_exn_to_errors
(caps :> < Cap.fork ; Cap.memory_limit >)
config
(mk_target_handler
(caps :> < Cap.time_limit >)
config valid_rules prefilter_cache_opt)
config valid_rules prefilter_cache_opt ?resolved_xtargets
?interfile_context)
in

(* TODO: Delete any lockfile-only findings whose rule produced a code+lockfile
finding in that lockfile in scanned_targets?
*)

(* the OSS engine was invoked so no interfile langs *)
let interfile_languages_used = [] in
let (res : Core_result.t) =
Core_result.mk_result file_results
(List_.map (fun r -> (r, `OSS)) valid_rules)
Expand Down
7 changes: 5 additions & 2 deletions src/engine/Match_env.ml
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,12 @@ let adjust_xconfig_with_rule_options xconf options =
match options with
| None -> xconf.config
| Some (rule_opts : Rule_options.t) ->
(* Merge rule options with existing config, preserving command-line taint_intrafile setting *)
(* Interfile tainting reuses the same summary machinery as taint_intrafile,
* so enabling it at the rule level must also enable taint_intrafile. *)
{ rule_opts with
taint_intrafile = xconf.config.taint_intrafile || rule_opts.taint_intrafile
taint_intrafile =
xconf.config.taint_intrafile || rule_opts.taint_intrafile
|| rule_opts.interfile
}
in
{ xconf with config }
Expand Down
36 changes: 22 additions & 14 deletions src/engine/Match_rules.ml
Original file line number Diff line number Diff line change
Expand Up @@ -130,20 +130,26 @@ let is_relevant_rule_for_xtarget r xconf xtarget =
in
let xconf = Match_env.adjust_xconfig_with_rule_options xconf r.R.options in
let is_relevant =
match xconf.filter_irrelevant_rules with
| NoPrefiltering -> true
| PrefilterWithCache cache -> (
match Analyze_rule.regexp_prefilter_of_rule ~cache:(Some cache) r with
| None -> true
| Some (prefilter_formula, func) ->
(* NOTE: If [lazy_content] is shared in > 1 thread, then this is not
* thread-safe. However, each [Xtarget.t] is only accessed in 1 worker
* task, so there should be no race. *)
let content = Lazy.force lazy_content in
Log.info (fun m ->
let s = Semgrep_prefilter_j.string_of_formula prefilter_formula in
m "looking for %s in %s" s !!internal_path_to_content);
func content)
if xconf.config.interfile then
(* Regex prefiltering is file-local, but interfile taint rules may split
* their source and sink across different files. Skipping the prefilter
* avoids falsely dropping files that only contain one side of the flow. *)
true
else
match xconf.filter_irrelevant_rules with
| NoPrefiltering -> true
| PrefilterWithCache cache -> (
match Analyze_rule.regexp_prefilter_of_rule ~cache:(Some cache) r with
| None -> true
| Some (prefilter_formula, func) ->
(* NOTE: If [lazy_content] is shared in > 1 thread, then this is not
* thread-safe. However, each [Xtarget.t] is only accessed in 1 worker
* task, so there should be no race. *)
let content = Lazy.force lazy_content in
Log.info (fun m ->
let s = Semgrep_prefilter_j.string_of_formula prefilter_formula in
m "looking for %s in %s" s !!internal_path_to_content);
func content)
in
if not is_relevant then
Log.info (fun m ->
Expand Down Expand Up @@ -276,6 +282,7 @@ let scc_match_hook (match_hook : Core_match.t -> unit)

let check
?(dependency_match_table : Match_SCA_mode.dependency_match_table option)
?(interfile_context : Match_tainting_mode.interfile_context option)
~match_hook ~(timeout : timeout_config option) (xconf : Match_env.xconfig)
(rules : Rule.rules) (xtarget : Xtarget.t) : Core_result.matches_single_file
=
Expand Down Expand Up @@ -319,6 +326,7 @@ let check
taint_rules_groups
|> List.concat_map (fun taint_rules ->
Match_tainting_mode.check_rules ~match_hook
?interfile_context
~per_rule_boilerplate_fn:per_rule_boilerplate_fn_opt
taint_rules xconf xtarget)
in
Expand Down
1 change: 1 addition & 0 deletions src/engine/Match_rules.mli
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type timeout_config = {
*)
val check :
?dependency_match_table:Match_SCA_mode.dependency_match_table ->
?interfile_context:Match_tainting_mode.interfile_context ->
match_hook:(Core_match.t -> unit) ->
timeout:timeout_config option ->
Match_env.xconfig ->
Expand Down
7 changes: 4 additions & 3 deletions src/engine/Match_taint_spec.ml
Original file line number Diff line number Diff line change
Expand Up @@ -441,13 +441,14 @@ let mk_taint_spec_match_preds rule matches =

let default_effect_handler _fun_name new_effects = new_effects

let taint_config_of_rule ~per_file_formula_cache
let taint_config_of_rule ~per_file_formula_cache ?(require_source_sink = true)
?(handle_effects = default_effect_handler) xconf lang file ast_and_errors
({ mode = `Taint spec; _ } as rule : R.taint_rule) =
match spec_matches_of_taint_rule ~per_file_formula_cache xconf !!file
ast_and_errors rule with
| { sinks = []; _ }, _
| { sources = []; _ }, _ -> None
| ({ sinks = []; _ }, _ | { sources = []; _ }, _)
when require_source_sink ->
None
| spec_matches, expls ->
let xconf = Match_env.adjust_xconfig_with_rule_options xconf rule.options in
let options = xconf.config in
Expand Down
1 change: 1 addition & 0 deletions src/engine/Match_taint_spec.mli
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ val hook_mk_taint_spec_match_preds :
*)
val taint_config_of_rule :
per_file_formula_cache:Formula_cache.t ->
?require_source_sink:bool ->
?handle_effects:Taint_rule_inst.effects_handler
(** Use 'handle_effects' to e.g. apply hash-consing (see 'Deep_tainting'), or
to do some side-effect if needed.
Expand Down
Loading