diff --git a/docs/publications/galaxy.png b/docs/publications/galaxy.png new file mode 100644 index 00000000..01850974 Binary files /dev/null and b/docs/publications/galaxy.png differ diff --git a/docs/publications/snakemake.jpg b/docs/publications/snakemake.jpg new file mode 100644 index 00000000..624907d0 Binary files /dev/null and b/docs/publications/snakemake.jpg differ diff --git a/docs/publications/table2.png b/docs/publications/table2.png new file mode 100644 index 00000000..26da25ce Binary files /dev/null and b/docs/publications/table2.png differ diff --git a/docs/publications/workflow/rube-goldberg-software.png b/docs/publications/workflow/rube-goldberg-software.png new file mode 100644 index 00000000..e6f1965f Binary files /dev/null and b/docs/publications/workflow/rube-goldberg-software.png differ diff --git a/docs/usenix.sty b/docs/usenix.sty deleted file mode 100644 index 7ea4a2fe..00000000 --- a/docs/usenix.sty +++ /dev/null @@ -1,131 +0,0 @@ -% https://www.usenix.org/sites/default/files/usenix-2020-09.sty - -% usenix.sty - to be used with latex2e for USENIX. -% To use this style file, look at the template usenix2019_v3.1.tex -% -% $Id: usenix.sty,v 1.2 2005/02/16 22:30:47 maniatis Exp $ -% -% The following definitions are modifications of standard article.sty -% definitions, arranged to do a better job of matching the USENIX -% guidelines. -% It will automatically select two-column mode and the Times-Roman -% font. -% -% 2018-12-19 [for ATC'19]: add packages to help embed all fonts in -% pdf; to improve appearance (hopefully); to make refs and citations -% clickable in pdf -% -% 2020-09-21 file updated to comment out flushend and make it optional - -% -% USENIX papers are two-column. -% Times-Roman font is nice if you can get it (requires NFSS, -% which is in latex2e. - -\if@twocolumn\else\input twocolumn.sty\fi -\usepackage{mathptmx} % times roman, including math (where possible) - -% hopefully embeds all fonts in pdf -\usepackage[T1]{fontenc} -\usepackage[utf8]{inputenc} -\usepackage{pslatex} - -% appearance -\usepackage[kerning,spacing]{microtype} % more compact and arguably nicer - -% Uncomment the following line if you want the columns of the last page -% equal in size. But note that doing so may cause issues with some -% document-generating tools. -% \usepackage{flushend} - -% refs and bib -%\usepackage{cite} % order multiple entries in \cite{...} -\usepackage{breakurl} % break too-long urls in refs -\usepackage{url} % allow \url in bibtex for clickable links -\usepackage{xcolor} % color definitions, to be use for... -\usepackage[]{hyperref} % ...clickable refs within pdf... -\hypersetup{ % ...like so - colorlinks, - linkcolor={green!80!black}, - citecolor={red!70!black}, - urlcolor={blue!70!black} -} - -% -% USENIX wants margins of: 0.75" sides, 1" bottom, and 1" top. -% 0.33" gutter between columns. -% Gives active areas of 7" x 9" -% -\setlength{\textheight}{9.0in} -\setlength{\columnsep}{0.33in} -\setlength{\textwidth}{7.00in} - -\setlength{\topmargin}{0.0in} - -\setlength{\headheight}{0.0in} - -\setlength{\headsep}{0.0in} - -\addtolength{\oddsidemargin}{-0.25in} -\addtolength{\evensidemargin}{-0.25in} - -% USENIX wants no page numbers for camera-ready papers, so that they can -% number them themselves. But submitted papers should have page numbers -% for the reviewers' convenience. -% -% -% \pagestyle{empty} - -% -% USENIX titles are in 14-point bold type, with no date, and with no -% change in the empty page headers. The whole author section is 12 point -% italic--- you must use {\rm } around the actual author names to get -% them in roman. -% -\def\maketitle{\par - \begingroup - \renewcommand\thefootnote{\fnsymbol{footnote}}% - \def\@makefnmark{\hbox to\z@{$\m@th^{\@thefnmark}$\hss}}% - \long\def\@makefntext##1{\parindent 1em\noindent - \hbox to1.8em{\hss$\m@th^{\@thefnmark}$}##1}% - \if@twocolumn - \twocolumn[\@maketitle]% - \else \newpage - \global\@topnum\z@ - \@maketitle \fi\@thanks - \endgroup - \setcounter{footnote}{0}% - \let\maketitle\relax - \let\@maketitle\relax - \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax} - -\def\@maketitle{\newpage - \vbox to 3.5in{ - \vspace*{\fill} - \vskip 2em - \begin{center}% - {\Large\textbf{\@title} \par}% - \vskip 0.375in minus 0.300in - {\large\it - \lineskip .5em - \begin{tabular}[t]{c}\@author - \end{tabular}\par}% - \end{center}% - \par - \vspace*{\fill} -% \vskip 1.5em - } -} - -% -% The abstract is preceded by a 12-pt bold centered heading -\def\abstract{\begin{center}% -{\large\textbf{\abstractname}\vspace{-.5em}\vspace{\z@}}% -\end{center}} -\def\endabstract{} - -% -% Main section titles are 12-pt bold. Others can be same or smaller. -% -\def\section{\@startsection {section}{1}{\z@}{-3.5ex plus-1ex minus - -.2ex}{2.3ex plus.2ex}{\reset@font\large\bf}} diff --git a/docs/usenix_template.tex b/docs/usenix_template.tex deleted file mode 100644 index ef7fd312..00000000 --- a/docs/usenix_template.tex +++ /dev/null @@ -1,151 +0,0 @@ -% https://www.usenix.org/sites/default/files/usenix2019_v3.1.tex - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Template for USENIX papers. -% -% History: -% -% - TEMPLATE for Usenix papers, specifically to meet requirements of -% USENIX '05. originally a template for producing IEEE-format -% articles using LaTeX. written by Matthew Ward, CS Department, -% Worcester Polytechnic Institute. adapted by David Beazley for his -% excellent SWIG paper in Proceedings, Tcl 96. turned into a -% smartass generic template by De Clarke, with thanks to both the -% above pioneers. Use at your own risk. Complaints to /dev/null. -% Make it two column with no page numbering, default is 10 point. -% -% - Munged by Fred Douglis 10/97 to -% separate the .sty file from the LaTeX source template, so that -% people can more easily include the .sty file into an existing -% document. Also changed to more closely follow the style guidelines -% as represented by the Word sample file. -% -% - Note that since 2010, USENIX does not require endnotes. If you -% want foot of page notes, don't include the endnotes package in the -% usepackage command, below. -% - This version uses the latex2e styles, not the very ancient 2.09 -% stuff. -% -% - Updated July 2018: Text block size changed from 6.5" to 7" -% -% - Updated Dec 2018 for ATC'19: -% -% * Revised text to pass HotCRP's auto-formatting check, with -% hotcrp.settings.submission_form.body_font_size=10pt, and -% hotcrp.settings.submission_form.line_height=12pt -% -% * Switched from \endnote-s to \footnote-s to match Usenix's policy. -% -% * \section* => \begin{abstract} ... \end{abstract} -% -% * Make template self-contained in terms of bibtex entries, to allow -% this file to be compiled. (And changing refs style to 'plain'.) -% -% * Make template self-contained in terms of figures, to -% allow this file to be compiled. -% -% * Added packages for hyperref, embedding fonts, and improving -% appearance. -% -% * Removed outdated text. -% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\documentclass[letterpaper,twocolumn,10pt]{article} - -% See https://github.com/jgm/pandoc/blob/main/data/templates/default.latex - -\usepackage[dvipsnames,table]{xcolor} -\usepackage{usenix} -\usepackage{amsmath} -\usepackage{calc} -\usepackage{printlen} -\usepackage{textcomp} % provide euro and other symbols -% \usepackage{lmodern} -\usepackage{cleveref} -\usepackage{physics} -\usepackage{enumitem} -\usepackage{longtable} -%\usepackage{subcaption} -\usepackage{booktabs} -\usepackage{array} -\usepackage{relsize} -\usepackage{etoolbox} -\usepackage{csquotes} -\usepackage{bookmark} -%\usepackage{xurl} -\usepackage{svg} -\usepackage[normalem]{ulem} -$if(linestretch)$\usepackage{setspace}$endif$ -$if(verbatim-in-note)$\usepackage{fancyvrb}$endif$ -$if(listings)$\usepackage{listings}$endif$ -$if(verbatim-in-note)$\usepackage{fancyvrb}$endif$ - -$if(highlighting-macros)$$highlighting-macros$$endif$ - -% \usepackage{bibcop} - -\RequirePackage[l2tabu, orthodox]{nag} - -\usepackage{graphicx} - -% avoid problems with \sout in headers with hyperref: -\pdfstringdefDisableCommands{\renewcommand{\sout}{}} - -%\setlength{\emergencystretch}{3em} % prevent overfull lines -%\providecommand{\tightlist}{\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} -\providecommand{\tightlist}{} -% \setlist{topsep=0pt, leftmargin=*} -% \setitemize{noitemsep,topsep=0pt,parsep=0pt,partopsep=0pt} - -$for(header-includes)$ -$header-includes$ -$endfor$ - -\newcommand{\bibliofont}{\footnotesize} - -% Gets rid of microtype warning: -\microtypecontext{spacing=nonfrench} - -\RequirePackage[ - datamodel=acmdatamodel, - style=acmnumeric, % use style=acmauthoryear for publications that require it -]{biblatex} -$for(bibliography)$ -\addbibresource{$bibliography$} -$endfor$ - -%don't want date printed -\date{} - -% make title bold and 14 pt font (Latex default is non-bold, 16 pt) -\title{\Large \textbf{$title$}} - -$if(anonymous)$ -\author{(Anonymous)} -$else$ -\author{ -$for(author)$ -{\textrm{$author.name$}}\\ -$if(author.affiliation)$$author.affiliation.institution$$endif$$sep$ -\and -$endfor$ -} -$endif$ - -\begin{document} - -\maketitle - -\begin{abstract} -$abstract$ -\end{abstract} - -$if(abstract_only)$ -$else$ - -$body$ - -$endif$ - -\end{document} diff --git a/libprobe/Makefile b/libprobe/Makefile index 8a0d6ef0..a8f56033 100644 --- a/libprobe/Makefile +++ b/libprobe/Makefile @@ -139,6 +139,11 @@ clean: rm --force --recursive $(BUILD_DIR)/ $(GENERATED_FILES) compile_commands.json generated/* .PHONY: clean +shallow-clean: + mkdir --parents $(BUILD_DIR)/ + rm --force --recursive $(BUILD_DIR)/ +.PHONY: clean + format: # Yes, this clang-formats the generated sources # But, they aren't checked into VCS, so I don't care diff --git a/probe_py/probe_py/workflow.py b/probe_py/probe_py/workflow.py new file mode 100644 index 00000000..0dda9382 --- /dev/null +++ b/probe_py/probe_py/workflow.py @@ -0,0 +1,209 @@ +from __future__ import annotations +import collections.abc +import dataclasses +import enum +import itertools +import pathlib +import shlex +import typing +import networkx +from . import dataflow_graph +from . import graph_utils +from . import headers as ops +from . import ptypes +from . import util + + +It: typing.TypeAlias = collections.abc.Iterable +Seq: typing.TypeAlias = collections.abc.Sequence +Map: typing.TypeAlias = collections.abc.Mapping + + +class WorkflowType(enum.StrEnum): + SNAKEMAKE = enum.auto() + + +@dataclasses.dataclass(frozen=True) +class Rule: + inputs: It[pathlib.Path] + outputs: It[pathlib.Path] + cwd: pathlib.Path + exe: pathlib.Path + argv: Seq[bytes] + env: It[bytes] + + def deduplicate(self) -> Rule: + return Rule( + tuple(frozenset(self.inputs)), + tuple(frozenset(self.outputs)), + self.cwd, + self.exe, + self.argv, + self.env + ) + + def filter(self: Rule, predicate: typing.Callable[[pathlib.Path], bool]) -> Rule: + return Rule( + tuple(filter(predicate, self.inputs)), + tuple(filter(predicate, self.outputs)), + self.cwd, + self.exe, + self.argv, + self.env + ) + + @staticmethod + def combine(main: Rule, children: It[Rule]) -> Rule: + return Rule( + tuple([*main.inputs, *itertools.chain.from_iterable(child.inputs for child in children)]), + tuple([*main.outputs, *itertools.chain.from_iterable(child.outputs for child in children)]), + main.cwd, + main.exe, + main.argv, + main.env, + ) + + +def dataflow_graph_to_workflow( + probe_log: ptypes.ProbeLog, + dfg: dataflow_graph.DataflowGraph, + is_important_path: typing.Callable[[pathlib.Path], bool], + is_important_cmd: typing.Callable[[Seq[bytes]], bool], + inodes_to_paths: Map[ptypes.Inode, It[pathlib.Path]], +) -> It[Rule]: + exec_pair_to_quads: Map[ptypes.ExecPair, It[dataflow_graph.Quads]] = util.groupby_dict( + [ + node + for node in dfg.nodes() + if isinstance(node, dataflow_graph.Quads) + ], + key_func=lambda quads: next(iter(quads)).exec_pair(), + ) + + all_rules = {} + for exec_pair, quadss in exec_pair_to_quads.items(): + inputs = frozenset( + path + for quads in quadss + for pred in dfg.predecessors(quads) + if isinstance(pred, dataflow_graph.IVNs) + for ivn in pred + for path in inodes_to_paths.get(ivn.inode, []) + ) + outputs = frozenset( + path + for quads in quadss + for succ in dfg.successors(quads) + if isinstance(succ, dataflow_graph.IVNs) + for ivn in succ + for path in inodes_to_paths.get(ivn.inode, []) + ) + init_quad = ptypes.OpQuad(exec_pair.pid, exec_pair.exec_no, exec_pair.pid.main_thread(), 0) + init_op_data = probe_log.get_op(init_quad).data + assert isinstance(init_op_data, ops.InitExecEpoch) + all_rules[exec_pair] = Rule( + tuple(inputs), + tuple(outputs), + pathlib.Path(init_op_data.cwd.path.decode()), + pathlib.Path(init_op_data.exe.path.decode()), + tuple(init_op_data.argv), + tuple(init_op_data.env), + ) + del exec_pair + + exec_pair_graph = get_exec_pair_graph(probe_log) + root_exec_pair = ptypes.ExecPair(probe_log.get_root_pid(), ptypes.ExecNo(0)) + traversal = graph_utils.search_with_pruning(exec_pair_graph, root_exec_pair, breadth_first=True) + rules: list[Rule] = [] + for curr_exec_pair in traversal: + assert curr_exec_pair is not None + if curr_exec_pair in all_rules: + main_rule = all_rules[curr_exec_pair] + important_inputs = list(filter(is_important_path, main_rule.inputs)) + important_outputs = list(filter(is_important_path, main_rule.outputs)) + # print("main_rule:", main_rule.argv[0]) + # print(" important cmd:", is_important_cmd(main_rule.argv)) + # print(" ", list(filter(is_important_path, main_rule.inputs))) + # print(" ", list(filter(is_important_path, main_rule.outputs))) + if is_important_cmd(main_rule.argv) or important_inputs or important_outputs: + # print(" keep") + child_rules = [] + # We want to take main_rule, but we need to subsume all child rules + for child_exec_pair in networkx.descendants(exec_pair_graph, curr_exec_pair): + # print(" child:", all_rules[child_exec_pair].argv[0]) + child_rules.append(all_rules[child_exec_pair]) + main_rule = Rule.combine(main_rule, child_rules).filter(is_important_path) + doubled_paths = frozenset(main_rule.inputs) & frozenset(main_rule.outputs) + main_rule = main_rule.filter(lambda path: path not in doubled_paths).deduplicate() + rules.append(main_rule) + traversal.send(False) + else: + # print(" ditch") + traversal.send(True) + else: + traversal.send(True) + return rules + + +def to_source( + workflow_type: WorkflowType, + directory: pathlib.Path, + rules: It[Rule], + include_env: bool, +) -> None: + { + WorkflowType.SNAKEMAKE: to_snakemake + }[workflow_type](directory, rules, include_env) + + +def to_snakemake( + directory: pathlib.Path, + rules: It[Rule], + include_env: bool +) -> None: + lines = [] + lines.append(b"# Generated by PROBE") + for rule in rules: + lines.append(b"rule " + rule.argv[0] + b"_" + str(hash(rule)).encode() + b":") + lines.append(b" input:") + for input in rule.inputs: + lines.append(b" \"" + str(input).encode() + b"\"") + lines.append(b" output:") + for output in rule.outputs: + lines.append(b" \"" + str(output).encode() + b"\"") + lines.append(b" shell:") + lines.append(b" " + repr(shlex.join([ + "env", + "--chdir", + str(rule.cwd), + *([env.decode() for env in rule.env] if include_env else []), + *(argv.decode() for argv in rule.argv) + ])).encode()) + lines.append(b"") + (directory / "Snakefile").write_bytes(b"\n".join(lines)) + + +def get_exec_pair_graph(probe_log: ptypes.ProbeLog) -> networkx.DiGraph[ptypes.ExecPair]: + graph: networkx.DiGraph[ptypes.ExecPair] = networkx.DiGraph() + for quad, op in probe_log.ops(): + current = quad.exec_pair() + graph.add_node(current) + if isinstance(op.data, ops.Clone) and op.data.task_type == ops.TaskType.PID and op.ferrno == 0: + target = ptypes.ExecPair( + ptypes.Pid(op.data.task_id), + ptypes.ExecNo(0), + ) + graph.add_edge(current, target) + elif isinstance(op.data, ops.Spawn) and op.ferrno == 0: + target = ptypes.ExecPair( + ptypes.Pid(op.data.child_pid), + ptypes.ExecNo(0), + ) + graph.add_edge(current, target) + elif isinstance(op.data, ops.Exec) and op.ferrno == 0: + target = ptypes.ExecPair( + quad.pid, + ptypes.ExecNo(quad.exec_no + 1), + ) + graph.add_edge(current, target) + return graph diff --git a/tests/ltrace_parser.py b/tests/ltrace_parser.py new file mode 100644 index 00000000..1a1413e7 --- /dev/null +++ b/tests/ltrace_parser.py @@ -0,0 +1,324 @@ +import collections +import dataclasses +import datetime +import fnmatch +import pathlib +import re +import shlex +import subprocess + + +@dataclasses.dataclass +class Symbol: + name: str + library: str | None + +@dataclasses.dataclass +class AtomicFunctionCall: + symbol: Symbol + args: str + ret: str + +@dataclasses.dataclass +class UnfinishedFunctionCall: + symbol: Symbol + args: str + +@dataclasses.dataclass +class FunctionReturn: + symbol: Symbol + args: str | None + ret: str | None + call: UnfinishedFunctionCall | None + +@dataclasses.dataclass +class NoReturn: + symbol: Symbol + ret: str + +@dataclasses.dataclass +class TransferCall: + call: str + +@dataclasses.dataclass +class Exit: + status: int + +@dataclasses.dataclass +class Signal: + signal: str + description: str + +@dataclasses.dataclass +class State: + stack: collections.abc.Mapping[int, list[UnfinishedFunctionCall]] = dataclasses.field( + default_factory=lambda: collections.defaultdict(list) + ) + + +Event = UnfinishedFunctionCall | AtomicFunctionCall | FunctionReturn | NoReturn | TransferCall | Signal | Exit + + +def parse( + log: collections.abc.Iterable[str], +) -> collections.abc.Iterator[tuple[State, int, Event]]: + state = State() + lineno = 0 + for line in log: + lineno += 1 + line = line.strip() + pid_match = re.match(r"(\d+) ( *)(.*)$", line) + if pid_match: + pid = int(pid_match.group(1)) + stack = state.stack[pid] + #indentation = len(pid_match.group(2)) + line = pid_match.group(3) + if symbol_match := re.match(r"([a-zA-Z0-9._-]+)(@[a-zA-Z0-9._-]*)?\((.*)$", line): + symbol = Symbol( + symbol_match.group(1), + symbol_match.group(2)[1:] if symbol_match.group(2) else None, + ) + line = symbol_match.group(3) + if atomic_function_call_match := re.match(r"(.*)\) += (.*)$", line): + yield state, pid, AtomicFunctionCall( + symbol, + atomic_function_call_match.group(1), + atomic_function_call_match.group(2), + ) + elif unfinished_function_call_match := re.match(r"(.*) $", line): + unfinished_function_call = UnfinishedFunctionCall( + symbol, + unfinished_function_call_match.group(1), + ) + stack.append(unfinished_function_call) + yield state, pid, unfinished_function_call + elif noreturn_call_match := re.match(r"(.*) $", line): + stack.clear() + noreturn = NoReturn( + symbol, + noreturn_call_match.group(1), + ) + stack.clear() + yield state, pid, noreturn + else: + raise RuntimeError(f"Line {lineno}: Could not parse:\n{line!r}") + elif exited_match := re.match(r"\+\+\+ exited \(status (\d+)\) \+\+\+$", line): + yield state, pid, Exit(int(exited_match.group(1))) + elif function_return_match := re.match(r"<\.\.\. (.*) resumed> (.+)?\) +=(?: (.*))?$", line): + #name = function_return_match.group(1) + args = function_return_match.group(2) + ret = function_return_match.group(3) + if stack: + call = stack.pop() + else: + call = None + #assert call.symbol.name == name, (call.symbol.name, name) + yield state, pid, FunctionReturn(symbol, args, ret, call) + elif transfer_call_match := re.match("--- Called (.*) ---$", line): + stack.clear() + yield state, pid, TransferCall(transfer_call_match.group(1)) + elif transfer_call_match := re.match("--- (SIG[A-Z12]*) (.*) ---$", line): + stack.clear() + yield state, pid, Signal(transfer_call_match.group(1), transfer_call_match.group(1)) + else: + raise RuntimeError(f"Line {lineno}: Could not parse:\n{line!r}") + else: + raise RuntimeError(f"Line {lineno}: Could not parse:\n{line!r}") + + +def run_ltrace( + cmd: list[str], + exclude: list[str], + timeout: float, +) -> tuple[bool, int, list[tuple[State, int, Event]]]: + exclude_flag = "*@SYS+*@libc.so.6" + ("-" if exclude else "") + "-".join(exclude) + tmpfile = pathlib.Path("ltrace.log") + real_cmd = ["ltrace", "-A5", "-f", "--indent=1", "-s4096", "-S", "-L", "-x", exclude_flag, "--output", str(tmpfile), *cmd] + print(shlex.join(real_cmd)) + try: + proc = subprocess.run( + real_cmd, + timeout=timeout, + check=False, + ) + except subprocess.TimeoutExpired: + with tmpfile.open() as tmpfile_obj: + return (True, 0, list(parse(tmpfile_obj))) + else: + with tmpfile.open() as tmpfile_obj: + return (False, proc.returncode, list(parse(tmpfile_obj))) + + +if __name__ == "__main__": + command: list[str] = [ + "bash", + "-c", + '../../../examples/echo.exe hi > test_file && ../../../examples/cat.exe test_file > test_file2', + ] + + timeout = 5 + excluded_functions = [ + # Libc internals + #"_*@libc.so.6", # not this, __openat_2 would be matched! + "__str*@libc.so.6", + "__mem*@libc.so.6", + "__pthread*@libc.so.6", + "___pthread*@libc.so.6", + "__GI_*@libc.so.6", + "_IO_*@libc.so.6", + "__cxa_*@libc.so.6", + "__wctob@libc.so.6", + "__btowc@libc.so.6", + "__free@libc.so.6", + "__gconv*@libc.so.6", + "__getdelim*@libc.so.6", + "__tsearch*@libc.so.6", + "__tfind*@libc.so.6", + + + # Libc stuff we don't care about + "wctob@libc.so.6", + "bcmp@libc.so.6", + "btowc@libc.so.6", + "wctype*@libc.so.6", + "*textdomain@libc.so.6", + "alloc_*@libc.so.6", + "sysconf@libc.so.6", + "getpagesize@libc.so.6", + "str*@libc.so.6", + "*strto*@libc.so.6", + "mb*@libc.so.6", + "mem*@libc.so.6", + "*_module@libc.so.6", + "*alias2@libc.so.6", + "*alloc@libc.so.6", + "*fput*@libc.so.6", + "*fget*@libc.so.6", + "*flush*@libc.so.6", + "free@libc.so.6", + "*free@libc.so.6", + "*printf*@libc.so.6", + "qsort*@libc.so.6", + "alias*@libc.so.6", + "tfind@libc.so.6", + "tsearch@libc.so.6", + "pthread_*@libc.so.6", + "sig*@libc.so.6", + "?etdelim@libc.so.6", + "*locale@libc.so.6", + "sem_*@libc.so.6", + "index@libc.so.6", + + # Ignore libc and syscalls + "rt_sig*@*", + "brk@*", + "sbrk@*", + "mprotect@*", + "*prctl@*", + "read*@*", + "write*@*", + "pread*@*", + "pwrite*@*", + "prlimit*@*", + "rlimit*@*", + "ioctl*@*", + "futex*@*", + "restart_syscall@SYS", + "getc@*", + "putc@*", + "clock_gettime@*", + + # These look like GCC leftovers? + "*.constprop.*", + "*.isra.*", + "*.part.*", + "*.localalias.*", + ] + included_functions = [ + "getenv@*", + "*access@*", + "*accessat@*", + "*stat@*", + "*statat@*", + "*open@*", + "*openat@*", + "*close@*", + "*seek@*", + "mmap@*", + "munmap@*", + "getenv@libc.so.6", + ] + + continue_ltracing = True + while continue_ltracing: + start = datetime.datetime.now() + timeout, returncode, events = run_ltrace(command, excluded_functions, timeout) + end = datetime.datetime.now() + if timeout: + print("Timeout reached") + else: + print(f"Process exited status {returncode} in {(end - start).total_seconds():.1f}seconds") + + function_calls = collections.Counter[tuple[str, str | None]]() + n_events = 0 + for _state, _pid, event in events: + n_events += 1 + if isinstance(event, AtomicFunctionCall | UnfinishedFunctionCall): + function_calls[(event.symbol.name, event.symbol.library)] += 1 + print("N lines", n_events) + print("N unique functions", len(function_calls)) + + if not function_calls: + break + + in_function_list = True + for (function_call, lib), count in function_calls.most_common(50): + tag = function_call + ("@" + lib if lib else "") + including = any( + fnmatch.fnmatch(tag, included_function) + for included_function in included_functions + ) + excluding = any( + fnmatch.fnmatch(tag, excluded_function) + for excluded_function in excluded_functions + ) + print(count, tag, f"{including=}", f"{excluding=}") + if not including and not excluding: + in_function_selection = True + while in_function_selection: + match input("> "): + case "exclude": + excluded_functions.append(tag) + in_function_selection = False + case "exclude*": + excluded_functions.append(input()) + in_function_selection = False + case "include": + included_functions.append(tag) + in_function_selection = False + case "include*": + included_functions.append(input()) + in_function_selection = False + case "run": + in_function_selection = False + in_function_list = False + continue_function_call_elimination = False + case "timeout": + print(timeout) + timeout_str = input(timeout) + try: + timeout = int(timeout_str) + except Exception as exc: + print(exc) + case "quit": + in_function_selection = False + in_function_list = False + continue_ltracing = False + print("excluded:", excluded_functions) + print("included:", included_functions) + print("command:", command) + print("timeout:", timeout) + case _: + print("What?") + if not in_function_list: + break