Added Clarion codes.#67
Conversation
There was a problem hiding this comment.
Pull request overview
Note
Copilot was unable to run its full agentic suite in this review.
Adds ClarIOn utilities for generating, persisting, and comparing call trees, plus a helper script to split trace files by PID.
Changes:
- Introduces a shared call tree node type and binary serialization/deserialization helpers.
- Adds two new binaries: one to build/deduplicate call trees and another to diff/visualize two saved call trees.
- Updates build system to compile the new binaries and install the split script.
Reviewed changes
Copilot reviewed 5 out of 5 changed files in this pull request and generated 16 comments.
Show a summary per file
| File | Description |
|---|---|
| src/dftracer/utils/utilities/call_tree/call_tree.h | Adds shared Node type and binary save/load helpers for call trees. |
| src/dftracer/utils/binaries/clarIOn_comparator.cpp | New CLI tool to diff two call trees and emit text or Graphviz .dot output. |
| src/dftracer/utils/binaries/clarIOn_calltree.cpp | New CLI tool to parse traces, build/deduplicate a call tree, and write outputs. |
| src/dftracer/utils/binaries/ClarIOn_split.sh | New script to split an input trace archive into per-PID gzip files. |
| src/CMakeLists.txt | Wires new binaries into the build and copies the split script to the bin dir. |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| for (const Node* cb : b->children) | ||
| children_b[cb->name] = cb; | ||
|
|
||
| std::unordered_set<std::string> visited; |
| for (const Node* r : roots_b) map_b[r->name] = r; | ||
|
|
||
| std::vector<DiffNode*> result; | ||
| std::unordered_set<std::string> visited; |
| #include <thread> | ||
|
|
||
| using namespace dftracer::utils; | ||
| using namespace dftracer::utils::utilities::indexer::internal; | ||
| using namespace dftracer::utils::utilities::reader::internal; | ||
| #include <algorithm> | ||
| #include <cmath> | ||
| #include <cstdio> | ||
| #include <cstring> | ||
| #include <functional> | ||
| #include <iostream> | ||
| #include <limits> | ||
| #include <memory> | ||
| #include <string> | ||
| #include <unordered_map> | ||
| #include <vector> | ||
|
|
| .scan<'g', double>(); | ||
| program.add_argument("--dot") | ||
| .help("Output a .dot file for visualization (optional)") | ||
| .default_value<bool>(true); |
| " fillcolor=\"#6dcf94\", fontcolor=\"#FFFFFF\"," | ||
| " fontsize=10, shape=box];\n" |
| fread(&name_len, sizeof(name_len), 1, f); | ||
| n->name.resize(name_len); | ||
| fread(n->name.data(), 1, name_len, f); | ||
| fread(&n->dur, sizeof(n->dur), 1, f); |
| struct Node { | ||
| long long ts, dur, ts_end; | ||
| int depth = 0, count = 1; | ||
| std::string name; | ||
| long long max, min; | ||
| std::size_t structural_hash = 0; | ||
| std::vector<Node*> children; | ||
| std::unordered_map<std::size_t, Node*> child_map; | ||
| }; |
| static constexpr uint32_t CALLTREE_MAGIC = 0xCA117EE1; | ||
| static constexpr uint32_t CALLTREE_VERSION = 0x00000001; | ||
|
|
||
| static void write_node(FILE* f, const Node* n) { |
| pipes[pid] = "gzip > " dir "/output-" pid ".pfw.gz" | ||
| } | ||
| print | pipes[pid] | ||
| } |
| target_set_warnings(${bin_exec}) | ||
| configure_file( | ||
| ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/binaries/clarIOn_split.sh | ||
| ${CMAKE_BINARY_DIR}/bin/ClarIOn_split.sh | ||
| COPYONLY | ||
| ) |
| program.add_description( | ||
| "ClarIOn utility for reading and creating call trees from compressed files (GZIP, " | ||
| "TAR.GZ)"); | ||
| program.add_argument("file") | ||
| .help("Compressed file to process (GZIP, TAR.GZ)") | ||
| .required(); | ||
| program.add_argument("-i", "--index") | ||
| .help("Index file to use") | ||
| .default_value<std::string>(""); | ||
| program.add_argument("-c", "--checkpoint-size") | ||
| .help("Checkpoint size for indexing in bytes (default: " + | ||
| default_checkpoint_size_str + ")") | ||
| .scan<'d', std::size_t>() | ||
| .default_value( | ||
| static_cast<std::size_t>(Indexer::DEFAULT_CHECKPOINT_SIZE)); | ||
| program.add_argument("-f", "--force-rebuild") | ||
| .help("Force rebuild index") | ||
| .flag(); | ||
| program.add_argument("--check").help("Check if index is valid").flag(); | ||
| program.add_argument("--read-buffer-size") | ||
| .help("Size of the read buffer in bytes (default: 1MB)") | ||
| .default_value<std::size_t>(1 * 1024 * 1024) | ||
| .scan<'d', std::size_t>(); | ||
| program.add_argument("--index-dir") | ||
| .help("Directory to store index files (default: system temp directory)") | ||
| .default_value<std::string>(""); | ||
| program.add_argument("-t", "--time-exclusive") | ||
| .help("Exclusive timing when building tree (default: inclusive)") | ||
| .flag(); | ||
| program.add_argument("-o", "--output") | ||
| .help("Output type: 'text' or 'binary' (default: text)") | ||
| .default_value<std::string>("text"); | ||
| try { | ||
| program.parse_args(argc, argv); | ||
| } catch (const std::exception &err) { | ||
| DFTRACER_UTILS_LOG_ERROR("Error occurred: %s", err.what()); | ||
| std::cerr << program; | ||
| return 1; | ||
| } | ||
|
|
||
| std::string gz_path = program.get<std::string>("file"); | ||
| std::string index_path = program.get<std::string>("--index"); | ||
| std::size_t checkpoint_size = Indexer::DEFAULT_CHECKPOINT_SIZE; // default 1MB |
There was a problem hiding this comment.
can you follow other binary patterns by extending cli::ArgParse?
| std::vector<NodeList> per_thread_nodes(line_ranges.size()); | ||
| std::vector<std::thread> threads; | ||
| for (std::size_t i = 0; i < line_ranges.size(); ++i) { | ||
| auto [start_line, end_line] = line_ranges[i]; | ||
| threads.emplace_back(line_worker, | ||
| start_line, | ||
| end_line, | ||
| std::ref(per_thread_nodes[i]));} | ||
|
|
||
| // 4. Join threads | ||
| for (auto &t : threads) { | ||
| t.join(); | ||
| } |
There was a problem hiding this comment.
we have pipeline and executor via coroutine that can do this. can you reuse the pattern?
rayandrew
left a comment
There was a problem hiding this comment.
we also have TraceReader now to do this which i optimize greatly using coroutine.
please look at the API
Clarion consists of three main files:
splitter, calltree, and comparator.
1 header file is also added for structures used in these files