Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,37 @@ make
## Usage

For comprehensive help, use `dooked --help`

### Runtime regex checks

Pass `--checks <file>` or `--check-config <file>` to run custom regex checks
against collected fields and print alerts when they match. The checks file can
be a JSON object with a `checks` array or the array itself:

```json
{
"checks": [
{
"field": "domain",
"regex": "dev|test",
"alert": "domain name contains an environment marker",
"ignore_case": true
},
{
"field": "response_body",
"regex": "copyright 2025",
"alert": "page may contain an outdated copyright banner",
"ignore_case": true
}
]
}
```

Each check requires `field`, `regex`, and `alert`; `pattern` is accepted as an
alias for `regex`, and `ignore_case` is optional.

Supported fields are `domain`, `domain_name`, `type`, `record_type`, `info`,
`rdata`, `ttl`, `content_length`, `http_code`, `code_string`, and
`http_status`. Page content can be checked with `response_body`, `body`,
`page_content`, or `content`; dooked keeps at most the first 64 KiB in memory
for matching and does not write page content to the JSON output.
11 changes: 11 additions & 0 deletions dooked/include/cli_preprocessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "dns/dns_resolver.hpp"
#include "utils/io_utils.hpp"
#include <regex>
#include <thread>

// maximum sockets to open regardless of the number of threads
Expand All @@ -19,6 +20,7 @@ struct cli_args_t {
std::string resolver_filename{};
std::string output_filename{};
std::string input_filename{};
std::string regex_checks_filename{};

int file_type{};
int post_http_request{};
Expand All @@ -27,6 +29,14 @@ struct cli_args_t {
bool include_date{false};
};

struct regex_check_t {
std::string field{};
std::string pattern{};
std::string alert{};
bool ignore_case{};
std::regex expression{};
};

struct runtime_args_t {
std::optional<resolver_list_t> resolvers{};
opt_domain_list_t names{};
Expand All @@ -36,6 +46,7 @@ struct runtime_args_t {
http_process_e http_request_time_{};
int thread_count{};
int content_length{-1};
std::vector<regex_check_t> regex_checks{};
};

void run_program(cli_args_t const &cli_args);
Expand Down
13 changes: 9 additions & 4 deletions dooked/include/utils/containers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <mutex>
#include <optional>
#include <queue>
#include <string>
#include <vector>

namespace dooked {
Expand All @@ -31,6 +32,7 @@ template <typename T> class circular_queue_t {
struct http_response_t {
int content_length_{};
int http_status_{};
std::string response_body_{};
};

template <typename ValueType> struct http_dns_response_t {
Expand All @@ -52,9 +54,11 @@ template <typename ValueType> class map_container_t {
}

void insert_impl(std::string const &name, int const len,
int const http_status) {
int const http_status,
std::string const &response_body = {}) {
map_[name].http_result_.content_length_ = len;
map_[name].http_result_.http_status_ = http_status;
map_[name].http_result_.response_body_ = response_body;
}

public:
Expand All @@ -74,12 +78,13 @@ template <typename ValueType> class map_container_t {
append_impl(key, value);
}

void insert(std::string const &name, int const len, int const http_status) {
void insert(std::string const &name, int const len, int const http_status,
std::string const &response_body = {}) {
if (!opt_mutex_) {
return insert_impl(name, len, http_status);
return insert_impl(name, len, http_status, response_body);
}
std::lock_guard<std::mutex> lock_g{*opt_mutex_};
insert_impl(name, len, http_status);
insert_impl(name, len, http_status, response_body);
}
// only used by main thread, after all "computations" has been
// done. There's no need for locks here.
Expand Down
1 change: 1 addition & 0 deletions dooked/include/utils/exceptions.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <stdexcept>
#include <string>

namespace dooked {

Expand Down
203 changes: 202 additions & 1 deletion dooked/source/cli_preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
#include "utils/exceptions.hpp"
#include "utils/random_utils.hpp"
#include "utils/string_utils.hpp"
#include <algorithm>
#include <boost/asio/io_context.hpp>
#include <boost/asio/thread_pool.hpp>
#include <cctype>
#include <fstream>
#include <set>
#include <spdlog/spdlog.h>

Expand All @@ -18,6 +21,196 @@ namespace dooked {
namespace net = boost::asio;
using namespace fmt::v7::literals;

std::string normalize_check_field(std::string field) {
std::transform(field.begin(), field.end(), field.begin(), [](char ch) {
if (ch == '-' || ch == ' ') {
return '_';
}
return (char)std::tolower((unsigned char)ch);
});
return field;
}

bool is_domain_check_field(std::string const &field) {
return field == "domain" || field == "domain_name" ||
field == "content_length" || field == "http_code" ||
field == "code_string" || field == "http_status" ||
field == "response_body" || field == "body" ||
field == "page_content" || field == "content";
}

bool is_record_check_field(std::string const &field) {
return field == "type" || field == "record_type" || field == "info" ||
field == "rdata" || field == "ttl";
}

bool is_supported_check_field(std::string const &field) {
return is_domain_check_field(field) || is_record_check_field(field);
}

std::optional<std::string>
domain_check_value(std::string const &field, std::string const &domain_name,
http_response_t const &http_result) {
if (field == "domain" || field == "domain_name") {
return domain_name;
}
if (field == "content_length") {
return std::to_string(http_result.content_length_);
}
if (field == "http_code") {
return std::to_string(http_result.http_status_);
}
if (field == "code_string" || field == "http_status") {
return code_string(http_result.http_status_);
}
if (field == "response_body" || field == "body" ||
field == "page_content" || field == "content") {
return http_result.response_body_;
}
return std::nullopt;
}

std::optional<std::string>
record_check_value(std::string const &field, probe_result_t const &record) {
if (field == "type" || field == "record_type") {
return dns_record_type_to_str(record.type);
}
if (field == "info" || field == "rdata") {
return record.rdata;
}
if (field == "ttl") {
return std::to_string(record.ttl);
}
return std::nullopt;
}

std::string alert_value(std::string value) {
for (auto &ch : value) {
if (ch == '\n' || ch == '\r' || ch == '\t') {
ch = ' ';
}
}
constexpr std::size_t max_alert_value_length = 240;
if (value.size() > max_alert_value_length) {
value = value.substr(0, max_alert_value_length) + "...";
}
return value;
}

void report_regex_match(regex_check_t const &check,
std::string const &domain_name,
std::string const &value) {
spdlog::warn("[REGEX][{}][{}] {} (value: `{}`)", check.field, domain_name,
check.alert, alert_value(value));
}

void report_regex_match(regex_check_t const &check,
std::string const &domain_name,
probe_result_t const &record,
std::string const &value) {
spdlog::warn("[REGEX][{}][{}][{}] {} (value: `{}`)", check.field,
domain_name, dns_record_type_to_str(record.type), check.alert,
alert_value(value));
}

void run_regex_checks(map_container_t<probe_result_t> const &result_map,
std::vector<regex_check_t> const &checks) {
if (checks.empty()) {
return;
}

for (auto const &result_pair : result_map.cresult()) {
auto const &domain_name = result_pair.first;
auto const &domain_result = result_pair.second;
for (auto const &check : checks) {
if (is_domain_check_field(check.field)) {
auto const value =
domain_check_value(check.field, domain_name,
domain_result.http_result_);
if (value && std::regex_search(*value, check.expression)) {
report_regex_match(check, domain_name, *value);
}
continue;
}

for (auto const &record : domain_result.dns_result_list_) {
auto const value = record_check_value(check.field, record);
if (value && std::regex_search(*value, check.expression)) {
report_regex_match(check, domain_name, record, *value);
}
}
}
}
}

std::optional<std::vector<regex_check_t>>
load_regex_checks(std::string const &filename) {
if (filename.empty()) {
return std::vector<regex_check_t>{};
}

std::ifstream input_file(filename);
if (!input_file) {
spdlog::error("Unable to open regex checks file `{}`", filename);
return std::nullopt;
}

try {
json root{};
input_file >> root;
json const *checks_json = nullptr;
if (root.is_array()) {
checks_json = &root;
} else if (root.is_object() && root.contains("checks") &&
root["checks"].is_array()) {
checks_json = &root["checks"];
}

if (!checks_json) {
spdlog::error("Regex checks file must be an array or contain a `checks` "
"array");
return std::nullopt;
}

std::vector<regex_check_t> checks{};
for (auto const &item : *checks_json) {
if (!item.is_object()) {
spdlog::error("Each regex check must be a JSON object");
return std::nullopt;
}

auto field = normalize_check_field(item.value("field", ""));
auto pattern = item.value("regex", item.value("pattern", ""));
auto alert = item.value("alert", "");
auto const ignore_case = item.value("ignore_case", false);

if (field.empty() || pattern.empty() || alert.empty()) {
spdlog::error("Each regex check requires `field`, `regex`, and "
"`alert`");
return std::nullopt;
}
if (!is_supported_check_field(field)) {
spdlog::error("Unsupported regex check field `{}`", field);
return std::nullopt;
}

auto options = std::regex_constants::ECMAScript;
if (ignore_case) {
options |= std::regex_constants::icase;
}
checks.push_back({field, pattern, alert, ignore_case,
std::regex(pattern, options)});
}
return checks;
} catch (std::regex_error const &e) {
spdlog::error("Invalid regex in checks file `{}`: {}", filename, e.what());
} catch (std::exception const &e) {
spdlog::error("Unable to parse regex checks file `{}`: {}", filename,
e.what());
}
return std::nullopt;
}

void compare_http_result(int const base_cl, json_data_t const &prev_http_result,
http_response_t const &current_result) {
auto const current_req_cl = current_result.content_length_;
Expand Down Expand Up @@ -339,7 +532,7 @@ void start_name_checking(runtime_args_t &&rt_args) {

// if we deferred HTTP/S "probe", now is the time to get to it
if (deferring) {
io_context.reset();
io_context.restart();
thread_pool.emplace(thread_count);
rt_args.names.emplace(std::move(*deferred_names_));
for (std::size_t index = 0; index < thread_count; ++index) {
Expand All @@ -350,6 +543,8 @@ void start_name_checking(runtime_args_t &&rt_args) {
}
thread_pool->join();
}
run_regex_checks(result_map, rt_args.regex_checks);

if (!silent) {
spdlog::info("Writing JSON output");
}
Expand Down Expand Up @@ -380,6 +575,12 @@ void start_name_checking(runtime_args_t &&rt_args) {

void run_program(cli_args_t const &cli_args) {
runtime_args_t rt_args{};
auto regex_checks = load_regex_checks(cli_args.regex_checks_filename);
if (!regex_checks) {
return;
}
rt_args.regex_checks = std::move(*regex_checks);

// settle resolvers.
std::vector<std::string> resolver_strings{};
if (cli_args.resolver_filename.empty()) {
Expand Down
Loading