From 19ed2a371b9b9afdbe1fbdba4f48eff436932a47 Mon Sep 17 00:00:00 2001 From: kanjk0olb0y-a11y Date: Tue, 12 May 2026 13:27:12 +0700 Subject: [PATCH] Add runtime regex checks --- README.md | 30 ++++ dooked/include/cli_preprocessor.hpp | 10 ++ dooked/include/utils/containers.hpp | 13 +- dooked/source/cli_preprocessor.cpp | 173 ++++++++++++++++++++++++ dooked/source/dns/dns_resolver.cpp | 10 +- dooked/source/http/requests_handler.cpp | 14 ++ dooked/source/http/resolver.cpp | 10 +- dooked/source/main.cpp | 2 + 8 files changed, 248 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index f1a761c..407101f 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,33 @@ make ## Usage For comprehensive help, use `dooked --help` + +### Runtime regex checks + +Use `--checks ` to alert on result fields at runtime. The checks file can +be either a JSON array or an object with a `checks` array: + +```json +{ + "checks": [ + { + "field": "domain", + "regex": "dev|test", + "alert": "domain contains an environment marker", + "ignore_case": true + }, + { + "field": "info", + "regex": "v=spf1", + "alert": "SPF TXT record found", + "ignore_case": true + } + ] +} +``` + +Supported fields are `domain`, `domain_name`, `type`, `record_type`, `info`, +`rdata`, `ttl`, `content_length`, `http_code`, `code_string`, and +`http_status`. You can also check HTTP page content with `response_body`, +`body`, `page_content`, or `content`; dooked keeps at most the first 64 KiB in +memory for matching and does not write page content to the JSON output. diff --git a/dooked/include/cli_preprocessor.hpp b/dooked/include/cli_preprocessor.hpp index 43fa1ba..0ce9850 100644 --- a/dooked/include/cli_preprocessor.hpp +++ b/dooked/include/cli_preprocessor.hpp @@ -2,6 +2,7 @@ #include "dns/dns_resolver.hpp" #include "utils/io_utils.hpp" +#include #include // maximum sockets to open regardless of the number of threads @@ -19,6 +20,7 @@ struct cli_args_t { std::string resolver_filename{}; std::string output_filename{}; std::string input_filename{}; + std::string regex_checks_filename{}; int file_type{}; int post_http_request{}; @@ -27,6 +29,13 @@ struct cli_args_t { bool include_date{false}; }; +struct regex_check_t { + std::string field{}; + std::string pattern{}; + std::string alert{}; + std::regex expression{}; +}; + struct runtime_args_t { std::optional resolvers{}; opt_domain_list_t names{}; @@ -36,6 +45,7 @@ struct runtime_args_t { http_process_e http_request_time_{}; int thread_count{}; int content_length{-1}; + std::vector regex_checks{}; }; void run_program(cli_args_t const &cli_args); diff --git a/dooked/include/utils/containers.hpp b/dooked/include/utils/containers.hpp index 7c955f6..b59d01c 100644 --- a/dooked/include/utils/containers.hpp +++ b/dooked/include/utils/containers.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace dooked { @@ -31,6 +32,7 @@ template class circular_queue_t { struct http_response_t { int content_length_{}; int http_status_{}; + std::string response_body_{}; }; template struct http_dns_response_t { @@ -52,9 +54,11 @@ template class map_container_t { } void insert_impl(std::string const &name, int const len, - int const http_status) { + int const http_status, + std::string const &response_body = {}) { map_[name].http_result_.content_length_ = len; map_[name].http_result_.http_status_ = http_status; + map_[name].http_result_.response_body_ = response_body; } public: @@ -74,12 +78,13 @@ template class map_container_t { append_impl(key, value); } - void insert(std::string const &name, int const len, int const http_status) { + void insert(std::string const &name, int const len, int const http_status, + std::string const &response_body = {}) { if (!opt_mutex_) { - return insert_impl(name, len, http_status); + return insert_impl(name, len, http_status, response_body); } std::lock_guard lock_g{*opt_mutex_}; - insert_impl(name, len, http_status); + insert_impl(name, len, http_status, response_body); } // only used by main thread, after all "computations" has been // done. There's no need for locks here. diff --git a/dooked/source/cli_preprocessor.cpp b/dooked/source/cli_preprocessor.cpp index c08d7fb..65f7c01 100644 --- a/dooked/source/cli_preprocessor.cpp +++ b/dooked/source/cli_preprocessor.cpp @@ -4,8 +4,12 @@ #include "utils/exceptions.hpp" #include "utils/random_utils.hpp" #include "utils/string_utils.hpp" +#include #include #include +#include +#include +#include #include #include @@ -18,6 +22,169 @@ namespace dooked { namespace net = boost::asio; using namespace fmt::v7::literals; +std::string normalize_regex_check_field(std::string field) { + std::transform(field.begin(), field.end(), field.begin(), [](char ch) { + if (ch == '-' || ch == ' ') { + return '_'; + } + return (char)std::tolower((unsigned char)ch); + }); + return field; +} + +bool is_supported_regex_check_field(std::string const &field) { + return field == "domain" || field == "domain_name" || + field == "type" || field == "record_type" || field == "info" || + field == "rdata" || field == "ttl" || + field == "content_length" || field == "http_code" || + field == "code_string" || field == "http_status" || + field == "response_body" || field == "body" || + field == "page_content" || field == "content"; +} + +std::optional +domain_regex_check_value(std::string const &field, + std::string const &domain_name, + http_response_t const &http_result) { + if (field == "domain" || field == "domain_name") { + return domain_name; + } + if (field == "content_length") { + return std::to_string(http_result.content_length_); + } + if (field == "http_code") { + return std::to_string(http_result.http_status_); + } + if (field == "code_string" || field == "http_status") { + return code_string(http_result.http_status_); + } + if (field == "response_body" || field == "body" || + field == "page_content" || field == "content") { + return http_result.response_body_; + } + return std::nullopt; +} + +std::optional +record_regex_check_value(std::string const &field, + probe_result_t const &record) { + if (field == "type" || field == "record_type") { + return dns_record_type_to_str(record.type); + } + if (field == "info" || field == "rdata") { + return record.rdata; + } + if (field == "ttl") { + return std::to_string(record.ttl); + } + return std::nullopt; +} + +std::string truncate_regex_check_value(std::string value) { + constexpr std::size_t max_value_size = 120; + if (value.size() <= max_value_size) { + return value; + } + value.resize(max_value_size); + return value + "..."; +} + +void log_regex_alert(std::string const &domain_name, + regex_check_t const &check, + std::string const &matched_value) { + spdlog::warn("[ALERT][{}][{}] {} (matched: {})", domain_name, check.field, + check.alert, truncate_regex_check_value(matched_value)); +} + +std::optional> +load_regex_checks(std::string const &filename) { + if (filename.empty()) { + return std::vector{}; + } + + std::ifstream input_file(filename); + if (!input_file) { + spdlog::error("unable to open regex checks file `{}`", filename); + return std::nullopt; + } + + try { + auto const document = json::parse(input_file); + auto const &checks_json = + document.is_object() && document.contains("checks") + ? document.at("checks") + : document; + if (!checks_json.is_array()) { + spdlog::error("regex checks file must contain a JSON array or a checks array"); + return std::nullopt; + } + + std::vector checks{}; + for (auto const &check_json : checks_json) { + if (!check_json.is_object()) { + spdlog::error("each regex check must be a JSON object"); + return std::nullopt; + } + + auto field = normalize_regex_check_field( + check_json.value("field", "")); + auto const pattern = check_json.value("regex", ""); + auto const alert = check_json.value("alert", ""); + auto const ignore_case = check_json.value("ignore_case", false); + + if (field.empty() || pattern.empty() || alert.empty()) { + spdlog::error("each regex check requires field, regex, and alert"); + return std::nullopt; + } + if (!is_supported_regex_check_field(field)) { + spdlog::error("unsupported regex check field `{}`", field); + return std::nullopt; + } + + auto flags = std::regex_constants::ECMAScript; + if (ignore_case) { + flags |= std::regex_constants::icase; + } + checks.push_back({std::move(field), pattern, alert, + std::regex(pattern, flags)}); + } + return checks; + } catch (std::regex_error const &e) { + spdlog::error("invalid regex in checks file: {}", e.what()); + } catch (std::exception const &e) { + spdlog::error("unable to parse regex checks file: {}", e.what()); + } + return std::nullopt; +} + +void run_regex_checks(map_container_t const &result_map, + std::vector const &checks) { + if (checks.empty()) { + return; + } + + for (auto const &result_pair : result_map.cresult()) { + auto const &domain_name = result_pair.first; + auto const &domain_result = result_pair.second; + for (auto const &check : checks) { + if (auto value = domain_regex_check_value( + check.field, domain_name, domain_result.http_result_); + value && std::regex_search(*value, check.expression)) { + log_regex_alert(domain_name, check, *value); + } + } + + for (auto const &record : domain_result.dns_result_list_) { + for (auto const &check : checks) { + if (auto value = record_regex_check_value(check.field, record); + value && std::regex_search(*value, check.expression)) { + log_regex_alert(domain_name, check, *value); + } + } + } + } +} + void compare_http_result(int const base_cl, json_data_t const &prev_http_result, http_response_t const ¤t_result) { auto const current_req_cl = current_result.content_length_; @@ -353,6 +520,7 @@ void start_name_checking(runtime_args_t &&rt_args) { if (!silent) { spdlog::info("Writing JSON output"); } + run_regex_checks(result_map, rt_args.regex_checks); write_json_result(result_map, rt_args); // compare old with new result -- only if we had previous record @@ -404,6 +572,11 @@ void run_program(cli_args_t const &cli_args) { if (!read_input_file(cli_args, rt_args)) { return; } + if (auto checks = load_regex_checks(cli_args.regex_checks_filename)) { + rt_args.regex_checks = std::move(*checks); + } else { + return; + } // try opening an output file { std::string filename{}; diff --git a/dooked/source/dns/dns_resolver.cpp b/dooked/source/dns/dns_resolver.cpp index 851745f..5f5ec73 100644 --- a/dooked/source/dns/dns_resolver.cpp +++ b/dooked/source/dns/dns_resolver.cpp @@ -417,11 +417,11 @@ void custom_resolver_socket_t::http_result_obtained( switch (rt) { case response_type_e::bad_request: { - result_map_.insert(name_, content_length, 400); + result_map_.insert(name_, content_length, 400, response_string); return dns_continue_probe(); } case response_type_e::forbidden: { - result_map_.insert(name_, content_length, 403); + result_map_.insert(name_, content_length, 403, response_string); return dns_continue_probe(); } case response_type_e::cannot_resolve_name: { @@ -447,11 +447,11 @@ void custom_resolver_socket_t::http_result_obtained( return send_https_request(response_string); } case response_type_e::not_found: { // HTTP(S) 404 - result_map_.insert(name_, content_length, 404); + result_map_.insert(name_, content_length, 404, response_string); return dns_continue_probe(); } case response_type_e::ok: { - result_map_.insert(name_, content_length, 200); + result_map_.insert(name_, content_length, 200, response_string); return dns_continue_probe(); } case response_type_e::recv_timed_out: { // retry, wait timeout @@ -477,7 +477,7 @@ void custom_resolver_socket_t::http_result_obtained( return send_https_request(response_string); } case response_type_e::server_error: { - result_map_.insert(name_, content_length, 503); + result_map_.insert(name_, content_length, 503, response_string); return dns_continue_probe(); } default: { diff --git a/dooked/source/http/requests_handler.cpp b/dooked/source/http/requests_handler.cpp index d21a592..52d1aa8 100644 --- a/dooked/source/http/requests_handler.cpp +++ b/dooked/source/http/requests_handler.cpp @@ -10,6 +10,14 @@ extern bool silent; namespace dooked { +std::string response_body_for_checks(std::string const &body) { + constexpr std::size_t max_body_size = 64 * 1024; + if (body.size() > max_body_size) { + return body.substr(0, max_body_size); + } + return body; +} + http_request_handler_t::http_request_handler_t(net::io_context &io_context, std::string domain_name) : io_{io_context}, domain_{std::move(domain_name)} {} @@ -167,6 +175,9 @@ void http_request_handler_t::on_data_received( #endif // _DEBUG response_int = response_type_e::unknown_response; } + if (status_code_simple != 3) { + response_string = response_body_for_checks(response_->body()); + } int content_length{}; if (response_->has_content_length()) { @@ -388,6 +399,9 @@ void https_request_handler_t::on_data_received( } else { response_int = response_type_e::unknown_response; } + if (status_code_simple != 3) { + response_string = response_body_for_checks(response_->body()); + } int content_length = 0; if (response_->has_content_length()) { diff --git a/dooked/source/http/resolver.cpp b/dooked/source/http/resolver.cpp index 95332a4..35ae835 100644 --- a/dooked/source/http/resolver.cpp +++ b/dooked/source/http/resolver.cpp @@ -65,11 +65,11 @@ void http_resolver_t::tcp_request_result(response_type_e const rt, std::string const &response_string) { switch (rt) { case response_type_e::bad_request: { - result_map_.insert(name_, content_length, 400); + result_map_.insert(name_, content_length, 400, response_string); return send_next_request(); } case response_type_e::forbidden: { - result_map_.insert(name_, content_length, 403); + result_map_.insert(name_, content_length, 403, response_string); return send_next_request(); } case response_type_e::cannot_resolve_name: { @@ -97,11 +97,11 @@ void http_resolver_t::tcp_request_result(response_type_e const rt, return send_https_request(response_string); } case response_type_e::not_found: { // HTTP(S) 404 - result_map_.insert(name_, content_length, 404); + result_map_.insert(name_, content_length, 404, response_string); return send_next_request(); } case response_type_e::ok: { - result_map_.insert(name_, content_length, 200); + result_map_.insert(name_, content_length, 200, response_string); return send_next_request(); } case response_type_e::recv_timed_out: { // retry, wait timeout @@ -122,7 +122,7 @@ void http_resolver_t::tcp_request_result(response_type_e const rt, return switch_ssl_method(response_string); } case response_type_e::server_error: { - result_map_.insert(name_, content_length, 503); + result_map_.insert(name_, content_length, 503, response_string); return send_next_request(); } default: { diff --git a/dooked/source/main.cpp b/dooked/source/main.cpp index cf29460..8232560 100644 --- a/dooked/source/main.cpp +++ b/dooked/source/main.cpp @@ -31,6 +31,8 @@ int main(int argc, char **argv) { app.add_option("-t,--threads", cli_args.thread_count, "total threads to use(default: " + std::to_string(DOOKED_SUPPORTED_THREADS) + ")"); + app.add_option("--checks", cli_args.regex_checks_filename, + "JSON file with runtime regex checks to run against results"); app.add_option( "-c,--content-length", cli_args.content_length, "show content lengths that changed more than --content-length");