From 9fdae46f9d7a56d28497d68ebf224fa0b5560846 Mon Sep 17 00:00:00 2001 From: Orange Studio Date: Fri, 15 May 2026 17:45:59 +0800 Subject: [PATCH] Add runtime regex check config --- README.md | 29 +++ dooked/CMakeLists.txt | 2 + dooked/include/checks/regex_checks.hpp | 28 +++ dooked/include/cli_preprocessor.hpp | 3 + dooked/include/utils/containers.hpp | 12 +- dooked/source/checks/regex_checks.cpp | 251 ++++++++++++++++++++++++ dooked/source/cli_preprocessor.cpp | 13 ++ dooked/source/dns/dns_resolver.cpp | 10 +- dooked/source/http/requests_handler.cpp | 19 ++ dooked/source/http/resolver.cpp | 10 +- dooked/source/main.cpp | 2 + 11 files changed, 365 insertions(+), 14 deletions(-) create mode 100644 dooked/include/checks/regex_checks.hpp create mode 100644 dooked/source/checks/regex_checks.cpp diff --git a/README.md b/README.md index f1a761c..246b36b 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,32 @@ make ## Usage For comprehensive help, use `dooked --help` + +### Runtime regex checks + +Use `--checks` or `--check-config` to load custom notification checks from a +JSON file. A config may be a JSON array or an object with a `checks` array: + +```json +{ + "checks": [ + { + "field": "domain", + "regex": "(dev|test)", + "alert": "domain contains an environment marker", + "ignore_case": true + }, + { + "field": "body", + "regex": "Copyright 2020", + "alert": "outdated copyright banner" + } + ] +} +``` + +Supported fields are `domain`, DNS fields (`type`, `info`/`rdata`, `ttl`), +HTTP fields (`http_code`, `code_string`, `content_length`), and response body +aliases (`body`, `response_body`, `page_content`, `content`). Response bodies +are kept only in memory for matching, capped at 64 KiB per request, and are not +written to the JSON output file. diff --git a/dooked/CMakeLists.txt b/dooked/CMakeLists.txt index c43ff38..f3a451a 100644 --- a/dooked/CMakeLists.txt +++ b/dooked/CMakeLists.txt @@ -67,6 +67,7 @@ set(SRC_FILES ./source/dns/dns_resolver.cpp ./source/http/resolver.cpp ./source/http/requests_handler.cpp + ./source/checks/regex_checks.cpp ./source/utils/constants.cpp ./source/utils/io_utils.cpp ./source/utils/string_utils.cpp @@ -84,6 +85,7 @@ set(HEADERS_FILES ./include/dns/dns_resolver.hpp ./include/http/resolver.hpp ./include/http/requests_handler.hpp + ./include/checks/regex_checks.hpp ./include/utils/constants.hpp ./include/utils/containers.hpp ./include/utils/dns_utils.hpp diff --git a/dooked/include/checks/regex_checks.hpp b/dooked/include/checks/regex_checks.hpp new file mode 100644 index 0000000..8c65a95 --- /dev/null +++ b/dooked/include/checks/regex_checks.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include "utils/containers.hpp" +#include "utils/probe_result.hpp" +#include +#include +#include +#include + +namespace dooked { + +struct regex_check_t { + std::string field{}; + std::string pattern{}; + std::string alert{}; + std::regex compiled_pattern{}; + bool body_field{false}; +}; + +using regex_check_list_t = std::vector; + +std::optional +load_regex_checks(std::string const &filename, std::string &error_message); + +void run_regex_checks(map_container_t const &result_map, + regex_check_list_t const &checks); + +} // namespace dooked diff --git a/dooked/include/cli_preprocessor.hpp b/dooked/include/cli_preprocessor.hpp index 43fa1ba..2e4c835 100644 --- a/dooked/include/cli_preprocessor.hpp +++ b/dooked/include/cli_preprocessor.hpp @@ -1,5 +1,6 @@ #pragma once +#include "checks/regex_checks.hpp" #include "dns/dns_resolver.hpp" #include "utils/io_utils.hpp" #include @@ -19,6 +20,7 @@ struct cli_args_t { std::string resolver_filename{}; std::string output_filename{}; std::string input_filename{}; + std::string check_config_filename{}; int file_type{}; int post_http_request{}; @@ -33,6 +35,7 @@ struct runtime_args_t { std::optional> previous_data{}; std::unique_ptr output_file{}; std::string output_filename{}; + std::optional regex_checks{}; http_process_e http_request_time_{}; int thread_count{}; int content_length{-1}; diff --git a/dooked/include/utils/containers.hpp b/dooked/include/utils/containers.hpp index 7c955f6..1f92eb6 100644 --- a/dooked/include/utils/containers.hpp +++ b/dooked/include/utils/containers.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace dooked { @@ -31,6 +32,7 @@ template class circular_queue_t { struct http_response_t { int content_length_{}; int http_status_{}; + std::string body_{}; }; template struct http_dns_response_t { @@ -52,9 +54,10 @@ template class map_container_t { } void insert_impl(std::string const &name, int const len, - int const http_status) { + int const http_status, std::string const &body) { map_[name].http_result_.content_length_ = len; map_[name].http_result_.http_status_ = http_status; + map_[name].http_result_.body_ = body; } public: @@ -74,12 +77,13 @@ template class map_container_t { append_impl(key, value); } - void insert(std::string const &name, int const len, int const http_status) { + void insert(std::string const &name, int const len, int const http_status, + std::string const &body = {}) { if (!opt_mutex_) { - return insert_impl(name, len, http_status); + return insert_impl(name, len, http_status, body); } std::lock_guard lock_g{*opt_mutex_}; - insert_impl(name, len, http_status); + insert_impl(name, len, http_status, body); } // only used by main thread, after all "computations" has been // done. There's no need for locks here. diff --git a/dooked/source/checks/regex_checks.cpp b/dooked/source/checks/regex_checks.cpp new file mode 100644 index 0000000..a2bab32 --- /dev/null +++ b/dooked/source/checks/regex_checks.cpp @@ -0,0 +1,251 @@ +#include "checks/regex_checks.hpp" +#include "utils/constants.hpp" +#include +#include +#include +#include +#include +#include + +namespace dooked { +namespace { + +using json = nlohmann::json; + +std::string lowercase(std::string value) { + std::transform(value.begin(), value.end(), value.begin(), [](char ch) { + return static_cast(std::tolower(static_cast(ch))); + }); + return value; +} + +std::optional canonical_field(std::string field) { + field = lowercase(std::move(field)); + std::replace(field.begin(), field.end(), '-', '_'); + + if (field == "domain" || field == "domain_name" || field == "name") { + return "domain"; + } + if (field == "type" || field == "record_type") { + return "type"; + } + if (field == "info" || field == "rdata" || field == "data") { + return "rdata"; + } + if (field == "ttl") { + return "ttl"; + } + if (field == "http_code" || field == "status" || field == "status_code") { + return "http_code"; + } + if (field == "code_string" || field == "status_text") { + return "code_string"; + } + if (field == "content_length") { + return "content_length"; + } + if (field == "body" || field == "content" || field == "page_content" || + field == "response_body" || field == "http_body") { + return "body"; + } + return std::nullopt; +} + +bool is_body_field(std::string const &field) { return field == "body"; } + +std::optional json_string_value(json const &object, + char const *key) { + auto const iter = object.find(key); + if (iter == object.end() || !iter->is_string()) { + return std::nullopt; + } + return iter->get(); +} + +std::string preview_value(std::string value) { + constexpr std::size_t max_preview_size = 120; + std::replace(value.begin(), value.end(), '\n', ' '); + std::replace(value.begin(), value.end(), '\r', ' '); + if (value.size() > max_preview_size) { + value.resize(max_preview_size); + value += "..."; + } + return value; +} + +void report_match(regex_check_t const &check, std::string const &domain, + std::string const &value) { + if (value.empty()) { + return; + } + + std::smatch match; + if (!std::regex_search(value, match, check.compiled_pattern)) { + return; + } + + auto matched = match.empty() ? value : match.str(0); + spdlog::warn("[REGEX][{}][{}] {} (matched: `{}`)", check.field, domain, + check.alert, preview_value(std::move(matched))); +} + +std::string http_field_value(http_response_t const &response, + std::string const &field) { + if (field == "http_code") { + return std::to_string(response.http_status_); + } + if (field == "code_string") { + return code_string(response.http_status_); + } + if (field == "content_length") { + return std::to_string(response.content_length_); + } + if (field == "body") { + return response.body_; + } + return {}; +} + +std::string dns_field_value(probe_result_t const &record, + std::string const &field) { + if (field == "type") { + return dns_record_type_to_str(record.type); + } + if (field == "rdata") { + return record.rdata; + } + if (field == "ttl") { + return std::to_string(record.ttl); + } + return {}; +} + +} // namespace + +std::optional +load_regex_checks(std::string const &filename, std::string &error_message) { + std::ifstream input_file(filename); + if (!input_file) { + error_message = "unable to open check config: " + filename; + return std::nullopt; + } + + json parsed; + try { + input_file >> parsed; + } catch (std::exception const &e) { + error_message = "invalid JSON check config: " + std::string(e.what()); + return std::nullopt; + } + + json checks_json; + if (parsed.is_array()) { + checks_json = parsed; + } else if (parsed.is_object() && parsed.contains("checks") && + parsed["checks"].is_array()) { + checks_json = parsed["checks"]; + } else { + error_message = "check config must be an array or an object with a checks " + "array"; + return std::nullopt; + } + + regex_check_list_t checks; + std::size_t index = 0; + for (auto const &check_json : checks_json) { + ++index; + if (!check_json.is_object()) { + error_message = "check #" + std::to_string(index) + " must be an object"; + return std::nullopt; + } + + auto raw_field = json_string_value(check_json, "field"); + auto raw_pattern = json_string_value(check_json, "regex"); + if (!raw_pattern) { + raw_pattern = json_string_value(check_json, "pattern"); + } + auto raw_alert = json_string_value(check_json, "alert"); + if (!raw_alert) { + raw_alert = json_string_value(check_json, "message"); + } + + if (!raw_field || raw_field->empty()) { + error_message = "check #" + std::to_string(index) + + " is missing a field value"; + return std::nullopt; + } + if (!raw_pattern || raw_pattern->empty()) { + error_message = "check #" + std::to_string(index) + + " is missing a regex value"; + return std::nullopt; + } + if (!raw_alert || raw_alert->empty()) { + error_message = "check #" + std::to_string(index) + + " is missing an alert value"; + return std::nullopt; + } + + auto field = canonical_field(*raw_field); + if (!field) { + error_message = "check #" + std::to_string(index) + + " uses an unsupported field: " + *raw_field; + return std::nullopt; + } + + bool ignore_case = check_json.value("ignore_case", false); + if (check_json.contains("case_sensitive") && + check_json["case_sensitive"].is_boolean()) { + ignore_case = !check_json["case_sensitive"].get(); + } + + auto flags = std::regex_constants::ECMAScript; + if (ignore_case) { + flags |= std::regex_constants::icase; + } + + try { + checks.push_back({*field, *raw_pattern, *raw_alert, + std::regex(*raw_pattern, flags), + is_body_field(*field)}); + } catch (std::regex_error const &e) { + error_message = "check #" + std::to_string(index) + + " has an invalid regex: " + e.what(); + return std::nullopt; + } + } + + if (checks.empty()) { + error_message = "check config does not contain any checks"; + return std::nullopt; + } + return checks; +} + +void run_regex_checks(map_container_t const &result_map, + regex_check_list_t const &checks) { + for (auto const &result_pair : result_map.cresult()) { + auto const &domain = result_pair.first; + auto const &response = result_pair.second; + + for (auto const &check : checks) { + if (check.field == "domain") { + report_match(check, domain, domain); + } else if (check.field == "http_code" || check.field == "code_string" || + check.field == "content_length" || check.field == "body") { + report_match(check, domain, + http_field_value(response.http_result_, check.field)); + } + } + + for (auto const &record : response.dns_result_list_) { + for (auto const &check : checks) { + if (check.field == "type" || check.field == "rdata" || + check.field == "ttl") { + report_match(check, domain, dns_field_value(record, check.field)); + } + } + } + } +} + +} // namespace dooked diff --git a/dooked/source/cli_preprocessor.cpp b/dooked/source/cli_preprocessor.cpp index c08d7fb..7043dd7 100644 --- a/dooked/source/cli_preprocessor.cpp +++ b/dooked/source/cli_preprocessor.cpp @@ -354,6 +354,9 @@ void start_name_checking(runtime_args_t &&rt_args) { spdlog::info("Writing JSON output"); } write_json_result(result_map, rt_args); + if (rt_args.regex_checks) { + run_regex_checks(result_map, *rt_args.regex_checks); + } // compare old with new result -- only if we had previous record if (rt_args.previous_data) { @@ -380,6 +383,16 @@ void start_name_checking(runtime_args_t &&rt_args) { void run_program(cli_args_t const &cli_args) { runtime_args_t rt_args{}; + + if (!cli_args.check_config_filename.empty()) { + std::string check_error; + auto checks = load_regex_checks(cli_args.check_config_filename, check_error); + if (!checks) { + return spdlog::error(check_error); + } + rt_args.regex_checks.emplace(std::move(*checks)); + } + // settle resolvers. std::vector resolver_strings{}; if (cli_args.resolver_filename.empty()) { diff --git a/dooked/source/dns/dns_resolver.cpp b/dooked/source/dns/dns_resolver.cpp index 851745f..5f5ec73 100644 --- a/dooked/source/dns/dns_resolver.cpp +++ b/dooked/source/dns/dns_resolver.cpp @@ -417,11 +417,11 @@ void custom_resolver_socket_t::http_result_obtained( switch (rt) { case response_type_e::bad_request: { - result_map_.insert(name_, content_length, 400); + result_map_.insert(name_, content_length, 400, response_string); return dns_continue_probe(); } case response_type_e::forbidden: { - result_map_.insert(name_, content_length, 403); + result_map_.insert(name_, content_length, 403, response_string); return dns_continue_probe(); } case response_type_e::cannot_resolve_name: { @@ -447,11 +447,11 @@ void custom_resolver_socket_t::http_result_obtained( return send_https_request(response_string); } case response_type_e::not_found: { // HTTP(S) 404 - result_map_.insert(name_, content_length, 404); + result_map_.insert(name_, content_length, 404, response_string); return dns_continue_probe(); } case response_type_e::ok: { - result_map_.insert(name_, content_length, 200); + result_map_.insert(name_, content_length, 200, response_string); return dns_continue_probe(); } case response_type_e::recv_timed_out: { // retry, wait timeout @@ -477,7 +477,7 @@ void custom_resolver_socket_t::http_result_obtained( return send_https_request(response_string); } case response_type_e::server_error: { - result_map_.insert(name_, content_length, 503); + result_map_.insert(name_, content_length, 503, response_string); return dns_continue_probe(); } default: { diff --git a/dooked/source/http/requests_handler.cpp b/dooked/source/http/requests_handler.cpp index d21a592..a505bbc 100644 --- a/dooked/source/http/requests_handler.cpp +++ b/dooked/source/http/requests_handler.cpp @@ -9,6 +9,17 @@ extern bool no_bytes_count; extern bool silent; namespace dooked { +namespace { + +std::string capped_response_body(std::string const &body) { + constexpr std::size_t max_body_bytes = 64 * 1024; + if (body.size() <= max_body_bytes) { + return body; + } + return body.substr(0, max_body_bytes); +} + +} // namespace http_request_handler_t::http_request_handler_t(net::io_context &io_context, std::string domain_name) @@ -138,6 +149,7 @@ void http_request_handler_t::on_data_received( if (status_code_simple == 2) { response_int = response_type_e::ok; + response_string = capped_response_body(response_->body()); } else if (status_code_simple == 3) { // redirected response_string = (*response_)[http::field::location].to_string(); if (response_string.empty()) { @@ -150,6 +162,7 @@ void http_request_handler_t::on_data_received( } } } else if (status_code_simple == 4) { + response_string = capped_response_body(response_->body()); if (http_status_code == 404) { response_int = response_type_e::not_found; } else if (http_status_code == 400) { @@ -159,6 +172,7 @@ void http_request_handler_t::on_data_received( } } else if (status_code_simple == 5) { response_int = response_type_e::server_error; + response_string = capped_response_body(response_->body()); } else { #ifdef _DEBUG if (!silent) { @@ -166,6 +180,7 @@ void http_request_handler_t::on_data_received( } #endif // _DEBUG response_int = response_type_e::unknown_response; + response_string = capped_response_body(response_->body()); } int content_length{}; @@ -364,6 +379,7 @@ void https_request_handler_t::on_data_received( if (status_code_simple == 2) { response_int = response_type_e::ok; + response_string = capped_response_body(response_->body()); } else if (status_code_simple == 3) { // redirected response_string = (*response_)[http::field::location].to_string(); if (response_string.empty()) { @@ -376,6 +392,7 @@ void https_request_handler_t::on_data_received( } } } else if (status_code_simple == 4) { + response_string = capped_response_body(response_->body()); if (status_code == 404) { response_int = response_type_e::not_found; } else if (status_code == 400) { @@ -385,8 +402,10 @@ void https_request_handler_t::on_data_received( } } else if (status_code_simple == 5) { response_int = response_type_e::server_error; + response_string = capped_response_body(response_->body()); } else { response_int = response_type_e::unknown_response; + response_string = capped_response_body(response_->body()); } int content_length = 0; diff --git a/dooked/source/http/resolver.cpp b/dooked/source/http/resolver.cpp index 95332a4..35ae835 100644 --- a/dooked/source/http/resolver.cpp +++ b/dooked/source/http/resolver.cpp @@ -65,11 +65,11 @@ void http_resolver_t::tcp_request_result(response_type_e const rt, std::string const &response_string) { switch (rt) { case response_type_e::bad_request: { - result_map_.insert(name_, content_length, 400); + result_map_.insert(name_, content_length, 400, response_string); return send_next_request(); } case response_type_e::forbidden: { - result_map_.insert(name_, content_length, 403); + result_map_.insert(name_, content_length, 403, response_string); return send_next_request(); } case response_type_e::cannot_resolve_name: { @@ -97,11 +97,11 @@ void http_resolver_t::tcp_request_result(response_type_e const rt, return send_https_request(response_string); } case response_type_e::not_found: { // HTTP(S) 404 - result_map_.insert(name_, content_length, 404); + result_map_.insert(name_, content_length, 404, response_string); return send_next_request(); } case response_type_e::ok: { - result_map_.insert(name_, content_length, 200); + result_map_.insert(name_, content_length, 200, response_string); return send_next_request(); } case response_type_e::recv_timed_out: { // retry, wait timeout @@ -122,7 +122,7 @@ void http_resolver_t::tcp_request_result(response_type_e const rt, return switch_ssl_method(response_string); } case response_type_e::server_error: { - result_map_.insert(name_, content_length, 503); + result_map_.insert(name_, content_length, 503, response_string); return send_next_request(); } default: { diff --git a/dooked/source/main.cpp b/dooked/source/main.cpp index cf29460..b5772e9 100644 --- a/dooked/source/main.cpp +++ b/dooked/source/main.cpp @@ -41,6 +41,8 @@ int main(int argc, char **argv) { "defers http request until after all DNS requests have been completed"); app.add_flag("--compare-cl", compare_cl, "compare content-length of HTTP requests"); + app.add_option("--checks,--check-config", cli_args.check_config_filename, + "load runtime regex checks from a JSON config file"); app.add_flag("--nbc", no_bytes_count, "in case `content-length` is missing in an HTTP header field,"