Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,32 @@ make
## Usage

For comprehensive help, use `dooked --help`

### Runtime regex checks

Use `--checks` or `--check-config` to load custom notification checks from a
JSON file. A config may be a JSON array or an object with a `checks` array:

```json
{
"checks": [
{
"field": "domain",
"regex": "(dev|test)",
"alert": "domain contains an environment marker",
"ignore_case": true
},
{
"field": "body",
"regex": "Copyright 2020",
"alert": "outdated copyright banner"
}
]
}
```

Supported fields are `domain`, DNS fields (`type`, `info`/`rdata`, `ttl`),
HTTP fields (`http_code`, `code_string`, `content_length`), and response body
aliases (`body`, `response_body`, `page_content`, `content`). Response bodies
are kept only in memory for matching, capped at 64 KiB per request, and are not
written to the JSON output file.
2 changes: 2 additions & 0 deletions dooked/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ set(SRC_FILES
./source/dns/dns_resolver.cpp
./source/http/resolver.cpp
./source/http/requests_handler.cpp
./source/checks/regex_checks.cpp
./source/utils/constants.cpp
./source/utils/io_utils.cpp
./source/utils/string_utils.cpp
Expand All @@ -84,6 +85,7 @@ set(HEADERS_FILES
./include/dns/dns_resolver.hpp
./include/http/resolver.hpp
./include/http/requests_handler.hpp
./include/checks/regex_checks.hpp
./include/utils/constants.hpp
./include/utils/containers.hpp
./include/utils/dns_utils.hpp
Expand Down
28 changes: 28 additions & 0 deletions dooked/include/checks/regex_checks.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include "utils/containers.hpp"
#include "utils/probe_result.hpp"
#include <optional>
#include <regex>
#include <string>
#include <vector>

namespace dooked {

struct regex_check_t {
std::string field{};
std::string pattern{};
std::string alert{};
std::regex compiled_pattern{};
bool body_field{false};
};

using regex_check_list_t = std::vector<regex_check_t>;

std::optional<regex_check_list_t>
load_regex_checks(std::string const &filename, std::string &error_message);

void run_regex_checks(map_container_t<probe_result_t> const &result_map,
regex_check_list_t const &checks);

} // namespace dooked
3 changes: 3 additions & 0 deletions dooked/include/cli_preprocessor.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include "checks/regex_checks.hpp"
#include "dns/dns_resolver.hpp"
#include "utils/io_utils.hpp"
#include <thread>
Expand All @@ -19,6 +20,7 @@ struct cli_args_t {
std::string resolver_filename{};
std::string output_filename{};
std::string input_filename{};
std::string check_config_filename{};

int file_type{};
int post_http_request{};
Expand All @@ -33,6 +35,7 @@ struct runtime_args_t {
std::optional<std::vector<json_data_t>> previous_data{};
std::unique_ptr<std::ofstream> output_file{};
std::string output_filename{};
std::optional<regex_check_list_t> regex_checks{};
http_process_e http_request_time_{};
int thread_count{};
int content_length{-1};
Expand Down
12 changes: 8 additions & 4 deletions dooked/include/utils/containers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <mutex>
#include <optional>
#include <queue>
#include <string>
#include <vector>

namespace dooked {
Expand All @@ -31,6 +32,7 @@ template <typename T> class circular_queue_t {
struct http_response_t {
int content_length_{};
int http_status_{};
std::string body_{};
};

template <typename ValueType> struct http_dns_response_t {
Expand All @@ -52,9 +54,10 @@ template <typename ValueType> class map_container_t {
}

void insert_impl(std::string const &name, int const len,
int const http_status) {
int const http_status, std::string const &body) {
map_[name].http_result_.content_length_ = len;
map_[name].http_result_.http_status_ = http_status;
map_[name].http_result_.body_ = body;
}

public:
Expand All @@ -74,12 +77,13 @@ template <typename ValueType> class map_container_t {
append_impl(key, value);
}

void insert(std::string const &name, int const len, int const http_status) {
void insert(std::string const &name, int const len, int const http_status,
std::string const &body = {}) {
if (!opt_mutex_) {
return insert_impl(name, len, http_status);
return insert_impl(name, len, http_status, body);
}
std::lock_guard<std::mutex> lock_g{*opt_mutex_};
insert_impl(name, len, http_status);
insert_impl(name, len, http_status, body);
}
// only used by main thread, after all "computations" has been
// done. There's no need for locks here.
Expand Down
251 changes: 251 additions & 0 deletions dooked/source/checks/regex_checks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
#include "checks/regex_checks.hpp"
#include "utils/constants.hpp"
#include <algorithm>
#include <cctype>
#include <fstream>
#include <nlohmann/json.hpp>
#include <spdlog/spdlog.h>
#include <sstream>

namespace dooked {
namespace {

using json = nlohmann::json;

std::string lowercase(std::string value) {
std::transform(value.begin(), value.end(), value.begin(), [](char ch) {
return static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
});
return value;
}

std::optional<std::string> canonical_field(std::string field) {
field = lowercase(std::move(field));
std::replace(field.begin(), field.end(), '-', '_');

if (field == "domain" || field == "domain_name" || field == "name") {
return "domain";
}
if (field == "type" || field == "record_type") {
return "type";
}
if (field == "info" || field == "rdata" || field == "data") {
return "rdata";
}
if (field == "ttl") {
return "ttl";
}
if (field == "http_code" || field == "status" || field == "status_code") {
return "http_code";
}
if (field == "code_string" || field == "status_text") {
return "code_string";
}
if (field == "content_length") {
return "content_length";
}
if (field == "body" || field == "content" || field == "page_content" ||
field == "response_body" || field == "http_body") {
return "body";
}
return std::nullopt;
}

bool is_body_field(std::string const &field) { return field == "body"; }

std::optional<std::string> json_string_value(json const &object,
char const *key) {
auto const iter = object.find(key);
if (iter == object.end() || !iter->is_string()) {
return std::nullopt;
}
return iter->get<std::string>();
}

std::string preview_value(std::string value) {
constexpr std::size_t max_preview_size = 120;
std::replace(value.begin(), value.end(), '\n', ' ');
std::replace(value.begin(), value.end(), '\r', ' ');
if (value.size() > max_preview_size) {
value.resize(max_preview_size);
value += "...";
}
return value;
}

void report_match(regex_check_t const &check, std::string const &domain,
std::string const &value) {
if (value.empty()) {
return;
}

std::smatch match;
if (!std::regex_search(value, match, check.compiled_pattern)) {
return;
}

auto matched = match.empty() ? value : match.str(0);
spdlog::warn("[REGEX][{}][{}] {} (matched: `{}`)", check.field, domain,
check.alert, preview_value(std::move(matched)));
}

std::string http_field_value(http_response_t const &response,
std::string const &field) {
if (field == "http_code") {
return std::to_string(response.http_status_);
}
if (field == "code_string") {
return code_string(response.http_status_);
}
if (field == "content_length") {
return std::to_string(response.content_length_);
}
if (field == "body") {
return response.body_;
}
return {};
}

std::string dns_field_value(probe_result_t const &record,
std::string const &field) {
if (field == "type") {
return dns_record_type_to_str(record.type);
}
if (field == "rdata") {
return record.rdata;
}
if (field == "ttl") {
return std::to_string(record.ttl);
}
return {};
}

} // namespace

std::optional<regex_check_list_t>
load_regex_checks(std::string const &filename, std::string &error_message) {
std::ifstream input_file(filename);
if (!input_file) {
error_message = "unable to open check config: " + filename;
return std::nullopt;
}

json parsed;
try {
input_file >> parsed;
} catch (std::exception const &e) {
error_message = "invalid JSON check config: " + std::string(e.what());
return std::nullopt;
}

json checks_json;
if (parsed.is_array()) {
checks_json = parsed;
} else if (parsed.is_object() && parsed.contains("checks") &&
parsed["checks"].is_array()) {
checks_json = parsed["checks"];
} else {
error_message = "check config must be an array or an object with a checks "
"array";
return std::nullopt;
}

regex_check_list_t checks;
std::size_t index = 0;
for (auto const &check_json : checks_json) {
++index;
if (!check_json.is_object()) {
error_message = "check #" + std::to_string(index) + " must be an object";
return std::nullopt;
}

auto raw_field = json_string_value(check_json, "field");
auto raw_pattern = json_string_value(check_json, "regex");
if (!raw_pattern) {
raw_pattern = json_string_value(check_json, "pattern");
}
auto raw_alert = json_string_value(check_json, "alert");
if (!raw_alert) {
raw_alert = json_string_value(check_json, "message");
}

if (!raw_field || raw_field->empty()) {
error_message = "check #" + std::to_string(index) +
" is missing a field value";
return std::nullopt;
}
if (!raw_pattern || raw_pattern->empty()) {
error_message = "check #" + std::to_string(index) +
" is missing a regex value";
return std::nullopt;
}
if (!raw_alert || raw_alert->empty()) {
error_message = "check #" + std::to_string(index) +
" is missing an alert value";
return std::nullopt;
}

auto field = canonical_field(*raw_field);
if (!field) {
error_message = "check #" + std::to_string(index) +
" uses an unsupported field: " + *raw_field;
return std::nullopt;
}

bool ignore_case = check_json.value("ignore_case", false);
if (check_json.contains("case_sensitive") &&
check_json["case_sensitive"].is_boolean()) {
ignore_case = !check_json["case_sensitive"].get<bool>();
}

auto flags = std::regex_constants::ECMAScript;
if (ignore_case) {
flags |= std::regex_constants::icase;
}

try {
checks.push_back({*field, *raw_pattern, *raw_alert,
std::regex(*raw_pattern, flags),
is_body_field(*field)});
} catch (std::regex_error const &e) {
error_message = "check #" + std::to_string(index) +
" has an invalid regex: " + e.what();
return std::nullopt;
}
}

if (checks.empty()) {
error_message = "check config does not contain any checks";
return std::nullopt;
}
return checks;
}

void run_regex_checks(map_container_t<probe_result_t> const &result_map,
regex_check_list_t const &checks) {
for (auto const &result_pair : result_map.cresult()) {
auto const &domain = result_pair.first;
auto const &response = result_pair.second;

for (auto const &check : checks) {
if (check.field == "domain") {
report_match(check, domain, domain);
} else if (check.field == "http_code" || check.field == "code_string" ||
check.field == "content_length" || check.field == "body") {
report_match(check, domain,
http_field_value(response.http_result_, check.field));
}
}

for (auto const &record : response.dns_result_list_) {
for (auto const &check : checks) {
if (check.field == "type" || check.field == "rdata" ||
check.field == "ttl") {
report_match(check, domain, dns_field_value(record, check.field));
}
}
}
}
}

} // namespace dooked
Loading