Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,66 @@ make
## Usage

For comprehensive help, use `dooked --help`

### History tracking

When a previous JSON output file is passed back as input, dooked now carries
record history forward into the next JSON output:

- `first-seen`: when this DNS record first appeared in dooked output
- `last-seen`: when this DNS record was last observed
- `seen`: how many runs have observed the same domain/type/value tuple

The history metadata makes load-balanced records less noisy because an IP can
rotate out of a single run without losing when it was first and last observed.
Records from previous JSON input are preserved in the next output when they are
not observed in the current run, with their previous `last_seen` value intact.

Additional reporting flags:

```sh
dooked -i previous.json --fs
dooked -i previous.json --ls 2
dooked -i previous.json --lsd 05/16/2026
```

- `--fs` reports DNS records that are first seen in the current run.
- `--ls <days>` reports records from the previous JSON that are missing from
the current run and were last seen at least `<days>` days ago.
- `--lsd <MM/DD/YYYY>` reports missing records last seen on or before the
supplied US-formatted date.

### Runtime regex checks

Use `--checks <file.json>` to load custom regex alerts at runtime. The config
can be either an array or an object with a `checks` array:

```json
{
"checks": [
{
"field": "domain",
"regex": "(dev|test)",
"alert": "development-looking domain",
"ignore_case": true
},
{
"field": "page_content",
"regex": "Copyright 2024",
"alert": "outdated copyright banner"
},
{
"field": "content_length",
"regex": "^[1-9][0-9]{5,}$",
"alert": "large response body"
}
]
}
```

Supported fields are `domain`, `domain_name`, `type`, `rdata`, `info`, `ttl`,
`http_code`, `code_string`, `content_length`, `body`, `page_content`,
`response_body`, `first-seen`, `first_seen`, `last-seen`, `last_seen`, and
`seen`. Use `"field": "*"` to run a regex against every supported field.
Response bodies are capped in memory for runtime checks and are not written
into JSON output.
17 changes: 17 additions & 0 deletions dooked/include/cli_preprocessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "dns/dns_resolver.hpp"
#include "utils/io_utils.hpp"
#include <regex>
#include <thread>

// maximum sockets to open regardless of the number of threads
Expand All @@ -19,12 +20,23 @@ struct cli_args_t {
std::string resolver_filename{};
std::string output_filename{};
std::string input_filename{};
std::string last_seen_date{};
std::string check_config_filename{};

int file_type{};
int post_http_request{};
int thread_count{};
int content_length{-1};
int last_seen_days{-1};
bool include_date{false};
bool show_first_seen{false};
};

struct regex_check_t {
std::string field{};
std::regex pattern{};
std::string pattern_text{};
std::string alert{};
};

struct runtime_args_t {
Expand All @@ -33,9 +45,14 @@ struct runtime_args_t {
std::optional<std::vector<json_data_t>> previous_data{};
std::unique_ptr<std::ofstream> output_file{};
std::string output_filename{};
std::string run_timestamp{};
std::string last_seen_date{};
std::vector<regex_check_t> regex_checks{};
http_process_e http_request_time_{};
int thread_count{};
int content_length{-1};
int last_seen_days{-1};
bool show_first_seen{false};
};

void run_program(cli_args_t const &cli_args);
Expand Down
12 changes: 8 additions & 4 deletions dooked/include/utils/containers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <mutex>
#include <optional>
#include <queue>
#include <string>
#include <vector>

namespace dooked {
Expand All @@ -31,6 +32,7 @@ template <typename T> class circular_queue_t {
struct http_response_t {
int content_length_{};
int http_status_{};
std::string response_body_{};
};

template <typename ValueType> struct http_dns_response_t {
Expand All @@ -52,9 +54,10 @@ template <typename ValueType> class map_container_t {
}

void insert_impl(std::string const &name, int const len,
int const http_status) {
int const http_status, std::string const &body) {
map_[name].http_result_.content_length_ = len;
map_[name].http_result_.http_status_ = http_status;
map_[name].http_result_.response_body_ = body;
}

public:
Expand All @@ -74,12 +77,13 @@ template <typename ValueType> class map_container_t {
append_impl(key, value);
}

void insert(std::string const &name, int const len, int const http_status) {
void insert(std::string const &name, int const len, int const http_status,
std::string const &body = {}) {
if (!opt_mutex_) {
return insert_impl(name, len, http_status);
return insert_impl(name, len, http_status, body);
}
std::lock_guard<std::mutex> lock_g{*opt_mutex_};
insert_impl(name, len, http_status);
insert_impl(name, len, http_status, body);
}
// only used by main thread, after all "computations" has been
// done. There's no need for locks here.
Expand Down
1 change: 1 addition & 0 deletions dooked/include/utils/exceptions.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <stdexcept>
#include <string>

namespace dooked {

Expand Down
77 changes: 76 additions & 1 deletion dooked/include/utils/io_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <map>
#include <nlohmann/json.hpp>
#include <optional>
#include <set>
#include <sstream>

namespace dooked {
Expand All @@ -26,14 +28,26 @@ void trim(std::string &);
struct json_data_t {
std::string domain_name{};
std::string rdata{};
std::string first_seen{};
std::string last_seen{};
int ttl{};
int http_code{};
int content_length{};
int seen{1};
dns_record_type_e type{};

static json_data_t serialize(std::string const &d, int const len,
int const http_code,
json::object_t &json_object) {
auto const get_string = [&](char const *primary, char const *fallback) {
if (auto iter = json_object.find(primary); iter != json_object.end()) {
return iter->second.get<json::string_t>();
}
if (auto iter = json_object.find(fallback); iter != json_object.end()) {
return iter->second.get<json::string_t>();
}
return std::string{};
};
json_data_t data{};
data.domain_name = d;
data.type =
Expand All @@ -42,6 +56,11 @@ struct json_data_t {
data.ttl = json_object["ttl"].get<json::number_integer_t>();
data.content_length = len;
data.http_code = http_code;
data.first_seen = get_string("first-seen", "first_seen");
data.last_seen = get_string("last-seen", "last_seen");
if (auto iter = json_object.find("seen"); iter != json_object.end()) {
data.seen = iter->second.get<json::number_integer_t>();
}
return data;
}
};
Expand All @@ -54,6 +73,25 @@ struct jd_domain_comparator_t {

namespace detail {

inline std::string record_key(std::string const &domain,
dns_record_type_e const type,
std::string const &rdata) {
return domain + "\n" + dns_record_type_to_str(type) + "\n" + rdata;
}

inline std::map<std::string, json_data_t>
previous_record_map(std::optional<std::vector<json_data_t>> const &previous) {
std::map<std::string, json_data_t> records{};
if (!previous) {
return records;
}
for (auto const &record : *previous) {
records[record_key(record.domain_name, record.type, record.rdata)] =
record;
}
return records;
}

template <typename DnsType, typename RtType>
void write_json_result_impl(map_container_t<DnsType> const &result_map,
RtType const &rt_args) {
Expand All @@ -67,10 +105,47 @@ void write_json_result_impl(map_container_t<DnsType> const &result_map,
}

json::array_t list;
auto const previous_records = previous_record_map(rt_args.previous_data);
auto const now = rt_args.run_timestamp;
for (auto const &result_pair : result_map.cresult()) {
json::object_t internal_object;
auto &http_result = result_pair.second.http_result_;
internal_object["dns_probe"] = result_pair.second.dns_result_list_;
json::array_t dns_probe{};
std::set<std::string> written_record_keys{};
for (auto const &record : result_pair.second.dns_result_list_) {
auto const key = record_key(result_pair.first, record.type, record.rdata);
written_record_keys.insert(key);
json::object_t record_json{};
record_json["ttl"] = record.ttl;
record_json["type"] = dns_record_type_to_str(record.type);
record_json["info"] = record.rdata;

auto const previous_iter = previous_records.find(key);
if (previous_iter == previous_records.end()) {
record_json["first-seen"] = now;
record_json["seen"] = 1;
} else {
auto const &previous = previous_iter->second;
record_json["first-seen"] =
previous.first_seen.empty() ? now : previous.first_seen;
record_json["seen"] = previous.seen + 1;
}
record_json["last-seen"] = now;
dns_probe.push_back(std::move(record_json));
}
for (auto const &[key, previous] : previous_records) {
if (previous.domain_name != result_pair.first ||
written_record_keys.count(key) != 0) {
continue;
}
dns_probe.push_back({{"ttl", previous.ttl},
{"type", dns_record_type_to_str(previous.type)},
{"info", previous.rdata},
{"first-seen", previous.first_seen},
{"last-seen", previous.last_seen},
{"seen", previous.seen}});
}
internal_object["dns_probe"] = std::move(dns_probe);
internal_object["content_length"] = http_result.content_length_;
internal_object["http_code"] = http_result.http_status_;
internal_object["code_string"] = code_string(http_result.http_status_);
Expand Down
Loading