diff --git a/README.md b/README.md index f1a761c..a8a5ad7 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,24 @@ make ## Usage For comprehensive help, use `dooked --help` + +### DNS record history + +When a previous JSON output is passed back through `--input-file`, dooked now +keeps history fields on each DNS record: + +- `first-seen`: first scan time where the record was observed +- `last-seen`: most recent scan time where the record was observed +- `seen`: number of scans where the record has appeared + +Records that are missing from the latest scan are preserved in the next JSON +output with their previous `last-seen` value. This makes load-balanced records +easier to track when IP addresses rotate between runs. + +Useful reporting flags: + +``` +--fs report records seen for the first time +--ls 2 report records missing since at least 2 days ago +--lsd 05/01/2026 report records missing since a US date +``` diff --git a/dooked/include/cli_preprocessor.hpp b/dooked/include/cli_preprocessor.hpp index 43fa1ba..81ab225 100644 --- a/dooked/include/cli_preprocessor.hpp +++ b/dooked/include/cli_preprocessor.hpp @@ -2,6 +2,7 @@ #include "dns/dns_resolver.hpp" #include "utils/io_utils.hpp" +#include #include // maximum sockets to open regardless of the number of threads @@ -24,7 +25,10 @@ struct cli_args_t { int post_http_request{}; int thread_count{}; int content_length{-1}; + int last_seen_days{-1}; bool include_date{false}; + bool report_first_seen{false}; + std::string last_seen_date{}; }; struct runtime_args_t { @@ -36,6 +40,8 @@ struct runtime_args_t { http_process_e http_request_time_{}; int thread_count{}; int content_length{-1}; + bool report_first_seen{false}; + std::optional last_seen_before{}; }; void run_program(cli_args_t const &cli_args); diff --git a/dooked/include/utils/io_utils.hpp b/dooked/include/utils/io_utils.hpp index 829b09e..46a6711 100644 --- a/dooked/include/utils/io_utils.hpp +++ b/dooked/include/utils/io_utils.hpp @@ -26,6 +26,9 @@ void trim(std::string &); struct json_data_t { std::string domain_name{}; std::string rdata{}; + std::string first_seen{}; + std::string last_seen{}; + int seen{}; int ttl{}; int http_code{}; int content_length{}; @@ -40,6 +43,18 @@ struct json_data_t { dns_str_to_record_type(json_object["type"].get()); data.rdata = json_object["info"].get(); data.ttl = json_object["ttl"].get(); + if (auto const iter = json_object.find("first-seen"); + iter != json_object.end() && iter->second.is_string()) { + data.first_seen = iter->second.get(); + } + if (auto const iter = json_object.find("last-seen"); + iter != json_object.end() && iter->second.is_string()) { + data.last_seen = iter->second.get(); + } + if (auto const iter = json_object.find("seen"); + iter != json_object.end() && iter->second.is_number_integer()) { + data.seen = iter->second.get(); + } data.content_length = len; data.http_code = http_code; return data; diff --git a/dooked/include/utils/probe_result.hpp b/dooked/include/utils/probe_result.hpp index 07211c6..4d2bf51 100644 --- a/dooked/include/utils/probe_result.hpp +++ b/dooked/include/utils/probe_result.hpp @@ -10,6 +10,9 @@ bool case_insensitive_compare(std::string const &, std::string const &); struct probe_result_t { std::string rdata{}; + std::string first_seen{}; + std::string last_seen{}; + int seen{}; dns_record_type_e type{}; // RR TYPE (2 octets) std::uint32_t ttl{}; // time to live(4 octets) diff --git a/dooked/source/cli_preprocessor.cpp b/dooked/source/cli_preprocessor.cpp index c08d7fb..01725ad 100644 --- a/dooked/source/cli_preprocessor.cpp +++ b/dooked/source/cli_preprocessor.cpp @@ -4,10 +4,18 @@ #include "utils/exceptions.hpp" #include "utils/random_utils.hpp" #include "utils/string_utils.hpp" +#include #include #include +#include +#include +#include #include #include +#include +#include +#include +#include // defined (and assigned to) in main.cpp extern bool silent; @@ -18,6 +26,205 @@ namespace dooked { namespace net = boost::asio; using namespace fmt::v7::literals; +namespace { + +std::string history_timestamp(std::time_t const timestamp) { + std::string output{}; + if (timet_to_string(output, static_cast(timestamp), + "%Y-%m-%d %H:%M:%S")) { + return output; + } + return {}; +} + +std::string normalize_history_key(std::string value) { + std::transform(value.begin(), value.end(), value.begin(), + [](unsigned char c) { return std::tolower(c); }); + return value; +} + +std::string history_key(std::string const &domain_name, + dns_record_type_e const record_type, + std::string const &rdata) { + return "{}\x1f{}\x1f{}"_format(normalize_history_key(domain_name), + static_cast(record_type), + normalize_history_key(rdata)); +} + +std::string history_key(json_data_t const &record) { + return history_key(record.domain_name, record.type, record.rdata); +} + +std::string history_key(std::string const &domain_name, + probe_result_t const &record) { + return history_key(domain_name, record.type, record.rdata); +} + +std::optional parse_timestamp(std::string const &input, + char const *format) { + std::tm parsed{}; + parsed.tm_isdst = -1; + std::istringstream stream{input}; + stream >> std::get_time(&parsed, format); + if (stream.fail()) { + return std::nullopt; + } + auto const timestamp = std::mktime(&parsed); + if (timestamp == static_cast(-1)) { + return std::nullopt; + } + return timestamp; +} + +std::optional parse_history_timestamp(std::string const &input) { + if (input.empty()) { + return std::nullopt; + } + if (auto const parsed = parse_timestamp(input, "%Y-%m-%d %H:%M:%S")) { + return parsed; + } + return parse_timestamp(input, "%Y-%m-%d"); +} + +std::optional parse_us_timestamp(std::string const &input) { + if (auto const parsed = parse_timestamp(input, "%m/%d/%Y %H:%M:%S")) { + return parsed; + } + if (auto const parsed = parse_timestamp(input, "%m/%d/%Y %H:%M")) { + return parsed; + } + return parse_timestamp(input, "%m/%d/%Y"); +} + +probe_result_t previous_to_probe_result(json_data_t const &previous, + std::string const &fallback_time) { + probe_result_t result{}; + result.rdata = previous.rdata; + result.first_seen = previous.first_seen; + result.last_seen = previous.last_seen; + result.seen = previous.seen; + result.type = previous.type; + result.ttl = static_cast(previous.ttl); + + if (result.last_seen.empty()) { + result.last_seen = fallback_time; + } + if (result.first_seen.empty()) { + result.first_seen = result.last_seen; + } + if (result.seen <= 0) { + result.seen = 1; + } + return result; +} + +bool should_report_last_seen(json_data_t const &record, + runtime_args_t const &rt_args) { + if (!rt_args.last_seen_before) { + return false; + } + auto const last_seen = parse_history_timestamp(record.last_seen); + return !last_seen || *last_seen <= *rt_args.last_seen_before; +} + +void report_first_seen(runtime_args_t const &rt_args, std::string const &domain, + probe_result_t const &record) { + if (rt_args.report_first_seen) { + spdlog::info("[FIRST-SEEN][{}][{}] `{}`", domain, + dns_record_type_to_str(record.type), record.rdata); + } +} + +void report_last_seen(runtime_args_t const &rt_args, json_data_t const &record) { + if (should_report_last_seen(record, rt_args)) { + auto const when = record.last_seen.empty() ? "unknown" : record.last_seen; + spdlog::info("[LAST-SEEN][{}][{}] `{}` last seen {}", record.domain_name, + dns_record_type_to_str(record.type), record.rdata, when); + } +} + +void merge_history(std::vector const *previous_result, + map_container_t ¤t_result, + runtime_args_t const &rt_args, std::time_t const now) { + auto const timestamp = history_timestamp(now); + auto ¤t_data_map = current_result.result(); + std::unordered_map previous_by_key{}; + std::unordered_set current_keys{}; + + if (previous_result) { + previous_by_key.reserve(previous_result->size()); + for (auto const &record : *previous_result) { + previous_by_key.emplace(history_key(record), &record); + } + } + + for (auto &[domain_name, domain_info] : current_data_map) { + for (auto &record : domain_info.dns_result_list_) { + auto const key = history_key(domain_name, record); + current_keys.insert(key); + auto const previous_iter = previous_by_key.find(key); + if (previous_iter != previous_by_key.end()) { + auto const &previous = *previous_iter->second; + record.first_seen = previous.first_seen.empty() + ? (previous.last_seen.empty() + ? timestamp + : previous.last_seen) + : previous.first_seen; + record.seen = previous.seen > 0 ? previous.seen + 1 : 2; + } else { + record.first_seen = timestamp; + record.seen = 1; + report_first_seen(rt_args, domain_name, record); + } + record.last_seen = timestamp; + } + } + + if (!previous_result) { + return; + } + + for (auto const &previous : *previous_result) { + auto const key = history_key(previous); + if (current_keys.find(key) != current_keys.end()) { + continue; + } + + report_last_seen(rt_args, previous); + auto &domain_info = current_data_map[previous.domain_name]; + if (domain_info.http_result_.http_status_ == 0) { + domain_info.http_result_.content_length_ = previous.content_length; + domain_info.http_result_.http_status_ = previous.http_code; + } + domain_info.dns_result_list_.push_back( + previous_to_probe_result(previous, timestamp)); + current_keys.insert(key); + } +} + +std::optional resolve_last_seen_cutoff(cli_args_t const &cli_args) { + if (!cli_args.last_seen_date.empty()) { + return parse_us_timestamp(cli_args.last_seen_date); + } + if (cli_args.last_seen_days >= 0) { + return std::time(nullptr) - + static_cast(cli_args.last_seen_days) * 24 * 60 * 60; + } + return std::nullopt; +} + +void sort_dns_results(map_container_t &result_map) { + for (auto &result_pair : result_map.result()) { + std::sort(result_pair.second.dns_result_list_.begin(), + result_pair.second.dns_result_list_.end(), + [](auto const &a, auto const &b) { + return std::tie(a.type, a.rdata) < std::tie(b.type, b.rdata); + }); + } +} + +} // namespace + void compare_http_result(int const base_cl, json_data_t const &prev_http_result, http_response_t const ¤t_result) { auto const current_req_cl = current_result.content_length_; @@ -350,10 +557,7 @@ void start_name_checking(runtime_args_t &&rt_args) { } thread_pool->join(); } - if (!silent) { - spdlog::info("Writing JSON output"); - } - write_json_result(result_map, rt_args); + auto const now = std::time(nullptr); // compare old with new result -- only if we had previous record if (rt_args.previous_data) { @@ -365,17 +569,18 @@ void start_name_checking(runtime_args_t &&rt_args) { return std::tie(a.domain_name, a.type) < std::tie(b.domain_name, b.type); }); - auto &result = result_map.result(); - for (auto &res : result) { - std::sort(res.second.dns_result_list_.begin(), - res.second.dns_result_list_.end(), - [](auto const &a, auto const &b) { - return std::tie(a.type, a.rdata) < std::tie(b.type, b.rdata); - }); - } - return compare_results(*rt_args.previous_data, result_map, - rt_args.content_length); + sort_dns_results(result_map); + compare_results(previous_data, result_map, rt_args.content_length); + merge_history(&previous_data, result_map, rt_args, now); + } else { + merge_history(nullptr, result_map, rt_args, now); } + + sort_dns_results(result_map); + if (!silent) { + spdlog::info("Writing JSON output"); + } + write_json_result(result_map, rt_args); } void run_program(cli_args_t const &cli_args) { @@ -400,6 +605,13 @@ void run_program(cli_args_t const &cli_args) { } } + rt_args.report_first_seen = cli_args.report_first_seen; + rt_args.last_seen_before = resolve_last_seen_cutoff(cli_args); + if (!cli_args.last_seen_date.empty() && !rt_args.last_seen_before) { + return spdlog::error("invalid --lsd date `{}`; expected MM/DD/YYYY", + cli_args.last_seen_date); + } + // read input file if (!read_input_file(cli_args, rt_args)) { return; diff --git a/dooked/source/main.cpp b/dooked/source/main.cpp index cf29460..d90bf58 100644 --- a/dooked/source/main.cpp +++ b/dooked/source/main.cpp @@ -34,6 +34,12 @@ int main(int argc, char **argv) { app.add_option( "-c,--content-length", cli_args.content_length, "show content lengths that changed more than --content-length"); + app.add_flag("--fs", cli_args.report_first_seen, + "show DNS records observed for the first time"); + app.add_option("--ls", cli_args.last_seen_days, + "show DNS records not seen since this many days ago"); + app.add_option("--lsd", cli_args.last_seen_date, + "show DNS records not seen since a US date (MM/DD/YYYY)"); app.add_flag("-d,--include-date", cli_args.include_date, "append present datetime(-ddMMyyyy_hhmmss) in output name"); app.add_flag( diff --git a/dooked/source/utils/io_utils.cpp b/dooked/source/utils/io_utils.cpp index a1bd5d3..0772911 100644 --- a/dooked/source/utils/io_utils.cpp +++ b/dooked/source/utils/io_utils.cpp @@ -5,7 +5,10 @@ namespace dooked { void to_json(json &j, probe_result_t const &record) { j = json{{"ttl", record.ttl}, {"type", dns_record_type_to_str(record.type)}, - {"info", record.rdata}}; + {"info", record.rdata}, + {"first-seen", record.first_seen}, + {"last-seen", record.last_seen}, + {"seen", record.seen}}; } bool is_text_file(std::string const &file_extension) {