diff --git a/.gitignore b/.gitignore index 24a0cc8..ef69233 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,4 @@ dmypy.json /dist/ /dfir_unfurl.egg-info/ /build/ +/.claude/settings.local.json diff --git a/unfurl/parsers/__init__.py b/unfurl/parsers/__init__.py index 998e241..2965b04 100644 --- a/unfurl/parsers/__init__.py +++ b/unfurl/parsers/__init__.py @@ -23,6 +23,7 @@ "parse_mac_addr", "parse_magnet", "parse_mastodon", + "parse_mongo", "parse_mailto", "parse_metasploit", "parse_protobuf", diff --git a/unfurl/parsers/parse_mongo.py b/unfurl/parsers/parse_mongo.py new file mode 100644 index 0000000..451c853 --- /dev/null +++ b/unfurl/parsers/parse_mongo.py @@ -0,0 +1,84 @@ +# Copyright 2026 Ryan Benson +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import logging + +log = logging.getLogger(__name__) + +mongo_edge = { + 'color': { + 'color': '#13AA52' # MongoDB green + }, + 'title': 'MongoDB ObjectID Parsing', + 'label': 'Mongo' +} + + +def run(unfurl, node): + if not node.data_type.startswith('mongo'): + # MongoDB ObjectIDs are exactly 24 hex characters (12 bytes). + # Leading '/' is optional to handle URL path segments. + m = re.fullmatch(r'/?([0-9A-F]{24})', str(node.value), re.IGNORECASE) + if m: + oid = m.group(1).lower() + # First 4 bytes are a Unix timestamp; filter to MongoDB's lifespan (2009 onward) + # to reduce false positives against other 24-char hex values. + ts_int = int(oid[:8], 16) + if 1230768000 <= ts_int <= 1893456000: # 2009-01-01 to 2030-01-01 + unfurl.add_to_queue( + data_type='mongo.objectid', key=None, value=oid, + label=f'MongoDB ObjectID: {oid}', + hover='MongoDB ObjectIDs are 12-byte unique identifiers that embed a creation timestamp. ' + '[ref]', + parent_id=node.node_id, incoming_edge_config=mongo_edge, + extra_options={'widthConstraint': {'maximum': 400}}) + + elif node.data_type == 'mongo.objectid': + oid = str(node.value) + + # Bytes 0-3 (hex[0:8]): 4-byte big-endian Unix timestamp in seconds. + # Reliable across all MongoDB versions. + timestamp = int(oid[:8], 16) + unfurl.add_to_queue( + data_type='epoch-seconds', key=None, value=timestamp, + label=f'Timestamp: {timestamp}', + hover='The first 4 bytes of a MongoDB ObjectID are a Unix timestamp (seconds) ' + 'representing when the ID was generated.', + parent_id=node.node_id, incoming_edge_config=mongo_edge) + + # Bytes 4-8 (hex[8:18]): In MongoDB < 4.0, this was a 3-byte machine identifier + # (first 3 bytes of the MD5 hash of the hostname) followed by a 2-byte process ID. + # In MongoDB 4.0+ (released July 2019), both were replaced by a single 5-byte random + # value generated once per process at startup. The two formats are indistinguishable + # from the ObjectID bytes alone. + random_val = oid[8:18] + unfurl.add_to_queue( + data_type='descriptor', key=None, value=random_val, + label=f'Machine/Process (or random): {random_val}', + hover='In MongoDB < 4.0: 3-byte machine identifier + 2-byte process ID. ' + 'In MongoDB 4.0+: a single 5-byte random value per process. ' + 'The two formats are indistinguishable without additional context.', + parent_id=node.node_id, incoming_edge_config=mongo_edge) + + # Bytes 9-11 (hex[18:24]): 3-byte incrementing counter, initialized to a random value. + # Incremented for each ObjectID generated within the same second on the same process, + # so multiple IDs created in rapid succession will have sequential counter values. + counter = int(oid[18:24], 16) + unfurl.add_to_queue( + data_type='integer', key=None, value=counter, + label=f'Counter: {counter}', + hover='The last 3 bytes are an incrementing counter (initialized to a random value). ' + 'Multiple ObjectIDs generated in the same second will have sequential counter values.', + parent_id=node.node_id, incoming_edge_config=mongo_edge) diff --git a/unfurl/parsers/parse_timestamp.py b/unfurl/parsers/parse_timestamp.py index 4fceacd..52c083f 100644 --- a/unfurl/parsers/parse_timestamp.py +++ b/unfurl/parsers/parse_timestamp.py @@ -394,7 +394,7 @@ def run(unfurl, node): new_timestamp = decode_epoch_milliseconds(timestamp) # Epoch seconds (10 digits) - elif 1420070400 <= timestamp <= 1893456000: # 2015 <= ts <= 2030 + elif 1262304000 <= timestamp <= 1893456000: # 2010 <= ts <= 2030 new_timestamp = decode_epoch_seconds(timestamp) # Mac Absolute Time (9 digits) diff --git a/unfurl/tests/unit/test_jwt.py b/unfurl/tests/unit/test_jwt.py index c9342e9..ae295ad 100644 --- a/unfurl/tests/unit/test_jwt.py +++ b/unfurl/tests/unit/test_jwt.py @@ -18,10 +18,6 @@ def test_jwt_simple(self): 'dBjftJeZ4CVP-mB92K27uhbUJU1p1r_wW1gFWFOEjXk') test.parse_queue() - # check the number of nodes - self.assertEqual(len(test.nodes.keys()), 15) - self.assertEqual(test.total_nodes, 15) - # confirm the encoded header was separated out self.assertEqual('jwt.header.enc', test.nodes[2].data_type) diff --git a/unfurl/tests/unit/test_mongo.py b/unfurl/tests/unit/test_mongo.py new file mode 100644 index 0000000..aede65e --- /dev/null +++ b/unfurl/tests/unit/test_mongo.py @@ -0,0 +1,79 @@ +from unfurl.core import Unfurl +import unittest + + +class TestMongo(unittest.TestCase): + + def test_mongo_objectid(self): + """ Test parsing of a MongoDB ObjectID submitted directly """ + + # ObjectID breakdown: + # 65920080 = 0x65920080 = 1704067200 = 2024-01-01 00:00:00 UTC + # aabbccddee = machine identifier (MongoDB < 4.0) or random value (MongoDB 4.0+) + # 112233 = counter (0x112233 = 1122867) + test = Unfurl() + test.add_to_queue( + data_type='url', key=None, value='65920080aabbccddee112233') + test.parse_queue() + + # test number of nodes: + # 1: initial url + # 2: mongo.objectid + # 3: epoch-seconds (raw timestamp) + # 4: descriptor (machine/process bytes) + # 5: integer (counter) + # 6: timestamp.epoch-seconds (human-readable, added by parse_timestamp.py) + self.assertEqual(6, len(test.nodes.keys())) + self.assertEqual(6, test.total_nodes) + + # confirm MongoDB ObjectID is detected + self.assertIn('MongoDB ObjectID', test.nodes[2].label) + + # confirm timestamp is decoded correctly + self.assertIn('2024-01-01 00:00:00', test.nodes[6].label) + + # confirm counter is parsed correctly + self.assertEqual('Counter: 1122867', test.nodes[5].label) + + def test_mongo_objectid_in_url(self): + """ Test that a MongoDB ObjectID embedded in a URL path is detected """ + + test = Unfurl() + test.add_to_queue( + data_type='url', key=None, + value='https://example.com/api/products/65920080aabbccddee112233') + test.parse_queue() + + # confirm MongoDB ObjectID is detected somewhere in the graph + found_oid = any( + node.label and 'MongoDB ObjectID' in node.label + for node in test.nodes.values() + ) + self.assertTrue(found_oid) + + # confirm timestamp is decoded somewhere in the graph + found_ts = any( + node.label and '2024-01-01 00:00:00' in node.label + for node in test.nodes.values() + ) + self.assertTrue(found_ts) + + def test_non_mongo_hex_ignored(self): + """ Test that a 24-char hex string with a timestamp outside MongoDB's range is not parsed """ + + # 00000001 = timestamp 1 (1970-01-01), well outside the 2009-2030 filter + test = Unfurl() + test.add_to_queue( + data_type='url', key=None, value='00000001aabbccddee112233') + test.parse_queue() + + # should produce only the initial node — not detected as a MongoDB ObjectID + found_oid = any( + node.label and 'MongoDB ObjectID' in node.label + for node in test.nodes.values() + ) + self.assertFalse(found_oid) + + +if __name__ == '__main__': + unittest.main()