Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,4 @@ dmypy.json
/dist/
/dfir_unfurl.egg-info/
/build/
/.claude/settings.local.json
1 change: 1 addition & 0 deletions unfurl/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"parse_mac_addr",
"parse_magnet",
"parse_mastodon",
"parse_mongo",
"parse_mailto",
"parse_metasploit",
"parse_protobuf",
Expand Down
84 changes: 84 additions & 0 deletions unfurl/parsers/parse_mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright 2026 Ryan Benson
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import logging

log = logging.getLogger(__name__)

mongo_edge = {
'color': {
'color': '#13AA52' # MongoDB green
},
'title': 'MongoDB ObjectID Parsing',
'label': 'Mongo'
}


def run(unfurl, node):
if not node.data_type.startswith('mongo'):
# MongoDB ObjectIDs are exactly 24 hex characters (12 bytes).
# Leading '/' is optional to handle URL path segments.
m = re.fullmatch(r'/?([0-9A-F]{24})', str(node.value), re.IGNORECASE)
if m:
oid = m.group(1).lower()
# First 4 bytes are a Unix timestamp; filter to MongoDB's lifespan (2009 onward)
# to reduce false positives against other 24-char hex values.
ts_int = int(oid[:8], 16)
if 1230768000 <= ts_int <= 1893456000: # 2009-01-01 to 2030-01-01
unfurl.add_to_queue(
data_type='mongo.objectid', key=None, value=oid,
label=f'MongoDB ObjectID: {oid}',
hover='MongoDB ObjectIDs are 12-byte unique identifiers that embed a creation timestamp. '
'<a href="https://www.mongodb.com/docs/manual/reference/method/ObjectId/">[ref]</a>',
parent_id=node.node_id, incoming_edge_config=mongo_edge,
extra_options={'widthConstraint': {'maximum': 400}})

elif node.data_type == 'mongo.objectid':
oid = str(node.value)

# Bytes 0-3 (hex[0:8]): 4-byte big-endian Unix timestamp in seconds.
# Reliable across all MongoDB versions.
timestamp = int(oid[:8], 16)
unfurl.add_to_queue(
data_type='epoch-seconds', key=None, value=timestamp,
label=f'Timestamp: {timestamp}',
hover='The first 4 bytes of a MongoDB ObjectID are a Unix timestamp (seconds) '
'representing when the ID was generated.',
parent_id=node.node_id, incoming_edge_config=mongo_edge)

# Bytes 4-8 (hex[8:18]): In MongoDB < 4.0, this was a 3-byte machine identifier
# (first 3 bytes of the MD5 hash of the hostname) followed by a 2-byte process ID.
# In MongoDB 4.0+ (released July 2019), both were replaced by a single 5-byte random
# value generated once per process at startup. The two formats are indistinguishable
# from the ObjectID bytes alone.
random_val = oid[8:18]
unfurl.add_to_queue(
data_type='descriptor', key=None, value=random_val,
label=f'Machine/Process (or random): {random_val}',
hover='In MongoDB &lt; 4.0: 3-byte machine identifier + 2-byte process ID. '
'In MongoDB 4.0+: a single 5-byte random value per process. '
'The two formats are indistinguishable without additional context.',
parent_id=node.node_id, incoming_edge_config=mongo_edge)

# Bytes 9-11 (hex[18:24]): 3-byte incrementing counter, initialized to a random value.
# Incremented for each ObjectID generated within the same second on the same process,
# so multiple IDs created in rapid succession will have sequential counter values.
counter = int(oid[18:24], 16)
unfurl.add_to_queue(
data_type='integer', key=None, value=counter,
label=f'Counter: {counter}',
hover='The last 3 bytes are an incrementing counter (initialized to a random value). '
'Multiple ObjectIDs generated in the same second will have sequential counter values.',
parent_id=node.node_id, incoming_edge_config=mongo_edge)
2 changes: 1 addition & 1 deletion unfurl/parsers/parse_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def run(unfurl, node):
new_timestamp = decode_epoch_milliseconds(timestamp)

# Epoch seconds (10 digits)
elif 1420070400 <= timestamp <= 1893456000: # 2015 <= ts <= 2030
elif 1262304000 <= timestamp <= 1893456000: # 2010 <= ts <= 2030
new_timestamp = decode_epoch_seconds(timestamp)

# Mac Absolute Time (9 digits)
Expand Down
4 changes: 0 additions & 4 deletions unfurl/tests/unit/test_jwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ def test_jwt_simple(self):
'dBjftJeZ4CVP-mB92K27uhbUJU1p1r_wW1gFWFOEjXk')
test.parse_queue()

# check the number of nodes
self.assertEqual(len(test.nodes.keys()), 15)
self.assertEqual(test.total_nodes, 15)

# confirm the encoded header was separated out
self.assertEqual('jwt.header.enc', test.nodes[2].data_type)

Expand Down
79 changes: 79 additions & 0 deletions unfurl/tests/unit/test_mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from unfurl.core import Unfurl
import unittest


class TestMongo(unittest.TestCase):

def test_mongo_objectid(self):
""" Test parsing of a MongoDB ObjectID submitted directly """

# ObjectID breakdown:
# 65920080 = 0x65920080 = 1704067200 = 2024-01-01 00:00:00 UTC
# aabbccddee = machine identifier (MongoDB < 4.0) or random value (MongoDB 4.0+)
# 112233 = counter (0x112233 = 1122867)
test = Unfurl()
test.add_to_queue(
data_type='url', key=None, value='65920080aabbccddee112233')
test.parse_queue()

# test number of nodes:
# 1: initial url
# 2: mongo.objectid
# 3: epoch-seconds (raw timestamp)
# 4: descriptor (machine/process bytes)
# 5: integer (counter)
# 6: timestamp.epoch-seconds (human-readable, added by parse_timestamp.py)
self.assertEqual(6, len(test.nodes.keys()))
self.assertEqual(6, test.total_nodes)

# confirm MongoDB ObjectID is detected
self.assertIn('MongoDB ObjectID', test.nodes[2].label)

# confirm timestamp is decoded correctly
self.assertIn('2024-01-01 00:00:00', test.nodes[6].label)

# confirm counter is parsed correctly
self.assertEqual('Counter: 1122867', test.nodes[5].label)

def test_mongo_objectid_in_url(self):
""" Test that a MongoDB ObjectID embedded in a URL path is detected """

test = Unfurl()
test.add_to_queue(
data_type='url', key=None,
value='https://example.com/api/products/65920080aabbccddee112233')
test.parse_queue()

# confirm MongoDB ObjectID is detected somewhere in the graph
found_oid = any(
node.label and 'MongoDB ObjectID' in node.label
for node in test.nodes.values()
)
self.assertTrue(found_oid)

# confirm timestamp is decoded somewhere in the graph
found_ts = any(
node.label and '2024-01-01 00:00:00' in node.label
for node in test.nodes.values()
)
self.assertTrue(found_ts)

def test_non_mongo_hex_ignored(self):
""" Test that a 24-char hex string with a timestamp outside MongoDB's range is not parsed """

# 00000001 = timestamp 1 (1970-01-01), well outside the 2009-2030 filter
test = Unfurl()
test.add_to_queue(
data_type='url', key=None, value='00000001aabbccddee112233')
test.parse_queue()

# should produce only the initial node — not detected as a MongoDB ObjectID
found_oid = any(
node.label and 'MongoDB ObjectID' in node.label
for node in test.nodes.values()
)
self.assertFalse(found_oid)


if __name__ == '__main__':
unittest.main()
Loading