Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"bindings/thread-cpu-clock.cc",
"bindings/translate-heap-profile.cc",
"bindings/translate-time-profile.cc",
"bindings/binding.cc"
"bindings/binding.cc",
"bindings/map-get.cc"
],
"include_dirs": [
"bindings",
Expand Down
375 changes: 375 additions & 0 deletions bindings/map-get.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,375 @@
/**
* Copyright 2025 Datadog. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "map-get.hh"

// Find a value in JavaScript map by directly reading the underlying V8 hash
// map.
//
// V8 uses TWO internal hash map representations:
// 1. SmallOrderedHashMap: For small maps (capacity 4-254)
// - Metadata stored as uint8_t bytes
// - Entry size: 2 (key, value)
// - Chain table separate from entries
//
// 2. OrderedHashMap: For larger maps (capacity >254)
// - Metadata stored as Smis in FixedArray
// - Entry size: 3 (key, value, chain)
// - Chain stored inline with entries
//
// This code handles both types by detecting the table format at runtime.
// Practical testing shows that at least the AsyncContextFrame maps use the
// large map format even for small cardinality maps, but just in case we handle
// both.

#include <cstdint>

namespace dd {

using Address = uintptr_t;

#ifndef _WIN32
// ============================================================================
// Constants from V8 internals
// ============================================================================

// Heap object tagging
constexpr int kHeapObjectTag = 1;

// OrderedHashMap/SmallOrderedHashMap shared constants
constexpr int kNotFound = -1;
constexpr int kSmallLoadFactor = 2;

// ============================================================================
// Helper Functions (needed by struct methods)
// ============================================================================

inline Address UntagPointer(Address tagged) {
return tagged - kHeapObjectTag;
}

inline bool IsSmi(Address value) {
return (value & 1) == 0;
}

// SmiToInt Conversion below valid only on 64-bit platforms, the only ones we
// support
static_assert(sizeof(void*) == 8, "Only 64-bit platforms supported");

inline int SmiToInt(Address smi) {
return static_cast<int>(static_cast<intptr_t>(smi) >> 32);
}

// ============================================================================
// V8 Hashtable Structure Definitions
// ============================================================================

// HeapObject layout - base for all V8 heap objects
// From v8/src/objects/heap-object.h
struct HeapObjectLayout {
Address classMap_; // Tagged pointer to the class map
};

// JavaScript Map object
struct JSMapLayout {
HeapObjectLayout header_; // Map is a HeapObject
Address properties_or_hash_; // not used by us
Address elements_; // not used by us
// Tagged pointer to a [Small]OrderedHashMapLayout
Address table_;
};

// V8 FixedArray: length_ is a Smi, followed by that many element slots
struct FixedArrayLayout {
HeapObjectLayout header_; // FixedArray is a HeapObject
Address length_;
Address elements_[0];
};

// NOTE: both OrderedHashMap and SmallOrderedHashMap have compatible method
// definitions so FindEntryByHash and FindValueByHash can be defined as
// templated function working on both.

// OrderedHashMap layout (for large maps, capacity >254)
// From v8/src/objects/ordered-hash-table.h
struct OrderedHashMapLayout {
FixedArrayLayout fixedArray_; // OrderedHashMap is a FixedArray
// The first 3 address slots in the FixedArray that is a Hashtable are the
// number of elements, deleted elements, and buckets. Each one is a Smi.
Address number_of_elements_;
Address number_of_deleted_elements_;
Address number_of_buckets_;
// First number_of_buckets_ entries in head_and_data_table_ is the head table:
// each entry is an index of the first entry (head of the linked list of
// entries) in the data table for that bucket. This is followed by the data
// table. Each data table entry uses three (kEntrySize == 3) tagged pointer
// slots:
// [0]: key (Tagged Object)
// [1]: value (Tagged Object)
// [2]: chain (Smi - next entry index or -1)
// All indices (both to the head of the list and to the next entry are
// expressed in number of entries from the start of the data table, so to
// convert it into a head_and_data_table_ you need to add number_of_buckets_
// (length of the head table) and then 3 * index.
Address head_and_data_table_[0]; // Variable: [head_table][data_table]

// Constants for entry structure
static constexpr int kEntrySize = 3;
static constexpr int kKeyOffset = 0;
static constexpr int kValueOffset = 1;
static constexpr int kChainOffset = 2;
static constexpr int kNotFoundValue = kNotFound;

// Get number of buckets (converts Smi to int)
int NumberOfBuckets() const {
return IsSmi(number_of_buckets_) ? SmiToInt(number_of_buckets_) : 0;
}

// Get an upper bound for number of element chain in a bucket. Used to prevent
// infinite lookup chain.
int GetMaxChainLength() const {
return IsSmi(number_of_elements_) && IsSmi(number_of_deleted_elements_)
? SmiToInt(number_of_elements_) +
SmiToInt(number_of_deleted_elements_)
: 0;
}

// Convert hash to bucket index
int HashToBucket(int hash) const {
int num_buckets = NumberOfBuckets();
return num_buckets > 0 ? (hash & (num_buckets - 1)) : 0;
}

// Get first entry index for a bucket
int GetFirstEntry(int bucket) const {
Address entry_smi = head_and_data_table_[bucket];
return IsSmi(entry_smi) ? SmiToInt(entry_smi) : kNotFound;
}

// Convert entry index to head_and_data_table_ index for the entry's key
int EntryToIndex(int entry) const {
return NumberOfBuckets() + (entry * kEntrySize);
}

// Get key at entry index
Address GetKey(int entry) const {
int index = EntryToIndex(entry);
return head_and_data_table_[index + kKeyOffset];
}

// Get value at entry index
Address GetValue(int entry) const {
int index = EntryToIndex(entry);
return head_and_data_table_[index + kValueOffset];
}

// Get next entry in chain
int GetNextChainEntry(int entry) const {
int index = EntryToIndex(entry);
Address chain_smi = head_and_data_table_[index + kChainOffset];
return IsSmi(chain_smi) ? SmiToInt(chain_smi) : kNotFound;
}
};

// SmallOrderedHashMap layout (for small maps, capacity 4-254)
// Memory layout (stores metadata as uint8_t, not Smis):
// [0]: map pointer (HeapObject)
// [kHeaderSize + 0]: number_of_elements (uint8)
// [kHeaderSize + 1]: number_of_deleted_elements (uint8)
// [kHeaderSize + 2]: number_of_buckets (uint8)
// [kHeaderSize + 3...]: padding (5 bytes on 64-bit, 1 byte on 32-bit)
// [DataTableStartOffset...]: data table (key-value pairs as Tagged)
// [...]: hash table (uint8 bucket indices)
// [...]: chain table (uint8 next entry indices)
//
// Each entry is 2 Tagged elements (kEntrySize = 2):
// [0]: key (Tagged Object)
// [1]: value (Tagged Object)
//
// From v8/src/objects/ordered-hash-table.h
struct SmallOrderedHashMapLayout {
HeapObjectLayout header_;
uint8_t number_of_elements_;
uint8_t number_of_deleted_elements_;
uint8_t number_of_buckets_;
uint8_t padding_[5]; // 5 bytes on 64-bit
// Variable length:
// - Address data_table_[capacity * kEntrySize] // Keys and values
// - uint8_t hash_table_[number_of_buckets_] // Bucket -> first entry
// - uint8_t chain_table_[capacity] // Entry -> next entry
Address data_table_[0];

// Constants for entry structure
static constexpr int kEntrySize = 2;
static constexpr int kKeyOffset = 0;
static constexpr int kValueOffset = 1;
static constexpr int kNotFoundValue = 255;

// Get capacity from number of buckets
int Capacity() const { return number_of_buckets_ * kSmallLoadFactor; }

int NumberOfBuckets() const { return number_of_buckets_; }

int GetMaxChainLength() const {
return number_of_elements_ + number_of_deleted_elements_;
}

int HashToBucket(int hash) const { return hash & (NumberOfBuckets() - 1); }

const uint8_t* GetHashTable() const {
return reinterpret_cast<const uint8_t*>(data_table_ +
Capacity() * kEntrySize);
}

const uint8_t* GetChainTable() const {
return GetHashTable() + number_of_buckets_;
}

// Get key at entry index
Address GetKey(int entry) const {
return data_table_[entry * kEntrySize + kKeyOffset];
}

// Get value at entry index
Address GetValue(int entry) const {
return data_table_[entry * kEntrySize + kValueOffset];
}

// Get first entry in bucket
uint8_t GetFirstEntry(int bucket) const {
const uint8_t* hash_table = GetHashTable();
return hash_table[bucket];
}

// Get next entry in chain
uint8_t GetNextChainEntry(int entry) const {
const uint8_t* chain_table = GetChainTable();
return chain_table[entry];
}
};

// ============================================================================
// Templated Hash Table Lookup
// ============================================================================

// Find an entry by a key and its hash in any hash table layout
// Template parameter LayoutT should be either OrderedHashMapLayout or
// SmallOrderedHashMapLayout
template <typename LayoutT>
int FindEntryByHash(const LayoutT* layout, int hash, Address key_to_find) {
int max_chain_length = layout->GetMaxChainLength();
int bucket = layout->HashToBucket(hash);
int entry = layout->GetFirstEntry(bucket);

// Paranoid: by never traversing more than the sum of elements and deleted
// elements we guarantee this terminates in bound time even if for some
// unforeseen reason the chain is cyclical.
for (int max_chain_left = max_chain_length;
entry != LayoutT::kNotFoundValue && max_chain_left > 0;
max_chain_left--) {
Address key_at_entry = layout->GetKey(entry);
if (key_at_entry == key_to_find) {
return entry;
}
entry = layout->GetNextChainEntry(entry);
}

return kNotFound;
}

// Find an entry by a key and its hash in any hash table layout, and return its
// value or the zero address if it is not found.
// Template parameter LayoutT should be either OrderedHashMapLayout or
// SmallOrderedHashMapLayout
template <typename LayoutT>
Address FindValueByHash(const LayoutT* layout, int hash, Address key_to_find) {
auto entry = FindEntryByHash(layout, hash, key_to_find);
return entry == kNotFound ? 0 : layout->GetValue(entry);
}

// Detect if the table is an OrderedHashMap or a SmallOrderedHashMap (or it can
// not safely be determined) by checking padding bytes. SmallOrderedHashMap has
// always-zero padding bytes after the metadata.
static uint8_t GetOrderedHashMapType(Address table_untagged) {
const SmallOrderedHashMapLayout* potential_small =
reinterpret_cast<const SmallOrderedHashMapLayout*>(table_untagged);

// Read the header as one 64-bit value for validation
uint64_t smallHeader =
*reinterpret_cast<const uint64_t*>(&potential_small->number_of_elements_);

static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__,
"Little-endian required");
// Small map will have some bits in bytes 0-2 be nonzero, and all bits in
// bytes 3-7 zero. That effectively limits the value range of smallHeader to
// [0x1-0xFFFFFF].
if (smallHeader > 0 && smallHeader < 0x1000000) {
auto num_elements = potential_small->number_of_elements_;
auto num_deleted = potential_small->number_of_deleted_elements_;
auto num_buckets = potential_small->number_of_buckets_;

// SmallOrderedHashMap has constraints:
// - num_buckets must be a power of 2 between 2 and 127
// - num_elements + num_deleted <= capacity (buckets * 2)
if (num_buckets >= 2 && num_buckets <= 127) {
// Check if num_buckets is a power of 2
if ((num_buckets & (num_buckets - 1)) == 0) {
auto capacity = num_buckets * kSmallLoadFactor;
if (num_elements + num_deleted <= capacity) {
return 1; // small map
}
}
}
return 2; // undecided
}
return 0; // large map
}

// ============================================================================
// Main entry point
// ============================================================================

// Lookup value in a Map given the hash and key pointer. If the key is not found
// in the map (or the lookup can not be performed) returns a zero Address (which
// is essentially a zero Smi value.)
Address GetValueFromMap(Address map_addr, int hash, Address key) {
const JSMapLayout* map_untagged =
reinterpret_cast<const JSMapLayout*>(UntagPointer(map_addr));
Address table_untagged = UntagPointer(map_untagged->table_);

switch (GetOrderedHashMapType(table_untagged)) {
case 0: {
const OrderedHashMapLayout* layout =
reinterpret_cast<const OrderedHashMapLayout*>(table_untagged);
return FindValueByHash(layout, hash, key);
}
case 1: {
const SmallOrderedHashMapLayout* layout =
reinterpret_cast<const SmallOrderedHashMapLayout*>(table_untagged);
return FindValueByHash(layout, hash, key);
}
}
return 0; // We couldn't determine the kind of the map, just return zero.
}

#else // _WIN32

Address GetValueFromMap(Address map_addr, int hash, Address key) {
return 0;
}

#endif // _WIN32
} // namespace dd
Loading
Loading