livekit
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎benchmarks/data_track_throughput/CMakeLists.txt‎
Lines changed: 95 additions & 0 deletions b/‎benchmarks/data_track_throughput/CMakeLists.txt‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎benchmarks/data_track_throughput/README.md‎
Lines changed: 273 additions & 0 deletions b/‎benchmarks/data_track_throughput/README.md‎
Lines changed: 273 additions & 0 deletions
@@ -34,3 +34,4 @@ livekit.log
 web/
 *trace.json
 compile_commands.json
+**data_track_throughput_results/
@@ -135,14 +135,15 @@ cmake --build --preset macos-release
 📖 **For complete build instructions, troubleshooting, and platform-specific notes, see [README_BUILD.md](README_BUILD.md)**
 
 ### Building with Docker
-The Dockerfile COPYs folders/files required to build the CPP SDK into the image. 
+The Docker setup is split into a reusable base image and an SDK image layered on top of it.
  **NOTE:** this has only been tested on Linux
 ```bash
-docker build -t livekit-cpp-sdk . -f docker/Dockerfile
+docker build -t livekit-cpp-sdk-base . -f docker/Dockerfile.base
+docker build --build-arg BASE_IMAGE=livekit-cpp-sdk-base -t livekit-cpp-sdk . -f docker/Dockerfile.sdk
 docker run -it --network host livekit-cpp-sdk:latest bash
 ```
 
-__NOTE:__ if you are building your own Dockerfile, you will likely need to set the same `ENV` variables as in `docker/Dockerfile`, but to the relevant directories:
+__NOTE:__ if you are building your own Dockerfile, you will likely need to set the same `ENV` variables as in `docker/Dockerfile.base`, but to the relevant directories:
 ```bash
 export CC=$HOME/gcc-14/bin/gcc
 export CXX=$HOME/gcc-14/bin/g++
 
@@ -0,0 +1,95 @@
+# Copyright 2026 LiveKit, Inc.
+#
+# Standalone CMake build for the data-track throughput experiment.
+# All paths are relative to CMAKE_CURRENT_SOURCE_DIR so this directory
+# can be moved or renamed freely.
+
+cmake_minimum_required(VERSION 3.20)
+project(DataTrackThroughput LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# ---- Dependencies --------------------------------------------------------
+
+find_package(LiveKit CONFIG REQUIRED)
+
+find_package(nlohmann_json 3.11 QUIET)
+if(NOT nlohmann_json_FOUND)
+  include(FetchContent)
+  FetchContent_Declare(
+    nlohmann_json
+    GIT_REPOSITORY https://github.com/nlohmann/json.git
+    GIT_TAG        v3.11.3
+    GIT_SHALLOW    TRUE
+  )
+  FetchContent_MakeAvailable(nlohmann_json)
+endif()
+
+# ---- Targets -------------------------------------------------------------
+
+set(_targets DataTrackThroughputProducer DataTrackThroughputConsumer)
+
+add_executable(DataTrackThroughputProducer producer.cpp)
+add_executable(DataTrackThroughputConsumer consumer.cpp)
+
+foreach(_target ${_targets})
+  target_include_directories(${_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+  target_link_libraries(${_target} PRIVATE LiveKit::livekit nlohmann_json::nlohmann_json)
+endforeach()
+
+# ---- RPATH ---------------------------------------------------------------
+
+if(UNIX)
+  if(APPLE)
+    set_target_properties(${_targets} PROPERTIES
+      BUILD_RPATH  "@loader_path"
+      INSTALL_RPATH "@loader_path"
+    )
+  else()
+    set_target_properties(${_targets} PROPERTIES
+      BUILD_RPATH  "$ORIGIN"
+      INSTALL_RPATH "$ORIGIN"
+      BUILD_RPATH_USE_ORIGIN TRUE
+    )
+  endif()
+endif()
+
+# ---- Copy SDK shared libraries next to executables -----------------------
+
+get_target_property(_lk_location LiveKit::livekit LOCATION)
+if(_lk_location)
+  get_filename_component(_lk_lib_dir "${_lk_location}" DIRECTORY)
+else()
+  get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION)
+  if(NOT _lk_location)
+    get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_RELEASE)
+  endif()
+  if(NOT _lk_location)
+    get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_DEBUG)
+  endif()
+  if(_lk_location)
+    get_filename_component(_lk_lib_dir "${_lk_location}" DIRECTORY)
+  endif()
+endif()
+
+if(_lk_lib_dir)
+  if(WIN32)
+    file(GLOB _sdk_shared_libs "${_lk_lib_dir}/../bin/*.dll" "${_lk_lib_dir}/*.dll")
+  elseif(APPLE)
+    file(GLOB _sdk_shared_libs "${_lk_lib_dir}/*.dylib")
+  else()
+    file(GLOB _sdk_shared_libs "${_lk_lib_dir}/*.so" "${_lk_lib_dir}/*.so.*")
+  endif()
+
+  foreach(_target ${_targets})
+    foreach(_lib ${_sdk_shared_libs})
+      get_filename_component(_lib_name "${_lib}" NAME)
+      add_custom_command(TARGET ${_target} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+                "${_lib}" "$<TARGET_FILE_DIR:${_target}>/${_lib_name}"
+        COMMENT "Copying ${_lib_name} next to ${_target}"
+      )
+    endforeach()
+  endforeach()
+endif()
@@ -0,0 +1,273 @@
+# Data Track Throughput Experiment
+
+Coordinated producer and consumer for benchmarking `LocalDataTrack` /
+`RemoteDataTrack` throughput across a sweep of payload sizes and publish rates.
+
+## What It Does
+
+- `producer.cpp`
+  - Publishes a data track named `data-track-throughput`
+  - Runs a default sweep of payload sizes and publish rates (see
+    **Test Bounds** below)
+  - Calls the consumer over RPC before and after each scenario
+
+- `consumer.cpp`
+  - Registers a room data-frame callback for the producer's data track
+  - Receives every frame and records arrival timestamps
+  - Logs validation warnings (size mismatches, header mismatches, etc.) to stderr
+  - Tracks duplicates and missing messages
+  - Appends raw data to scenario-level and per-message CSV files
+
+## Design Principles
+
+- **Raw data only in CSV.** The consumer writes only directly measured values
+  (counts, byte totals, microsecond timestamps). All derived metrics (throughput,
+  latency percentiles, delivery ratio, etc.) are computed at analysis time by
+  `scripts/plot_throughput.py`.
+- **Fixed packet size per scenario.** Each scenario uses a single
+  `packet_size_bytes`. This ensures every message in a run is the same size,
+  making aggregate measurements unambiguous.
+- **Minimal measurement overhead.** The hot `onDataFrame` callback captures the
+  arrival timestamp first, then appends to an in-memory vector under a brief
+  mutex. File I/O happens only at finalization after all data is collected.
+
+## Test Bounds
+
+All bounds are defined in `common.h`. A scenario is any combination of
+(payload size, publish rate) that passes all three constraints below.
+
+### Hard Limits
+
+| Parameter | Min | Max |
+|-----------|-----|-----|
+| Packet size | 1 KiB | 256 MiB |
+| Publish rate | 1 Hz | 50k Hz |
+
+### Data-Rate Budget
+
+Every scenario must satisfy:
+
+```
+packet_size_bytes * desired_rate_hz <= 10 Gbps (1.25 GB/s)
+```
+
+This naturally allows small messages at very high rates and large messages at
+low rates while preventing any single scenario from attempting an unreasonable
+throughput that would destabilize the connection.
+
+### Default Sweep Grid
+
+The default sweep iterates over 13 payload sizes and 13 publish rates, skipping
+any combination that exceeds the data-rate budget:
+
+**Payload sizes:** 1 KiB, 4 KiB, 16 KiB, 64 KiB, 128 KiB, 256 KiB, 512 KiB,
+1 MiB, 2 MiB, 4 MiB, 16 MiB, 64 MiB, 256 MiB
+
+**Publish rates:** 1, 5, 10, 25, 50, 100, 200, 500, 1k, 5k, 10k, 20k, 50k Hz
+
+The budget clips larger payloads to lower rates. For example:
+
+| Payload | Max rate allowed |
+|---------|-----------------|
+| 1 KiB | 50k Hz (all rates) |
+| 16 KiB | 50k Hz (all rates) |
+| 64 KiB | 10k Hz |
+| 256 KiB | 1k Hz |
+| 1 MiB | 1k Hz |
+| 4 MiB | 200 Hz |
+| 64 MiB | 10 Hz |
+| 256 MiB | 1 Hz |
+
+The budget clips larger payloads to lower rates. For example:
+
+| Payload | Max rate allowed |
+|---------|-----------------|
+| 1 KiB | 50k Hz (all rates) |
+| 16 KiB | 50k Hz (all rates) |
+| 64 KiB | 10k Hz |
+| 256 KiB | 1k Hz |
+| 1 MiB | 1k Hz |
+| 4 MiB | 200 Hz |
+| 64 MiB | 10 Hz |
+| 256 MiB | 1 Hz |
+
+Single-scenario mode (`--rate-hz`, `--packet-size`, `--num-msgs`) bypasses the
+default grid and only enforces the hard limits and data-rate budget, allowing
+any valid combination to be tested explicitly.
+
+## CSV Output
+
+The consumer writes raw measurement data only. All derived metrics are computed
+at analysis time by `scripts/plot_throughput.py`.
+
+### `throughput_summary.csv`
+
+One row per scenario. Contains only raw counts, byte totals, and microsecond
+timestamps:
+
+| Column | Description |
+|--------|-------------|
+| `run_id` | Unique scenario identifier |
+| `scenario_name` | Human-readable scenario label |
+| `desired_rate_hz` | Requested publish rate |
+| `packet_size_bytes` | Fixed packet size for this scenario |
+| `messages_requested` | Number of messages the producer was told to send |
+| `messages_attempted` | Number of messages the producer tried to send |
+| `messages_enqueued` | Number of messages successfully enqueued |
+| `messages_enqueue_failed` | Number of enqueue failures |
+| `messages_received` | Unique messages received by consumer |
+| `messages_missed` | `messages_requested - messages_received` |
+| `duplicate_messages` | Number of duplicate frames received |
+| `attempted_bytes` | Total bytes the producer attempted to send |
+| `enqueued_bytes` | Total bytes successfully enqueued |
+| `received_bytes` | Total bytes received by consumer |
+| `first_send_time_us` | Timestamp of first send (microseconds since epoch) |
+| `last_send_time_us` | Timestamp of last send |
+| `first_arrival_time_us` | Timestamp of first arrival at consumer |
+| `last_arrival_time_us` | Timestamp of last arrival at consumer |
+
+### `throughput_messages.csv`
+
+One row per received frame. Raw observation data only:
+
+| Column | Description |
+|--------|-------------|
+| `run_id` | Scenario identifier |
+| `sequence` | Message sequence number |
+| `payload_bytes` | Actual payload size received |
+| `send_time_us` | Producer send timestamp (microseconds since epoch) |
+| `arrival_time_us` | Consumer arrival timestamp (microseconds since epoch) |
+| `is_duplicate` | 1 if this sequence was already seen, 0 otherwise |
+
+## Prerequisites
+
+- CMake 3.20+
+- C++17 compiler
+- The LiveKit C++ SDK, built and installed (see below)
+
+## Building
+
+All commands below assume you are in **this directory**
+(`data_track_throughput/`).
+
+### 1. Build and install the SDK
+
+From the SDK repository root:
+
+```bash
+./build.sh          # builds the SDK (debug by default)
+cmake --install build-debug --prefix local-install
+```
+
+### 2. Configure this experiment
+
+```bash
+cmake -S . -B build \
+  -DCMAKE_PREFIX_PATH="$(cd ../../local-install && pwd)"
+```
+
+> Adjust the `CMAKE_PREFIX_PATH` to wherever the SDK was installed. The path
+> above assumes this directory lives two levels below the repository root; it
+> works regardless of the parent directory's name.
+
+### 3. Build
+
+```bash
+cmake --build build
+```
+
+The executables and required shared libraries are placed in `build/`.
+
+## Build Targets
+
+- `DataTrackThroughputConsumer`
+- `DataTrackThroughputProducer`
+
+## Running
+
+## Generate Tokens
+
+```bash
+# producer
+lk token create \
+  --api-key devkey \
+  --api-secret secret \
+  -i producer \
+  --join \
+  --valid-for 99999h \
+  --room robo_room \
+  --grant '{"canPublish":true,"canSubscribe":true,"canPublishData":true}'
+
+# consumer
+lk token create \
+  --api-key devkey \
+  --api-secret secret \
+  -i consumer \
+  --join \
+  --valid-for 99999h \
+  --room robo_room \
+  --grant '{"canPublish":true,"canSubscribe":true,"canPublishData":true}'
+```
+
+Start the local server:
+```bash
+LIVEKIT_CONFIG="enable_data_tracks: true" livekit-server --dev
+```
+
+Start the consumer first:
+
+```bash
+./build/DataTrackThroughputConsumer <ws-url> <consumer-token>
+```
+
+Then start the producer:
+
+```bash
+./build/DataTrackThroughputProducer <ws-url> <producer-token> --consumer consumer
+```
+
+If you omit `--consumer`, the producer expects exactly one remote participant
+to already be in the room.
+
+## Single Scenario
+
+Instead of the full sweep, you can run one scenario:
+
+```bash
+./build/DataTrackThroughputProducer \
+  <ws-url> <producer-token> \
+  --consumer <consumer-identity> \
+  --rate-hz 50 \
+  --packet-size 1mb \
+  --num-msgs 25
+```
+
+## Plotting
+
+Generate plots from a benchmark output directory:
+
+```bash
+python3 scripts/plot_throughput.py data_track_throughput_results
+```
+
+By default the script writes PNGs into `data_track_throughput_results/plots/`.
+Pass `--output-dir <path>` to override the output location.
+
+All derived metrics (throughput, latency percentiles, delivery ratio, receive
+rate, interarrival times) are computed from the raw CSV timestamps and counts
+at plot time.
+
+### Generated Plots
+
+From `throughput_summary.csv` + `throughput_messages.csv`:
+
+| File | Description |
+|------|-------------|
+| `expected_vs_actual_throughput.png` | Scatter plot comparing expected vs actual receive throughput (Mbps). Points are colored by desired publish rate and sized by payload. An ideal y=x reference line is overlaid. |
+| `dropped_messages_vs_expected_throughput.png` | Scatter plot of missed/dropped message count vs expected throughput, colored by payload size (log scale). |
+| `actual_throughput_heatmap.png` | Heatmap of actual receive throughput (Mbps) with payload size on the y-axis and desired rate on the x-axis. |
+| `delivery_ratio_heatmap.png` | Heatmap of delivery ratio (received / requested) over the same payload-size x rate grid. |
+| `p50_latency_heatmap.png` | Heatmap of median (P50) send-to-receive latency (ms) over the same grid. |
+| `p95_latency_heatmap.png` | Heatmap of P95 send-to-receive latency (ms) over the same grid. |
+| `message_latency_histogram.png` | Histogram of per-message latency (ms) across all received frames. |
+| `message_interarrival_series.png` | Time-series line plot of inter-arrival gaps (ms) for every received message, ordered by run then arrival time. |