From b266506db3a6ea0e146fc3cfac298321d6387696 Mon Sep 17 00:00:00 2001
From: infinityabundance <255699974+infinityabundance@users.noreply.github.com>
Date: Fri, 6 Feb 2026 19:11:59 +0000
Subject: [PATCH] Add A/V timestamps and basic sync

---
 include/rootstream.h | 15 ++++++++++++++-
 src/network.c        | 35 +++++++++++++++++++++++++++++------
 src/network_stub.c   | 16 ++++++++++++++++
 src/nvenc_encoder.c  |  7 +++++--
 src/service.c        | 33 +++++++++++++++++++++++++--------
 5 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/include/rootstream.h b/include/rootstream.h
index 31162fe..f7e00e5 100644
--- a/include/rootstream.h
+++ b/include/rootstream.h
@@ -226,9 +226,19 @@ typedef PACKED_STRUCT {
     uint32_t offset;       /* Offset of this chunk */
     uint16_t chunk_size;   /* Size of this chunk */
     uint16_t flags;        /* Reserved for future use */
+    uint64_t timestamp_us; /* Capture timestamp */
 } video_chunk_header_t;
 PACKED_STRUCT_END
 
+/* Audio payload header (inside encrypted payload) */
+typedef PACKED_STRUCT {
+    uint64_t timestamp_us; /* Capture timestamp */
+    uint32_t sample_rate;  /* Samples per second */
+    uint16_t channels;     /* Channel count */
+    uint16_t samples;      /* Samples per channel */
+} audio_packet_header_t;
+PACKED_STRUCT_END
+
 /* Encrypted input event payload */
 typedef PACKED_STRUCT {
     uint8_t type;              /* EV_KEY, EV_REL, etc */
@@ -394,6 +404,8 @@ typedef struct {
     uint64_t bytes_received;
     latency_stats_t latency;   /* Latency instrumentation */
     bool is_host;              /* Host mode (streamer) */
+    uint64_t last_video_ts_us; /* Last received video timestamp */
+    uint64_t last_audio_ts_us; /* Last received audio timestamp */
 } rootstream_ctx_t;
 
 /* ============================================================================
@@ -493,7 +505,8 @@ int rootstream_net_init(rootstream_ctx_t *ctx, uint16_t port);
 int rootstream_net_send_encrypted(rootstream_ctx_t *ctx, peer_t *peer,
                                   uint8_t type, const void *data, size_t size);
 int rootstream_net_send_video(rootstream_ctx_t *ctx, peer_t *peer,
-                              const uint8_t *data, size_t size);
+                              const uint8_t *data, size_t size,
+                              uint64_t timestamp_us);
 int rootstream_net_recv(rootstream_ctx_t *ctx, int timeout_ms);
 int rootstream_net_handshake(rootstream_ctx_t *ctx, peer_t *peer);
 void rootstream_net_tick(rootstream_ctx_t *ctx);
diff --git a/src/network.c b/src/network.c
index 336b16d..1289173 100644
--- a/src/network.c
+++ b/src/network.c
@@ -104,7 +104,8 @@ static size_t max_plain_payload_size(void) {
 }
 
 int rootstream_net_send_video(rootstream_ctx_t *ctx, peer_t *peer,
-                              const uint8_t *data, size_t size) {
+                              const uint8_t *data, size_t size,
+                              uint64_t timestamp_us) {
     if (!ctx || !peer || !data || size == 0) {
         fprintf(stderr, "ERROR: Invalid arguments to send_video\n");
         return -1;
@@ -138,7 +139,8 @@ int rootstream_net_send_video(rootstream_ctx_t *ctx, peer_t *peer,
             .total_size = (uint32_t)size,
             .offset = (uint32_t)offset,
             .chunk_size = (uint16_t)chunk_size,
-            .flags = 0
+            .flags = 0,
+            .timestamp_us = timestamp_us
         };
 
         memcpy(payload, &header, sizeof(header));
@@ -558,19 +560,40 @@ int rootstream_net_recv(rootstream_ctx_t *ctx, int timeout_ms) {
                     ctx->current_frame.data = peer->video_rx_buffer;
                     ctx->current_frame.size = peer->video_rx_expected;
                     ctx->current_frame.capacity = peer->video_rx_capacity;
-                    ctx->current_frame.timestamp = get_timestamp_us();
+                    ctx->current_frame.timestamp = header.timestamp_us;
+                    ctx->last_video_ts_us = header.timestamp_us;
                     ctx->frames_received++;
                 }
             }
             else if (hdr->type == PKT_AUDIO) {
                 /* Decode Opus audio and play immediately */
+                if (decrypted_len < sizeof(audio_packet_header_t)) {
+                    fprintf(stderr, "WARNING: Audio packet too small: %zu bytes\n", decrypted_len);
+                    break;
+                }
+
+                audio_packet_header_t header;
+                memcpy(&header, decrypted, sizeof(header));
+
+                size_t opus_len = decrypted_len - sizeof(audio_packet_header_t);
+                const uint8_t *opus_data = decrypted + sizeof(audio_packet_header_t);
                 int16_t pcm_buffer[5760 * 2];  /* Max frame size * stereo */
                 size_t pcm_samples = 0;
 
-                if (rootstream_opus_decode(ctx, decrypted, decrypted_len,
+                if (rootstream_opus_decode(ctx, opus_data, opus_len,
                                pcm_buffer, &pcm_samples) == 0) {
-                    /* Play audio immediately (low latency, no buffering) */
-                    audio_playback_write(ctx, pcm_buffer, pcm_samples);
+                    bool drop_audio = false;
+                    if (ctx->last_video_ts_us > 0) {
+                        int64_t delta = (int64_t)header.timestamp_us - (int64_t)ctx->last_video_ts_us;
+                        if (delta > 80000 || delta < -200000) {
+                            drop_audio = true;
+                        }
+                    }
+
+                    if (!drop_audio) {
+                        audio_playback_write(ctx, pcm_buffer, pcm_samples);
+                        ctx->last_audio_ts_us = header.timestamp_us;
+                    }
                 } else {
                     #ifdef DEBUG
                     fprintf(stderr, "DEBUG: Audio decode failed\n");
diff --git a/src/network_stub.c b/src/network_stub.c
index dbc868b..8a492c4 100644
--- a/src/network_stub.c
+++ b/src/network_stub.c
@@ -28,6 +28,18 @@ int rootstream_net_send_encrypted(rootstream_ctx_t *ctx, peer_t *peer,
     return -1;
 }
 
+int rootstream_net_send_video(rootstream_ctx_t *ctx, peer_t *peer,
+                              const uint8_t *data, size_t size,
+                              uint64_t timestamp_us) {
+    (void)ctx;
+    (void)peer;
+    (void)data;
+    (void)size;
+    (void)timestamp_us;
+    fprintf(stderr, "ERROR: Cannot send video (NO_CRYPTO build)\n");
+    return -1;
+}
+
 int rootstream_net_recv(rootstream_ctx_t *ctx, int timeout_ms) {
     (void)ctx;
     (void)timeout_ms;
@@ -42,6 +54,10 @@ int rootstream_net_handshake(rootstream_ctx_t *ctx, peer_t *peer) {
     return -1;
 }
 
+void rootstream_net_tick(rootstream_ctx_t *ctx) {
+    (void)ctx;
+}
+
 peer_t* rootstream_add_peer(rootstream_ctx_t *ctx, const char *rootstream_code) {
     (void)ctx;
     (void)rootstream_code;
diff --git a/src/nvenc_encoder.c b/src/nvenc_encoder.c
index 94bf84f..f3d6266 100644
--- a/src/nvenc_encoder.c
+++ b/src/nvenc_encoder.c
@@ -293,8 +293,11 @@ int rootstream_encoder_init_nvenc(rootstream_ctx_t *ctx, codec_type_t codec) {
     /* Set encoding parameters */
     nv->width = ctx->display.width;
     nv->height = ctx->display.height;
-    nv->fps = ctx->display.refresh_rate;
-    nv->bitrate = ctx->settings.video_bitrate;
+    nv->fps = ctx->display.refresh_rate ? ctx->display.refresh_rate : 60;
+    nv->bitrate = ctx->encoder.bitrate;
+    if (nv->bitrate == 0) {
+        nv->bitrate = ctx->settings.video_bitrate;
+    }
     if (nv->bitrate == 0) {
         nv->bitrate = 10000000;  /* 10 Mbps default */
     }
diff --git a/src/service.c b/src/service.c
index 0ea9ff8..ba09a8c 100644
--- a/src/service.c
+++ b/src/service.c
@@ -189,11 +189,15 @@ int service_run_host(rootstream_ctx_t *ctx) {
     }
 
     /* Initialize audio capture and Opus encoder */
-    if (audio_capture_init(ctx) < 0) {
-        fprintf(stderr, "WARNING: Audio capture init failed (continuing without audio)\n");
-    } else if (rootstream_opus_encoder_init(ctx) < 0) {
-        fprintf(stderr, "WARNING: Opus encoder init failed (continuing without audio)\n");
-        audio_capture_cleanup(ctx);
+    if (ctx->settings.audio_enabled) {
+        if (audio_capture_init(ctx) < 0) {
+            fprintf(stderr, "WARNING: Audio capture init failed (continuing without audio)\n");
+        } else if (rootstream_opus_encoder_init(ctx) < 0) {
+            fprintf(stderr, "WARNING: Opus encoder init failed (continuing without audio)\n");
+            audio_capture_cleanup(ctx);
+        }
+    } else {
+        printf("INFO: Audio disabled in settings\n");
     }
 
     /* Announce service */
@@ -256,7 +260,8 @@ int service_run_host(rootstream_ctx_t *ctx) {
         size_t audio_size = 0;
         size_t num_samples = 0;
 
-        if (audio_capture_frame(ctx, audio_samples, &num_samples) == 0) {
+        if (ctx->settings.audio_enabled &&
+            audio_capture_frame(ctx, audio_samples, &num_samples) == 0) {
             if (rootstream_opus_encode(ctx, audio_samples, audio_buf, &audio_size) < 0) {
                 /* Audio encode failed, continue with video only */
                 audio_size = 0;
@@ -270,14 +275,26 @@ int service_run_host(rootstream_ctx_t *ctx) {
             if (peer->state == PEER_CONNECTED && peer->is_streaming) {
                 /* Send video */
                 if (enc_size > 0 &&
-                    rootstream_net_send_video(ctx, peer, enc_buf, enc_size) < 0) {
+                    rootstream_net_send_video(ctx, peer, enc_buf, enc_size,
+                                              ctx->current_frame.timestamp) < 0) {
                     fprintf(stderr, "ERROR: Video send failed (peer=%s)\n", peer->hostname);
                 }
 
                 /* Send audio if available */
                 if (audio_size > 0) {
+                    audio_packet_header_t header = {
+                        .timestamp_us = get_timestamp_us(),
+                        .sample_rate = 48000,
+                        .channels = 2,
+                        .samples = (uint16_t)num_samples
+                    };
+
+                    uint8_t payload[sizeof(audio_packet_header_t) + 4000];
+                    memcpy(payload, &header, sizeof(header));
+                    memcpy(payload + sizeof(header), audio_buf, audio_size);
+
                     if (rootstream_net_send_encrypted(ctx, peer, PKT_AUDIO,
-                                                      audio_buf, audio_size) < 0) {
+                                                      payload, sizeof(header) + audio_size) < 0) {
                         fprintf(stderr, "ERROR: Audio send failed (peer=%s)\n", peer->hostname);
                     }
                 }