diff --git a/.gitignore b/.gitignore
index 84ad134..7b1ab87 100644
--- a/.gitignore
+++ b/.gitignore
@@ -208,4 +208,5 @@ __marimo__/
 transcript.txt
 /transcripts
 polyglot.lock
+viewer_password.txt
 .claude/settings.local.json
diff --git a/app.py b/app.py
index 6a98f48..b314ca2 100644
--- a/app.py
+++ b/app.py
@@ -4,6 +4,7 @@
 """
 
 import argparse
+import collections
 import os
 import queue
 import random
@@ -11,6 +12,7 @@
 import threading
 import time
 import warnings
+import wave
 import webbrowser
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
@@ -21,9 +23,9 @@ def log(message, tag=None):
     """Print a log message with timestamp. Tag is optional prefix like [SUMMARY]"""
     ts = datetime.now().strftime('%H:%M:%S')
     if tag:
-        print(f"[{ts}] [{tag}] {message}")
+        print(f"[{ts}] [{tag}] {message}", flush=True)
     else:
-        print(f"[{ts}] {message}")
+        print(f"[{ts}] {message}", flush=True)
 
 
 # Word list for generating memorable passphrases
@@ -319,6 +321,30 @@ def cleanup_lock_file():
 meeting_start_time = None  # Track when meeting started (first transcription)
 summary_pending = False  # Track if a summary generation is waiting
 
+# ── Meet bot state (Phase 4) ──────────────────────────────────────────────────
+# Deque of closed speaker segments: (start_ms, end_ms, display_name).
+# Populated by the /meet_bot SocketIO namespace; consumed by resolve_speaker_identity().
+speaker_timeline = collections.deque(maxlen=500)
+# Known participants: display_name -> first_seen_ms
+meet_participants = {}
+# Open (unended) speaker intervals: display_name -> start_ms
+_active_speaker_starts = {}
+# Whether the Playwright bot is currently connected
+bot_connected = False
+# Accumulation buffer for rechunking 320-sample bot frames → CHUNK_SIZE frames
+_bot_pcm_buffer = np.array([], dtype=np.float32)
+# Wall-clock ms of the most recently received bot audio frame (for Phase 5 time-alignment)
+_last_capture_ts_ms = None
+# WAV file writer — captures the full raw 16 kHz mono meeting audio for retranscription
+_bot_wav_writer = None
+_bot_wav_lock = threading.Lock()
+# Speaker-switch batching: who the bot says is currently speaking, and whether the most recent
+# speaker_start differs from the previous one (triggers process_audio to flush the current batch).
+_current_bot_speaker = None
+_pending_speaker_switch = False
+# Maximum batch length when bot is connected (gives Whisper more context per speaker turn).
+BOT_MAX_BATCH_SEC = 60
+
 
 def load_transcript_segments(transcript_path):
     """Load existing transcript file into all_meeting_segments for summarization.
@@ -1236,8 +1262,57 @@ def perform_speaker_diarization(audio_data, sample_rate):
         return None
 
 
+def resolve_speaker_identity(speaker_segments, batch_end_ts_ms, audio_duration_secs):
+    """Match pyannote speaker IDs to real names via speaker_timeline overlap.
+
+    Returns {original_pyannote_id: real_name} for speakers with >= 30% time overlap.
+    Considers both closed (speaker_timeline) and still-open (_active_speaker_starts)
+    intervals, since a speaker who started talking during this batch may not have
+    emitted speaker_end yet when the transcription thread kicks off.
+    """
+    if batch_end_ts_ms is None or not speaker_segments:
+        return {}
+
+    # Build the full set of speaker intervals to consider.
+    intervals = list(speaker_timeline)  # closed: (start_ms, end_ms, name)
+    now_ms = int(time.time() * 1000)
+    for name, start_ms in _active_speaker_starts.items():
+        intervals.append((start_ms, now_ms, name))
+
+    if not intervals:
+        return {}
+
+    from collections import defaultdict
+    batch_start_ms = batch_end_ts_ms - audio_duration_secs * 1000
+
+    speaker_times = defaultdict(list)
+    for seg in speaker_segments:
+        seg_start_ms = batch_start_ms + seg["start"] * 1000
+        seg_end_ms = batch_start_ms + seg["end"] * 1000
+        speaker_times[seg["speaker"]].append((seg_start_ms, seg_end_ms))
+
+    resolved = {}
+    for original_id, time_ranges in speaker_times.items():
+        name_overlap = defaultdict(float)
+        for seg_start_ms, seg_end_ms in time_ranges:
+            for (tl_start, tl_end, name) in intervals:
+                overlap = max(0.0, min(seg_end_ms, tl_end) - max(seg_start_ms, tl_start))
+                if overlap > 0:
+                    name_overlap[name] += overlap
+
+        if not name_overlap:
+            continue
+
+        best_name = max(name_overlap, key=name_overlap.get)
+        total_ms = sum(end - start for start, end in time_ranges)
+        if total_ms > 0 and name_overlap[best_name] / total_ms >= 0.30:
+            resolved[original_id] = best_name
+
+    return resolved
+
+
 @torch.inference_mode()
-def transcribe_and_translate(audio_data, audio_duration):
+def transcribe_and_translate(audio_data, audio_duration, batch_end_ts_ms=None):
     """Background thread for transcription and translation with speaker diarization"""
     global is_processing, all_meeting_segments
 
@@ -1328,6 +1403,13 @@ def normalize_caps(text):
                     speaker_mapping[original_id] = f"SPEAKER_{speaker_counter:02d}"
                     speaker_counter += 1
 
+            # Phase 5: resolve pyannote IDs → real names from speaker_timeline.
+            resolved_names = resolve_speaker_identity(speaker_segments, batch_end_ts_ms, audio_duration)
+            if resolved_names:
+                for orig_id, real_name in resolved_names.items():
+                    speaker_xx = speaker_mapping.get(orig_id, orig_id)
+                    log(f"Resolved {speaker_xx} → {real_name}", "BOT")
+                    socketio.emit("rename_speaker", {"speaker_id": speaker_xx, "name": real_name})
 
             # Extract all words with timestamps from chunks
             all_words = []
@@ -1361,7 +1443,9 @@ def normalize_caps(text):
 
                     if overlap > max_overlap:
                         max_overlap = overlap
-                        best_speaker = speaker_mapping.get(seg["speaker"], seg["speaker"])
+                        orig = seg["speaker"]
+                        # Prefer resolved real name; fall back to renumbered SPEAKER_XX.
+                        best_speaker = resolved_names.get(orig, speaker_mapping.get(orig, orig))
                         best_segment_idx = idx
 
                 words_with_speakers.append({
@@ -1705,10 +1789,27 @@ def process_audio():
 
             buffer.append(chunk)
 
-            # Process when we detect end of sentence (silence after minimum audio) OR max buffer reached
-            silence_detected = len(buffer) >= min_chunks and silence_counter >= audio_thresholds["silence_chunks"]
+            # Bot mode: only flush on speaker switch or 60 s cap. No silence
+            # detection — a speaker's natural pauses emit speaker_end/start
+            # toggles and we explicitly do NOT flush on those. Mic-only mode
+            # falls back to the original level-based silence heuristic.
+            global _pending_speaker_switch
+            bot_mode = bot_connected
+            if bot_mode:
+                max_chunks = int(actual_sample_rate * BOT_MAX_BATCH_SEC / CHUNK_SIZE)
+
+            speaker_switched = bot_mode and _pending_speaker_switch and len(buffer) >= min_chunks
+            if speaker_switched:
+                _pending_speaker_switch = False
+
+            # Level-based silence detection — only used when bot isn't driving batching.
+            silence_detected = (
+                not bot_mode
+                and len(buffer) >= min_chunks
+                and silence_counter >= audio_thresholds["silence_chunks"]
+            )
             max_length_reached = len(buffer) >= max_chunks
-            should_process = silence_detected or max_length_reached
+            should_process = silence_detected or max_length_reached or speaker_switched
 
             if should_process:
                 is_processing = True  # Set lock
@@ -1753,10 +1854,13 @@ def process_audio():
                 # Calculate audio duration
                 audio_duration = len(audio_resampled) / SAMPLE_RATE
 
+                # Snapshot the wall-clock anchor for Phase 5 speaker resolution.
+                batch_end_ts = _last_capture_ts_ms
+
                 # Launch background thread for transcription and translation
                 # This keeps the main loop responsive for WebSocket updates
                 processing_thread = threading.Thread(
-                    target=transcribe_and_translate, args=(audio_resampled, audio_duration), daemon=True
+                    target=transcribe_and_translate, args=(audio_resampled, audio_duration, batch_end_ts), daemon=True
                 )
                 processing_thread.start()
 
@@ -2201,10 +2305,18 @@ def start_listening_internal():
     if not is_listening:
         is_listening = True
 
-        # Start audio stream
+        # Start the processing thread first (same for both audio sources).
         audio_thread = threading.Thread(target=process_audio, daemon=True)
         audio_thread.start()
 
+        # Meet-bot path: audio arrives over SocketIO at 16 kHz mono; skip PyAudio entirely.
+        if Config.AUDIO_SOURCE == "meet_bot":
+            actual_sample_rate = Config.SAMPLE_RATE  # 16000
+            num_channels = 1
+            print("[AUDIO] Source: Meet bot (waiting for bot to connect and stream audio)")
+            socketio.emit("status", {"listening": True})
+            return
+
         # Initialize PyAudio
         p_audio = pyaudio.PyAudio()
 
@@ -2491,6 +2603,217 @@ def handle_broadcast_manual_summary(data):
     emit('manual_summary_broadcast', {'success': True, 'languages_sent': list(translations_cache.keys())})
 
 
+# ── Meet bot SocketIO namespace (Phase 4) ────────────────────────────────────
+#
+# The Playwright bot connects here as a socket.io-client at /meet_bot.
+# It streams two event types:
+#   audio_frame  — binary PCM16 payload + JSON meta {capture_ts_ms, sample_rate, channels}
+#   speaker_event — JSON {type, name, wall_clock_ms} for speaker_start/end/roster_update
+
+def _open_bot_wav():
+    """Open a WAV file to record the full raw meeting audio from the bot."""
+    global _bot_wav_writer
+    with _bot_wav_lock:
+        if _bot_wav_writer is not None or not TRANSCRIPT_FILE:
+            return
+        wav_path = TRANSCRIPT_FILE.with_suffix(".wav")
+        try:
+            _bot_wav_writer = wave.open(str(wav_path), "wb")
+            _bot_wav_writer.setnchannels(1)
+            _bot_wav_writer.setsampwidth(2)  # int16
+            _bot_wav_writer.setframerate(Config.SAMPLE_RATE)
+            log(f"Recording meeting audio → {wav_path.name}", "BOT")
+        except Exception as e:
+            log(f"Failed to open WAV: {e}", "BOT")
+            _bot_wav_writer = None
+
+
+def _close_bot_wav():
+    global _bot_wav_writer
+    with _bot_wav_lock:
+        if _bot_wav_writer is None:
+            return
+        try:
+            _bot_wav_writer.close()
+            log("Meeting audio file closed", "BOT")
+        except Exception as e:
+            log(f"Error closing WAV: {e}", "BOT")
+        _bot_wav_writer = None
+
+
+if Config.MEET_BOT_ENABLED:
+
+    @socketio.on("connect", namespace="/meet_bot")
+    def bot_connect():
+        global bot_connected
+        bot_connected = True
+        log("Meet bot connected", "BOT")
+        _open_bot_wav()
+        socketio.emit("bot_status", {"connected": True}, room="admin")
+
+    @socketio.on("disconnect", namespace="/meet_bot")
+    def bot_disconnect():
+        global bot_connected, _active_speaker_starts
+        bot_connected = False
+        # Close any open speaker intervals so timeline stays consistent.
+        now_ms = int(time.time() * 1000)
+        for name, start in list(_active_speaker_starts.items()):
+            speaker_timeline.append((start, now_ms, name))
+        _active_speaker_starts.clear()
+        _close_bot_wav()
+        log("Meet bot disconnected", "BOT")
+        socketio.emit("bot_status", {"connected": False}, room="admin")
+
+    @socketio.on("audio_frame", namespace="/meet_bot")
+    def bot_audio_frame(meta, data):
+        global _bot_pcm_buffer, _last_capture_ts_ms
+        if not is_listening:
+            return
+        # Persist raw int16 PCM for offline retranscription.
+        # writeframes (not writeframesraw) patches the header on every write so
+        # the file is valid even if the server is killed without clean shutdown.
+        if _bot_wav_writer is not None:
+            try:
+                with _bot_wav_lock:
+                    if _bot_wav_writer is not None:
+                        _bot_wav_writer.writeframes(data)
+            except Exception:
+                pass
+        # data arrives as bytes (Int16 PCM, 16 kHz mono, 320 samples = 20 ms).
+        frame = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
+        _bot_pcm_buffer = np.concatenate([_bot_pcm_buffer, frame])
+        # Rechunk to CHUNK_SIZE (1024) so process_audio's duration math is correct.
+        while len(_bot_pcm_buffer) >= CHUNK_SIZE:
+            audio_queue.put(_bot_pcm_buffer[:CHUNK_SIZE].copy())
+            _bot_pcm_buffer = _bot_pcm_buffer[CHUNK_SIZE:]
+        _last_capture_ts_ms = meta.get("capture_ts_ms")
+
+    @socketio.on("speaker_event", namespace="/meet_bot")
+    def bot_speaker_event(ev):
+        global meet_participants, _active_speaker_starts, _current_bot_speaker, _pending_speaker_switch
+        ev_type = ev.get("type")
+        name = ev.get("name")
+        ts_ms = ev.get("wall_clock_ms", int(time.time() * 1000))
+
+        if ev_type == "roster_update":
+            for p in ev.get("participants", []):
+                if p and p not in meet_participants:
+                    meet_participants[p] = ts_ms
+            socketio.emit("meet_roster", {
+                "participants": list(meet_participants.keys()),
+                "bot_connected": True,
+            }, room="admin")
+            return
+
+        if not name:
+            return
+
+        if ev_type == "speaker_start":
+            _active_speaker_starts[name] = ts_ms
+            # Flag a pending switch if the speaker changed — process_audio uses this to flush.
+            if _current_bot_speaker is not None and _current_bot_speaker != name:
+                _pending_speaker_switch = True
+            _current_bot_speaker = name
+            log(f"Speaking: {name}", "BOT")
+            _broadcast_active_speakers()
+
+        elif ev_type == "speaker_end":
+            start = _active_speaker_starts.pop(name, ts_ms - 1000)
+            speaker_timeline.append((start, ts_ms, name))
+            log(f"Segment: {name} {ts_ms - start} ms", "BOT")
+            # Don't flush here — a single speaker's short pauses emit
+            # speaker_end/speaker_start toggles, so flushing on end would
+            # chop their turn into tiny fragments. We only flush when a
+            # DIFFERENT speaker starts (speaker_switched) or the 60 s cap
+            # is reached. _current_bot_speaker stays as the last name so
+            # the same speaker resuming does not trigger a switch.
+            _broadcast_active_speakers()
+
+
+def _broadcast_active_speakers():
+    """Push the current set of speaking participants to admin + all viewers."""
+    names = list(_active_speaker_starts.keys())
+    payload = {"speakers": names, "wall_clock_ms": int(time.time() * 1000)}
+    socketio.emit("active_speakers", payload, room="admin")
+    for lang_code, count in active_language_viewers.items():
+        if count > 0:
+            socketio.emit("active_speakers", payload, room=f"lang_{lang_code}")
+
+
+# ── Admin-triggered bot spawning ─────────────────────────────────────────────
+# Tracks the currently running Meet bot subprocess so we can start/stop it
+# from the admin panel. Only one bot instance is supported at a time.
+_meet_bot_process = None
+_meet_bot_lock = threading.Lock()
+
+
+def _bot_script_path():
+    return Path(__file__).parent / "meet-bot" / "index.js"
+
+
+@socketio.on("start_meet_bot")
+def handle_start_meet_bot(data):
+    """Spawn the Meet bot pointing at the given URL. Admin-only."""
+    global _meet_bot_process
+
+    url = (data or {}).get("url", "").strip()
+    if not url.startswith("http"):
+        emit("meet_bot_control_result", {"ok": False, "error": "URL must start with http(s)://"})
+        return
+
+    with _meet_bot_lock:
+        # If a bot is already running, refuse to start another.
+        if _meet_bot_process is not None and _meet_bot_process.poll() is None:
+            emit("meet_bot_control_result", {"ok": False, "error": "Bot already running — stop it first"})
+            return
+
+        script = _bot_script_path()
+        if not script.exists():
+            emit("meet_bot_control_result", {"ok": False, "error": f"Bot script not found at {script}"})
+            return
+
+        import subprocess
+        try:
+            _meet_bot_process = subprocess.Popen(
+                ["node", str(script),
+                 "--url", url,
+                 "--polyglot-url", "http://localhost:5000",
+                 "--headful"],
+                cwd=str(script.parent),
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                stdin=subprocess.DEVNULL,
+                creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == "nt" else 0,
+            )
+            log(f"Spawned Meet bot (pid={_meet_bot_process.pid}) → {url}", "BOT")
+            emit("meet_bot_control_result", {"ok": True, "pid": _meet_bot_process.pid})
+        except Exception as e:
+            log(f"Failed to spawn bot: {e}", "BOT")
+            emit("meet_bot_control_result", {"ok": False, "error": str(e)})
+
+
+@socketio.on("stop_meet_bot")
+def handle_stop_meet_bot():
+    """Terminate the running Meet bot subprocess."""
+    global _meet_bot_process
+    with _meet_bot_lock:
+        if _meet_bot_process is None or _meet_bot_process.poll() is not None:
+            emit("meet_bot_control_result", {"ok": False, "error": "No bot running"})
+            _meet_bot_process = None
+            return
+        try:
+            _meet_bot_process.terminate()
+            try:
+                _meet_bot_process.wait(timeout=5)
+            except Exception:
+                _meet_bot_process.kill()
+            log(f"Stopped Meet bot subprocess", "BOT")
+            _meet_bot_process = None
+            emit("meet_bot_control_result", {"ok": True})
+        except Exception as e:
+            emit("meet_bot_control_result", {"ok": False, "error": str(e)})
+
+
 if __name__ == "__main__":
     # Check for single instance before doing anything else
     check_single_instance()
diff --git a/config.py b/config.py
index 082569f..8c9485e 100644
--- a/config.py
+++ b/config.py
@@ -211,6 +211,14 @@ def get_translation_lang_code(cls, iso_code):
     SAMPLE_RATE = 16000  # Whisper expects 16kHz audio
     CHUNK_SIZE = 1024  # Audio buffer chunk size
 
+    # Audio source — "wasapi" uses the WASAPI loopback device (original path);
+    # "meet_bot" receives 16 kHz PCM16 from the Playwright bot over SocketIO.
+    AUDIO_SOURCE = os.getenv("AUDIO_SOURCE", "meet_bot")
+
+    # Meet bot SocketIO receiver.  The bot connects to /meet_bot on whatever
+    # port Polyglot is already running on — no separate port needed.
+    MEET_BOT_ENABLED = os.getenv("MEET_BOT_ENABLED", "True").lower() in ("true", "1", "yes")
+
     # Minimum audio level to process (prevents hallucinations during silence)
     # If average audio level is below this, skip transcription
     MIN_AUDIO_LEVEL = 0.01
diff --git a/meet-bot/.gitignore b/meet-bot/.gitignore
new file mode 100644
index 0000000..793052f
--- /dev/null
+++ b/meet-bot/.gitignore
@@ -0,0 +1,4 @@
+node_modules/
+package-lock.json
+*.log
+chrome-profile/
diff --git a/meet-bot/README.md b/meet-bot/README.md
new file mode 100644
index 0000000..12d6416
--- /dev/null
+++ b/meet-bot/README.md
@@ -0,0 +1,61 @@
+# Polyglot Meet Bot
+
+Headless Chromium bot that joins a Google Meet as an anonymous guest. Future phases will stream meeting audio and active-speaker names back to the Polyglot server; this initial phase validates only the join-and-get-admitted flow.
+
+## Setup
+
+```bash
+cd meet-bot
+npm install
+npx playwright install chromium
+```
+
+Node 20+ required.
+
+## Run
+
+```bash
+# Typical use — fully headless:
+node index.js --url "https://meet.google.com/xxx-yyyy-zzz"
+
+# Watch what the bot sees (debug Meet UI issues):
+node index.js --url "https://meet.google.com/xxx-yyyy-zzz" --headful
+
+# Override the displayed name (default "Polyglot Bot"):
+node index.js --url "..." --name "Transcription Bot"
+```
+
+## What it does (phase 1)
+
+1. Launches a fresh, cookieless Chromium — no Google sign-in.
+2. Opens the Meet URL, waits for the pre-join screen.
+3. Fills the "Your name" field, mutes mic + camera, clicks **Ask to join**.
+4. Waits up to 2 minutes for the host to admit it.
+5. Once admitted, stays connected until the meeting ends or it's removed.
+
+Exit codes:
+
+| Code | Meaning |
+|------|---------|
+| 0    | Joined successfully, then meeting ended / bot removed cleanly |
+| 1    | Crash / unexpected error (see stderr) |
+| 2    | Bad CLI arguments |
+| 3    | Host explicitly denied the join request |
+| 4    | Timed out in the lobby (host never admitted) |
+
+## Testing
+
+The easy test: open Meet in a normal browser tab, start a meeting as host, run the bot with `--headful --url <link>`, and admit it from the participants panel when it shows up as "Polyglot Bot". You should see the bot's Chromium window join the call.
+
+## What's NOT here yet
+
+- Audio capture (tab audio → 16 kHz PCM16 → Polyglot WebSocket)
+- DOM scraping of active-speaker name and participant roster
+- WebSocket connection to the Polyglot backend
+- Control channel (join/leave commands from Polyglot's admin UI)
+
+Those land in subsequent phases once we've validated the bot can reliably get into meetings.
+
+## Selectors
+
+All Meet DOM selectors live in `selectors.js`. When Meet ships a UI change and the bot breaks, that's the file to update — nothing else should need touching.
diff --git a/meet-bot/audio.js b/meet-bot/audio.js
new file mode 100644
index 0000000..508caed
--- /dev/null
+++ b/meet-bot/audio.js
@@ -0,0 +1,127 @@
+// Phase 2: in-browser audio capture for the Meet bot.
+//
+// Two-part design:
+//   1. RTC_INIT_SCRIPT — must be registered via context.addInitScript() BEFORE
+//      page.goto() so it runs before Meet initialises its RTCPeerConnections.
+//      It patches RTCPeerConnection to funnel every remote audio track into a
+//      single shared MediaStream (window.__pgStream).
+//
+//   2. setupAudioCapture(page, onChunk) — called after the bot has joined.
+//      Injects an AudioWorklet that downsamples all audio in __pgStream to
+//      16 kHz mono PCM16, buffers into 20 ms frames, and sends each frame
+//      back to Node via an exposed function.
+
+// ── 1. RTC patch (init script) ───────────────────────────────────────────────
+
+export const RTC_INIT_SCRIPT = `(function () {
+  window.__pgStream = new MediaStream();
+  const _Orig = window.RTCPeerConnection;
+  class _Patched extends _Orig {
+    constructor(...a) {
+      super(...a);
+      this.addEventListener('track', (ev) => {
+        if (ev.track.kind !== 'audio') return;
+        if (!window.__pgStream.getTrackById(ev.track.id))
+          window.__pgStream.addTrack(ev.track);
+      });
+    }
+  }
+  window.RTCPeerConnection = _Patched;
+})();`;
+
+// ── 2. AudioWorklet processor source ─────────────────────────────────────────
+//
+// Nearest-neighbour resampler: maintains a fractional index across process()
+// calls so downsampling is consistent across block boundaries.
+// Buffers output until 320 samples (20 ms @ 16 kHz) are ready, then posts
+// { pcm: ArrayBuffer, ts: number } to the main thread.
+
+const WORKLET_SRC = `
+class PgResampler extends AudioWorkletProcessor {
+  constructor() { super(); this._idx = 0; this._buf = []; }
+
+  process(inputs) {
+    const ch = inputs[0]?.[0];
+    if (!ch) return true;
+
+    const ratio = sampleRate / 16000; // e.g. 3.0 for 48 kHz input
+    while (this._idx < ch.length) {
+      const s = ch[Math.floor(this._idx)];
+      this._buf.push(Math.round(Math.max(-1, Math.min(1, s)) * 32767));
+      this._idx += ratio;
+    }
+    this._idx -= ch.length; // carry fractional offset to next block
+
+    while (this._buf.length >= 320) {
+      const arr = new Int16Array(this._buf.splice(0, 320));
+      this.port.postMessage({ pcm: arr.buffer, ts: Date.now() }, [arr.buffer]);
+    }
+    return true;
+  }
+}
+registerProcessor('pg-resampler', PgResampler);
+`;
+
+// ── 3. setupAudioCapture ──────────────────────────────────────────────────────
+//
+// onChunk(pcm: Buffer, captureTs: number) is called for each 20 ms PCM16 frame.
+// captureTs is wall-clock ms at the moment the worklet produced the frame —
+// used later by resolve_speaker_identity() for time-alignment.
+
+export async function setupAudioCapture(page, onChunk) {
+  // Bridge from browser → Node. exposeFunction is safe to call post-navigate.
+  await page.exposeFunction('__pgChunk', (b64, ts) => {
+    onChunk(Buffer.from(b64, 'base64'), ts);
+  });
+
+  await page.evaluate(async (src) => {
+    // Inject worklet via blob URL (no local server needed).
+    const url = URL.createObjectURL(new Blob([src], { type: 'application/javascript' }));
+    const ctx = new AudioContext();
+    await ctx.resume(); // bypass autoplay suspension — bot has no user gesture
+    await ctx.audioWorklet.addModule(url);
+    URL.revokeObjectURL(url);
+
+    const node = new AudioWorkletNode(ctx, 'pg-resampler');
+
+    // Worklet → Node bridge: encode PCM16 ArrayBuffer as base64 string so it
+    // can cross the Playwright IPC boundary (exposeFunction only handles JSON).
+    node.port.onmessage = ({ data: { pcm, ts } }) => {
+      const u8 = new Uint8Array(pcm);
+      let s = '';
+      for (let i = 0; i < u8.length; i++) s += String.fromCharCode(u8[i]);
+      window.__pgChunk(btoa(s), ts);
+    };
+
+    function connectTrack(track) {
+      // Each track gets its own MediaStreamSource; sharing a single source
+      // across tracks doesn't work — each source reads one stream.
+      ctx.createMediaStreamSource(new MediaStream([track])).connect(node);
+    }
+
+    // Connect tracks already in the shared stream (joined mid-call or after
+    // participants were already speaking).
+    window.__pgStream.getAudioTracks().forEach(connectTrack);
+
+    // Connect tracks added after this point (people join late, etc.).
+    window.__pgStream.addEventListener('addtrack', (e) => {
+      if (e.track.kind === 'audio') connectTrack(e.track);
+    });
+
+    // Fallback: some Meet versions route audio through <audio> elements instead
+    // of exposing it via RTCPeerConnection track events. Tap those too.
+    function connectEl(el) {
+      if (el._pg) return;
+      el._pg = true;
+      try {
+        const src = ctx.createMediaElementSource(el);
+        src.connect(node);
+        src.connect(ctx.destination); // keep the original playback alive
+      } catch (_) { /* element may already be claimed */ }
+    }
+    document.querySelectorAll('audio').forEach(connectEl);
+    new MutationObserver(() => document.querySelectorAll('audio').forEach(connectEl))
+      .observe(document.documentElement, { childList: true, subtree: true });
+
+  }, WORKLET_SRC);
+}
diff --git a/meet-bot/index.js b/meet-bot/index.js
new file mode 100644
index 0000000..7a02f63
--- /dev/null
+++ b/meet-bot/index.js
@@ -0,0 +1,310 @@
+// Polyglot Meet Bot — phases 1–3: join + audio capture + speaker detection.
+//
+// Usage:
+//   node index.js --url https://meet.google.com/xxx-yyyy-zzz \
+//                 [--name "Polyglot Bot"] \
+//                 [--headful] \
+//                 [--polyglot-url http://localhost:5001] \
+//                 [--profile-dir <path>]   (default: ~/.polyglot-bot-profile)
+//
+// Exit codes: 0 clean leave, 1 crash, 2 bad args, 3 denied by host, 4 lobby timeout,
+//             6 blocked (bot detected / meeting locked / link invalid).
+//
+// Persistent profile: the bot reuses a Chrome profile across runs so Google
+// sign-in cookies survive. On first run the user logs in manually in the
+// headful window; subsequent runs are already authenticated.
+
+import os from "os";
+import path from "path";
+import { fileURLToPath } from "url";
+import { chromium } from "playwright";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+import { SELECTORS, firstMatching } from "./selectors.js";
+import { RTC_INIT_SCRIPT, setupAudioCapture } from "./audio.js";
+import { setupSpeakerDetection } from "./speaker.js";
+
+// Dedicated bot profile — separate from the user's real Chrome so there's no
+// instance conflict. Stored inside meet-bot/ so it's self-contained.
+const DEFAULT_PROFILE = path.join(__dirname, "chrome-profile");
+
+function parseArgs(argv) {
+  const args = {
+    url: null,
+    name: "Polyglot Bot",
+    headful: false,
+    polyglotUrl: null,
+    profileDir: DEFAULT_PROFILE,
+  };
+  for (let i = 2; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === "--url") args.url = argv[++i];
+    else if (a === "--name") args.name = argv[++i];
+    else if (a === "--headful") args.headful = true;
+    else if (a === "--polyglot-url") args.polyglotUrl = argv[++i];
+    else if (a === "--profile-dir") args.profileDir = argv[++i];
+  }
+  if (!args.url) {
+    console.error(
+      "Usage: node index.js --url <meet-link> [--name <display>] [--headful] [--polyglot-url <url>] [--profile-dir <path>]"
+    );
+    process.exit(2);
+  }
+  return args;
+}
+
+function log(msg, ...rest) {
+  const ts = new Date().toISOString();
+  console.log(`[${ts}] ${msg}`, ...rest);
+}
+
+// Connect to Polyglot's /meet_bot SocketIO namespace.
+// Returns { sendAudio, sendEvent } or null if not configured / unavailable.
+async function connectPolyglot(polyglotUrl) {
+  if (!polyglotUrl) return null;
+  try {
+    const { io } = await import("socket.io-client");
+    // Socket.IO reconnects automatically; give the initial handshake 15 s so
+    // Polyglot has time to finish any lazy namespace registration.
+    const socket = io(`${polyglotUrl}/meet_bot`, {
+      transports: ["websocket"],
+      reconnection: true,
+      reconnectionDelay: 500,
+      reconnectionDelayMax: 2000,
+    });
+    await new Promise((resolve, reject) => {
+      socket.once("connect", resolve);
+      setTimeout(() => reject(new Error("connect timeout")), 15000);
+    });
+    log(`Connected to Polyglot at ${polyglotUrl}/meet_bot`);
+    return {
+      sendAudio: (pcm, captureTs) =>
+        socket.emit("audio_frame", { capture_ts_ms: captureTs, sample_rate: 16000, channels: 1 }, pcm),
+      sendEvent: (ev) =>
+        socket.emit("speaker_event", ev),
+    };
+  } catch (err) {
+    log(`WARN: Could not connect to Polyglot (${err.message}). Audio will not be forwarded.`);
+    return null;
+  }
+}
+
+async function joinMeeting({ url, name, headful, polyglotUrl, profileDir }) {
+  log(`Launching Chrome (headful=${headful}, profile=${profileDir})`);
+
+  // launchPersistentContext keeps cookies/localStorage across runs — the user
+  // signs in once and subsequent runs are already authenticated.
+  const context = await chromium.launchPersistentContext(profileDir, {
+    headless: !headful,
+    channel: "chrome",
+    args: [
+      "--use-fake-ui-for-media-stream",
+      "--disable-blink-features=AutomationControlled",
+      "--autoplay-policy=no-user-gesture-required",
+      "--no-sandbox",
+      "--disable-dev-shm-usage",
+    ],
+    permissions: ["microphone", "camera"],
+  });
+
+  // Stealth: unset navigator.webdriver before any page JS runs.
+  await context.addInitScript(() => {
+    Object.defineProperty(navigator, "webdriver", { get: () => undefined });
+  });
+
+  // Phase 2: patch RTCPeerConnection before Meet's JS initialises WebRTC.
+  await context.addInitScript(RTC_INIT_SCRIPT);
+
+  const page = await context.newPage();
+
+  log(`Navigating to ${url}`);
+  await page.goto(url, { waitUntil: "domcontentloaded" });
+
+  // Pre-join loop — handles:
+  //   • Anonymous guest flow (name field → join button)
+  //   • Signed-in flow (join button directly, no name field)
+  //   • Google sign-in redirect (wait indefinitely for user to log in)
+  let clicked = false;
+  while (!clicked) {
+    await page.waitForTimeout(1200); // let Meet's JS settle
+
+    const currentUrl = page.url();
+    log(`Page: ${currentUrl.slice(0, 80)}`);
+
+    // Block / bot-detection page — Google redirected us away from Meet entirely.
+    if (SELECTORS.blockedUrls.some((u) => currentUrl.includes(u))) {
+      log("BLOCKED — Google rejected access (bot detected, meeting locked, or link invalid).");
+      return 6;
+    }
+    // "You can't join this video call" text shown inline on meet.google.com.
+    const pageText = await page.evaluate(() => document.body.innerText).catch(() => "");
+    if (SELECTORS.blockedText.test(pageText)) {
+      log(`BLOCKED — page says: "${pageText.slice(0, 120)}"`);
+      return 6;
+    }
+
+    // Sign-in wall — wait for the user to complete login in the headful window.
+    if (currentUrl.includes("accounts.google.com")) {
+      log("Google sign-in required — log in in the browser window. Bot will resume automatically.");
+      await page.waitForURL((u) => !u.href.includes("accounts.google.com"), { timeout: 0 });
+      log("Back on Meet — retrying pre-join flow.");
+      await page.waitForLoadState("domcontentloaded");
+      continue;
+    }
+
+    // Name field — present for anonymous guests only.
+    const match = await firstMatching(page, SELECTORS.nameInput, 3000);
+    if (match) {
+      log(`Filling name field with "${name}"`);
+      try {
+        await page.locator(match.selector).first().fill(name, { timeout: 4000 });
+      } catch (_) {
+        continue; // navigation happened mid-fill — loop will detect it
+      }
+    } else {
+      log("No name field — signed-in account.");
+    }
+
+    // Mute mic + camera before entering.
+    for (const [label, sel] of [
+      ["microphone", SELECTORS.micToggle],
+      ["camera", SELECTORS.camToggle],
+    ]) {
+      try {
+        const btn = await page.$(sel);
+        if (btn) {
+          const aria = (await btn.getAttribute("aria-label")) || "";
+          if (/turn off/i.test(aria)) {
+            log(`Muting ${label}`);
+            await btn.click();
+          }
+        }
+      } catch (_) {}
+    }
+
+    // Click join button.
+    for (const label of SELECTORS.joinButtonNames) {
+      const btn = page.getByRole("button", { name: label });
+      if (await btn.count()) {
+        log(`Clicking "${label}"`);
+        try {
+          await btn.first().click({ timeout: 5000 });
+          clicked = true;
+        } catch (e) {
+          log(`  click failed: ${e.message.split("\n")[0]}`);
+          // Try forcing through any overlay.
+          try {
+            await btn.first().click({ force: true, timeout: 3000 });
+            clicked = true;
+            log(`  forced click succeeded`);
+          } catch (e2) {
+            log(`  forced click also failed: ${e2.message.split("\n")[0]}`);
+          }
+        }
+        break;
+      }
+    }
+
+    if (!clicked) {
+      log("Join button not visible yet — retrying in 2 s…");
+      await page.waitForTimeout(2000);
+    }
+  }
+
+  log("Waiting for host to admit from lobby (up to 2 min)…");
+  const inMeeting = await Promise.race([
+    page
+      .waitForSelector(SELECTORS.leaveCallButton, { timeout: 120000 })
+      .then(() => "joined")
+      .catch(() => null),
+    page
+      .waitForFunction(
+        (pattern) => new RegExp(pattern.source, pattern.flags).test(document.body.innerText),
+        { source: SELECTORS.deniedText.source, flags: SELECTORS.deniedText.flags },
+        { timeout: 120000 }
+      )
+      .then(() => "denied")
+      .catch(() => null),
+  ]);
+
+  if (inMeeting === "joined") {
+    log("JOINED — bot is in the meeting.");
+
+    // Enable Meet's live captions — primary source of reliable speaker-identity.
+    // Try in order: click the toolbar button, then the keyboard shortcut.
+    try {
+      await page.waitForTimeout(2000);
+      // First, try to find the captions button by aria-label and click it.
+      const captionsBtn = page.locator(
+        'button[aria-label*="caption" i], button[aria-label*="subtitle" i]'
+      ).first();
+      if (await captionsBtn.count()) {
+        const label = await captionsBtn.getAttribute("aria-label");
+        // Only click if label indicates captions are OFF (turn on...).
+        if (/turn on|show/i.test(label || "")) {
+          await captionsBtn.click({ timeout: 3000 });
+          log(`Enabled captions via button: "${label}"`);
+        } else {
+          log(`Captions already on: "${label}"`);
+        }
+      } else {
+        // Fallback: keyboard shortcut. Click main area first to ensure focus.
+        await page.click("body").catch(() => {});
+        await page.keyboard.press("c");
+        log("Enabled captions via keyboard shortcut.");
+      }
+    } catch (e) {
+      log(`WARN: Could not enable captions: ${e.message}`);
+    }
+
+    // Phase 2: start audio capture.
+    const polyglot = await connectPolyglot(polyglotUrl);
+    let chunkCount = 0;
+    log("Setting up audio capture…");
+    await setupAudioCapture(page, (pcm, captureTs) => {
+      chunkCount++;
+      if (chunkCount % 50 === 0) {
+        log(`Audio: ${chunkCount * 20} ms captured, last ts=${captureTs}`);
+      }
+      polyglot?.sendAudio(pcm, captureTs);
+    });
+    log("Audio capture active (16 kHz mono PCM16, 20 ms frames).");
+
+    // Phase 3: speaker events and roster.
+    await setupSpeakerDetection(page, (ev) => {
+      if (ev.type === "roster_update") {
+        log(`Roster: ${ev.participants.join(", ") || "(empty)"}`);
+      } else {
+        log(`${ev.type === "speaker_start" ? "  Speaking" : "Silent   "}  ${ev.name}`);
+      }
+      polyglot?.sendEvent(ev);
+    });
+    log("Speaker detection active.");
+
+    await page
+      .waitForSelector(SELECTORS.leaveCallButton, { state: "detached", timeout: 0 })
+      .catch(() => {});
+    log(`Meeting ended. Total audio captured: ${chunkCount * 20} ms`);
+    return 0;
+  }
+
+  if (inMeeting === "denied") {
+    log("DENIED — host rejected the join request.");
+    return 3;
+  }
+
+  log("TIMED OUT in lobby (2 min) — host did not admit.");
+  return 4;
+}
+
+(async () => {
+  const args = parseArgs(process.argv);
+  let code = 1;
+  try {
+    code = await joinMeeting(args);
+  } catch (err) {
+    log("ERROR:", err.message);
+    code = 1;
+  }
+  process.exit(code);
+})();
diff --git a/meet-bot/package.json b/meet-bot/package.json
new file mode 100644
index 0000000..ac6d5f1
--- /dev/null
+++ b/meet-bot/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "polyglot-meet-bot",
+  "version": "0.1.0",
+  "description": "Headless Chromium bot that joins a Google Meet as an anonymous guest and streams audio + speaker identity back to Polyglot.",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "start": "node index.js",
+    "install-browsers": "playwright install chromium"
+  },
+  "engines": {
+    "node": ">=20"
+  },
+  "dependencies": {
+    "playwright": "^1.47.0",
+    "socket.io-client": "^4.8.0"
+  }
+}
diff --git a/meet-bot/selectors.js b/meet-bot/selectors.js
new file mode 100644
index 0000000..c9720e4
--- /dev/null
+++ b/meet-bot/selectors.js
@@ -0,0 +1,88 @@
+// Google Meet DOM selectors.
+//
+// Meet's CSS classes are obfuscated and rotate. This file centralizes every
+// selector the bot relies on so a UI change is a one-file fix. Prefer stable
+// anchors (aria-label, role, visible text) over class names.
+
+export const SELECTORS = {
+  // ── Active-speaker / roster detection (Phase 3) ──────────────────────
+  //
+  // Meet's classes are obfuscated; everything here anchors on aria-label,
+  // role, or data-* attributes that have been stable across rollouts.
+  //
+  // Tile container — wraps one participant's video + name + mic ring.
+  // data-participant-id is the most stable anchor we have.
+  participantTile: '[data-participant-id]',
+
+  // Speaking indicators — Meet has used several over time; we try all.
+  // Strategy 1: explicit boolean attribute (newer Meet)
+  speakingAttr: '[data-is-speaking="true"]',
+  // Strategy 2: aria-label on the tile or mic button says "… is speaking"
+  speakingAriaLabel: '[aria-label*="is speaking" i]',
+  // Strategy 3: the audio-level bars inside a tile animate when speaking.
+  // Class is obfuscated, but the element always carries [data-is-muted="false"]
+  // and its closest tile ancestor is the active speaker. Fallback only.
+  audioLevelBar: '[data-is-muted="false"]',
+
+  // Name extraction — checked in order inside a tile element.
+  tileNameSelectors: [
+    '[data-self-name]',             // newer Meet
+    '[jsname="r8qRAd"]',            // one known jsname for name label
+    'div[class][data-tooltip]',     // tooltip often holds display name
+  ],
+
+  // People panel — open it to get full roster.
+  peopleButton:
+    'button[aria-label*="people" i], button[aria-label*="everyone" i], button[aria-label*="participants" i]',
+  // Each row in the People panel roster.
+  rosterItem: '[data-participant-id] span[jsname], [role="listitem"] span',
+
+  // Pre-join screen (anonymous guest path) -------------------------------
+  // The "Your name" input shown to signed-out users on the Meet landing page
+  // before joining. Meet has used multiple implementations; try in order.
+  nameInput: [
+    'input[aria-label="Your name"]',
+    'input[placeholder="Your name"]',
+    'input[jsname][type="text"]',
+  ],
+
+  // The button that submits the pre-join form. Its label depends on meeting
+  // config: "Ask to join" when the host hasn't admitted you, "Join now" when
+  // you're the host or pre-admitted. Match on visible text via Playwright's
+  // getByRole('button', { name: ... }) at call sites — selector here is a
+  // fallback for the ARIA role.
+  joinButtonNames: ["Ask to join", "Join now", "Join"],
+
+  // Pre-join sometimes prompts to turn off mic/cam — these buttons toggle
+  // them. Anchored on aria-label which Meet has kept stable for years.
+  micToggle: 'div[role="button"][aria-label*="microphone" i]',
+  camToggle: 'div[role="button"][aria-label*="camera" i]',
+
+  // In-call indicators ---------------------------------------------------
+  // Presence of the leave-call button is the most reliable "we are in the
+  // meeting" signal. Its aria-label is "Leave call".
+  leaveCallButton: 'button[aria-label="Leave call"]',
+
+  // Lobby / denial detection. When the host denies entry, Meet shows a
+  // message containing this text.
+  deniedText: /You can't join this call|no one responded|denied/i,
+
+  // Bot / access blocked detection. Google redirects here or shows this text
+  // when the meeting blocks automated access or the link is invalid.
+  blockedUrls: ["workspace.google.com/products/meet", "accounts.google.com/v3/signin/rejected"],
+  blockedText: /you can't join this video call|this meeting is locked|you're not allowed|not available/i,
+};
+
+// Helper: return the first selector from a list that matches something on
+// the page. Used for resilient element lookup when Meet ships A/B variants.
+export async function firstMatching(page, selectorList, timeoutMs = 15000) {
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    for (const sel of selectorList) {
+      const el = await page.$(sel);
+      if (el) return { selector: sel, element: el };
+    }
+    await page.waitForTimeout(250);
+  }
+  return null;
+}
diff --git a/meet-bot/speaker.js b/meet-bot/speaker.js
new file mode 100644
index 0000000..11c8640
--- /dev/null
+++ b/meet-bot/speaker.js
@@ -0,0 +1,272 @@
+// Phase 3: active-speaker detection + roster scraping.
+//
+// Primary signal: Meet's live captions. When captions are enabled, each
+// caption block carries the speaker's name as the first text node and the
+// spoken words as the rest. This is far more reliable than DOM-class
+// heuristics (which change every Meet rollout and rotate as ambient pulse).
+//
+// Fallback signals: the legacy data-is-speaking attribute and aria-label
+// text — still present on rare pre-join / breakout-style UIs.
+
+export async function setupSpeakerDetection(page, onEvent) {
+  await page.exposeFunction("__pgSpeakerEvent", (json) => onEvent(JSON.parse(json)));
+
+  await page.evaluate(() => {
+
+    // ── Helpers ────────────────────────────────────────────────────────────
+
+    function isUIAction(s) {
+      return /^(?:pin|unpin|mute|unmute|remove|reframe|spotlight|present|share|more options|turn on|turn off|stop|start)/i.test(s.trim());
+    }
+
+    function nameFromTile(el) {
+      const root = el.closest("[data-participant-id]") || el;
+
+      const nameEl = root.querySelector("[data-self-name]");
+      if (nameEl?.dataset.selfName) {
+        const n = nameEl.dataset.selfName.trim();
+        if (n && !isUIAction(n)) return n;
+      }
+      if (nameEl?.textContent) {
+        const n = nameEl.textContent.trim();
+        if (n && !isUIAction(n)) return n;
+      }
+
+      const rootLabel = root.getAttribute("aria-label") || "";
+      if (rootLabel) {
+        const m = rootLabel.match(/^(.+?)(?:'s\s+(?:video|screen|camera|tile)|(?:\s*\(you\)))/i);
+        if (m?.[1] && !isUIAction(m[1])) return m[1].trim();
+        if (rootLabel.length < 60 && !isUIAction(rootLabel) && !/\b(?:from|your|main|screen)\b/i.test(rootLabel))
+          return rootLabel.trim();
+      }
+
+      for (const sel of ["span[jsname='r8qRAd']", "div[jsname='Cpqoke']", "div[data-tooltip]"]) {
+        const candidate = root.querySelector(sel);
+        const text = (candidate?.textContent || candidate?.dataset?.tooltip || "").trim();
+        if (text && text.length < 60 && !isUIAction(text)) return text;
+      }
+
+      return null;
+    }
+
+    function emit(ev) {
+      window.__pgSpeakerEvent(JSON.stringify(ev));
+    }
+
+    // ── Captions-based speaker detection ───────────────────────────────────
+    //
+    // We look for a container whose aria-label mentions "caption" /
+    // "transcription" / "untertitel" etc. Meet renders each caption block as
+    // a group with the speaker name as the first text child and the spoken
+    // words as siblings. When a caption block updates (new words appended),
+    // that speaker is currently active.
+
+    const CAPTION_REGION_RE = /caption|transcript|untertitel|sous-titre|subtitulo|subtitle/i;
+
+    function findCaptionContainer() {
+      // 1. Strong preference: role=region with aria-label exactly "Captions"
+      //    (or localized equivalent matching the captions-word regex).
+      const regions = [...document.querySelectorAll('[role="region"]')];
+      for (const el of regions) {
+        const lbl = el.getAttribute("aria-label") || "";
+        if (CAPTION_REGION_RE.test(lbl)) return el;
+      }
+      // 2. Fallback: ANY element whose aria-label is exactly "Captions" etc.
+      for (const el of document.querySelectorAll('[aria-label]')) {
+        const lbl = (el.getAttribute("aria-label") || "").trim();
+        // Exact match on the WORD captions (not combobox "Caption type").
+        if (/^(captions?|transcript|untertitel|sous-titres)$/i.test(lbl)) return el;
+      }
+      // 3. Known jsnames.
+      for (const sel of ['div[jsname="dsyhDe"]', 'div[jsname="YSxPC"]', 'div[jsname="r5nxDd"]']) {
+        const el = document.querySelector(sel);
+        if (el) return el;
+      }
+      return null;
+    }
+
+    // Track active speakers: name → { lastUpdateMs, startMs }
+    // A speaker is "active" if their caption block updated in the last 1.5 s.
+    const activeSpeakers = new Map();
+    const SPEAKER_TIMEOUT_MS = 1500;
+
+    // Find the enclosing caption block for any DOM node. A caption block is
+    // the wrapper that contains one speaker's current utterance. Meet uses a
+    // class like `nMcdL` on this wrapper; if that rotates we fall back to
+    // structural heuristics (a div whose direct children include both a
+    // short "name" span and a larger "text" div).
+    function findCaptionBlock(node) {
+      let cur = node.nodeType === 1 ? node : node.parentElement;
+      const container = findCaptionContainer();
+      while (cur && cur !== container && cur !== document.body) {
+        if (cur.matches && cur.matches('[class*="nMcdL"]')) return cur;
+        cur = cur.parentElement;
+      }
+      // Structural fallback: direct child of container with children that look
+      // like a (name, text) pair.
+      if (container) {
+        let p = node.nodeType === 1 ? node : node.parentElement;
+        while (p && p.parentElement !== container) p = p.parentElement;
+        if (p && p.parentElement === container) return p;
+      }
+      return null;
+    }
+
+    function extractBlockSpeaker(block) {
+      // Try the known name span first.
+      const nameEl = block.querySelector('span.NWpY1d, [class*="NWpY1d"]');
+      if (nameEl) {
+        const name = (nameEl.textContent || "").trim();
+        if (name && name.length < 60 && !isUIAction(name) && /^[\p{L}]/u.test(name))
+          return name;
+      }
+      // Structural fallback: find a short-text descendant at the top of the
+      // block whose text is distinct from the long "spoken text" sibling.
+      const texts = [];
+      for (const el of block.querySelectorAll('*')) {
+        const t = (el.textContent || "").trim();
+        if (!t || t.length > 60) continue;
+        if (!/^[\p{L}][\p{L}\s.'-]+$/u.test(t)) continue;
+        if (isUIAction(t)) continue;
+        texts.push(t);
+      }
+      return texts[0] || null;
+    }
+
+    function onCaptionMutation(mutations) {
+      const now = Date.now();
+      const processedBlocks = new Set();
+
+      // Per-mutation block lookup — catches characterData updates.
+      for (const m of mutations || []) {
+        const block = findCaptionBlock(m.target);
+        if (!block || processedBlocks.has(block)) continue;
+        processedBlocks.add(block);
+        markSpeakerActive(extractBlockSpeaker(block), now);
+      }
+
+      // Whole-container re-scan — covers cases where Meet replaces entire
+      // caption blocks rather than appending characterData to existing ones.
+      // We use a data attribute to track each block's last-seen text length
+      // and treat any growth as active speech.
+      const container = findCaptionContainer();
+      if (!container) return;
+      for (const block of container.querySelectorAll('[class*="nMcdL"]')) {
+        if (processedBlocks.has(block)) continue;
+        const textEl = block.querySelector('[class*="ygicle"], [class*="VbkSUe"]');
+        const len = textEl ? (textEl.textContent || "").length : 0;
+        const prev = parseInt(block.getAttribute("data-pg-len") || "-1", 10);
+        if (len !== prev) {
+          block.setAttribute("data-pg-len", String(len));
+          markSpeakerActive(extractBlockSpeaker(block), now);
+        }
+      }
+    }
+
+    function markSpeakerActive(name, now) {
+      if (!name) return;
+      const existing = activeSpeakers.get(name);
+      if (!existing) {
+        activeSpeakers.set(name, { lastUpdateMs: now, startMs: now });
+        emit({ type: "speaker_start", name, wall_clock_ms: now });
+      } else {
+        existing.lastUpdateMs = now;
+      }
+    }
+
+    // Sweeper: close intervals for speakers whose captions haven't updated.
+    function sweepInactive() {
+      const now = Date.now();
+      for (const [name, info] of activeSpeakers) {
+        if (now - info.lastUpdateMs > SPEAKER_TIMEOUT_MS) {
+          emit({ type: "speaker_end", name, wall_clock_ms: now });
+          activeSpeakers.delete(name);
+        }
+      }
+    }
+
+    // ── Legacy DOM signals (fallback) ──────────────────────────────────────
+
+    let legacyLastSpeaker = null;
+    function checkLegacySpeaker() {
+      // Strategy 1: data-is-speaking="true"
+      const s1 = document.querySelector('[data-is-speaking="true"]');
+      let speaker = s1 ? nameFromTile(s1) : null;
+
+      if (!speaker) {
+        // Strategy 2: aria-label "X is speaking"
+        for (const el of document.querySelectorAll("[aria-label]")) {
+          const lbl = el.getAttribute("aria-label");
+          const m = lbl.match(/^(.+?)\s+is speaking/i);
+          if (m?.[1] && !isUIAction(m[1])) { speaker = m[1].trim(); break; }
+        }
+      }
+
+      if (speaker === legacyLastSpeaker) return;
+      const now = Date.now();
+      if (legacyLastSpeaker) emit({ type: "speaker_end", name: legacyLastSpeaker, wall_clock_ms: now });
+      if (speaker) emit({ type: "speaker_start", name: speaker, wall_clock_ms: now });
+      legacyLastSpeaker = speaker;
+    }
+
+    // ── Roster scraping ────────────────────────────────────────────────────
+
+    function scrapeRoster() {
+      const names = new Set();
+      for (const tile of document.querySelectorAll("[data-participant-id]")) {
+        const n = nameFromTile(tile);
+        if (n && n.length < 60 && !/^\(you\)$|^you$/i.test(n) && !isUIAction(n))
+          names.add(n);
+      }
+      return [...names];
+    }
+
+    let lastRosterKey = "";
+    function checkRoster() {
+      const roster = scrapeRoster();
+      const key = roster.slice().sort().join("|");
+      if (key === lastRosterKey) return;
+      lastRosterKey = key;
+      emit({ type: "roster_update", participants: roster, wall_clock_ms: Date.now() });
+    }
+
+    // ── Observers / scheduler ──────────────────────────────────────────────
+
+    // Caption observer: attach once the container appears; re-attach if Meet
+    // rerenders it.
+    let captionObserver = null;
+    let boundContainer = null;
+    function ensureCaptionObserver() {
+      const container = findCaptionContainer();
+      if (!container || container === boundContainer) return;
+      if (captionObserver) captionObserver.disconnect();
+      captionObserver = new MutationObserver(onCaptionMutation);
+      captionObserver.observe(container, { childList: true, subtree: true, characterData: true });
+      boundContainer = container;
+      // Immediate scan in case captions already exist.
+      onCaptionMutation();
+    }
+
+    // Roster observer — tiles come and go with gallery pagination.
+    const rosterObserver = new MutationObserver(() => checkRoster());
+    rosterObserver.observe(document.body, {
+      childList: true, subtree: true, attributes: true,
+      attributeFilter: ["data-self-name", "aria-label"],
+    });
+
+    // Periodic work: ensure observers attached, sweep inactive speakers, run
+    // legacy fallback, refresh roster.
+    setInterval(() => {
+      ensureCaptionObserver();
+      sweepInactive();
+      checkLegacySpeaker();
+      checkRoster();
+    }, 500);
+
+    setTimeout(() => {
+      ensureCaptionObserver();
+      checkRoster();
+    }, 2000);
+
+  });
+}
diff --git a/templates/admin.html b/templates/admin.html
index 6a44972..ada7950 100644
--- a/templates/admin.html
+++ b/templates/admin.html
@@ -580,16 +580,6 @@ <h1>🌍 Polyglot</h1>
                     <div class="status-dot listening"></div>
                     <span>🔴 LIVE</span>
                 </div>
-                <div class="audio-visualizer" id="audioVisualizer" style="display: none;">
-                    <div class="audio-bar"></div>
-                    <div class="audio-bar"></div>
-                    <div class="audio-bar"></div>
-                    <div class="audio-bar"></div>
-                    <div class="audio-bar"></div>
-                    <div class="audio-bar"></div>
-                    <div class="audio-bar"></div>
-                    <div class="audio-bar"></div>
-                </div>
                 <button id="startBtn">▶️ Start Listening</button>
                 <button id="stopBtn" disabled>⏹️ Stop</button>
                 <button id="settingsBtn">⚙️ Settings</button>
@@ -597,8 +587,8 @@ <h1>🌍 Polyglot</h1>
             </div>
         </header>
 
-        <!-- Audio Visualization, Thresholds, and Viewer Stats - 3 Column Layout -->
-        <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; margin-bottom: 8px;">
+        <!-- Audio Waveform + Thresholds + Viewer Stats - 3 Column Layout -->
+        <div style="display: grid; grid-template-columns: minmax(0, 1fr) minmax(0, 1fr) minmax(0, 1fr); gap: 10px; margin-bottom: 8px;">
             <!-- Column 1: Audio Waveform -->
             <div style="background: rgba(0, 0, 0, 0.95); color: #00ff41; padding: 12px; border-radius: 10px; border: 1px solid rgba(0, 255, 65, 0.2);">
                 <h3 style="color: #00ff41; margin-bottom: 10px; font-size: 12px; font-family: 'Courier New', monospace; letter-spacing: 2px; opacity: 0.8;">AUDIO SIGNAL</h3>
@@ -643,8 +633,8 @@ <h3 style="color: #667eea; margin-bottom: 10px; font-size: 14px;">📊 Threshold
                     </div>
                 </div>
 
-                <!-- Silence Counter Bar -->
-                <div style="margin-bottom: 10px;">
+                <!-- Silence Counter Bar (hidden in bot mode — only drives mic-mode flush) -->
+                <div id="silenceRow" style="margin-bottom: 10px;">
                     <div style="display: flex; justify-content: space-between; font-size: 10px; color: #999; margin-bottom: 4px;">
                         <span>SILENCE</span>
                         <span><span id="debugSilenceCounter">0</span> / <span id="debugSilenceChunksReq">-</span></span>
@@ -658,7 +648,7 @@ <h3 style="color: #667eea; margin-bottom: 10px; font-size: 14px;">📊 Threshold
                 <div>
                     <div style="display: flex; justify-content: space-between; font-size: 10px; color: #999; margin-bottom: 4px;">
                         <span>BUFFER</span>
-                        <span><span id="debugBufferChunks">0</span> (<span id="debugMinChunks">-</span>-<span id="debugMaxChunks">-</span>)</span>
+                        <span><span id="debugBufferSeconds">0.0</span>s / 60s</span>
                     </div>
                     <div style="height: 20px; background: #1a1a1a; border-radius: 10px; overflow: hidden; position: relative;">
                         <div id="bufferBar" style="height: 100%; background: linear-gradient(90deg, #8b5cf6 0%, #a78bfa 100%); width: 0%; transition: width 0.2s ease; box-shadow: 0 0 10px rgba(139, 92, 246, 0.5);"></div>
@@ -681,7 +671,7 @@ <h3 style="margin-bottom: 10px; color: #06b6d4; font-size: 14px; display: flex;
         </div>
 
         <!-- System Stats Row -->
-        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-bottom: 8px;">
+        <div style="display: grid; grid-template-columns: minmax(0, 1fr) minmax(0, 1fr); gap: 10px; margin-bottom: 8px;">
             <!-- VRAM Usage Chart -->
             <div style="background: rgba(0, 0, 0, 0.9); color: white; padding: 12px; border-radius: 10px;">
                 <h3 style="margin-bottom: 10px; color: #f59e0b; font-size: 14px; display: flex; justify-content: space-between; align-items: center;">
@@ -692,7 +682,7 @@ <h3 style="margin-bottom: 10px; color: #f59e0b; font-size: 14px; display: flex;
                     <div style="flex: 1; height: 140px;">
                         <canvas id="vramChart"></canvas>
                     </div>
-                    <div id="vramLegend" style="font-size: 10px; min-width: 180px;">
+                    <div id="vramLegend" style="font-size: 10px; min-width: 140px; flex-shrink: 0;">
                         <div style="display: flex; align-items: center; gap: 6px; margin-bottom: 4px;" title="Whisper model">
                             <span style="width: 10px; height: 10px; background: #3b82f6; border-radius: 2px; flex-shrink: 0;"></span>
                             <span style="flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;" id="modelWhisperName">Whisper</span>
@@ -771,10 +761,33 @@ <h3 style="margin-bottom: 10px; color: #8b5cf6; font-size: 14px; display: flex;
             <div class="panel" style="display: flex; flex-direction: column; max-height: none; height: 100%;">
                 <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; flex-shrink: 0;">
                     <h2 style="margin: 0;">📝 Transcript</h2>
-                    <div id="adminLagTimer" style="font-size: 12px; color: rgba(255, 255, 255, 0.6); padding: 6px 12px; background: rgba(0, 0, 0, 0.3); border-radius: 8px; display: none;">
-                        Lag: <span id="adminLagSeconds">0</span>s
+                    <div style="display: flex; gap: 8px; align-items: center;">
+                        <span id="botStatusBadge" style="font-size: 11px; padding: 3px 10px; border-radius: 12px; background: #374151; color: #9ca3af; display: none;">Bot disconnected</span>
+                        <div id="adminLagTimer" style="font-size: 12px; color: rgba(255, 255, 255, 0.6); padding: 6px 12px; background: rgba(0, 0, 0, 0.3); border-radius: 8px; display: none;">
+                            Lag: <span id="adminLagSeconds">0</span>s
+                        </div>
                     </div>
                 </div>
+                <!-- Live "currently speaking" banner -->
+                <div id="activeSpeakerBanner" style="display: none; margin-bottom: 8px; padding: 8px 12px; background: linear-gradient(90deg, rgba(34,197,94,0.15), rgba(34,197,94,0.05)); border-radius: 8px; border: 1px solid rgba(34,197,94,0.4); flex-shrink: 0; font-size: 13px; color: #22c55e; font-weight: 600;">
+                    <span style="display: inline-block; width: 8px; height: 8px; background: #22c55e; border-radius: 50%; margin-right: 8px; animation: pulse 1.5s infinite;"></span>
+                    <span id="activeSpeakerText">—</span>
+                </div>
+
+                <!-- Meet bot control: enter a Meet URL/ID and start/stop the bot -->
+                <div id="meetBotControls" style="margin-bottom: 8px; padding: 10px 12px; background: rgba(255,255,255,0.04); border-radius: 8px; border: 1px solid rgba(255,255,255,0.1); flex-shrink: 0; display: flex; gap: 8px; align-items: center;">
+                    <input id="meetUrlInput" type="text" placeholder="meet.google.com/xxx-yyyy-zzz or just xxx-yyyy-zzz"
+                           style="flex: 1; padding: 6px 10px; font-size: 12px; background: rgba(0,0,0,0.4); color: #fff; border: 1px solid rgba(255,255,255,0.15); border-radius: 6px; outline: none;">
+                    <button id="startBotBtn" onclick="startBot()"
+                            style="padding: 6px 14px; font-size: 12px; background: linear-gradient(135deg, #22c55e 0%, #16a34a 100%); border: none; color: white; cursor: pointer; border-radius: 6px; font-weight: 600;">🤖 Start bot</button>
+                    <button id="stopBotBtn" onclick="stopBot()" style="display: none; padding: 6px 14px; font-size: 12px; background: linear-gradient(135deg, #ef4444 0%, #dc2626 100%); border: none; color: white; cursor: pointer; border-radius: 6px; font-weight: 600;">⏹ Stop bot</button>
+                </div>
+
+                <!-- Meet participant roster (shown when bot is connected) -->
+                <div id="meetRosterPanel" style="display: none; margin-bottom: 8px; padding: 8px 12px; background: rgba(255,255,255,0.05); border-radius: 8px; border: 1px solid rgba(255,255,255,0.1); flex-shrink: 0;">
+                    <div style="font-size: 11px; color: #9ca3af; margin-bottom: 4px;">PARTICIPANTS</div>
+                    <ul id="meetRosterList" style="margin: 0; padding: 0; list-style: none; max-height: 80px; overflow-y: auto; font-size: 12px;"></ul>
+                </div>
                 <div id="transcriptContent" class="translation-content" style="font-size: 16px; line-height: 1.7; flex: 1; overflow-y: auto;">
                     <div class="empty-state">
                         <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -1801,26 +1814,25 @@ <h2>Settings</h2>
                     }
                 }
 
-                // Update buffer stats and bar
-                const debugBufferChunks = document.getElementById('debugBufferChunks');
-                const debugMinChunks = document.getElementById('debugMinChunks');
-                const debugMaxChunks = document.getElementById('debugMaxChunks');
-                if (debugBufferChunks) debugBufferChunks.textContent = data.buffer_chunks;
-                if (debugMinChunks) debugMinChunks.textContent = data.min_chunks;
-                if (debugMaxChunks) debugMaxChunks.textContent = data.max_chunks;
-
-                // Update buffer bar (scale between min and max)
-                const bufferPercent = data.max_chunks > 0
-                    ? Math.min(100, (data.buffer_chunks / data.max_chunks) * 100)
-                    : 0;
+                // Buffer: display in seconds, scale bar against a fixed 60 s cap
+                // (matches BOT_MAX_BATCH_SEC on the server). 1 chunk = 1024/16000 s.
+                const CHUNK_SEC = 1024 / 16000;
+                const BUFFER_MAX_SEC = 60;
+                const bufferSeconds = (data.buffer_chunks || 0) * CHUNK_SEC;
+                const minSeconds = (data.min_chunks || 0) * CHUNK_SEC;
+                const maxSeconds = Math.min(BUFFER_MAX_SEC, (data.max_chunks || 0) * CHUNK_SEC);
+
+                const debugBufferSeconds = document.getElementById('debugBufferSeconds');
+                if (debugBufferSeconds) debugBufferSeconds.textContent = bufferSeconds.toFixed(1);
+
+                const bufferPercent = Math.min(100, (bufferSeconds / BUFFER_MAX_SEC) * 100);
                 const bufferBar = document.getElementById('bufferBar');
                 if (bufferBar) {
                     bufferBar.style.width = bufferPercent + '%';
 
-                    // Change color based on buffer state
-                    if (data.buffer_chunks >= data.max_chunks) {
+                    if (bufferSeconds >= maxSeconds) {
                         bufferBar.style.background = 'linear-gradient(90deg, #ef4444 0%, #f87171 100%)'; // Red when full
-                    } else if (data.buffer_chunks >= data.min_chunks) {
+                    } else if (bufferSeconds >= minSeconds) {
                         bufferBar.style.background = 'linear-gradient(90deg, #f59e0b 0%, #fbbf24 100%)'; // Yellow when at min
                     } else {
                         bufferBar.style.background = 'linear-gradient(90deg, #8b5cf6 0%, #a78bfa 100%)'; // Purple otherwise
@@ -1840,6 +1852,85 @@ <h2>Settings</h2>
             }
         });
 
+        // ── Meet bot events ─────────────────────────────────────────────────
+
+        socket.on('bot_status', (data) => {
+            const badge = document.getElementById('botStatusBadge');
+            const rosterPanel = document.getElementById('meetRosterPanel');
+            const startBtn = document.getElementById('startBotBtn');
+            const stopBtn = document.getElementById('stopBotBtn');
+            const silenceRow = document.getElementById('silenceRow');
+            if (badge) {
+                badge.style.display = 'inline-block';
+                badge.textContent = data.connected ? '🤖 Bot connected' : '🤖 Bot disconnected';
+                badge.style.background = data.connected ? 'rgba(34,197,94,0.2)' : 'rgba(239,68,68,0.2)';
+                badge.style.color = data.connected ? '#22c55e' : '#ef4444';
+                badge.style.border = `1px solid ${data.connected ? '#22c55e' : '#ef4444'}`;
+            }
+            if (rosterPanel) rosterPanel.style.display = data.connected ? 'block' : 'none';
+            if (startBtn) startBtn.style.display = data.connected ? 'none' : 'inline-block';
+            if (stopBtn)  stopBtn.style.display  = data.connected ? 'inline-block' : 'none';
+            // Silence detection is only used in mic-mode — hide its bar when the bot is driving batching.
+            if (silenceRow) silenceRow.style.display = data.connected ? 'none' : 'block';
+        });
+
+        // Start / stop the Meet bot via backend subprocess.
+        function normalizeMeetUrl(raw) {
+            const s = (raw || '').trim();
+            if (!s) return '';
+            if (s.startsWith('http')) return s;
+            if (s.includes('meet.google.com')) return 'https://' + s.replace(/^\/+/, '');
+            // Just a meeting ID like "stu-tyen-aed"
+            if (/^[a-z]{3}-[a-z]{4}-[a-z]{3}$/i.test(s)) return 'https://meet.google.com/' + s;
+            return s;
+        }
+        function startBot() {
+            const input = document.getElementById('meetUrlInput');
+            const url = normalizeMeetUrl(input?.value || '');
+            if (!url) { alert('Enter a Meet URL or meeting ID (e.g. xxx-yyyy-zzz)'); return; }
+            socket.emit('start_meet_bot', { url });
+        }
+        function stopBot() { socket.emit('stop_meet_bot'); }
+
+        socket.on('meet_bot_control_result', (data) => {
+            if (!data.ok) alert(`Bot control failed: ${data.error || 'unknown'}`);
+        });
+
+        socket.on('active_speakers', (data) => {
+            const banner = document.getElementById('activeSpeakerBanner');
+            const text = document.getElementById('activeSpeakerText');
+            if (!banner || !text) return;
+            const names = (data && data.speakers) || [];
+            if (names.length === 0) {
+                banner.style.display = 'none';
+            } else {
+                banner.style.display = 'block';
+                text.textContent = `Speaking: ${names.join(', ')}`;
+            }
+        });
+
+        socket.on('meet_roster', (data) => {
+            const el = document.getElementById('meetRosterList');
+            const rosterPanel = document.getElementById('meetRosterPanel');
+            if (!el) return;
+            el.innerHTML = (data.participants || [])
+                .map(n => `<li style="padding:2px 0;color:#ccc;">${n}</li>`)
+                .join('');
+            if (rosterPanel && (data.participants || []).length > 0)
+                rosterPanel.style.display = 'block';
+        });
+
+        socket.on('rename_speaker', (data) => {
+            // Retroactively replace SPEAKER_XX labels in the transcript panel.
+            const tc = document.getElementById('transcriptContent');
+            if (!tc) return;
+            const re = new RegExp(data.speaker_id.replace('_', '_'), 'g');
+            tc.innerHTML = tc.innerHTML.replace(
+                new RegExp(data.speaker_id, 'g'),
+                data.name
+            );
+        });
+
         function updateStatus() {
             const statusEl = document.getElementById('statusIndicator');
             const startBtn = document.getElementById('startBtn');
@@ -1859,7 +1950,7 @@ <h2>Settings</h2>
                 statusEl.innerHTML = '<span class="status-dot"></span><span>Listening</span>';
                 startBtn.disabled = true;
                 stopBtn.disabled = false;
-                visualizer.style.display = 'inline-flex';
+                visualizer.style.display = 'flex';
                 console.log('[updateStatus] Set stopBtn.disabled = false');
             } else {
                 statusEl.className = 'status idle';
diff --git a/templates/viewer.html b/templates/viewer.html
index aa01b97..a299793 100644
--- a/templates/viewer.html
+++ b/templates/viewer.html
@@ -624,6 +624,11 @@ <h1>Select Your Language</h1>
                     <button class="change-language-btn" onclick="changeLanguage()">Change</button>
                 </div>
             </div>
+            <!-- Live "currently speaking" banner -->
+            <div id="activeSpeakerBanner" style="display: none; margin: 8px 12px 0; padding: 10px 14px; background: linear-gradient(90deg, rgba(139,92,246,0.18), rgba(139,92,246,0.05)); border-radius: 10px; border: 1px solid rgba(139,92,246,0.45); font-size: 14px; color: #a78bfa; font-weight: 600;">
+                <span style="display: inline-block; width: 9px; height: 9px; background: #a78bfa; border-radius: 50%; margin-right: 10px; animation: pulse 1.5s infinite;"></span>
+                <span id="activeSpeakerText">—</span>
+            </div>
             <div class="panel-content" id="translationsContainer">
                 <div class="no-data">
                     <div class="icon">...</div>
@@ -825,6 +830,7 @@ <h1>Select Your Language</h1>
                 // Apply new-message class to the newest items (at the start after reverse)
                 const isNew = index < newCount;
                 itemDiv.className = isNew ? 'translation-item new-message' : 'translation-item';
+                if (item.speaker) itemDiv.dataset.speaker = item.speaker;
 
                 const timeDiv = document.createElement('div');
                 timeDiv.className = 'time';
@@ -835,6 +841,15 @@ <h1>Select Your Language</h1>
                     hour12: false
                 });
 
+                // Speaker name line (prominent, colored)
+                if (item.speaker) {
+                    const speakerDiv = document.createElement('div');
+                    speakerDiv.className = 'speaker';
+                    speakerDiv.style.cssText = 'font-weight:600; color:#8b5cf6; font-size:13px; margin-bottom:2px;';
+                    speakerDiv.textContent = item.speaker;
+                    itemDiv.appendChild(speakerDiv);
+                }
+
                 const textDiv = document.createElement('div');
                 textDiv.className = 'text';
                 textDiv.textContent = item.text;
@@ -896,6 +911,7 @@ <h1>Select Your Language</h1>
                 newSegments.forEach(segment => {
                     displayedTranslations.push({
                         text: segment.text,
+                        speaker: segment.speaker || null,
                         timestamp: Date.now()
                     });
                 });
@@ -1046,6 +1062,30 @@ <h1>Select Your Language</h1>
             document.getElementById('connectionStatus').textContent = 'Disconnected';
         });
 
+        // Retroactively rename SPEAKER_XX labels to real names for already-displayed segments.
+        socket.on('rename_speaker', (data) => {
+            if (!data || !data.speaker_id || !data.name) return;
+            let dirty = false;
+            displayedTranslations.forEach(item => {
+                if (item.speaker === data.speaker_id) { item.speaker = data.name; dirty = true; }
+            });
+            if (dirty) renderTranslations(0);
+        });
+
+        // Live "who is currently speaking" banner.
+        socket.on('active_speakers', (data) => {
+            const banner = document.getElementById('activeSpeakerBanner');
+            const text = document.getElementById('activeSpeakerText');
+            if (!banner || !text) return;
+            const names = (data && data.speakers) || [];
+            if (names.length === 0) {
+                banner.style.display = 'none';
+            } else {
+                banner.style.display = 'block';
+                text.textContent = `Speaking: ${names.join(', ')}`;
+            }
+        });
+
         socket.io.on('reconnect_attempt', (attemptNumber) => {
             reconnectAttempts = attemptNumber;
             document.getElementById('connectionDot').className = 'status-dot reconnecting';