etiennechabert · etiennechabert · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/.gitignore b/.gitignore
@@ -208,4 +208,5 @@ __marimo__/
 transcript.txt
 /transcripts
 polyglot.lock
+viewer_password.txt
 .claude/settings.local.json
diff --git a/app.py b/app.py
diff --git a/config.py b/config.py
@@ -211,6 +211,14 @@ def get_translation_lang_code(cls, iso_code):
     SAMPLE_RATE = 16000  # Whisper expects 16kHz audio
     CHUNK_SIZE = 1024  # Audio buffer chunk size
 
+    # Audio source — "wasapi" uses the WASAPI loopback device (original path);
+    # "meet_bot" receives 16 kHz PCM16 from the Playwright bot over SocketIO.
+    AUDIO_SOURCE = os.getenv("AUDIO_SOURCE", "meet_bot")
+
+    # Meet bot SocketIO receiver.  The bot connects to /meet_bot on whatever
+    # port Polyglot is already running on — no separate port needed.
+    MEET_BOT_ENABLED = os.getenv("MEET_BOT_ENABLED", "True").lower() in ("true", "1", "yes")
+
     # Minimum audio level to process (prevents hallucinations during silence)
     # If average audio level is below this, skip transcription
     MIN_AUDIO_LEVEL = 0.01

diff --git a/meet-bot/.gitignore b/meet-bot/.gitignore
@@ -0,0 +1,4 @@
+node_modules/
+package-lock.json
+*.log
+chrome-profile/
diff --git a/meet-bot/README.md b/meet-bot/README.md
@@ -0,0 +1,61 @@
+# Polyglot Meet Bot
+
+Headless Chromium bot that joins a Google Meet as an anonymous guest. Future phases will stream meeting audio and active-speaker names back to the Polyglot server; this initial phase validates only the join-and-get-admitted flow.
+
+## Setup
+
+```bash
+cd meet-bot
+npm install
+npx playwright install chromium
+```
+
+Node 20+ required.
+
+## Run
+
+```bash
+# Typical use — fully headless:
+node index.js --url "https://meet.google.com/xxx-yyyy-zzz"
+
+# Watch what the bot sees (debug Meet UI issues):
+node index.js --url "https://meet.google.com/xxx-yyyy-zzz" --headful
+
+# Override the displayed name (default "Polyglot Bot"):
+node index.js --url "..." --name "Transcription Bot"
+```
+
+## What it does (phase 1)
+
+1. Launches a fresh, cookieless Chromium — no Google sign-in.
+2. Opens the Meet URL, waits for the pre-join screen.
+3. Fills the "Your name" field, mutes mic + camera, clicks **Ask to join**.
+4. Waits up to 2 minutes for the host to admit it.
+5. Once admitted, stays connected until the meeting ends or it's removed.
+
+Exit codes:
+
+| Code | Meaning |
+|------|---------|
+| 0    | Joined successfully, then meeting ended / bot removed cleanly |
+| 1    | Crash / unexpected error (see stderr) |
+| 2    | Bad CLI arguments |
+| 3    | Host explicitly denied the join request |
+| 4    | Timed out in the lobby (host never admitted) |
+
+## Testing
+
+The easy test: open Meet in a normal browser tab, start a meeting as host, run the bot with `--headful --url <link>`, and admit it from the participants panel when it shows up as "Polyglot Bot". You should see the bot's Chromium window join the call.
+
+## What's NOT here yet
+
+- Audio capture (tab audio → 16 kHz PCM16 → Polyglot WebSocket)
+- DOM scraping of active-speaker name and participant roster
+- WebSocket connection to the Polyglot backend
+- Control channel (join/leave commands from Polyglot's admin UI)
+
+Those land in subsequent phases once we've validated the bot can reliably get into meetings.
+
+## Selectors
+
+All Meet DOM selectors live in `selectors.js`. When Meet ships a UI change and the bot breaks, that's the file to update — nothing else should need touching.
diff --git a/meet-bot/audio.js b/meet-bot/audio.js
@@ -0,0 +1,127 @@
+// Phase 2: in-browser audio capture for the Meet bot.
+//
+// Two-part design:
+//   1. RTC_INIT_SCRIPT — must be registered via context.addInitScript() BEFORE
+//      page.goto() so it runs before Meet initialises its RTCPeerConnections.
+//      It patches RTCPeerConnection to funnel every remote audio track into a
+//      single shared MediaStream (window.__pgStream).
+//
+//   2. setupAudioCapture(page, onChunk) — called after the bot has joined.
+//      Injects an AudioWorklet that downsamples all audio in __pgStream to
+//      16 kHz mono PCM16, buffers into 20 ms frames, and sends each frame
+//      back to Node via an exposed function.
+
+// ── 1. RTC patch (init script) ───────────────────────────────────────────────
+
+export const RTC_INIT_SCRIPT = `(function () {
+  window.__pgStream = new MediaStream();
+  const _Orig = window.RTCPeerConnection;
+  class _Patched extends _Orig {
+    constructor(...a) {
+      super(...a);
+      this.addEventListener('track', (ev) => {
+        if (ev.track.kind !== 'audio') return;
+        if (!window.__pgStream.getTrackById(ev.track.id))
+          window.__pgStream.addTrack(ev.track);
+      });
+    }
+  }
+  window.RTCPeerConnection = _Patched;
+})();`;
+
+// ── 2. AudioWorklet processor source ─────────────────────────────────────────
+//
+// Nearest-neighbour resampler: maintains a fractional index across process()
+// calls so downsampling is consistent across block boundaries.
+// Buffers output until 320 samples (20 ms @ 16 kHz) are ready, then posts
+// { pcm: ArrayBuffer, ts: number } to the main thread.
+
+const WORKLET_SRC = `
+class PgResampler extends AudioWorkletProcessor {
+  constructor() { super(); this._idx = 0; this._buf = []; }
+
+  process(inputs) {
+    const ch = inputs[0]?.[0];
+    if (!ch) return true;
+
+    const ratio = sampleRate / 16000; // e.g. 3.0 for 48 kHz input
+    while (this._idx < ch.length) {
+      const s = ch[Math.floor(this._idx)];
+      this._buf.push(Math.round(Math.max(-1, Math.min(1, s)) * 32767));
+      this._idx += ratio;
+    }
+    this._idx -= ch.length; // carry fractional offset to next block
+
+    while (this._buf.length >= 320) {
+      const arr = new Int16Array(this._buf.splice(0, 320));
+      this.port.postMessage({ pcm: arr.buffer, ts: Date.now() }, [arr.buffer]);
+    }
+    return true;
+  }
+}
+registerProcessor('pg-resampler', PgResampler);
+`;
+
+// ── 3. setupAudioCapture ──────────────────────────────────────────────────────
+//
+// onChunk(pcm: Buffer, captureTs: number) is called for each 20 ms PCM16 frame.
+// captureTs is wall-clock ms at the moment the worklet produced the frame —
+// used later by resolve_speaker_identity() for time-alignment.
+
+export async function setupAudioCapture(page, onChunk) {
+  // Bridge from browser → Node. exposeFunction is safe to call post-navigate.
+  await page.exposeFunction('__pgChunk', (b64, ts) => {
+    onChunk(Buffer.from(b64, 'base64'), ts);
+  });
+
+  await page.evaluate(async (src) => {
+    // Inject worklet via blob URL (no local server needed).
+    const url = URL.createObjectURL(new Blob([src], { type: 'application/javascript' }));
+    const ctx = new AudioContext();
+    await ctx.resume(); // bypass autoplay suspension — bot has no user gesture
+    await ctx.audioWorklet.addModule(url);
+    URL.revokeObjectURL(url);
+
+    const node = new AudioWorkletNode(ctx, 'pg-resampler');
+
+    // Worklet → Node bridge: encode PCM16 ArrayBuffer as base64 string so it
+    // can cross the Playwright IPC boundary (exposeFunction only handles JSON).
+    node.port.onmessage = ({ data: { pcm, ts } }) => {
+      const u8 = new Uint8Array(pcm);
+      let s = '';
+      for (let i = 0; i < u8.length; i++) s += String.fromCharCode(u8[i]);
+      window.__pgChunk(btoa(s), ts);
+    };
+
+    function connectTrack(track) {
+      // Each track gets its own MediaStreamSource; sharing a single source
+      // across tracks doesn't work — each source reads one stream.
+      ctx.createMediaStreamSource(new MediaStream([track])).connect(node);
+    }
+
+    // Connect tracks already in the shared stream (joined mid-call or after
+    // participants were already speaking).
+    window.__pgStream.getAudioTracks().forEach(connectTrack);
+
+    // Connect tracks added after this point (people join late, etc.).
+    window.__pgStream.addEventListener('addtrack', (e) => {
+      if (e.track.kind === 'audio') connectTrack(e.track);
+    });
+
+    // Fallback: some Meet versions route audio through <audio> elements instead
+    // of exposing it via RTCPeerConnection track events. Tap those too.
+    function connectEl(el) {
+      if (el._pg) return;
+      el._pg = true;
+      try {
+        const src = ctx.createMediaElementSource(el);
+        src.connect(node);
+        src.connect(ctx.destination); // keep the original playback alive
+      } catch (_) { /* element may already be claimed */ }
+    }
+    document.querySelectorAll('audio').forEach(connectEl);
+    new MutationObserver(() => document.querySelectorAll('audio').forEach(connectEl))
+      .observe(document.documentElement, { childList: true, subtree: true });
+
+  }, WORKLET_SRC);
+}