diff --git a/.gitignore b/.gitignore index 84ad134..7b1ab87 100644 --- a/.gitignore +++ b/.gitignore @@ -208,4 +208,5 @@ __marimo__/ transcript.txt /transcripts polyglot.lock +viewer_password.txt .claude/settings.local.json diff --git a/app.py b/app.py index 6a98f48..b314ca2 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ """ import argparse +import collections import os import queue import random @@ -11,6 +12,7 @@ import threading import time import warnings +import wave import webbrowser from concurrent.futures import ThreadPoolExecutor from datetime import datetime @@ -21,9 +23,9 @@ def log(message, tag=None): """Print a log message with timestamp. Tag is optional prefix like [SUMMARY]""" ts = datetime.now().strftime('%H:%M:%S') if tag: - print(f"[{ts}] [{tag}] {message}") + print(f"[{ts}] [{tag}] {message}", flush=True) else: - print(f"[{ts}] {message}") + print(f"[{ts}] {message}", flush=True) # Word list for generating memorable passphrases @@ -319,6 +321,30 @@ def cleanup_lock_file(): meeting_start_time = None # Track when meeting started (first transcription) summary_pending = False # Track if a summary generation is waiting +# ── Meet bot state (Phase 4) ────────────────────────────────────────────────── +# Deque of closed speaker segments: (start_ms, end_ms, display_name). +# Populated by the /meet_bot SocketIO namespace; consumed by resolve_speaker_identity(). +speaker_timeline = collections.deque(maxlen=500) +# Known participants: display_name -> first_seen_ms +meet_participants = {} +# Open (unended) speaker intervals: display_name -> start_ms +_active_speaker_starts = {} +# Whether the Playwright bot is currently connected +bot_connected = False +# Accumulation buffer for rechunking 320-sample bot frames → CHUNK_SIZE frames +_bot_pcm_buffer = np.array([], dtype=np.float32) +# Wall-clock ms of the most recently received bot audio frame (for Phase 5 time-alignment) +_last_capture_ts_ms = None +# WAV file writer — captures the full raw 16 kHz mono meeting audio for retranscription +_bot_wav_writer = None +_bot_wav_lock = threading.Lock() +# Speaker-switch batching: who the bot says is currently speaking, and whether the most recent +# speaker_start differs from the previous one (triggers process_audio to flush the current batch). +_current_bot_speaker = None +_pending_speaker_switch = False +# Maximum batch length when bot is connected (gives Whisper more context per speaker turn). +BOT_MAX_BATCH_SEC = 60 + def load_transcript_segments(transcript_path): """Load existing transcript file into all_meeting_segments for summarization. @@ -1236,8 +1262,57 @@ def perform_speaker_diarization(audio_data, sample_rate): return None +def resolve_speaker_identity(speaker_segments, batch_end_ts_ms, audio_duration_secs): + """Match pyannote speaker IDs to real names via speaker_timeline overlap. + + Returns {original_pyannote_id: real_name} for speakers with >= 30% time overlap. + Considers both closed (speaker_timeline) and still-open (_active_speaker_starts) + intervals, since a speaker who started talking during this batch may not have + emitted speaker_end yet when the transcription thread kicks off. + """ + if batch_end_ts_ms is None or not speaker_segments: + return {} + + # Build the full set of speaker intervals to consider. + intervals = list(speaker_timeline) # closed: (start_ms, end_ms, name) + now_ms = int(time.time() * 1000) + for name, start_ms in _active_speaker_starts.items(): + intervals.append((start_ms, now_ms, name)) + + if not intervals: + return {} + + from collections import defaultdict + batch_start_ms = batch_end_ts_ms - audio_duration_secs * 1000 + + speaker_times = defaultdict(list) + for seg in speaker_segments: + seg_start_ms = batch_start_ms + seg["start"] * 1000 + seg_end_ms = batch_start_ms + seg["end"] * 1000 + speaker_times[seg["speaker"]].append((seg_start_ms, seg_end_ms)) + + resolved = {} + for original_id, time_ranges in speaker_times.items(): + name_overlap = defaultdict(float) + for seg_start_ms, seg_end_ms in time_ranges: + for (tl_start, tl_end, name) in intervals: + overlap = max(0.0, min(seg_end_ms, tl_end) - max(seg_start_ms, tl_start)) + if overlap > 0: + name_overlap[name] += overlap + + if not name_overlap: + continue + + best_name = max(name_overlap, key=name_overlap.get) + total_ms = sum(end - start for start, end in time_ranges) + if total_ms > 0 and name_overlap[best_name] / total_ms >= 0.30: + resolved[original_id] = best_name + + return resolved + + @torch.inference_mode() -def transcribe_and_translate(audio_data, audio_duration): +def transcribe_and_translate(audio_data, audio_duration, batch_end_ts_ms=None): """Background thread for transcription and translation with speaker diarization""" global is_processing, all_meeting_segments @@ -1328,6 +1403,13 @@ def normalize_caps(text): speaker_mapping[original_id] = f"SPEAKER_{speaker_counter:02d}" speaker_counter += 1 + # Phase 5: resolve pyannote IDs → real names from speaker_timeline. + resolved_names = resolve_speaker_identity(speaker_segments, batch_end_ts_ms, audio_duration) + if resolved_names: + for orig_id, real_name in resolved_names.items(): + speaker_xx = speaker_mapping.get(orig_id, orig_id) + log(f"Resolved {speaker_xx} → {real_name}", "BOT") + socketio.emit("rename_speaker", {"speaker_id": speaker_xx, "name": real_name}) # Extract all words with timestamps from chunks all_words = [] @@ -1361,7 +1443,9 @@ def normalize_caps(text): if overlap > max_overlap: max_overlap = overlap - best_speaker = speaker_mapping.get(seg["speaker"], seg["speaker"]) + orig = seg["speaker"] + # Prefer resolved real name; fall back to renumbered SPEAKER_XX. + best_speaker = resolved_names.get(orig, speaker_mapping.get(orig, orig)) best_segment_idx = idx words_with_speakers.append({ @@ -1705,10 +1789,27 @@ def process_audio(): buffer.append(chunk) - # Process when we detect end of sentence (silence after minimum audio) OR max buffer reached - silence_detected = len(buffer) >= min_chunks and silence_counter >= audio_thresholds["silence_chunks"] + # Bot mode: only flush on speaker switch or 60 s cap. No silence + # detection — a speaker's natural pauses emit speaker_end/start + # toggles and we explicitly do NOT flush on those. Mic-only mode + # falls back to the original level-based silence heuristic. + global _pending_speaker_switch + bot_mode = bot_connected + if bot_mode: + max_chunks = int(actual_sample_rate * BOT_MAX_BATCH_SEC / CHUNK_SIZE) + + speaker_switched = bot_mode and _pending_speaker_switch and len(buffer) >= min_chunks + if speaker_switched: + _pending_speaker_switch = False + + # Level-based silence detection — only used when bot isn't driving batching. + silence_detected = ( + not bot_mode + and len(buffer) >= min_chunks + and silence_counter >= audio_thresholds["silence_chunks"] + ) max_length_reached = len(buffer) >= max_chunks - should_process = silence_detected or max_length_reached + should_process = silence_detected or max_length_reached or speaker_switched if should_process: is_processing = True # Set lock @@ -1753,10 +1854,13 @@ def process_audio(): # Calculate audio duration audio_duration = len(audio_resampled) / SAMPLE_RATE + # Snapshot the wall-clock anchor for Phase 5 speaker resolution. + batch_end_ts = _last_capture_ts_ms + # Launch background thread for transcription and translation # This keeps the main loop responsive for WebSocket updates processing_thread = threading.Thread( - target=transcribe_and_translate, args=(audio_resampled, audio_duration), daemon=True + target=transcribe_and_translate, args=(audio_resampled, audio_duration, batch_end_ts), daemon=True ) processing_thread.start() @@ -2201,10 +2305,18 @@ def start_listening_internal(): if not is_listening: is_listening = True - # Start audio stream + # Start the processing thread first (same for both audio sources). audio_thread = threading.Thread(target=process_audio, daemon=True) audio_thread.start() + # Meet-bot path: audio arrives over SocketIO at 16 kHz mono; skip PyAudio entirely. + if Config.AUDIO_SOURCE == "meet_bot": + actual_sample_rate = Config.SAMPLE_RATE # 16000 + num_channels = 1 + print("[AUDIO] Source: Meet bot (waiting for bot to connect and stream audio)") + socketio.emit("status", {"listening": True}) + return + # Initialize PyAudio p_audio = pyaudio.PyAudio() @@ -2491,6 +2603,217 @@ def handle_broadcast_manual_summary(data): emit('manual_summary_broadcast', {'success': True, 'languages_sent': list(translations_cache.keys())}) +# ── Meet bot SocketIO namespace (Phase 4) ──────────────────────────────────── +# +# The Playwright bot connects here as a socket.io-client at /meet_bot. +# It streams two event types: +# audio_frame — binary PCM16 payload + JSON meta {capture_ts_ms, sample_rate, channels} +# speaker_event — JSON {type, name, wall_clock_ms} for speaker_start/end/roster_update + +def _open_bot_wav(): + """Open a WAV file to record the full raw meeting audio from the bot.""" + global _bot_wav_writer + with _bot_wav_lock: + if _bot_wav_writer is not None or not TRANSCRIPT_FILE: + return + wav_path = TRANSCRIPT_FILE.with_suffix(".wav") + try: + _bot_wav_writer = wave.open(str(wav_path), "wb") + _bot_wav_writer.setnchannels(1) + _bot_wav_writer.setsampwidth(2) # int16 + _bot_wav_writer.setframerate(Config.SAMPLE_RATE) + log(f"Recording meeting audio → {wav_path.name}", "BOT") + except Exception as e: + log(f"Failed to open WAV: {e}", "BOT") + _bot_wav_writer = None + + +def _close_bot_wav(): + global _bot_wav_writer + with _bot_wav_lock: + if _bot_wav_writer is None: + return + try: + _bot_wav_writer.close() + log("Meeting audio file closed", "BOT") + except Exception as e: + log(f"Error closing WAV: {e}", "BOT") + _bot_wav_writer = None + + +if Config.MEET_BOT_ENABLED: + + @socketio.on("connect", namespace="/meet_bot") + def bot_connect(): + global bot_connected + bot_connected = True + log("Meet bot connected", "BOT") + _open_bot_wav() + socketio.emit("bot_status", {"connected": True}, room="admin") + + @socketio.on("disconnect", namespace="/meet_bot") + def bot_disconnect(): + global bot_connected, _active_speaker_starts + bot_connected = False + # Close any open speaker intervals so timeline stays consistent. + now_ms = int(time.time() * 1000) + for name, start in list(_active_speaker_starts.items()): + speaker_timeline.append((start, now_ms, name)) + _active_speaker_starts.clear() + _close_bot_wav() + log("Meet bot disconnected", "BOT") + socketio.emit("bot_status", {"connected": False}, room="admin") + + @socketio.on("audio_frame", namespace="/meet_bot") + def bot_audio_frame(meta, data): + global _bot_pcm_buffer, _last_capture_ts_ms + if not is_listening: + return + # Persist raw int16 PCM for offline retranscription. + # writeframes (not writeframesraw) patches the header on every write so + # the file is valid even if the server is killed without clean shutdown. + if _bot_wav_writer is not None: + try: + with _bot_wav_lock: + if _bot_wav_writer is not None: + _bot_wav_writer.writeframes(data) + except Exception: + pass + # data arrives as bytes (Int16 PCM, 16 kHz mono, 320 samples = 20 ms). + frame = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0 + _bot_pcm_buffer = np.concatenate([_bot_pcm_buffer, frame]) + # Rechunk to CHUNK_SIZE (1024) so process_audio's duration math is correct. + while len(_bot_pcm_buffer) >= CHUNK_SIZE: + audio_queue.put(_bot_pcm_buffer[:CHUNK_SIZE].copy()) + _bot_pcm_buffer = _bot_pcm_buffer[CHUNK_SIZE:] + _last_capture_ts_ms = meta.get("capture_ts_ms") + + @socketio.on("speaker_event", namespace="/meet_bot") + def bot_speaker_event(ev): + global meet_participants, _active_speaker_starts, _current_bot_speaker, _pending_speaker_switch + ev_type = ev.get("type") + name = ev.get("name") + ts_ms = ev.get("wall_clock_ms", int(time.time() * 1000)) + + if ev_type == "roster_update": + for p in ev.get("participants", []): + if p and p not in meet_participants: + meet_participants[p] = ts_ms + socketio.emit("meet_roster", { + "participants": list(meet_participants.keys()), + "bot_connected": True, + }, room="admin") + return + + if not name: + return + + if ev_type == "speaker_start": + _active_speaker_starts[name] = ts_ms + # Flag a pending switch if the speaker changed — process_audio uses this to flush. + if _current_bot_speaker is not None and _current_bot_speaker != name: + _pending_speaker_switch = True + _current_bot_speaker = name + log(f"Speaking: {name}", "BOT") + _broadcast_active_speakers() + + elif ev_type == "speaker_end": + start = _active_speaker_starts.pop(name, ts_ms - 1000) + speaker_timeline.append((start, ts_ms, name)) + log(f"Segment: {name} {ts_ms - start} ms", "BOT") + # Don't flush here — a single speaker's short pauses emit + # speaker_end/speaker_start toggles, so flushing on end would + # chop their turn into tiny fragments. We only flush when a + # DIFFERENT speaker starts (speaker_switched) or the 60 s cap + # is reached. _current_bot_speaker stays as the last name so + # the same speaker resuming does not trigger a switch. + _broadcast_active_speakers() + + +def _broadcast_active_speakers(): + """Push the current set of speaking participants to admin + all viewers.""" + names = list(_active_speaker_starts.keys()) + payload = {"speakers": names, "wall_clock_ms": int(time.time() * 1000)} + socketio.emit("active_speakers", payload, room="admin") + for lang_code, count in active_language_viewers.items(): + if count > 0: + socketio.emit("active_speakers", payload, room=f"lang_{lang_code}") + + +# ── Admin-triggered bot spawning ───────────────────────────────────────────── +# Tracks the currently running Meet bot subprocess so we can start/stop it +# from the admin panel. Only one bot instance is supported at a time. +_meet_bot_process = None +_meet_bot_lock = threading.Lock() + + +def _bot_script_path(): + return Path(__file__).parent / "meet-bot" / "index.js" + + +@socketio.on("start_meet_bot") +def handle_start_meet_bot(data): + """Spawn the Meet bot pointing at the given URL. Admin-only.""" + global _meet_bot_process + + url = (data or {}).get("url", "").strip() + if not url.startswith("http"): + emit("meet_bot_control_result", {"ok": False, "error": "URL must start with http(s)://"}) + return + + with _meet_bot_lock: + # If a bot is already running, refuse to start another. + if _meet_bot_process is not None and _meet_bot_process.poll() is None: + emit("meet_bot_control_result", {"ok": False, "error": "Bot already running — stop it first"}) + return + + script = _bot_script_path() + if not script.exists(): + emit("meet_bot_control_result", {"ok": False, "error": f"Bot script not found at {script}"}) + return + + import subprocess + try: + _meet_bot_process = subprocess.Popen( + ["node", str(script), + "--url", url, + "--polyglot-url", "http://localhost:5000", + "--headful"], + cwd=str(script.parent), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + stdin=subprocess.DEVNULL, + creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == "nt" else 0, + ) + log(f"Spawned Meet bot (pid={_meet_bot_process.pid}) → {url}", "BOT") + emit("meet_bot_control_result", {"ok": True, "pid": _meet_bot_process.pid}) + except Exception as e: + log(f"Failed to spawn bot: {e}", "BOT") + emit("meet_bot_control_result", {"ok": False, "error": str(e)}) + + +@socketio.on("stop_meet_bot") +def handle_stop_meet_bot(): + """Terminate the running Meet bot subprocess.""" + global _meet_bot_process + with _meet_bot_lock: + if _meet_bot_process is None or _meet_bot_process.poll() is not None: + emit("meet_bot_control_result", {"ok": False, "error": "No bot running"}) + _meet_bot_process = None + return + try: + _meet_bot_process.terminate() + try: + _meet_bot_process.wait(timeout=5) + except Exception: + _meet_bot_process.kill() + log(f"Stopped Meet bot subprocess", "BOT") + _meet_bot_process = None + emit("meet_bot_control_result", {"ok": True}) + except Exception as e: + emit("meet_bot_control_result", {"ok": False, "error": str(e)}) + + if __name__ == "__main__": # Check for single instance before doing anything else check_single_instance() diff --git a/config.py b/config.py index 082569f..8c9485e 100644 --- a/config.py +++ b/config.py @@ -211,6 +211,14 @@ def get_translation_lang_code(cls, iso_code): SAMPLE_RATE = 16000 # Whisper expects 16kHz audio CHUNK_SIZE = 1024 # Audio buffer chunk size + # Audio source — "wasapi" uses the WASAPI loopback device (original path); + # "meet_bot" receives 16 kHz PCM16 from the Playwright bot over SocketIO. + AUDIO_SOURCE = os.getenv("AUDIO_SOURCE", "meet_bot") + + # Meet bot SocketIO receiver. The bot connects to /meet_bot on whatever + # port Polyglot is already running on — no separate port needed. + MEET_BOT_ENABLED = os.getenv("MEET_BOT_ENABLED", "True").lower() in ("true", "1", "yes") + # Minimum audio level to process (prevents hallucinations during silence) # If average audio level is below this, skip transcription MIN_AUDIO_LEVEL = 0.01 diff --git a/meet-bot/.gitignore b/meet-bot/.gitignore new file mode 100644 index 0000000..793052f --- /dev/null +++ b/meet-bot/.gitignore @@ -0,0 +1,4 @@ +node_modules/ +package-lock.json +*.log +chrome-profile/ diff --git a/meet-bot/README.md b/meet-bot/README.md new file mode 100644 index 0000000..12d6416 --- /dev/null +++ b/meet-bot/README.md @@ -0,0 +1,61 @@ +# Polyglot Meet Bot + +Headless Chromium bot that joins a Google Meet as an anonymous guest. Future phases will stream meeting audio and active-speaker names back to the Polyglot server; this initial phase validates only the join-and-get-admitted flow. + +## Setup + +```bash +cd meet-bot +npm install +npx playwright install chromium +``` + +Node 20+ required. + +## Run + +```bash +# Typical use — fully headless: +node index.js --url "https://meet.google.com/xxx-yyyy-zzz" + +# Watch what the bot sees (debug Meet UI issues): +node index.js --url "https://meet.google.com/xxx-yyyy-zzz" --headful + +# Override the displayed name (default "Polyglot Bot"): +node index.js --url "..." --name "Transcription Bot" +``` + +## What it does (phase 1) + +1. Launches a fresh, cookieless Chromium — no Google sign-in. +2. Opens the Meet URL, waits for the pre-join screen. +3. Fills the "Your name" field, mutes mic + camera, clicks **Ask to join**. +4. Waits up to 2 minutes for the host to admit it. +5. Once admitted, stays connected until the meeting ends or it's removed. + +Exit codes: + +| Code | Meaning | +|------|---------| +| 0 | Joined successfully, then meeting ended / bot removed cleanly | +| 1 | Crash / unexpected error (see stderr) | +| 2 | Bad CLI arguments | +| 3 | Host explicitly denied the join request | +| 4 | Timed out in the lobby (host never admitted) | + +## Testing + +The easy test: open Meet in a normal browser tab, start a meeting as host, run the bot with `--headful --url `, and admit it from the participants panel when it shows up as "Polyglot Bot". You should see the bot's Chromium window join the call. + +## What's NOT here yet + +- Audio capture (tab audio → 16 kHz PCM16 → Polyglot WebSocket) +- DOM scraping of active-speaker name and participant roster +- WebSocket connection to the Polyglot backend +- Control channel (join/leave commands from Polyglot's admin UI) + +Those land in subsequent phases once we've validated the bot can reliably get into meetings. + +## Selectors + +All Meet DOM selectors live in `selectors.js`. When Meet ships a UI change and the bot breaks, that's the file to update — nothing else should need touching. diff --git a/meet-bot/audio.js b/meet-bot/audio.js new file mode 100644 index 0000000..508caed --- /dev/null +++ b/meet-bot/audio.js @@ -0,0 +1,127 @@ +// Phase 2: in-browser audio capture for the Meet bot. +// +// Two-part design: +// 1. RTC_INIT_SCRIPT — must be registered via context.addInitScript() BEFORE +// page.goto() so it runs before Meet initialises its RTCPeerConnections. +// It patches RTCPeerConnection to funnel every remote audio track into a +// single shared MediaStream (window.__pgStream). +// +// 2. setupAudioCapture(page, onChunk) — called after the bot has joined. +// Injects an AudioWorklet that downsamples all audio in __pgStream to +// 16 kHz mono PCM16, buffers into 20 ms frames, and sends each frame +// back to Node via an exposed function. + +// ── 1. RTC patch (init script) ─────────────────────────────────────────────── + +export const RTC_INIT_SCRIPT = `(function () { + window.__pgStream = new MediaStream(); + const _Orig = window.RTCPeerConnection; + class _Patched extends _Orig { + constructor(...a) { + super(...a); + this.addEventListener('track', (ev) => { + if (ev.track.kind !== 'audio') return; + if (!window.__pgStream.getTrackById(ev.track.id)) + window.__pgStream.addTrack(ev.track); + }); + } + } + window.RTCPeerConnection = _Patched; +})();`; + +// ── 2. AudioWorklet processor source ───────────────────────────────────────── +// +// Nearest-neighbour resampler: maintains a fractional index across process() +// calls so downsampling is consistent across block boundaries. +// Buffers output until 320 samples (20 ms @ 16 kHz) are ready, then posts +// { pcm: ArrayBuffer, ts: number } to the main thread. + +const WORKLET_SRC = ` +class PgResampler extends AudioWorkletProcessor { + constructor() { super(); this._idx = 0; this._buf = []; } + + process(inputs) { + const ch = inputs[0]?.[0]; + if (!ch) return true; + + const ratio = sampleRate / 16000; // e.g. 3.0 for 48 kHz input + while (this._idx < ch.length) { + const s = ch[Math.floor(this._idx)]; + this._buf.push(Math.round(Math.max(-1, Math.min(1, s)) * 32767)); + this._idx += ratio; + } + this._idx -= ch.length; // carry fractional offset to next block + + while (this._buf.length >= 320) { + const arr = new Int16Array(this._buf.splice(0, 320)); + this.port.postMessage({ pcm: arr.buffer, ts: Date.now() }, [arr.buffer]); + } + return true; + } +} +registerProcessor('pg-resampler', PgResampler); +`; + +// ── 3. setupAudioCapture ────────────────────────────────────────────────────── +// +// onChunk(pcm: Buffer, captureTs: number) is called for each 20 ms PCM16 frame. +// captureTs is wall-clock ms at the moment the worklet produced the frame — +// used later by resolve_speaker_identity() for time-alignment. + +export async function setupAudioCapture(page, onChunk) { + // Bridge from browser → Node. exposeFunction is safe to call post-navigate. + await page.exposeFunction('__pgChunk', (b64, ts) => { + onChunk(Buffer.from(b64, 'base64'), ts); + }); + + await page.evaluate(async (src) => { + // Inject worklet via blob URL (no local server needed). + const url = URL.createObjectURL(new Blob([src], { type: 'application/javascript' })); + const ctx = new AudioContext(); + await ctx.resume(); // bypass autoplay suspension — bot has no user gesture + await ctx.audioWorklet.addModule(url); + URL.revokeObjectURL(url); + + const node = new AudioWorkletNode(ctx, 'pg-resampler'); + + // Worklet → Node bridge: encode PCM16 ArrayBuffer as base64 string so it + // can cross the Playwright IPC boundary (exposeFunction only handles JSON). + node.port.onmessage = ({ data: { pcm, ts } }) => { + const u8 = new Uint8Array(pcm); + let s = ''; + for (let i = 0; i < u8.length; i++) s += String.fromCharCode(u8[i]); + window.__pgChunk(btoa(s), ts); + }; + + function connectTrack(track) { + // Each track gets its own MediaStreamSource; sharing a single source + // across tracks doesn't work — each source reads one stream. + ctx.createMediaStreamSource(new MediaStream([track])).connect(node); + } + + // Connect tracks already in the shared stream (joined mid-call or after + // participants were already speaking). + window.__pgStream.getAudioTracks().forEach(connectTrack); + + // Connect tracks added after this point (people join late, etc.). + window.__pgStream.addEventListener('addtrack', (e) => { + if (e.track.kind === 'audio') connectTrack(e.track); + }); + + // Fallback: some Meet versions route audio through