From d8c1bd4ea26816ce7976299c7ac73611c8fe1fb6 Mon Sep 17 00:00:00 2001 From: Adriel Guerrero Date: Sun, 12 Apr 2026 20:13:52 -0700 Subject: [PATCH] #485 Implement voice chat capture, playback and spatialization - Add VoiceChat class with PortAudio capture (20ms Opus frames), stereo playback with equal-power panning, jitter buffer, distance attenuation, mic gain, music injection, and device hot-swap. - Handle 'F' subcommands in Core.cpp and route incoming voice packets to VoiceChat in GlobalHandler.cpp. --- CMakeLists.txt | 8 +- include/Audio/VoiceChat.h | 150 ++++++++ src/Audio/VoiceChat.cpp | 663 ++++++++++++++++++++++++++++++++++ src/Network/Core.cpp | 62 +++- src/Network/GlobalHandler.cpp | 15 +- vcpkg.json | 4 +- 6 files changed, 896 insertions(+), 6 deletions(-) create mode 100644 include/Audio/VoiceChat.h create mode 100644 src/Audio/VoiceChat.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 67e666ef..f9f2b292 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,8 @@ file(GLOB source_files "src/*.cpp" "src/*/*.cpp" "src/*/*.hpp" "include/*.h" "i find_package(httplib CONFIG REQUIRED) find_package(nlohmann_json CONFIG REQUIRED) find_package(CURL REQUIRED) +find_package(Opus CONFIG REQUIRED) +find_package(portaudio CONFIG REQUIRED) add_executable(${PROJECT_NAME} ${source_files}) set_target_properties(${PROJECT_NAME} PROPERTIES OUTPUT_NAME "BeamMP-Launcher") @@ -26,15 +28,15 @@ if (WIN32) find_package(ZLIB REQUIRED) find_package(OpenSSL REQUIRED) target_link_libraries(${PROJECT_NAME} PRIVATE - ZLIB::ZLIB OpenSSL::SSL OpenSSL::Crypto ws2_32 httplib::httplib nlohmann_json::nlohmann_json CURL::libcurl) + ZLIB::ZLIB OpenSSL::SSL OpenSSL::Crypto ws2_32 httplib::httplib nlohmann_json::nlohmann_json CURL::libcurl Opus::opus portaudio_static) elseif (UNIX) find_package(ZLIB REQUIRED) find_package(OpenSSL REQUIRED) target_link_libraries(${PROJECT_NAME} PRIVATE - ZLIB::ZLIB OpenSSL::SSL OpenSSL::Crypto CURL::libcurl) + ZLIB::ZLIB OpenSSL::SSL OpenSSL::Crypto CURL::libcurl Opus::opus portaudio) else(WIN32) #MINGW add_definitions("-D_WIN32_WINNT=0x0600") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os -s --static") - target_link_libraries(${PROJECT_NAME} ssl crypto ws2_32 ssp crypt32 z CURL::libcurl) + target_link_libraries(${PROJECT_NAME} ssl crypto ws2_32 ssp crypt32 z CURL::libcurl Opus::opus portaudio) endif(WIN32) target_include_directories(${PROJECT_NAME} PRIVATE "include") diff --git a/include/Audio/VoiceChat.h b/include/Audio/VoiceChat.h new file mode 100644 index 00000000..5a021678 --- /dev/null +++ b/include/Audio/VoiceChat.h @@ -0,0 +1,150 @@ +/* + Copyright (C) 2024 BeamMP Ltd., BeamMP team and contributors. + Licensed under AGPL-3.0 (or later), see . + SPDX-License-Identifier: AGPL-3.0-or-later +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct PaStreamParameters; +struct PaStreamCallbackTimeInfo; +typedef void PaStream; +struct OpusEncoder; +struct OpusDecoder; + +class VoiceChat { +public: + using SendCallback = std::function; + using GameSendCallback = std::function; + + static VoiceChat& Instance(); + + void Init(); + void Shutdown(); + void SetSendCallback(SendCallback cb); + void SetGameSendCallback(GameSendCallback cb); + + void StartRecording(); + void StopRecording(); + void SetMuted(bool muted); + void SetVolume(int vol); + void SetMusicVolume(int vol); // 0-100, applies only to injected (music) audio + void SetMicGain(int pct); // 0-200, 100=default (4x). Scales software mic gain. + void UpdateListenerPosition(float x, float y, float z); + void UpdateListenerOrientation(float fx, float fy, float fz); + + void SetInputDevice(int deviceId); + void SetOutputDevice(int deviceId); + std::string EnumerateDevicesJson(); + + void ProcessIncomingVoice(const char* data, size_t len); + + bool IsInitialized() const { return mInitialized.load(); } + bool IsRecording() const { return mRecording.load(); } + bool IsMuted() const { return mMuted.load(); } + + static constexpr int SAMPLE_RATE = 48000; + static constexpr int CAPTURE_CHANNELS = 1; // mic: mono + static constexpr int PLAYBACK_CHANNELS = 2; // output: stereo + static constexpr int CHANNELS = CAPTURE_CHANNELS; // legacy alias for encoder + // 20ms is the standard Opus frame size for low-latency voice chat and matches + // the music-server injection cadence. Both capture and playback use the same + // size so the encoder/decoder pair is symmetric. + // If PortAudio callback scheduling causes glitches at 20ms, raise to 40ms. + static constexpr int FRAME_DURATION_MS = 20; // mic capture: 20ms frames + static constexpr int FRAME_SIZE = SAMPLE_RATE * FRAME_DURATION_MS / 1000; // 960 — capture/encoder + static constexpr int PLAYBACK_FRAME_DURATION_MS = 20; // playback callback: 20ms + static constexpr int PLAYBACK_FRAME_SIZE = SAMPLE_RATE * PLAYBACK_FRAME_DURATION_MS / 1000; // 960 + static constexpr int MAX_OPUS_PACKET = 512; + static constexpr int OPUS_BITRATE = 24000; // bps — balance of quality and bandwidth + static constexpr int JITTER_BUFFER_FRAMES = 3; // buffer 3x20ms = 60ms before playback starts (absorbs network jitter) + // Packet v2: 'F' + uint8(version) + uint8(flags) + uint16(source_id) + float[3](pos) + static constexpr size_t VOICE_HEADER_SIZE = 1 + 1 + 1 + 2 + 12 + 4 + 4; // 25 bytes: F+ver+flags+id+pos+maxDist+gain + static constexpr uint8_t VOICE_PROTOCOL_VERSION = 2; + static constexpr uint8_t VOICE_FLAG_PROXIMITY = 0x01; + static constexpr uint8_t VOICE_FLAG_INJECTED = 0x02; + +private: + VoiceChat() = default; + ~VoiceChat(); + VoiceChat(const VoiceChat&) = delete; + VoiceChat& operator=(const VoiceChat&) = delete; + + static int CaptureCallback(const void* input, void* output, + unsigned long frameCount, const PaStreamCallbackTimeInfo* timeInfo, + unsigned long statusFlags, void* userData); + + void EncodeCapturedAudio(const int16_t* samples, size_t count); + void MixAndPlay(float* output, unsigned long frameCount); + + static int PlaybackCallback(const void* input, void* output, + unsigned long frameCount, const PaStreamCallbackTimeInfo* timeInfo, + unsigned long statusFlags, void* userData); + + void OpenCaptureStream(int deviceId); + void OpenPlaybackStream(int deviceId); + void MicLevelSenderLoop(); + + std::atomic mInitialized { false }; + std::atomic mRecording { false }; + std::atomic mMuted { false }; + std::atomic mVolume { 80 }; + std::atomic mMusicVolume { 100 }; // music (injected) volume 0-100 + std::atomic mMicGainPct { 100 }; // mic gain: (pct/100)*4 = actual multiplier. 100=4x default, 800=32x max + std::atomic mMicLevel { 0.0f }; + std::atomic mLevelThreadRunning { false }; + std::thread mLevelThread; + + SendCallback mSendCallback; + GameSendCallback mGameSendCallback; + std::mutex mCallbackMutex; + + PaStream* mCaptureStream = nullptr; + int mCaptureDeviceId = -1; + OpusEncoder* mEncoder = nullptr; + std::deque mCaptureBuffer; + std::mutex mCaptureMutex; + + PaStream* mPlaybackStream = nullptr; + int mPlaybackDeviceId = -1; + int mPlaybackChannels = PLAYBACK_CHANNELS; // actual opened channel count (may fall back to 1) + + struct ClientVoice { + OpusDecoder* decoder = nullptr; + // Sample ring: written by ProcessIncomingVoice (network thread), + // read by MixAndPlay (audio callback thread) — both hold mPlaybackMutex. + // sampleReadPos is the read head; the vector is compacted back to zero in + // ProcessIncomingVoice (network thread) so the audio callback never calls + // erase(), keeping the hot path O(1). + std::vector sampleQueue; + size_t sampleReadPos = 0; + float position[3] = { 0.0f, 0.0f, 0.0f }; + float maxDistance = 0.0f; + float broadcastGain = 1.0f; // gain set by sender (car stereo volume), 0.0-1.0 + float smoothedGain = 1.0f; // interpolated gain to avoid discontinuities + float smoothedPan = 0.0f; // interpolated pan (-1..+1) for smooth rotation + uint8_t flags = 0; // last received flags (proximity, injected) + std::chrono::steady_clock::time_point lastReceived; + bool buffering = true; // jitter buffer: true until enough frames accumulated + }; + std::unordered_map mClients; + // Throttle map for "speaking" notifications — protected by mPlaybackMutex. + // Declared as a member (not static local) so it is cleared on Shutdown(). + std::unordered_map mLastVoiceNotified; + std::mutex mPlaybackMutex; + + float mListenerPos[3] = { 0.0f, 0.0f, 0.0f }; + float mListenerFwd[3] = { 0.0f, 1.0f, 0.0f }; // forward direction (normalized) + std::mutex mListenerMutex; +}; diff --git a/src/Audio/VoiceChat.cpp b/src/Audio/VoiceChat.cpp new file mode 100644 index 00000000..9bbab64b --- /dev/null +++ b/src/Audio/VoiceChat.cpp @@ -0,0 +1,663 @@ +/* + Copyright (C) 2024 BeamMP Ltd., BeamMP team and contributors. + Licensed under AGPL-3.0 (or later), see . + SPDX-License-Identifier: AGPL-3.0-or-later +*/ + +#include "Audio/VoiceChat.h" +#include "Logger.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace { + // Explicit little-endian float read — portable to any architecture. + // Wire format: little-endian IEEE 754 floats (matches server BuildPacket). + inline float readLEFloat(const uint8_t* p) { + uint32_t bits = static_cast(p[0]) + | (static_cast(p[1]) << 8) + | (static_cast(p[2]) << 16) + | (static_cast(p[3]) << 24); + float f; std::memcpy(&f, &bits, sizeof(float)); + return f; + } +} // namespace + +VoiceChat& VoiceChat::Instance() { + static VoiceChat instance; + return instance; +} + +VoiceChat::~VoiceChat() { + Shutdown(); +} + +// ── stream helpers ────────────────────────────────────── + +void VoiceChat::OpenCaptureStream(int deviceId) { + if (mCaptureStream) { + Pa_StopStream(mCaptureStream); + Pa_CloseStream(mCaptureStream); + mCaptureStream = nullptr; + } + + PaDeviceIndex dev = (deviceId < 0) ? Pa_GetDefaultInputDevice() : static_cast(deviceId); + if (dev == paNoDevice) { + warn("VoiceChat: No input device available."); + return; + } + const PaDeviceInfo* devInfo = Pa_GetDeviceInfo(dev); + if (!devInfo || devInfo->maxInputChannels < 1) { + warn("VoiceChat: Selected input device has no input channels."); + return; + } + + PaStreamParameters inputParams {}; + inputParams.device = dev; + inputParams.channelCount = CHANNELS; + inputParams.sampleFormat = paInt16; + inputParams.suggestedLatency = devInfo->defaultLowInputLatency; + + PaError err = Pa_OpenStream(&mCaptureStream, &inputParams, nullptr, + SAMPLE_RATE, FRAME_SIZE, paClipOff, CaptureCallback, this); + if (err != paNoError) { + error(std::string("VoiceChat: Failed to open capture stream: ") + Pa_GetErrorText(err)); + mCaptureStream = nullptr; + } else { + // Start stream immediately so mic level monitoring is always active + PaError startErr = Pa_StartStream(mCaptureStream); + if (startErr != paNoError) { + error(std::string("VoiceChat: Failed to start capture stream: ") + Pa_GetErrorText(startErr)); + Pa_CloseStream(mCaptureStream); + mCaptureStream = nullptr; + } else { + mCaptureDeviceId = dev; + info(std::string("VoiceChat: Capture device: ") + devInfo->name); + } + } +} + +void VoiceChat::OpenPlaybackStream(int deviceId) { + if (mPlaybackStream) { + Pa_StopStream(mPlaybackStream); + Pa_CloseStream(mPlaybackStream); + mPlaybackStream = nullptr; + } + + PaDeviceIndex dev = (deviceId < 0) ? Pa_GetDefaultOutputDevice() : static_cast(deviceId); + if (dev == paNoDevice) { + warn("VoiceChat: No output device available."); + return; + } + const PaDeviceInfo* devInfo = Pa_GetDeviceInfo(dev); + if (!devInfo || devInfo->maxOutputChannels < 1) { + warn("VoiceChat: Selected output device has no output channels."); + return; + } + + int chCount = (devInfo->maxOutputChannels >= PLAYBACK_CHANNELS) ? PLAYBACK_CHANNELS : 1; + + PaStreamParameters outputParams {}; + outputParams.device = dev; + outputParams.channelCount = chCount; + outputParams.sampleFormat = paFloat32; + outputParams.suggestedLatency = devInfo->defaultLowOutputLatency; + + PaError err = Pa_OpenStream(&mPlaybackStream, nullptr, &outputParams, + SAMPLE_RATE, PLAYBACK_FRAME_SIZE, paClipOff, PlaybackCallback, this); + if (err != paNoError && chCount > 1) { + // Fallback to mono + warn("VoiceChat: Stereo playback failed, falling back to mono."); + outputParams.channelCount = 1; + err = Pa_OpenStream(&mPlaybackStream, nullptr, &outputParams, + SAMPLE_RATE, PLAYBACK_FRAME_SIZE, paClipOff, PlaybackCallback, this); + } + if (err != paNoError) { + error(std::string("VoiceChat: Failed to open playback stream: ") + Pa_GetErrorText(err)); + mPlaybackStream = nullptr; + } else { + mPlaybackChannels = outputParams.channelCount; + Pa_StartStream(mPlaybackStream); + mPlaybackDeviceId = dev; + info(std::string("VoiceChat: Playback device: ") + devInfo->name + + " (" + std::to_string(mPlaybackChannels) + "ch)"); + } +} + +// ── init / shutdown ───────────────────────────────────── + +void VoiceChat::Init() { + if (mInitialized.load()) return; + + PaError err = Pa_Initialize(); + if (err != paNoError) { + error(std::string("VoiceChat: PortAudio init failed: ") + Pa_GetErrorText(err)); + return; + } + + int opusErr = 0; + mEncoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, OPUS_APPLICATION_VOIP, &opusErr); + if (opusErr != OPUS_OK || !mEncoder) { + error(std::string("VoiceChat: Opus encoder creation failed: ") + opus_strerror(opusErr)); + Pa_Terminate(); + return; + } + opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(OPUS_BITRATE)); + opus_encoder_ctl(mEncoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); + + OpenCaptureStream(-1); + OpenPlaybackStream(-1); + + mInitialized.store(true); + + // Start background thread that sends mic level to the game + // (must NOT be done inside the real-time PortAudio callback) + mLevelThreadRunning.store(true); + mLevelThread = std::thread(&VoiceChat::MicLevelSenderLoop, this); + + info("VoiceChat: Initialized successfully."); +} + +void VoiceChat::Shutdown() { + if (!mInitialized.load()) return; + mInitialized.store(false); + mRecording.store(false); + mMuted.store(false); + + // Stop mic level sender thread + mLevelThreadRunning.store(false); + if (mLevelThread.joinable()) mLevelThread.join(); + + if (mCaptureStream) { + Pa_StopStream(mCaptureStream); + Pa_CloseStream(mCaptureStream); + mCaptureStream = nullptr; + } + if (mPlaybackStream) { + Pa_StopStream(mPlaybackStream); + Pa_CloseStream(mPlaybackStream); + mPlaybackStream = nullptr; + } + if (mEncoder) { + opus_encoder_destroy(mEncoder); + mEncoder = nullptr; + } + { + std::lock_guard lock(mPlaybackMutex); + for (auto& [id, cv] : mClients) { + if (cv.decoder) opus_decoder_destroy(cv.decoder); + } + mClients.clear(); + mLastVoiceNotified.clear(); + } + { + std::lock_guard lock(mCallbackMutex); + mSendCallback = nullptr; + mGameSendCallback = nullptr; + } + + Pa_Terminate(); + info("VoiceChat: Shutdown."); +} + +// ── callbacks / setters ───────────────────────────────── + +void VoiceChat::SetSendCallback(SendCallback cb) { + std::lock_guard lock(mCallbackMutex); + mSendCallback = std::move(cb); +} + +void VoiceChat::SetGameSendCallback(GameSendCallback cb) { + std::lock_guard lock(mCallbackMutex); + mGameSendCallback = std::move(cb); +} + +void VoiceChat::SetMuted(bool muted) { + mMuted.store(muted); + debug(std::string("VoiceChat: Muted = ") + (muted ? "true" : "false")); +} + +void VoiceChat::SetVolume(int vol) { + mVolume.store(std::max(0, std::min(100, vol))); + debug("VoiceChat: Volume = " + std::to_string(mVolume.load())); +} + +// ── device management ─────────────────────────────────── + +void VoiceChat::SetInputDevice(int deviceId) { + if (!mInitialized.load()) return; + OpenCaptureStream(deviceId); +} + +void VoiceChat::SetOutputDevice(int deviceId) { + if (!mInitialized.load()) return; + OpenPlaybackStream(deviceId); +} + +std::string VoiceChat::EnumerateDevicesJson() { + if (!mInitialized.load()) return "{}"; + int numDevices = Pa_GetDeviceCount(); + if (numDevices < 0) return "{}"; + + // Prefer platform-native API to avoid duplicate devices across host APIs +#ifdef _WIN32 + PaHostApiIndex preferredApi = Pa_HostApiTypeIdToHostApiIndex(paWASAPI); +#elif defined(__linux__) + PaHostApiIndex preferredApi = Pa_HostApiTypeIdToHostApiIndex(paPulseAudio); + if (preferredApi < 0) preferredApi = Pa_HostApiTypeIdToHostApiIndex(paALSA); +#else + PaHostApiIndex preferredApi = -1; +#endif + if (preferredApi < 0) preferredApi = Pa_GetDefaultHostApi(); + + std::ostringstream ss; + ss << "{\"input\":["; + bool firstIn = true; + for (int i = 0; i < numDevices; ++i) { + const PaDeviceInfo* info = Pa_GetDeviceInfo(i); + if (!info || info->maxInputChannels < 1) continue; + if (info->hostApi != preferredApi) continue; + if (!firstIn) ss << ","; + firstIn = false; + std::string name(info->name); + // Escape quotes in device name + for (size_t p = 0; (p = name.find('"', p)) != std::string::npos; p += 2) + name.replace(p, 1, "\\\""); + ss << "{\"id\":" << i << ",\"name\":\"" << name << "\"}"; + } + ss << "],\"output\":["; + bool firstOut = true; + for (int i = 0; i < numDevices; ++i) { + const PaDeviceInfo* info = Pa_GetDeviceInfo(i); + if (!info || info->maxOutputChannels < 1) continue; + if (info->hostApi != preferredApi) continue; + if (!firstOut) ss << ","; + firstOut = false; + std::string name(info->name); + for (size_t p = 0; (p = name.find('"', p)) != std::string::npos; p += 2) + name.replace(p, 1, "\\\""); + ss << "{\"id\":" << i << ",\"name\":\"" << name << "\"}"; + } + ss << "]}"; + return ss.str(); +} + +// ── recording ─────────────────────────────────────────── + +void VoiceChat::StartRecording() { + if (!mInitialized.load() || !mCaptureStream) { + warn("VoiceChat: StartRecording failed - not initialized or no capture stream"); + return; + } + if (mRecording.load()) return; + { + std::lock_guard lock(mCaptureMutex); + mCaptureBuffer.clear(); + } + mRecording.store(true); + info("VoiceChat: Recording started (encoding active)."); +} + +void VoiceChat::StopRecording() { + if (!mRecording.load()) return; + mRecording.store(false); + info("VoiceChat: Recording stopped (encoding paused)."); +} + +void VoiceChat::UpdateListenerPosition(float x, float y, float z) { + std::lock_guard lock(mListenerMutex); + mListenerPos[0] = x; + mListenerPos[1] = y; + mListenerPos[2] = z; +} + +void VoiceChat::UpdateListenerOrientation(float fx, float fy, float fz) { + // Normalize the forward vector + float len = std::sqrt(fx*fx + fy*fy + fz*fz); + if (len < 1e-6f) return; + std::lock_guard lock(mListenerMutex); + mListenerFwd[0] = fx / len; + mListenerFwd[1] = fy / len; + mListenerFwd[2] = fz / len; +} + +// ── capture callback ──────────────────────────────────── + +int VoiceChat::CaptureCallback(const void* input, void* /*output*/, + unsigned long frameCount, const PaStreamCallbackTimeInfo* /*timeInfo*/, + unsigned long /*statusFlags*/, void* userData) { + auto* self = static_cast(userData); + if (!input) return paContinue; + + const auto* samples = static_cast(input); + + // Always calculate RMS for mic level indicator (atomic store only — no blocking I/O) + float rms = 0.0f; + for (unsigned long i = 0; i < frameCount; ++i) { + float s = samples[i] / 32768.0f; + rms += s * s; + } + rms = std::sqrt(rms / frameCount); + // Apply same gain factor so the meter reflects the actual transmitted level + float MIC_GAIN = (self->mMicGainPct.load() / 100.0f) * 4.0f; // 100=4x default, 500=20x, 800=32x max + rms = std::min(1.0f, rms * MIC_GAIN); + self->mMicLevel.store(rms); + + // Only encode and transmit when actively recording and not muted + if (!self->mRecording.load() || self->mMuted.load()) return paContinue; + + self->EncodeCapturedAudio(samples, frameCount); + return paContinue; +} + +void VoiceChat::SetMusicVolume(int vol) { + mMusicVolume.store(std::max(0, std::min(100, vol))); + debug("VoiceChat: MusicVolume = " + std::to_string(mMusicVolume.load())); +} + +void VoiceChat::SetMicGain(int pct) { + mMicGainPct.store(std::max(0, std::min(800, pct))); + debug("VoiceChat: MicGainPct = " + std::to_string(mMicGainPct.load())); +} + +void VoiceChat::MicLevelSenderLoop() { + info("VoiceChat: Mic level sender thread started."); + int lastLevel = -1; + while (mLevelThreadRunning.load()) { + GameSendCallback cbCopy; + { + std::lock_guard lock(mCallbackMutex); + cbCopy = mGameSendCallback; + } + if (mCaptureStream && cbCopy) { + float rms = mMicLevel.load(); + int levelPercent = std::min(100, static_cast(rms * 100.0f)); + if (levelPercent != lastLevel) { + lastLevel = levelPercent; + cbCopy("Fl" + std::to_string(levelPercent)); + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + info("VoiceChat: Mic level sender thread stopped."); +} + +void VoiceChat::EncodeCapturedAudio(const int16_t* samples, size_t count) { + if (!mEncoder) return; + + SendCallback cbCopy; + { + std::lock_guard lock(mCallbackMutex); + cbCopy = mSendCallback; + } + + std::lock_guard lock(mCaptureMutex); + mCaptureBuffer.insert(mCaptureBuffer.end(), samples, samples + count); + + // Temp contiguous buffer for opus_encode (deque is not contiguous) + std::vector frameBuffer(FRAME_SIZE); + + while (mCaptureBuffer.size() >= static_cast(FRAME_SIZE)) { + std::copy(mCaptureBuffer.begin(), mCaptureBuffer.begin() + FRAME_SIZE, frameBuffer.begin()); + + // Software mic gain: dynamic, controlled via SetMicGain (Fg command). + // Default mMicGainPct=100 -> 4.0x (~+12dB). Range 0-200%. + float MIC_GAIN = (mMicGainPct.load() / 100.0f) * 4.0f; // 100=4x default, 500=20x, 800=32x max + for (auto& s : frameBuffer) { + float boosted = s * MIC_GAIN; + s = static_cast(std::max(-32768.0f, std::min(32767.0f, boosted))); + } + + unsigned char opusOut[MAX_OPUS_PACKET]; + int encoded = opus_encode(mEncoder, frameBuffer.data(), FRAME_SIZE, opusOut, MAX_OPUS_PACKET); + + if (encoded > 0 && cbCopy) { + std::string packet(1 + encoded, '\0'); + packet[0] = 'F'; + std::memcpy(&packet[1], opusOut, encoded); + cbCopy(packet, false); + } + + mCaptureBuffer.erase(mCaptureBuffer.begin(), mCaptureBuffer.begin() + FRAME_SIZE); + } +} + +// ── incoming voice ────────────────────────────────────── + +void VoiceChat::ProcessIncomingVoice(const char* data, size_t len) { + if (len < VOICE_HEADER_SIZE + 1) return; + + // Packet v2: F + version(1) + flags(1) + source_id(2) + pos(3x4B) + maxDist(4B) + gain(4B) + opus + const auto* p = reinterpret_cast(data); + uint8_t version = p[1]; + if (version != VOICE_PROTOCOL_VERSION) return; + + uint8_t flags = p[2]; + // Explicit LE reads — matches server BuildPacket wire format. + uint16_t sourceId = static_cast(p[3]) | (static_cast(p[4]) << 8); + float pos[3] = { readLEFloat(p + 5), readLEFloat(p + 9), readLEFloat(p + 13) }; + float maxDistance = readLEFloat(p + 17); + float broadcastGain = readLEFloat(p + 21); + if (broadcastGain < 0.0f) broadcastGain = 0.0f; + if (broadcastGain > 1.0f) broadcastGain = 1.0f; // server sends [0.0, 1.0] + + const unsigned char* opusData = reinterpret_cast(data + VOICE_HEADER_SIZE); + int opusLen = static_cast(len - VOICE_HEADER_SIZE); + + // Snapshot callback before taking the heavy lock + GameSendCallback cbCopy; + { + std::lock_guard lockCb(mCallbackMutex); + cbCopy = mGameSendCallback; + } + + std::lock_guard lock(mPlaybackMutex); + + // Throttled "speaking" notification — mLastVoiceNotified is protected by mPlaybackMutex. + if (cbCopy) { + auto now = std::chrono::steady_clock::now(); + auto& last = mLastVoiceNotified[sourceId]; + if (std::chrono::duration_cast(now - last).count() >= 300) { + last = now; + bool isInjected = (flags & VOICE_FLAG_INJECTED) != 0; + // Fk: — speaking notification + cbCopy("Fk" + std::to_string(sourceId) + ":" + (isInjected ? "1" : "0")); + } + } + + auto& client = mClients[sourceId]; + if (!client.decoder) { + int err = 0; + client.decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err); + if (err != OPUS_OK || !client.decoder) { + error("VoiceChat: Failed to create Opus decoder for source " + std::to_string(sourceId)); + mClients.erase(sourceId); + return; + } + } + + client.position[0] = pos[0]; + client.position[1] = pos[1]; + client.position[2] = pos[2]; + client.maxDistance = maxDistance; + client.broadcastGain = broadcastGain; + client.flags = flags; + client.lastReceived = std::chrono::steady_clock::now(); + + // Decode raw — attenuation is applied in MixAndPlay using current listener pos + std::vector decoded(FRAME_SIZE); + int samples = opus_decode_float(client.decoder, opusData, opusLen, decoded.data(), FRAME_SIZE, 0); + if (samples > 0) { + decoded.resize(samples); + + // Compact consumed samples back to the front (O(n) but in the network + // thread, never in the audio callback — see sampleReadPos in header). + if (client.sampleReadPos > 0) { + client.sampleQueue.erase(client.sampleQueue.begin(), + client.sampleQueue.begin() + client.sampleReadPos); + client.sampleReadPos = 0; + } + + // Cap unread queue to ~500ms to prevent unbounded growth. + constexpr size_t MAX_QUEUE = PLAYBACK_FRAME_SIZE * 25; // 25x20ms = 500ms + if (client.sampleQueue.size() + decoded.size() > MAX_QUEUE) { + size_t drop = (client.sampleQueue.size() + decoded.size()) - MAX_QUEUE; + client.sampleQueue.erase(client.sampleQueue.begin(), + client.sampleQueue.begin() + std::min(drop, client.sampleQueue.size())); + } + client.sampleQueue.insert(client.sampleQueue.end(), decoded.begin(), decoded.end()); + + if (flags & VOICE_FLAG_INJECTED) { + // Injected audio: server controls pacing, start immediately + client.buffering = false; + } else if (client.buffering + && client.sampleQueue.size() >= static_cast(PLAYBACK_FRAME_SIZE * JITTER_BUFFER_FRAMES)) { + // Mic audio: wait for jitter buffer to fill + client.buffering = false; + } + } +} + +// ── playback ──────────────────────────────────────────── + +int VoiceChat::PlaybackCallback(const void* /*input*/, void* output, + unsigned long frameCount, const PaStreamCallbackTimeInfo* /*timeInfo*/, + unsigned long /*statusFlags*/, void* userData) { + auto* self = static_cast(userData); + auto* out = static_cast(output); + std::memset(out, 0, frameCount * self->mPlaybackChannels * sizeof(float)); + self->MixAndPlay(out, frameCount); + return paContinue; +} + +void VoiceChat::MixAndPlay(float* output, unsigned long frameCount) { + // Regular lock: ProcessIncomingVoice holds this for only ~microseconds (one Opus + // decode), so blocking here is far cheaper than the glitch from try_lock dropping + // an entire 20ms frame every time a network packet arrives simultaneously. + std::unique_lock lock(mPlaybackMutex); + + // Snapshot listener position and orientation under its mutex + float listenerPos[3], listenerFwd[3]; + { + std::unique_lock lpos(mListenerMutex, std::try_to_lock); + if (lpos.owns_lock()) { + listenerPos[0] = mListenerPos[0]; + listenerPos[1] = mListenerPos[1]; + listenerPos[2] = mListenerPos[2]; + listenerFwd[0] = mListenerFwd[0]; + listenerFwd[1] = mListenerFwd[1]; + listenerFwd[2] = mListenerFwd[2]; + } else { + listenerPos[0] = listenerPos[1] = listenerPos[2] = 0.0f; + listenerFwd[0] = 0.0f; listenerFwd[1] = 1.0f; listenerFwd[2] = 0.0f; + } + } + + // Voice volume: slider 0-100 -> gain 0.0-3.0 (+9.5dB headroom for quiet mics) + float volScale = (mVolume.load() / 100.0f) * 3.0f; + // Music volume: separate slider 0-100 -> gain 0.0-3.0 (injected sources only) + float musicVolScale = (mMusicVolume.load() / 100.0f) * 3.0f; + auto now = std::chrono::steady_clock::now(); + std::vector expired; + + for (auto& [id, client] : mClients) { + if (std::chrono::duration_cast(now - client.lastReceived).count() > 2) { + expired.push_back(id); + continue; + } + + if (client.buffering) continue; + + // ── Compute target gain from current listener position ────────────── + float targetGain = 1.0f; + if (client.flags & VOICE_FLAG_PROXIMITY) { + // maxDistance==0 means the server set proxDist=0 = unlimited range. + // In that case skip falloff entirely so players always hear each other + // regardless of whatever position the server embedded in the packet. + if (client.maxDistance > 0.0f) { + float dx = client.position[0] - listenerPos[0]; + float dy = client.position[1] - listenerPos[1]; + float dz = client.position[2] - listenerPos[2]; + float dist = std::sqrt(dx * dx + dy * dy + dz * dz); + + float maxDist = client.maxDistance; + if (dist >= maxDist) { + targetGain = 0.0f; + } else { + float innerRadius = maxDist * 0.15f; + if (dist <= innerRadius) { + targetGain = 1.0f; + } else { + float t = (dist - innerRadius) / (maxDist - innerRadius); + targetGain = 1.0f - std::pow(t, 0.5f); + targetGain = std::max(0.0f, targetGain); + } + } + } + // else: maxDistance==0 -> unlimited, keep targetGain=1.0f + } + // ── Compute target stereo pan from source angle relative to listener forward ── + float dx = client.position[0] - listenerPos[0]; + float dy = client.position[1] - listenerPos[1]; + // Right vector perpendicular to forward in XY plane (BeamNG coords) + float rightX = listenerFwd[1]; + float rightY = -listenerFwd[0]; + float srcLen = std::sqrt(dx*dx + dy*dy); + float targetPan = 0.0f; // -1=full left, 0=center, +1=full right + if (srcLen > 0.5f) { + targetPan = (dx * rightX + dy * rightY) / srcLen; + targetPan = std::max(-1.0f, std::min(1.0f, targetPan)); + } + + // ── Per-sample smoothing: slow for gain (avoids clicks), fast for pan (responsive) ── + // gainAlpha ~0.05 -> ~20 samples to settle (smooth volume transitions) + // panAlpha ~0.30 -> ~3 samples to settle (near-instant pan on camera rotation) + const float gainAlpha = 0.05f; + const float panAlpha = 0.30f; + // Use music volume for injected sources, voice volume for mic sources + float sourceVolScale = (client.flags & VOICE_FLAG_INJECTED) ? musicVolScale : volScale; + // Apply broadcast gain (sender's car stereo volume) for injected sources + float bGain = (client.flags & VOICE_FLAG_INJECTED) ? client.broadcastGain : 1.0f; + targetGain *= sourceVolScale * bGain; + + // O(1) read: advance the read head, compaction is deferred to ProcessIncomingVoice. + const size_t available = client.sampleQueue.size() - client.sampleReadPos; + size_t toMix = std::min(static_cast(frameCount), available); + if (mPlaybackChannels >= 2) { + for (size_t i = 0; i < toMix; ++i) { + client.smoothedGain += gainAlpha * (targetGain - client.smoothedGain); + client.smoothedPan += panAlpha * (targetPan - client.smoothedPan); + // Equal-power panning from smoothed pan value + float angle = (client.smoothedPan + 1.0f) * 0.7854f; // (pan+1)*pi/4 + float gL = std::cos(angle) * client.smoothedGain; + float gR = std::sin(angle) * client.smoothedGain; + float s = client.sampleQueue[client.sampleReadPos + i]; + output[i * 2 + 0] += s * gL; + output[i * 2 + 1] += s * gR; + } + } else { + for (size_t i = 0; i < toMix; ++i) { + client.smoothedGain += gainAlpha * (targetGain - client.smoothedGain); + output[i] += client.sampleQueue[client.sampleReadPos + i] * client.smoothedGain; + } + } + client.sampleReadPos += toMix; // O(1) — no erase in audio callback + + // Do not re-enable buffering mid-stream — only on fresh decoder creation + } + + for (auto id : expired) { + if (mClients[id].decoder) opus_decoder_destroy(mClients[id].decoder); + mClients.erase(id); + } + + // Clip output (stereo or mono) + for (unsigned long i = 0; i < frameCount * static_cast(mPlaybackChannels); ++i) { + output[i] = std::max(-1.0f, std::min(1.0f, output[i])); + } +} diff --git a/src/Network/Core.cpp b/src/Network/Core.cpp index bcb80509..25bdd0e0 100644 --- a/src/Network/Core.cpp +++ b/src/Network/Core.cpp @@ -25,6 +25,7 @@ #include #endif +#include "Audio/VoiceChat.h" #include "Logger.h" #include "Startup.h" #include @@ -36,6 +37,23 @@ #include +namespace { + // Parse a "x,y,z" float triple produced by the game engine. + // Returns true and fills x/y/z on success; false otherwise. + bool parseVec3(const std::string& s, float& x, float& y, float& z) { + try { + size_t p1 = s.find(','); + if (p1 == std::string::npos) return false; + size_t p2 = s.find(',', p1 + 1); + if (p2 == std::string::npos) return false; + x = std::stof(s.substr(0, p1)); + y = std::stof(s.substr(p1 + 1, p2 - p1 - 1)); + z = std::stof(s.substr(p2 + 1)); + return true; + } catch (...) { return false; } + } +} // namespace + extern int TraceBack; std::set* ConfList = nullptr; bool TCPTerminate = false; @@ -346,6 +364,47 @@ void Parse(std::string Data, SOCKET CSocket) { }); break; } + case 'F': { // Voice chat commands from game + if (SubCode == 's') { + VoiceChat::Instance().StartRecording(); + } else if (SubCode == 'e') { + VoiceChat::Instance().StopRecording(); + } else if (SubCode == 'p' && Data.size() > 2) { + float x = 0, y = 0, z = 0; + if (parseVec3(Data.substr(2), x, y, z)) { + VoiceChat::Instance().UpdateListenerPosition(x, y, z); + } + } else if (SubCode == 'v' && Data.size() > 2) { + int vol = std::atoi(Data.substr(2).c_str()); + VoiceChat::Instance().SetVolume(vol); + } else if (SubCode == 'm' && Data.size() > 2) { + VoiceChat::Instance().SetMuted(Data[2] == '1'); + } else if (SubCode == 'd') { + std::string json = VoiceChat::Instance().EnumerateDevicesJson(); + CoreSend("Fd" + json); + } else if (SubCode == 'i' && Data.size() > 2) { + auto devStr = Data.substr(2); + int devId = (devStr == "default") ? -1 : std::atoi(devStr.c_str()); + VoiceChat::Instance().SetInputDevice(devId); + } else if (SubCode == 'o' && Data.size() > 2) { + auto devStr = Data.substr(2); + int devId = (devStr == "default") ? -1 : std::atoi(devStr.c_str()); + VoiceChat::Instance().SetOutputDevice(devId); + } else if (SubCode == 'f' && Data.size() > 2) { + float fx = 0, fy = 0, fz = 0; + if (parseVec3(Data.substr(2), fx, fy, fz)) { + VoiceChat::Instance().UpdateListenerOrientation(fx, fy, fz); + } + } else if (SubCode == 'n' && Data.size() > 2) { + int vol = std::atoi(Data.substr(2).c_str()); + VoiceChat::Instance().SetMusicVolume(vol); + } else if (SubCode == 'g' && Data.size() > 2) { + int pct = std::atoi(Data.substr(2).c_str()); + VoiceChat::Instance().SetMicGain(pct); + } + Data.clear(); + break; + } // end case 'F' default: Data.clear(); break; @@ -366,7 +425,8 @@ void GameHandler(SOCKET Client) { do { try { Utils::ReceiveFromGame(Client, data); - Parse(std::string(data.data(), data.size()), Client); + std::string raw(data.data(), data.size()); + Parse(raw, Client); } catch (const std::exception& e) { error(std::string("Error while receiving from game on core: ") + e.what()); break; diff --git a/src/Network/GlobalHandler.cpp b/src/Network/GlobalHandler.cpp index da4b6f28..c786c6f6 100644 --- a/src/Network/GlobalHandler.cpp +++ b/src/Network/GlobalHandler.cpp @@ -21,13 +21,13 @@ #include #endif +#include "Audio/VoiceChat.h" #include "Logger.h" #include "Options.h" #include #include #include #include -#include "Options.h" #include std::chrono::time_point PingStart, PingEnd; @@ -106,6 +106,7 @@ void ServerSend(std::string Data, bool Rel) { } void NetReset() { + VoiceChat::Instance().Shutdown(); TCPTerminate = false; GConnected = false; Terminate = false; @@ -204,6 +205,9 @@ void ParserAsync(std::string_view Data) { MStatus = Data; UlStatus = "Uldone"; return; + case 'F': // Voice chat packet from server + VoiceChat::Instance().ProcessIncomingVoice(Data.data(), Data.size()); + return; case 'U': magic = Data.substr(1); default: @@ -223,6 +227,15 @@ void NetMain(const std::string& IP, int Port) { info("Connection Terminated!"); } void TCPGameServer(const std::string& IP, int Port) { + // Initialize voice chat and wire send callback to ServerSend + VoiceChat::Instance().Init(); + VoiceChat::Instance().SetSendCallback([](const std::string& data, bool rel) { + ServerSend(data, rel); + }); + VoiceChat::Instance().SetGameSendCallback([](const std::string& data) { + CoreSend(data); + }); + GSocket = SetupListener(); std::unique_ptr ClientThread {}; std::unique_ptr NetMainThread {}; diff --git a/vcpkg.json b/vcpkg.json index e718f977..29c10ae9 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -4,6 +4,8 @@ "nlohmann-json", "zlib", "openssl", - "curl" + "curl", + "opus", + "portaudio" ] }