diff --git a/scripts/chromium-autostart.sh b/scripts/chromium-autostart.sh index 7e699a51..626472e1 100644 --- a/scripts/chromium-autostart.sh +++ b/scripts/chromium-autostart.sh @@ -79,6 +79,14 @@ START_SOUND=$(/usr/bin/jq -r .mupibox.startSound ${CONFIG}) START_VOLUME=$(/usr/bin/jq -r .mupibox.startVolume ${CONFIG}) AUDIO_DEVICE=$(/usr/bin/jq -r .mupibox.audioDevice ${CONFIG}) /usr/bin/pactl set-sink-volume @DEFAULT_SINK@ ${START_VOLUME}% +# Kill any in-flight startup-sound playback before launching a fresh one. +# chromium-autostart.sh runs from two paths that can fire in quick +# succession: (1) restart_kiosk.sh after the admin "Restart services" +# click, and (2) dietpi-login auto-respawn on tty2 once chromium dies. +# Without this pkill both invocations spawn their own mplayer & overlay +# the welcome wav. Match by the wav path so the regex never collides +# with mplayer's slave-mode instance held by the backend-player. +pkill -f "mplayer.*${START_SOUND}" 2>/dev/null /usr/bin/mplayer -volume 100 ${START_SOUND} & pgrep -f "chromium-browser" | while read -r pid; do # Setze die Priorität für jeden Prozess neu diff --git a/scripts/mupihat/mupihat.py b/scripts/mupihat/mupihat.py index 07d6e568..69c4a985 100644 --- a/scripts/mupihat/mupihat.py +++ b/scripts/mupihat/mupihat.py @@ -35,13 +35,19 @@ import argparse from datetime import datetime from flask import Flask, render_template, jsonify -from threading import Thread +from threading import Thread, Lock from mupihat_bq25792 import bq25792 app = Flask(__name__) # Global variables hat = None +# AR5-5: Flask is threaded by default — without serialising access to the +# shared BQ25792 driver, the periodic_json_dump background thread and the +# Flask request workers can interleave their I2C transactions on the same +# smbus2.SMBus handle, producing "Remote I/O error" (errno 121) on the +# Pi's i2c bus. All hat.* calls below run under this lock. +i2c_lock = Lock() log_flag = False json_flag = False json_file = "/tmp/mupihat.json" @@ -84,7 +90,9 @@ def log_register_values(): def index(): """Flask route to display register values.""" try: - return render_template("index.html", registers=hat.to_json_registers()) + with i2c_lock: + registers = hat.to_json_registers() + return render_template("index.html", registers=registers) except Exception as e: return f"Error reading registers: {str(e)}", 500 @@ -92,8 +100,10 @@ def index(): @app.route("/api/registers") def api_registers(): """Flask API endpoint to return register values as JSON.""" - try: - return jsonify(hat.to_json_registers()) + try: + with i2c_lock: + registers = hat.to_json_registers() + return jsonify(registers) except Exception as e: return jsonify({"error": str(e)}), 500 @@ -102,19 +112,31 @@ def periodic_json_dump(): """Periodically writes the register values to a JSON file.""" global json_flag, json_file while True: - hat.watchdog_reset() - time.sleep(0.1) # Allow time for the watchdog reset - hat.read_all_register() - time.sleep(1) # Allow time for the registers to be updated - if json_flag: + # AR5-5: serialise I2C work via i2c_lock so Flask request workers + # don't interleave bus transactions with this thread. Sleeps stay + # OUTSIDE the lock so request workers can fit between phases. + snapshot = None + try: + with i2c_lock: + hat.watchdog_reset() + time.sleep(0.1) # Allow time for the watchdog reset + with i2c_lock: + hat.read_all_register() + time.sleep(1) # Allow time for the registers to be updated + with i2c_lock: + if json_flag: + snapshot = hat.to_json() + if log_flag: + log_register_values() + except Exception as e: + logging.error("periodic_json_dump I2C cycle failed: %s", str(e)) + if snapshot is not None: try: with open(json_file, "w") as outfile: - json.dump(hat.to_json(), outfile, indent=4) + json.dump(snapshot, outfile, indent=4) except Exception as e: logging.error("Failed to write JSON dump: %s", str(e)) - if log_flag: - log_register_values() - time.sleep(3.9) # Run every 4 seconds + time.sleep(3.9) # Run every ~4 seconds def parse_arguments(): diff --git a/scripts/mupihat/mupihat_bq25792.py b/scripts/mupihat/mupihat_bq25792.py index ab1182f2..df5fa898 100644 --- a/scripts/mupihat/mupihat_bq25792.py +++ b/scripts/mupihat/mupihat_bq25792.py @@ -5307,15 +5307,43 @@ def get_PART_REVISION_string(self): # class methods + def _reopen_bus(self): + """ + Close and re-open the SMBus handle. Used after a Remote I/O error + to clear a hung bus state on the BQ25792 (errno 121 leaves the bus + in an indeterminate state on the Pi's i2c-bcm2835 driver). + """ + try: + self.bq.close() + except Exception: + pass + self.bq = smbus2.SMBus(self.i2c_device) + def safe_execute(self, func, *args, **kwargs): """ Executes a function safely, catching exceptions and handling errors. + On the first OSError (typically errno 121 "Remote I/O error" caused + by an interleaved I2C transaction or transient bus glitch), reopen + the SMBus handle and retry once. A second failure escalates to + I2CError so the caller can choose whether to skip the cycle. If `_exit_on_error` is True, the program will exit on error. """ try: return func(*args, **kwargs) + except OSError as e: + logging.warning(f"OSError in {func.__name__}: {str(e)} — reopening I2C bus and retrying once") + self._reopen_bus() + try: + # Re-bind the bound method to the new bus handle if needed + if hasattr(func, '__self__') and func.__self__ is not None: + func = getattr(self.bq, func.__name__) + return func(*args, **kwargs) + except Exception as e2: + logging.error(f"Error in {func.__name__} after bus reopen: {str(e2)}") + if self._exit_on_error: + sys.exit(1) + raise I2CError(f"Failed to execute {func.__name__}") from e2 except Exception as e: - #sys.stderr.write(f"Error in {func.__name__}: {str(e)}\n") logging.error(f"Error in {func.__name__}: {str(e)}") if self._exit_on_error: sys.exit(1) diff --git a/src/backend-player/src/mplayer-wrapper.js b/src/backend-player/src/mplayer-wrapper.js index 1ea0dcb9..6b436f7b 100644 --- a/src/backend-player/src/mplayer-wrapper.js +++ b/src/backend-player/src/mplayer-wrapper.js @@ -6,24 +6,104 @@ const debug = require('debug')('mplayer-wrapper') const parsers = require('./parsers') +const MPLAYER_RESPAWN_MAX_BACKOFF_MS = 30_000 +const MPLAYER_HEALTHY_RUN_MS = 30_000 + const createPlayer = () => { const out = new EventEmitter() - const proc = spawn( - 'mplayer', - [ - '-slave', // 😔 - '-idle', - '-novideo', - '-quiet', - '-msglevel', - 'all=1:global=4:cplayer=4', - ], - { - env: process.env, - stdio: ['pipe', 'pipe', 'ignore'], - }, - ) + // mplayer -> wrapper. Defined before spawnMplayer so the spawn closure + // can reference it without a forward declaration. + const onLine = (line) => { + debug(`line: ${line}`) + if (line === 'Starting playback...') return out.emit('track-change') + + //Callback when playlist finishes + if (line === 'ANS_ERROR=PROPERTY_UNAVAILABLE') return out.emit('playlist-finish') + // todo: `ANS_ERROR=PROPERTY_UNAVAILABLE` + + const parts = /^ANS_([\w]+)=/g.exec(line) + if (!parts || !parts[1]) return null + const prop = parts[1] + + const parser = parsers[prop] + if (!parser) return null + const val = parser(line.slice(parts[0].length)) + out.emit('prop', prop, val) + out.emit(prop, val) + } + + // Auto-respawn: previously a single mplayer crash (SIGSEGV, OOM, mp3 + // decoder bug) bricked the player until pm2 restarted backend-player. + // Now we re-spawn with exponential backoff (1, 2, 4, 8, 16, 30, 30…s), + // and reset the counter once a fresh process has run cleanly for 30s. + let proc = null + let shutdown = false + let respawnAttempts = 0 + let healthyTimer = null + + const spawnMplayer = () => { + if (shutdown) return + + proc = spawn( + 'mplayer', + [ + '-slave', // 😔 + '-idle', + '-novideo', + '-quiet', + '-msglevel', + 'all=1:global=4:cplayer=4', + ], + { + env: process.env, + stdio: ['pipe', 'pipe', 'ignore'], + }, + ) + + // Spawn-itself errors (binary missing, ENOMEM during fork). Without + // this listener Node would re-throw and kill the entire backend-player. + proc.on('error', (err) => { + debug(`mplayer process error: ${err.message}`) + out.emit('mplayer-error', err) + }) + + // EPIPE on stdin when mplayer dies mid-write. The 'close' handler + // below owns the respawn; here we just absorb the error so it doesn't + // become an uncaught exception. + if (proc.stdin) { + proc.stdin.on('error', (err) => { + debug(`mplayer stdin error: ${err.message}`) + }) + } + + proc.stdout.pipe(byLine.createStream()).on('data', (line) => { + onLine(Buffer.isBuffer(line) ? line.toString() : line) + }) + + proc.on('close', (code) => { + if (healthyTimer) { + clearTimeout(healthyTimer) + healthyTimer = null + } + if (shutdown) { + out.emit('close', code) + return + } + const backoff = Math.min(1000 * 2 ** respawnAttempts, MPLAYER_RESPAWN_MAX_BACKOFF_MS) + respawnAttempts++ + debug(`mplayer exited unexpectedly (code ${code}), respawn in ${backoff}ms (attempt ${respawnAttempts})`) + out.emit('mplayer-crash', { code, attempt: respawnAttempts, backoffMs: backoff }) + setTimeout(spawnMplayer, backoff) + }) + + healthyTimer = setTimeout(() => { + respawnAttempts = 0 + healthyTimer = null + }, MPLAYER_HEALTHY_RUN_MS) + } + + spawnMplayer() // wrapper -> mplayer const exec = (cmd, args = []) => { @@ -31,14 +111,35 @@ const createPlayer = () => { for (const arg of args) { str += ' ' if ('string' === typeof arg) { - if (arg.includes(' ')) str += `"` - str += jsStringEscape(arg) - if (arg.includes(' ')) str += `"` + // Decode percent-encoded paths/URLs FIRST, then escape. The + // previous code decoded AFTER jsStringEscape — that undid the + // quote/newline protection and let `%22%0Astop%0A` become a + // literal `"\nstop\n` injection (HIGH-7). Decoding before + // escape keeps the legitimate use case (RSS/playlist track + // names with %20 etc. that callers pass through) while + // jsStringEscape now sees and escapes any quote/newline that + // came out of the decode. + let decoded = arg + try { + decoded = decodeURIComponent(arg) + } catch { + // Malformed percent sequence (e.g. literal `%FF` that isn't + // valid UTF-8). Fall through with the original string — + // jsStringEscape will still neutralise quotes/newlines. + } + if (decoded.includes(' ')) str += `"` + str += jsStringEscape(decoded) + if (decoded.includes(' ')) str += `"` } else str += arg } - str = decodeURIComponent(str) debug(`exec: ${str}`) - proc.stdin.write(`${str}\n`) + if (proc?.stdin?.writable) { + proc.stdin.write(`${str}\n`) + } else { + // mplayer is between crash and respawn — drop the command rather + // than hard-error. Callers that care can react to mplayer-crash. + debug(`exec dropped (mplayer not ready): ${str}`) + } } const getProps = (props) => { for (const prop of props) exec('pausing_keep_force get_property', [prop]) @@ -55,42 +156,11 @@ const createPlayer = () => { const setVolume = (amount) => exec('pausing_keep volume', [amount, '1']) const stop = () => exec('stop') - let closed = false - proc.on('close', (code) => { - closed = true - out.emit('close', code) - if (code > 0) { - // todo: emit err from proc.stderr - } - }) const close = () => { - if (!closed) exec('quit') - } - - // mplayer -> wrapper - const onLine = (line) => { - debug(`line: ${line}`) - if (line === 'Starting playback...') return out.emit('track-change') - - //Callback when playlist finishes - if (line === 'ANS_ERROR=PROPERTY_UNAVAILABLE') return out.emit('playlist-finish') - // todo: `ANS_ERROR=PROPERTY_UNAVAILABLE` - - const parts = /^ANS_([\w]+)=/g.exec(line) - if (!parts || !parts[1]) return null - const prop = parts[1] - - const parser = parsers[prop] - if (!parser) return null - const val = parser(line.slice(parts[0].length)) - out.emit('prop', prop, val) - out.emit(prop, val) + shutdown = true + if (proc?.stdin?.writable) exec('quit') } - proc.stdout.pipe(byLine.createStream()).on('data', (line) => { - onLine(Buffer.isBuffer(line) ? line.toString() : line) - }) - out.exec = exec out.getProps = getProps out.seek = seek diff --git a/src/backend-player/src/spotify-control.js b/src/backend-player/src/spotify-control.js index 732da894..9f459b4b 100644 --- a/src/backend-player/src/spotify-control.js +++ b/src/backend-player/src/spotify-control.js @@ -261,7 +261,7 @@ function setAccessToken(token) { /*called in all error cases*/ /*token expired and no_device error are handled explicitly*/ function handleSpotifyError(err, from) { - if (err.body.error?.status === 401) { + if (err?.body?.error?.status === 401) { log.debug(`${nowDate.toLocaleString()}: access token expired, refreshing...`) log.debug(`${nowDate.toLocaleString()}: Error from: ${from}`) counter.counterrorAccessToken++ @@ -271,7 +271,7 @@ function handleSpotifyError(err, from) { if (currentMeta.activeSpotifyId !== '0') { refreshToken() } - } else if (err.body.error?.status === 400) { + } else if (err?.body?.error?.status === 400) { log.debug(`${nowDate.toLocaleString()}: invalid id`) log.debug(`${nowDate.toLocaleString()}: Error from: ${from}`) log.debug(`${nowDate.toLocaleString()}: ${err}`) @@ -282,7 +282,7 @@ function handleSpotifyError(err, from) { if (currentMeta.activeSpotifyId !== '0') { setActiveDevice() } - } else if (err.body.error?.status === 429) { + } else if (err?.body?.error?.status === 429) { log.debug(`${nowDate.toLocaleString()}: To many requests on th spotify web api`) log.debug(`${nowDate.toLocaleString()}: Error from: ${from}`) log.debug(`${nowDate.toLocaleString()}: ${err}`) @@ -837,6 +837,22 @@ function cmdCall(cmd) { }) } +// Serialise setVolume calls so two rapid taps from the touchscreen +// can't both read the same stale currentMeta.volume and double-increment +// past maxVolume. The previous code fired exec(cmdVolume) WITHOUT +// awaiting the callback and then immediately compared against the +// not-yet-updated currentMeta.volume — for fast taps the comparison +// always saw the value from before any of the in-flight operations, +// so the maxVolume cap (Hörschutz) was bypassable. Bug class: TOCTOU. +let _volumeOpQueue = Promise.resolve() +const _execAsync = (cmd) => + new Promise((resolve, reject) => { + require('node:child_process').exec(cmd, (e, stdout, stderr) => { + if (e) reject(e) + else resolve({ stdout, stderr }) + }) + }) + /*gets available devices, searches for the active one and returns its volume*/ async function setVolume(volume) { const volumeUp = '/usr/bin/amixer sset Master 5%+' @@ -844,33 +860,42 @@ async function setVolume(volume) { const volumeMax = `/usr/bin/amixer sset Master ${muPiBoxConfig.mupibox.maxVolume}%` const cmdVolume = "/usr/bin/amixer sget Master | grep 'Right:'" - const exec = require('node:child_process').exec - exec(cmdVolume, (e, stdout, _stderr) => { - if (e instanceof Error) { - console.error(nowDate.toLocaleString() + e) - throw e + // Chain onto the queue so concurrent invocations run strictly serially. + // Each invocation reads the ACTUAL current volume from amixer first, + // checks the cap, then writes — no stale-comparison window. + _volumeOpQueue = _volumeOpQueue.then(async () => { + let actualVolume + try { + const { stdout } = await _execAsync(cmdVolume) + actualVolume = Number.parseInt(stdout.split('[')[1].split('%')[0], 10) + } catch (e) { + log.warn(`${nowDate.toLocaleString()}: [setVolume] amixer read failed, skipping op:`, e?.message || e) + return } - currentMeta.volume = Number.parseInt(stdout.split('[')[1].split('%')[0], 10) - //console.log('stdout', stdout); - //console.log('stderr', stderr); - }) - - if (volume) { - if (currentMeta.volume < muPiBoxConfig.mupibox.maxVolume) { - await cmdCall(volumeUp) - currentMeta.volume = Number.parseInt(currentMeta.volume, 10) + 5 - } else { - currentMeta.volume = muPiBoxConfig.mupibox.maxVolume - await cmdCall(volumeMax) + if (Number.isNaN(actualVolume)) { + log.warn(`${nowDate.toLocaleString()}: [setVolume] amixer returned unparseable volume, skipping op`) + return } - } else { - await cmdCall(volumeDown) - if (currentMeta.volume > 0) { - currentMeta.volume = Number.parseInt(currentMeta.volume, 10) - 5 + currentMeta.volume = actualVolume + + if (volume) { + if (actualVolume < muPiBoxConfig.mupibox.maxVolume) { + await cmdCall(volumeUp) + currentMeta.volume = Math.min(actualVolume + 5, muPiBoxConfig.mupibox.maxVolume) + } else { + currentMeta.volume = muPiBoxConfig.mupibox.maxVolume + await cmdCall(volumeMax) + } } else { - currentMeta.volume = 0 + await cmdCall(volumeDown) + currentMeta.volume = Math.max(actualVolume - 5, 0) } - } + }).catch((err) => { + // Don't let one failed op poison the queue for subsequent ops. + log.warn(`${nowDate.toLocaleString()}: [setVolume] op failed:`, err?.message || err) + }) + + return _volumeOpQueue } async function transferPlayback(id) {