Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 60 additions & 3 deletions music_assistant/helpers/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import itertools
import os
import random
import re
import urllib.parse
from base64 import b64decode
from collections.abc import Iterable
from io import BytesIO
Expand All @@ -28,8 +30,52 @@
from music_assistant.mass import MusicAssistant


async def get_image_data(mass: MusicAssistant, path_or_url: str, provider: str) -> bytes:
"""Create thumbnail from image url."""
def _extract_imageproxy_params(url: str) -> tuple[str, str] | None:
"""
Extract path and provider from an imageproxy URL.

:param url: The URL to check for imageproxy format.
:return: Tuple of (path, provider) if this is an imageproxy URL, None otherwise.
"""
if "/imageproxy?" not in url:
return None

try:
parsed = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed.query)

path = query_params.get("path", [None])[0]
provider = query_params.get("provider", ["builtin"])[0]

if path:
# The path may be double URL-encoded, decode it
decoded_path = urllib.parse.unquote_plus(path)
if re.search(r"%[0-9A-Fa-f]{2}", decoded_path):
decoded_path = urllib.parse.unquote_plus(decoded_path)
Comment on lines +51 to +54
Copy link

Copilot AI Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The double URL decoding logic at lines 52-54 uses a regex pattern to detect if the path is still URL-encoded after the first decoding, then decodes it again. However, this approach may fail to handle edge cases where the regex pattern appears naturally in the decoded string (e.g., a filename containing "%20"). A more reliable approach would be to track whether double encoding was applied when the URL was created and decode accordingly. Alternatively, you could use a loop to decode until no more encoded characters are found, with a safety limit to prevent infinite loops.

Suggested change
# The path may be double URL-encoded, decode it
decoded_path = urllib.parse.unquote_plus(path)
if re.search(r"%[0-9A-Fa-f]{2}", decoded_path):
decoded_path = urllib.parse.unquote_plus(decoded_path)
# The path may be URL-encoded multiple times; decode until stable
decoded_path = path
for _ in range(3):
new_decoded = urllib.parse.unquote_plus(decoded_path)
if new_decoded == decoded_path:
break
decoded_path = new_decoded

Copilot uses AI. Check for mistakes.
return (decoded_path, provider)
except (KeyError, ValueError, IndexError):
# URL parsing failed, not an imageproxy URL
pass
Comment on lines +43 to +58
Copy link

Copilot AI Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exception handler catches KeyError, ValueError, and IndexError, but these exceptions are unlikely to be raised by the operations in the try block. urllib.parse.urlparse() doesn't raise these exceptions for invalid URLs - it just parses them as best it can. urllib.parse.parse_qs() also doesn't raise these exceptions. The only place where IndexError could potentially occur is line 47-48 when accessing index [0], but this is already guarded by the .get() method which returns a default value. Consider removing this overly broad exception handling, or at minimum document what specific error conditions you're trying to handle.

Suggested change
try:
parsed = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed.query)
path = query_params.get("path", [None])[0]
provider = query_params.get("provider", ["builtin"])[0]
if path:
# The path may be double URL-encoded, decode it
decoded_path = urllib.parse.unquote_plus(path)
if re.search(r"%[0-9A-Fa-f]{2}", decoded_path):
decoded_path = urllib.parse.unquote_plus(decoded_path)
return (decoded_path, provider)
except (KeyError, ValueError, IndexError):
# URL parsing failed, not an imageproxy URL
pass
parsed = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed.query)
path = query_params.get("path", [None])[0]
provider = query_params.get("provider", ["builtin"])[0]
if path:
# The path may be double URL-encoded, decode it
decoded_path = urllib.parse.unquote_plus(path)
if re.search(r"%[0-9A-Fa-f]{2}", decoded_path):
decoded_path = urllib.parse.unquote_plus(decoded_path)
return (decoded_path, provider)

Copilot uses AI. Check for mistakes.

return None


async def get_image_data(
mass: MusicAssistant, path_or_url: str, provider: str, *, _depth: int = 0
) -> bytes:
"""
Retrieve image data from a path or URL.

:param mass: The MusicAssistant instance.
:param path_or_url: The image path, URL, or base64 data URI.
:param provider: The provider ID that can resolve the image.
:param _depth: Internal recursion depth counter (do not set manually).
"""
max_recursion_depth = 5
if _depth > max_recursion_depth:
Copy link

Copilot AI Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The recursion depth check uses > (greater than) instead of >= (greater than or equal), which allows one extra level of recursion beyond the intended maximum. With max_recursion_depth = 5 and the check _depth > max_recursion_depth, the function will actually allow recursion up to depth 6 (0, 1, 2, 3, 4, 5, 6 before failing at 7). The check should be if _depth >= max_recursion_depth: to properly limit recursion to the intended maximum of 5 levels.

Suggested change
if _depth > max_recursion_depth:
if _depth >= max_recursion_depth:

Copilot uses AI. Check for mistakes.
msg = f"Maximum recursion depth exceeded when fetching image: {path_or_url}"
raise FileNotFoundError(msg)

# TODO: add local cache here !
if prov := mass.get_provider(provider):
assert isinstance(prov, MusicProvider | MetadataProvider | PluginProvider)
Expand All @@ -40,11 +86,22 @@ async def get_image_data(mass: MusicAssistant, path_or_url: str, provider: str)
path_or_url = resolved_image
# handle HTTP location
if path_or_url.startswith("http"):
# Handle imageproxy URLs pointing to our own server
parsed_url = urllib.parse.urlparse(path_or_url)
url_host = f"{parsed_url.scheme}://{parsed_url.netloc}"
server_base_urls = {mass.webserver.base_url, mass.streams.base_url}
if url_host in server_base_urls:
if imageproxy_params := _extract_imageproxy_params(path_or_url):
extracted_path, extracted_provider = imageproxy_params
Copy link

Copilot AI Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The self-reference detection at line 93 checks if the URL host matches the server base URLs, but this check may not handle all edge cases. If the imageproxy URL includes query parameters that reference another imageproxy URL (creating a chain), and those query parameters differ (e.g., different size or format parameters), the recursion will continue until hitting the depth limit. Consider also checking if the extracted path from the imageproxy URL is itself an imageproxy URL pointing to the same server, to prevent unnecessary recursion through chains of imageproxy URLs with varying parameters.

Suggested change
extracted_path, extracted_provider = imageproxy_params
extracted_path, extracted_provider = imageproxy_params
# Iteratively unwrap nested imageproxy URLs that also point to our own server
seen_urls: set[str] = set()
while True:
if extracted_path in seen_urls:
# Detected a loop in proxy URLs; stop unwrapping
break
seen_urls.add(extracted_path)
inner_params = _extract_imageproxy_params(extracted_path)
if not inner_params:
break
inner_parsed = urllib.parse.urlparse(extracted_path)
inner_host = f"{inner_parsed.scheme}://{inner_parsed.netloc}" if inner_parsed.scheme and inner_parsed.netloc else ""
# Only unwrap further if the inner proxy URL also points to this server
if inner_host and inner_host not in server_base_urls:
break
extracted_path, extracted_provider = inner_params

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When extracting and decoding the imageproxy path parameter, there's no validation that the decoded path is safe before recursively calling get_image_data. While the recursive call will eventually validate the path through existing checks (e.g., is_safe_path for file paths), an attacker could potentially craft a malicious imageproxy URL that decodes to a path attempting directory traversal or other security issues. Consider adding an early validation check on the extracted_path before the recursive call, especially since this is handling user-controlled URL input.

Suggested change
extracted_path, extracted_provider = imageproxy_params
extracted_path, extracted_provider = imageproxy_params
# Early validation of extracted path before recursive call
if not extracted_path.startswith(("http://", "https://", "data:image")):
if not is_safe_path(extracted_path):
msg = f"Unsafe image path: {extracted_path}"
raise FileNotFoundError(msg)

Copilot uses AI. Check for mistakes.
return await get_image_data(
mass, extracted_path, extracted_provider, _depth=_depth + 1
)
try:
async with mass.http_session_no_ssl.get(path_or_url, raise_for_status=True) as resp:
return await resp.read()
except ClientError as err:
raise FileNotFoundError from err
msg = f"Failed to fetch image from {path_or_url}: {err}"
raise FileNotFoundError(msg) from err
# handle base64 embedded images
if path_or_url.startswith("data:image"):
return b64decode(path_or_url.split(",")[-1])
Expand Down
Loading