Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 70 additions & 1 deletion src/exportmi.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Upload Assistant © 2025 Audionut & wastaken7 — Licensed under UAPL v1.0
import asyncio
import glob
import json
import os
import platform
Expand Down Expand Up @@ -489,7 +490,17 @@ def filter_mediainfo(data: dict[str, Any]) -> dict[str, Any]:
# Use standard MediaInfo library for non-DVD or when specialized CLI not available
media_info_json = MediaInfo.parse(video, output="JSON")
media_info_dict = json.loads(media_info_json)


# .VOB must be parsed for scan type on DVD's as .IFO does not contain that field
# from the older DVD-compatible version of MediaInfo
if is_dvd:
scan_from_vob = await _probe_vob_scantype(video, mediainfo_cmd, debug=debug)
if scan_from_vob:
for track in media_info_dict.get("media", {}).get("track", []):
if track.get("@type") == "Video" and not track.get("ScanType"):
track["ScanType"] = scan_from_vob
break

filtered_info = filter_mediainfo(media_info_dict)

async with aiofiles.open(f"{base_dir}/tmp/{folder_id}/MediaInfo.json", "w", encoding="utf-8") as export:
Expand All @@ -511,6 +522,64 @@ def filter_mediainfo(data: dict[str, Any]) -> dict[str, Any]:
return mi


async def _probe_vob_scantype(video: str, mediainfo_cmd: str | None, debug: bool = False) -> str:
"""Return ScanType from the largest .VOB in the same dir as `video`, or "." (current directory)"""
try:
folder = os.path.dirname(video) or "."
# try heuristic sibling first, then glob fallback
base = os.path.basename(video)
candidates = []
for candidate in (base.replace('_0.IFO', '_1.VOB'), base.replace('.IFO', '.VOB')):
p = os.path.join(folder, candidate)
if os.path.exists(p):
candidates.append(p)
if not candidates:
candidates = glob.glob(os.path.join(folder, "*.VOB"))
if not candidates:
return ""

# choose largest VOB (most likely main)
chosen = max(candidates, key=lambda p: os.path.getsize(p) if os.path.exists(p) else 0)

# try CLI first (faster/more consistent), else pymediainfo
json_text = None
if mediainfo_cmd:
try:
proc = await asyncio.to_thread(subprocess.run, [mediainfo_cmd, "--Output=JSON", chosen],
capture_output=True, text=True, timeout=30)
if proc.returncode == 0 and proc.stdout:
json_text = proc.stdout
except Exception:
json_text = None

if not json_text:
try:
json_text = MediaInfo.parse(chosen, output="JSON")
except Exception:
json_text = None

if not json_text:
return ""

mi = json.loads(json_text) if isinstance(json_text, str) else json_text
for track in mi.get("media", {}).get("track", []):
if track.get("@type") == "Video":
val = track.get("ScanType") or track.get("scanType") or ""
# handle nested dicts
if isinstance(val, dict):
for key in ("#text", "text", "value", "@value", "Name", "String"):
if key in val and val[key]:
val = val[key]
break
else:
val = ""
return str(val) if val else ""
Comment on lines +531 to +576
Copy link

@coderabbitai coderabbitai bot Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Fallback probing can still miss ScanType on some DVD layouts.

At Line 536, glob fallback only runs when heuristic candidates are empty. If a heuristic file exists but has no usable video ScanType, Line 576 returns empty without trying other .VOB files, so the ScanType can still remain unset.

💡 Proposed fix (probe multiple candidates, largest-first, until ScanType is found)
-        candidates = []
+        candidates: list[str] = []
         for candidate in (base.replace('_0.IFO', '_1.VOB'), base.replace('.IFO', '.VOB')):
             p = os.path.join(folder, candidate)
             if os.path.exists(p):
                 candidates.append(p)
-        if not candidates:
-            candidates = glob.glob(os.path.join(folder, "*.VOB"))
+        # Always include full folder candidates as fallback pool
+        candidates.extend(glob.glob(os.path.join(folder, "*.VOB")))
+        candidates.extend(glob.glob(os.path.join(folder, "*.vob")))
+        # de-dupe while preserving order
+        candidates = list(dict.fromkeys(candidates))
         if not candidates:
             return ""

-        # choose largest VOB (most likely main)
-        chosen = max(candidates, key=lambda p: os.path.getsize(p) if os.path.exists(p) else 0)
-
-        # try CLI first (faster/more consistent), else pymediainfo
-        json_text = None
-        if mediainfo_cmd:
-            try:
-                proc = await asyncio.to_thread(subprocess.run, [mediainfo_cmd, "--Output=JSON", chosen],
-                                               capture_output=True, text=True, timeout=30)
-                if proc.returncode == 0 and proc.stdout:
-                    json_text = proc.stdout
-            except Exception:
-                json_text = None
-
-        if not json_text:
-            try:
-                json_text = MediaInfo.parse(chosen, output="JSON")
-            except Exception:
-                json_text = None
-
-        if not json_text:
-            return ""
-
-        mi = json.loads(json_text) if isinstance(json_text, str) else json_text
-        for track in mi.get("media", {}).get("track", []):
-            if track.get("@type") == "Video":
-                val = track.get("ScanType") or track.get("scanType") or ""
-                # handle nested dicts
-                if isinstance(val, dict):
-                    for key in ("#text", "text", "value", "@value", "Name", "String"):
-                        if key in val and val[key]:
-                            val = val[key]
-                            break
-                    else:
-                        val = ""
-                return str(val) if val else ""
+        # Probe largest-first until we find a usable ScanType
+        for chosen in sorted(candidates, key=lambda p: os.path.getsize(p) if os.path.exists(p) else 0, reverse=True):
+            json_text = None
+            if mediainfo_cmd:
+                try:
+                    proc = await asyncio.to_thread(
+                        subprocess.run,
+                        [mediainfo_cmd, "--Output=JSON", chosen],
+                        capture_output=True,
+                        text=True,
+                        timeout=30,
+                    )
+                    if proc.returncode == 0 and proc.stdout:
+                        json_text = proc.stdout
+                except Exception:
+                    json_text = None
+
+            if not json_text:
+                try:
+                    json_text = MediaInfo.parse(chosen, output="JSON")
+                except Exception:
+                    json_text = None
+
+            if not json_text:
+                continue
+
+            mi = json.loads(json_text) if isinstance(json_text, str) else json_text
+            for track in mi.get("media", {}).get("track", []):
+                if track.get("@type") == "Video":
+                    val = track.get("ScanType") or track.get("scanType") or ""
+                    if isinstance(val, dict):
+                        for key in ("#text", "text", "value", "@value", "Name", "String"):
+                            extracted = val.get(key)
+                            if extracted:
+                                val = extracted
+                                break
+                        else:
+                            val = ""
+                    if val:
+                        return str(val)
     except Exception:
🧰 Tools
🪛 Ruff (0.15.2)

[warning] 552-552: Do not catch blind exception: Exception

(BLE001)


[warning] 558-558: Do not catch blind exception: Exception

(BLE001)


[warning] 571-571: Unnecessary key check before dictionary access

Replace with dict.get

(RUF019)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/exportmi.py` around lines 531 - 576, The current logic picks a single
'chosen' VOB and returns immediately if its tracks lack ScanType; instead,
iterate through all candidate VOBs sorted by descending file size (so
largest-first) and probe each with the existing mediainfo_cmd fallback
(subprocess.run) then MediaInfo.parse until you find a Video track with a
non-empty ScanType/scanType (handling nested dicts as you already do). Update
the use of 'chosen' to a loop over 'candidates' (sorted by os.path.getsize) and
only return when a valid scan type is found; if none yield a value, return "" as
before. Ensure you still respect the mediainfo_cmd/MediaInfo.parse fallback flow
for each candidate.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bad suggestion as the proposed change will undoubtedly cause an incorrect scan type to be written to MediaInfo.json at some point. We do not ever want to read a .VOB file that is not the largest one as the largest .VOB file in a DVD full disc will always be the main movie. If you read any of the smaller .VOB files, they main contain a different scan type than the main movie .VOB files. It is better to fallback to returning an empty string and allowing video.py to handle the case where the Scan type field is not present in the largest .VOB MediaInfo dict.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@9Oc, you're absolutely right—thank you for the correction! I understand now that the largest VOB file always contains the main movie on a DVD full disc, and probing smaller VOBs could indeed lead to an incorrect scan type being written. Your current implementation correctly identifies and uses only the largest VOB, with a fallback to an empty string that video.py can handle appropriately. I appreciate the explanation of the DVD structure!


✏️ Learnings added
Learnt from: 9Oc
Repo: Audionut/Upload-Assistant PR: 1273
File: src/exportmi.py:531-576
Timestamp: 2026-03-01T05:35:38.825Z
Learning: In DVD full disc structures, the largest .VOB file always contains the main movie. Smaller .VOB files may contain different scan types (e.g., special features, menus) and should never be probed for the main video's ScanType. When determining ScanType for DVDs in src/exportmi.py, only the largest VOB should be read, and if it lacks ScanType, return an empty string to let video.py handle the missing field.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: Lusephur
Repo: Audionut/Upload-Assistant PR: 1261
File: src/trackers/TVC.py:106-125
Timestamp: 2026-02-21T21:40:40.803Z
Learning: TVC (src/trackers/TVC.py) does not currently accept BDMV/Blu-ray disc releases or remuxes—only HDTV and WEB-DL. The `_build_disc_info` method exists for potential future compatibility but is not called during normal TVC uploads due to disc blocking in `search_existing()`.

Learnt from: Audionut
Repo: Audionut/Upload-Assistant PR: 1025
File: src/trackers/TVC.py:32-86
Timestamp: 2025-12-14T00:20:21.219Z
Learning: In src/trackers/TVC.py, the genres argument passed to get_cat_id() is a string (comma-separated) rather than a list or other type. Remove or avoid code paths that handle list inputs for this parameter; assume and validate string input, adjust type hints if present, and, if possible, add a unit test to ensure a string is passed.

Learnt from: Audionut
Repo: Audionut/Upload-Assistant PR: 1081
File: src/trackers/CBR.py:94-94
Timestamp: 2025-12-27T13:04:22.071Z
Learning: In Python codebases where a dict (e.g., meta) is guaranteed to always include a specific key (e.g., 'tag'), direct dictionary access meta['tag'] is safe and preferred over meta.get('tag') for performance and readability. Ensure the key is indeed always initialized in all code paths; if there's any doubt, consider defensive checks or documentation to avoid KeyError.

Learnt from: Audionut
Repo: Audionut/Upload-Assistant PR: 1114
File: src/manualpackage.py:16-23
Timestamp: 2026-01-11T05:07:09.469Z
Learning: In Python type hints for read-only mappings (e.g., DEFAULT_CONFIG, TRACKER_CONFIG), prefer Mapping[str, Any] over Dict[str, Any] to signal immutability intent. Use from typing import Mapping; annotate with Mapping[str, Any] while allowing concrete dicts at runtime. Do not rely on isinstance(value, dict) for immutability checks; this is a type-hint convention, not a runtime guarantee.

Learnt from: Audionut
Repo: Audionut/Upload-Assistant PR: 1114
File: src/manualpackage.py:16-23
Timestamp: 2026-01-11T05:07:09.469Z
Learning: In the Upload-Assistant repository, avoid flagging minor or stylistic issues (e.g., ValueError vs TypeError for type validation) unless they represent actual bugs or significant problems. Follow Audionut's preference to focus on substantive issues rather than minor concerns across Python files.

except Exception:
if debug:
console.print("[yellow]Probe VOB scantype failed[/yellow]")
return ""


def validate_mediainfo(meta: dict[str, Any], debug: bool, settings: bool = False) -> bool:
if not any(str(f).lower().endswith(".mkv") for f in meta.get("filelist", [])):
if debug:
Expand Down