From 32d6ec78b46e57fa8eff5fd9b61470721ca3d86b Mon Sep 17 00:00:00 2001 From: Ryan <10638626+ryancee@users.noreply.github.com> Date: Sat, 18 Apr 2026 12:49:55 -0400 Subject: [PATCH] fix(discovery): fall back to directory name for audio model type detection Models whose config.json omits a top-level model_type field (e.g. parakeet-tdt models, which use NeMo-format config files) were being classified as 'llm' because detect_model_type() only inspects the architectures[] and model_type fields from config.json. This caused a KeyError('model_type') at inference time: the wrong engine (BatchedEngine/LLM) was loaded for the model, and that engine's LLM loading path called config['model_type'] without a .get() fallback. Fix: after all config-based checks, extract the first hyphen-separated segment of the model directory name and match it against the same mlx-audio AUDIO_STT/TTS/STS_MODEL_TYPES sets used for config-based detection. The stem is excluded from _LLM_TYPE_COLLISIONS to avoid false positives (e.g. a directory named 'llama-...' should not be detected as audio). This matches how mlx_audio.utils.base_load_model already resolves model type from path when config is missing the field. --- omlx/model_discovery.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/omlx/model_discovery.py b/omlx/model_discovery.py index ffa69e1f..48db97d7 100644 --- a/omlx/model_discovery.py +++ b/omlx/model_discovery.py @@ -490,6 +490,20 @@ def detect_model_type(model_path: Path) -> ModelType: if normalized_type.startswith("lfm") and normalized_type not in EMBEDDING_MODEL_TYPES: return "audio_sts" + # Directory-name fallback for audio models whose config.json omits model_type. + # Some model families (e.g. NeMo-format parakeet) do not include a top-level + # model_type field, so the checks above find nothing. As a last resort, match + # the first hyphen-separated segment of the model directory name against the + # same mlx-audio model-type sets used above. + dir_stem = model_path.name.lower().split("-")[0] + if dir_stem and dir_stem not in _LLM_TYPE_COLLISIONS: + if dir_stem in AUDIO_TTS_MODEL_TYPES: + return "audio_tts" + if dir_stem in AUDIO_STT_MODEL_TYPES: + return "audio_stt" + if dir_stem in AUDIO_STS_MODEL_TYPES: + return "audio_sts" + return "llm"