pytorch · metascroy · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.Package.swift/backend_mlx/dummy.swift b/.Package.swift/backend_mlx/dummy.swift
diff --git a/.Package.swift/backend_mlx_debug/dummy.swift b/.Package.swift/backend_mlx_debug/dummy.swift
diff --git a/.github/workflows/mlx.yml b/.github/workflows/mlx.yml
diff --git a/.gitignore b/.gitignore
@@ -74,5 +74,7 @@ xcuserdata/
 *.dll
 *.pyd
 
+
 # Agents
 .claude/*.local.*
+extension/pybindings/mlx.metallib
diff --git a/.gitmodules b/.gitmodules
@@ -67,3 +67,7 @@
 [submodule "third-party/json"]
 	path = third-party/json
 	url = https://github.com/nlohmann/json.git
+[submodule "backends/mlx/third-party/mlx"]
+	path = backends/mlx/third-party/mlx
+	url = https://github.com/ml-explore/mlx.git
+	shallow = true
@@ -659,6 +659,11 @@ if(EXECUTORCH_BUILD_MPS)
   list(APPEND _executorch_backends mpsdelegate)
 endif()
 
+if(EXECUTORCH_BUILD_MLX)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/mlx)
+  list(APPEND _executorch_backends mlxdelegate)
+endif()
+
 if(EXECUTORCH_BUILD_NEURON)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/mediatek)
   list(APPEND _executorch_backends neuron_backend)
@@ -956,6 +961,10 @@ if(EXECUTORCH_BUILD_PYBIND)
     list(APPEND _dep_libs mpsdelegate)
   endif()
 
+  if(EXECUTORCH_BUILD_MLX)
+    list(APPEND _dep_libs mlxdelegate)
+  endif()
+
   if(EXECUTORCH_BUILD_OPENVINO)
     list(APPEND _dep_libs openvino_backend)
   endif()
@@ -1056,6 +1065,12 @@ if(EXECUTORCH_BUILD_PYBIND)
   install(TARGETS data_loader
           LIBRARY DESTINATION executorch/extension/pybindings
   )
+
+  # Copy MLX metallib next to _portable_lib.so for editable installs. MLX uses
+  # dladdr() to find the directory containing the library with MLX code, then
+  # looks for mlx.metallib in that directory. When MLX is statically linked into
+  # _portable_lib.so, we need the metallib colocated with it.
+  executorch_target_copy_mlx_metallib(portable_lib)
 endif()
 
 if(EXECUTORCH_BUILD_WASM)

@@ -45,7 +45,8 @@
         "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
         "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos.cmake",
         "PLATFORM": "MAC_ARM64",
-        "DEPLOYMENT_TARGET": "12.0",
+        "DEPLOYMENT_TARGET": "14.0",
+        "CMAKE_OSX_DEPLOYMENT_TARGET": "14.0",
         "CMAKE_MACOSX_BUNDLE": "OFF"
       },
       "condition": {
@@ -110,7 +111,7 @@
       "inherits": ["common"],
       "cacheVariables": {
         "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/pybind.cmake",
-        "CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
+        "CMAKE_OSX_DEPLOYMENT_TARGET": "14.0"
       },
       "condition": {
         "type": "inList",
@@ -294,6 +295,43 @@
         "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/arm_ethosu_linux.cmake",
         "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/arm/ethos-u-setup/aarch64-linux-musl-toolchain.cmake"
       }
+    },
+    {
+      "name": "mlx",
+      "displayName": "Build MLX delegate",
+      "inherits": ["common"],
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/mlx.cmake",
+        "EXECUTORCH_ENABLE_LOGGING": "ON",
+        "CMAKE_OSX_DEPLOYMENT_TARGET": "14.0"
+      },
+      "condition": {
+        "lhs": "${hostSystemName}",
+        "type": "equals",
+        "rhs": "Darwin"
+      }
+    },
+    {
+      "name": "mlx-release",
+      "displayName": "MLX delegate release build",
+      "inherits": ["mlx"],
+      "cacheVariables": {
+        "CMAKE_BUILD_TYPE": "Release",
+        "CMAKE_INSTALL_PREFIX": "${sourceDir}/cmake-out",
+        "ET_MLX_ENABLE_OP_LOGGING": "OFF",
+        "ET_MIN_LOG_LEVEL": "Error"
+      }
+    },
+    {
+      "name": "mlx-debug",
+      "displayName": "MLX delegate debug build with op logging",
+      "inherits": ["mlx"],
+      "cacheVariables": {
+        "CMAKE_BUILD_TYPE": "Debug",
+        "CMAKE_INSTALL_PREFIX": "${sourceDir}/cmake-out",
+        "ET_MLX_ENABLE_OP_LOGGING": "ON",
+        "ET_MIN_LOG_LEVEL": "Debug"
+      }
     }
   ],
   "buildPresets": [
@@ -362,6 +400,24 @@
         "install"
       ],
       "jobs": 0
+    },
+    {
+      "name": "mlx-release-install",
+      "displayName": "Build and install MLX delegate release artifacts",
+      "configurePreset": "mlx-release",
+      "targets": [
+        "install"
+      ],
+      "jobs": 0
+    },
+    {
+      "name": "mlx-debug-install",
+      "displayName": "Build and install MLX delegate debug artifacts",
+      "configurePreset": "mlx-debug",
+      "targets": [
+        "install"
+      ],
+      "jobs": 0
     }
   ],
   "workflowPresets": [
@@ -462,6 +518,34 @@
           "name": "llm-metal-stats-install"
         }
       ]
+    },
+    {
+      "name": "mlx-release",
+      "displayName": "Configure, build and install ExecuTorch MLX delegate",
+      "steps": [
+        {
+          "type": "configure",
+          "name": "mlx-release"
+        },
+        {
+          "type": "build",
+          "name": "mlx-release-install"
+        }
+      ]
+    },
+    {
+      "name": "mlx-debug",
+      "displayName": "Configure, build and install ExecuTorch MLX delegate with op logging (Debug)",
+      "steps": [
+        {
+          "type": "configure",
+          "name": "mlx-debug"
+        },
+        {
+          "type": "build",
+          "name": "mlx-debug-install"
+        }
+      ]
     }
   ]
 }
diff --git a/Makefile b/Makefile
@@ -14,10 +14,10 @@
 #
 # SUPPORTED MODELS:
 # -----------------
-# - voxtral:  Multimodal voice + text model (CPU, CUDA, Metal)
-# - voxtral_realtime: Realtime speech-to-text model (CPU, CUDA, Metal)
+# - voxtral:  Multimodal voice + text model (CPU, CUDA, Metal, MLX)
+# - voxtral_realtime: Realtime speech-to-text model (CPU, CUDA, Metal, MLX)
 # - whisper:  Speech recognition model (CPU, CUDA, Metal)
-# - parakeet: Speech recognition model (CPU, CUDA, Metal)
+# - parakeet: Speech recognition model (CPU, CUDA, Metal, MLX)
 # - sortformer: Speaker diarization model (CPU)
 # - silero_vad: Voice activity detection model (CPU)
 # - llama:    Text generation model (CPU)
@@ -91,16 +91,18 @@
 #
 # ==============================================================================
 
-.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
+.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral-mlx voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal voxtral_realtime-mlx whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-mlx sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
 
 help:
 	@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
 	@echo "  voxtral-cuda        - Build Voxtral runner with CUDA backend"
 	@echo "  voxtral-cpu         - Build Voxtral runner with CPU backend"
 	@echo "  voxtral-metal       - Build Voxtral runner with Metal backend (macOS only)"
+	@echo "  voxtral-mlx         - Build Voxtral runner with MLX backend"
 	@echo "  voxtral_realtime-cuda - Build Voxtral Realtime runner with CUDA backend"
 	@echo "  voxtral_realtime-cpu - Build Voxtral Realtime runner with CPU backend"
 	@echo "  voxtral_realtime-metal - Build Voxtral Realtime runner with Metal backend (macOS only)"
+	@echo "  voxtral_realtime-mlx - Build Voxtral Realtime runner with MLX backend"
 	@echo "  whisper-cuda        - Build Whisper runner with CUDA backend"
 	@echo "  whisper-cuda-debug  - Build Whisper runner with CUDA backend (debug mode)"
 	@echo "  whisper-cpu         - Build Whisper runner with CPU backend"
@@ -109,6 +111,7 @@ help:
 	@echo "  parakeet-cuda-debug - Build Parakeet runner with CUDA backend (debug mode)"
 	@echo "  parakeet-cpu        - Build Parakeet runner with CPU backend"
 	@echo "  parakeet-metal      - Build Parakeet runner with Metal backend (macOS only)"
+	@echo "  parakeet-mlx        - Build Parakeet runner with MLX backend"
 	@echo "  sortformer-cpu      - Build Sortformer runner with CPU backend"
 	@echo "  silero-vad-cpu      - Build Silero VAD runner with CPU backend"
 	@echo "  llama-cuda          - Build Llama runner with CUDA backend"
@@ -146,6 +149,15 @@ voxtral-metal:
 	@echo "✓ Build complete!"
 	@echo "  Binary: cmake-out/examples/models/voxtral/voxtral_runner"
 
+voxtral-mlx:
+	@echo "==> Building and installing ExecuTorch with MLX..."
+	cmake --workflow --preset mlx-release
+	@echo "==> Building Voxtral runner with MLX..."
+	cd examples/models/voxtral && cmake --workflow --preset voxtral-mlx
+	@echo ""
+	@echo "✓ Build complete!"
+	@echo "  Binary: cmake-out/examples/models/voxtral/voxtral_runner"
+
 whisper-cuda:
 	@echo "==> Building and installing ExecuTorch with CUDA..."
 	cmake --workflow --preset llm-release-cuda
@@ -218,6 +230,15 @@ parakeet-metal:
 	@echo "✓ Build complete!"
 	@echo "  Binary: cmake-out/examples/models/parakeet/parakeet_runner"
 
+parakeet-mlx:
+	@echo "==> Building and installing ExecuTorch with MLX..."
+	cmake --workflow --preset mlx-release
+	@echo "==> Building Parakeet runner with MLX..."
+	cd examples/models/parakeet && cmake --workflow --preset parakeet-mlx
+	@echo ""
+	@echo "✓ Build complete!"
+	@echo "  Binary: cmake-out/examples/models/parakeet/parakeet_runner"
+
 sortformer-cpu:
 	@echo "==> Building and installing ExecuTorch..."
 	cmake --workflow --preset llm-release
@@ -254,6 +275,15 @@ voxtral_realtime-cuda:
 	@echo "✓ Build complete!"
 	@echo "  Binary: cmake-out/examples/models/voxtral_realtime/voxtral_realtime_runner"
 
+voxtral_realtime-mlx:
+	@echo "==> Building and installing ExecuTorch with MLX..."
+	cmake --workflow --preset mlx-release
+	@echo "==> Building Voxtral Realtime runner with MLX..."
+	cd examples/models/voxtral_realtime && cmake --workflow --preset voxtral-realtime-mlx
+	@echo ""
+	@echo "✓ Build complete!"
+	@echo "  Binary: cmake-out/examples/models/voxtral_realtime/voxtral_realtime_runner"
+
 silero-vad-cpu:
 	@echo "==> Building and installing ExecuTorch..."
 	cmake --workflow --preset llm-release

diff --git a/Package.swift b/Package.swift
@@ -52,6 +52,13 @@ let products = deliverables([
       "sqlite3",
     ],
   ],
+  "backend_mlx": [
+    "frameworks": [
+      "Metal",
+      "MetalPerformanceShaders",
+    ],
+    "forceLoad": true,
+  ],
   "backend_mps": [
     "frameworks": [
       "Metal",
@@ -113,15 +120,20 @@ for (key, value) in products {
     name: key,
     path: "cmake-out/\(key).xcframework"
   ))
+  let forceLoad = value["forceLoad"] as? Bool ?? false
+  var linkerSettings: [LinkerSetting] =
+      (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
+      (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
+  if forceLoad {
+    linkerSettings.append(.unsafeFlags(["-all_load"]))
+  }
   let target: Target = .target(
     name: "\(key)\(dependencies_suffix)",
     dependencies: ([key] + (value["targets"] as? [String] ?? []).map {
       key.hasSuffix(debug_suffix) ? $0 + debug_suffix : $0
     }).map { .target(name: $0) },
     path: ".Package.swift/\(key)",
-    linkerSettings:
-      (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
-      (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
+    linkerSettings: linkerSettings
   )
   packageTargets.append(target)
 }