From dc5dbef3fcdccff6de28e92fb4f26b76b6feb4ad Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 19:34:02 +0800
Subject: [PATCH 1/8] feat: add Docker Compose YAML editor and native process
 recovery

---
 backend/app/services/deployment_sync.py |   67 ++
 backend/pyproject.toml                  |    5 +
 backend/requirements.txt                |    1 +
 frontend/package-lock.json              |   90 +-
 frontend/package.json                   |    4 +-
 frontend/src/components/logos/index.tsx |   15 +
 frontend/src/pages/Deployments.tsx      | 1251 ++++++++++++++---------
 frontend/src/utils/dockerCompose.ts     |  336 ++++++
 worker/agent.py                         |   12 +
 worker/native_ops/process_manager.py    |  121 +++
 10 files changed, 1432 insertions(+), 470 deletions(-)
 create mode 100644 frontend/src/utils/dockerCompose.ts

diff --git a/backend/app/services/deployment_sync.py b/backend/app/services/deployment_sync.py
index 3e9e55d..bf38b3f 100644
--- a/backend/app/services/deployment_sync.py
+++ b/backend/app/services/deployment_sync.py
@@ -122,6 +122,69 @@ async def check_with_semaphore(deployment: Deployment):
 
         return stats
 
+    def _is_native_deployment(self, deployment: Deployment) -> bool:
+        """Check if this is a native Mac deployment (not Docker)."""
+        # Native deployments have container_id like "native-123"
+        if deployment.container_id and deployment.container_id.startswith("native-"):
+            return True
+
+        # Mac-only backends are always native
+        native_only_backends = {"mlx", "llama_cpp"}
+        if deployment.backend in native_only_backends:
+            return True
+
+        # For Ollama, check if worker is Mac
+        if deployment.backend == BackendType.OLLAMA.value:
+            if deployment.worker and deployment.worker.is_mac:
+                return True
+
+        return False
+
+    async def _check_native_deployment(self, deployment: Deployment) -> str:
+        """Check a native Mac deployment's API health.
+
+        Native deployments run as processes, not Docker containers.
+        We can only check if the API endpoint is responding.
+        """
+        try:
+            # For native deployments, if worker is offline, keep current status
+            # and let the health check loop retry later (worker may be reconnecting)
+            if deployment.worker.status != "online":
+                logger.info(
+                    f"Native deployment {deployment.name}: worker offline, "
+                    "keeping current status (may be reconnecting)"
+                )
+                # Don't change status - worker might be in the process of reconnecting
+                return "skipped"
+
+            # Check API health via worker
+            api_healthy = await self._check_api_health(
+                deployment.worker.address,
+                deployment.port,
+                deployment.backend,
+                None,  # No container_name for native
+            )
+
+            if api_healthy:
+                if deployment.status != DeploymentStatus.RUNNING.value:
+                    deployment.status = DeploymentStatus.RUNNING.value
+                    deployment.status_message = "Model ready (native process verified)"
+                logger.info(f"Native deployment {deployment.name}: healthy")
+                return "running_verified"
+            else:
+                # Process might have died or not started yet
+                # Mark as STARTING instead of ERROR to allow retry
+                deployment.status = DeploymentStatus.STARTING.value
+                deployment.status_message = "Native process not responding. Waiting for recovery..."
+                logger.info(f"Native deployment {deployment.name}: API not responding, waiting...")
+                return "api_not_ready"
+
+        except Exception as e:
+            logger.error(f"Error checking native deployment {deployment.name}: {e}")
+            deployment.status = DeploymentStatus.STARTING.value
+            deployment.status_message = f"Checking status: {e}"
+            return "api_not_ready"
+
     async def _check_and_update_deployment(self, deployment: Deployment, db) -> str:
         """Check a single deployment and update its status.
 
@@ -134,6 +197,10 @@ async def _check_and_update_deployment(self, deployment: Deployment, db) -> str:
             logger.warning(f"Deployment {deployment.id} has no worker, skipping")
             return "skipped"
 
+        # Check if this is a native deployment (Mac without Docker)
+        if self._is_native_deployment(deployment):
+            return await self._check_native_deployment(deployment)
+
         if not deployment.container_id:
             # If deployment is still starting, skip it
             if deployment.status == DeploymentStatus.STARTING.value:
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index b11ee91..6f3156e 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -19,6 +19,11 @@ dependencies = [
     "httpx>=0.26.0",
     "docker>=7.0.0",
     "python-multipart>=0.0.6",
+    "python-jose[cryptography]>=3.3.0",
+    "email-validator>=2.0.0",
+    "psutil>=5.9.0",
+    "optuna>=3.5.0",
+    "openai>=1.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 0729e40..001cf1e 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -11,3 +11,4 @@ python-jose[cryptography]>=3.3.0
 email-validator>=2.0.0
 psutil>=5.9.0
 optuna>=3.5.0
+openai>=1.0.0
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index c2b2f48..ad6c288 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -9,6 +9,7 @@
       "version": "0.1.0",
       "dependencies": {
         "@ant-design/icons": "^5.2.6",
+        "@monaco-editor/react": "^4.7.0",
         "antd": "^5.12.0",
         "axios": "^1.6.0",
         "dayjs": "^1.11.10",
@@ -20,7 +21,8 @@
         "react-syntax-highlighter": "^16.1.0",
         "recharts": "^3.6.0",
         "rehype-raw": "^7.0.0",
-        "remark-gfm": "^4.0.1"
+        "remark-gfm": "^4.0.1",
+        "yaml": "^2.8.2"
       },
       "devDependencies": {
         "@testing-library/jest-dom": "^6.1.0",
@@ -1252,6 +1254,29 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
+    "node_modules/@monaco-editor/loader": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/@monaco-editor/loader/-/loader-1.7.0.tgz",
+      "integrity": "sha512-gIwR1HrJrrx+vfyOhYmCZ0/JcWqG5kbfG7+d3f/C1LXk2EvzAbHSg3MQ5lO2sMlo9izoAZ04shohfKLVT6crVA==",
+      "license": "MIT",
+      "dependencies": {
+        "state-local": "^1.0.6"
+      }
+    },
+    "node_modules/@monaco-editor/react": {
+      "version": "4.7.0",
+      "resolved": "https://registry.npmjs.org/@monaco-editor/react/-/react-4.7.0.tgz",
+      "integrity": "sha512-cyzXQCtO47ydzxpQtCGSQGOC8Gk3ZUeBXFAxD+CWXYFo5OqZyZUonFl0DwUlTyAfRHntBfw2p3w4s9R6oe1eCA==",
+      "license": "MIT",
+      "dependencies": {
+        "@monaco-editor/loader": "^1.5.0"
+      },
+      "peerDependencies": {
+        "monaco-editor": ">= 0.25.0 < 1",
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
+        "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      }
+    },
     "node_modules/@nodelib/fs.scandir": {
       "version": "2.1.5",
       "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -2195,6 +2220,14 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/@types/trusted-types": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz",
+      "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==",
+      "license": "MIT",
+      "optional": true,
+      "peer": true
+    },
     "node_modules/@types/unist": {
       "version": "3.0.3",
       "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
@@ -3668,6 +3701,16 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/dompurify": {
+      "version": "3.2.7",
+      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.7.tgz",
+      "integrity": "sha512-WhL/YuveyGXJaerVlMYGWhvQswa7myDG17P7Vu65EWC05o8vfeNbvNf4d/BOvH99+ZW+LlQsc1GDKMa1vNK6dw==",
+      "license": "(MPL-2.0 OR Apache-2.0)",
+      "peer": true,
+      "optionalDependencies": {
+        "@types/trusted-types": "^2.0.7"
+      }
+    },
     "node_modules/dunder-proto": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@@ -5674,6 +5717,19 @@
         "url": "https://github.com/sponsors/wooorm"
       }
     },
+    "node_modules/marked": {
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/marked/-/marked-14.0.0.tgz",
+      "integrity": "sha512-uIj4+faQ+MgHgwUW1l2PsPglZLOLOT1uErt06dAPtx2kjteLAkbsd/0FiYg/MGS+i7ZKLb7w2WClxHkzOOuryQ==",
+      "license": "MIT",
+      "peer": true,
+      "bin": {
+        "marked": "bin/marked.js"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
     "node_modules/math-intrinsics": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
@@ -6646,6 +6702,17 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/monaco-editor": {
+      "version": "0.55.1",
+      "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.55.1.tgz",
+      "integrity": "sha512-jz4x+TJNFHwHtwuV9vA9rMujcZRb0CEilTEwG2rRSpe/A7Jdkuj8xPKttCgOh+v/lkHy7HsZ64oj+q3xoAFl9A==",
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "dompurify": "3.2.7",
+        "marked": "14.0.0"
+      }
+    },
     "node_modules/mrmime": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-2.0.1.tgz",
@@ -8510,6 +8577,12 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/state-local": {
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/state-local/-/state-local-1.0.7.tgz",
+      "integrity": "sha512-HTEHMNieakEnoe33shBYcZ7NX83ACUjCu8c40iOGEZsngj9zRnkqS9j1pqQPXwobB0ZcVTk27REb7COQ0UR59w==",
+      "license": "MIT"
+    },
     "node_modules/std-env": {
       "version": "3.10.0",
       "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz",
@@ -9502,6 +9575,21 @@
       "dev": true,
       "license": "ISC"
     },
+    "node_modules/yaml": {
+      "version": "2.8.2",
+      "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz",
+      "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==",
+      "license": "ISC",
+      "bin": {
+        "yaml": "bin.mjs"
+      },
+      "engines": {
+        "node": ">= 14.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/eemeli"
+      }
+    },
     "node_modules/yocto-queue": {
       "version": "0.1.0",
       "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
diff --git a/frontend/package.json b/frontend/package.json
index 964a24d..308a97b 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -14,6 +14,7 @@
   },
   "dependencies": {
     "@ant-design/icons": "^5.2.6",
+    "@monaco-editor/react": "^4.7.0",
     "antd": "^5.12.0",
     "axios": "^1.6.0",
     "dayjs": "^1.11.10",
@@ -25,7 +26,8 @@
     "react-syntax-highlighter": "^16.1.0",
     "recharts": "^3.6.0",
     "rehype-raw": "^7.0.0",
-    "remark-gfm": "^4.0.1"
+    "remark-gfm": "^4.0.1",
+    "yaml": "^2.8.2"
   },
   "devDependencies": {
     "@testing-library/jest-dom": "^6.1.0",
diff --git a/frontend/src/components/logos/index.tsx b/frontend/src/components/logos/index.tsx
index 029b8b7..bf955ba 100644
--- a/frontend/src/components/logos/index.tsx
+++ b/frontend/src/components/logos/index.tsx
@@ -132,6 +132,21 @@ interface IconProps {
   className?: string;
 }
 
+export function AppleIcon({ size = 14, style, className }: IconProps) {
+  return (
+    <svg
+      width={size}
+      height={size}
+      viewBox="0 0 24 24"
+      fill="currentColor"
+      style={style}
+      className={className}
+    >
+      <path d="M18.71 19.5c-.83 1.24-1.71 2.45-3.05 2.47-1.34.03-1.77-.79-3.29-.79-1.53 0-2 .77-3.27.82-1.31.05-2.3-1.32-3.14-2.53C4.25 17 2.94 12.45 4.7 9.39c.87-1.52 2.43-2.48 4.12-2.51 1.28-.02 2.5.87 3.29.87.78 0 2.26-1.07 3.81-.91.65.03 2.47.26 3.64 1.98-.09.06-2.17 1.28-2.15 3.81.03 3.02 2.65 4.03 2.68 4.04-.03.07-.42 1.44-1.38 2.83M13 3.5c.73-.83 1.94-1.46 2.94-1.5.13 1.17-.34 2.35-1.04 3.19-.69.85-1.83 1.51-2.95 1.42-.15-1.15.41-2.35 1.05-3.11z" />
+    </svg>
+  );
+}
+
 export function DockerIcon({ size = 14, style, className }: IconProps) {
   return (
     <svg
diff --git a/frontend/src/pages/Deployments.tsx b/frontend/src/pages/Deployments.tsx
index e77be8f..57b369b 100644
--- a/frontend/src/pages/Deployments.tsx
+++ b/frontend/src/pages/Deployments.tsx
@@ -28,12 +28,14 @@ import {
   VerticalAlignBottomOutlined,
   ExclamationCircleOutlined,
   SettingOutlined,
+  CodeOutlined,
 } from "@ant-design/icons";
 import { useAppTheme } from "../hooks/useTheme";
 import {
   OllamaLogo,
   HuggingFaceLogo,
   DockerIcon,
+  AppleIcon,
   getBackendConfig,
 } from "../components/logos";
 import { getDeploymentStatusColor } from "../utils";
@@ -45,6 +47,13 @@ import DeploymentAdvancedForm from "../components/DeploymentAdvancedForm";
 import ModelCompatibilityCheck from "../components/ModelCompatibilityCheck";
 import ModelFormatCompatibility from "../components/ModelFormatCompatibility";
 import backendVersionsData from "../constants/backendVersions.json";
+import {
+  generateDockerCompose,
+  parseDockerCompose,
+  validateDockerCompose,
+  type DeploymentConfig,
+} from "../utils/dockerCompose";
+import Editor from "@monaco-editor/react";
 import dayjs from "dayjs";
 import utc from "dayjs/plugin/utc";
 
@@ -92,6 +101,12 @@ export default function Deployments() {
   const [editingDeployment, setEditingDeployment] = useState<Deployment | null>(
     null,
   );
+  // YAML editor state
+  const [showYamlPanel, setShowYamlPanel] = useState(true); // Show YAML panel on desktop
+  const [yamlContent, setYamlContent] = useState<string>("");
+  const [yamlError, setYamlError] = useState<string | null>(null);
+  const [isYamlUserEditing, setIsYamlUserEditing] = useState(false); // Track if user is editing YAML
+  const yamlSyncTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);
   const { isMobile } = useResponsive();
   const { isDark } = useAppTheme();
   const { canEdit } = useAuth();
@@ -151,6 +166,141 @@ export default function Deployments() {
 
   const BACKEND_CONFIG = getBackendConfig(isDark);
 
+  // Check if backend is Docker-based (not native Mac)
+  const isDockerBackend = (backend: string, worker?: Worker | null) => {
+    // Native Mac backends
+    if (backend === "mlx" || backend === "llama_cpp") return false;
+    // Ollama on Mac is native
+    if (backend === "ollama" && worker?.os_type === "darwin") return false;
+    // vLLM on Mac (vLLM-Metal) is native
+    if (backend === "vllm" && worker?.os_type === "darwin") return false;
+    return true;
+  };
+
+  // Generate YAML from current form values
+  const generateYamlFromForm = useCallback(() => {
+    const values = form.getFieldsValue();
+    // Use selectedModelId as fallback if form value not available yet
+    const modelId = values.model_id || selectedModelId;
+    const model = models.find((m) => m.id === modelId);
+
+    if (!model) return "";
+
+    const config: DeploymentConfig = {
+      name: values.name || "deployment",
+      model_id: model.model_id,
+      model_name: model.name,
+      backend: selectedBackend,
+      worker_name: selectedWorker?.name,
+      gpu_indexes:
+        selectedGpuIndexes.length > 0 ? selectedGpuIndexes : undefined,
+      extra_params: values.extra_params || {},
+    };
+
+    return generateDockerCompose(config);
+  }, [
+    form,
+    models,
+    selectedModelId,
+    selectedBackend,
+    selectedWorker,
+    selectedGpuIndexes,
+  ]);
+
+  // Watch form values for auto-sync to YAML
+  const formName = Form.useWatch("name", form);
+  const formModelId = Form.useWatch("model_id", form);
+  const formExtraParams = Form.useWatch("extra_params", form);
+
+  // Auto-update YAML when form values change (if not user-editing YAML)
+  useEffect(() => {
+    if (!showYamlPanel) return;
+    if (isYamlUserEditing) return; // Don't overwrite user edits
+    if (!selectedModelId && !formModelId) return;
+
+    const yaml = generateYamlFromForm();
+    if (yaml && yaml !== yamlContent) {
+      setYamlContent(yaml);
+    }
+  }, [
+    showYamlPanel,
+    isYamlUserEditing,
+    formName,
+    formModelId,
+    formExtraParams,
+    selectedModelId,
+    selectedBackend,
+    selectedWorker,
+    selectedGpuIndexes,
+    generateYamlFromForm,
+    yamlContent,
+  ]);
+
+  // Handle YAML edit with debounced sync back to form
+  const handleYamlChange = useCallback(
+    (newYaml: string) => {
+      setYamlContent(newYaml);
+      setIsYamlUserEditing(true);
+
+      // Validate YAML
+      const validation = validateDockerCompose(newYaml);
+      setYamlError(validation.valid ? null : validation.error || null);
+
+      // Debounce sync to form
+      if (yamlSyncTimeoutRef.current) {
+        clearTimeout(yamlSyncTimeoutRef.current);
+      }
+
+      if (validation.valid) {
+        yamlSyncTimeoutRef.current = setTimeout(() => {
+          const config = parseDockerCompose(newYaml);
+          if (config) {
+            // Update form fields that correspond to YAML values
+            if (config.name && config.name !== form.getFieldValue("name")) {
+              form.setFieldValue("name", config.name);
+            }
+            if (config.extra_params?.docker_image) {
+              form.setFieldValue(
+                ["extra_params", "docker_image"],
+                config.extra_params.docker_image,
+              );
+            }
+            if (config.extra_params?.tensor_parallel_size !== undefined) {
+              form.setFieldValue(
+                ["extra_params", "tensor_parallel_size"],
+                config.extra_params.tensor_parallel_size,
+              );
+            }
+            if (config.extra_params?.max_model_len !== undefined) {
+              form.setFieldValue(
+                ["extra_params", "max_model_len"],
+                config.extra_params.max_model_len,
+              );
+            }
+            if (config.extra_params?.gpu_memory_utilization !== undefined) {
+              form.setFieldValue(
+                ["extra_params", "gpu_memory_utilization"],
+                config.extra_params.gpu_memory_utilization,
+              );
+            }
+          }
+          // Reset editing flag after sync
+          setIsYamlUserEditing(false);
+        }, 500); // 500ms debounce
+      }
+    },
+    [form],
+  );
+
+  // Cleanup timeout on unmount
+  useEffect(() => {
+    return () => {
+      if (yamlSyncTimeoutRef.current) {
+        clearTimeout(yamlSyncTimeoutRef.current);
+      }
+    };
+  }, []);
+
   const fetchDeployments = useCallback(async () => {
     try {
       const response = await deploymentsApi.list();
@@ -390,12 +540,22 @@ export default function Deployments() {
                     display: "inline-flex",
                     alignItems: "center",
                     gap: 2,
-                    background: "rgba(13, 148, 227, 0.1)",
-                    border: "1px solid rgba(13, 148, 227, 0.3)",
-                    color: "#0d94e3",
+                    background: record.container_id.startsWith("native-")
+                      ? "rgba(147, 147, 147, 0.1)"
+                      : "rgba(13, 148, 227, 0.1)",
+                    border: record.container_id.startsWith("native-")
+                      ? "1px solid rgba(147, 147, 147, 0.3)"
+                      : "1px solid rgba(13, 148, 227, 0.3)",
+                    color: record.container_id.startsWith("native-")
+                      ? "#666"
+                      : "#0d94e3",
                   }}
                 >
-                  <DockerIcon size={10} />
+                  {record.container_id.startsWith("native-") ? (
+                    <AppleIcon size={10} />
+                  ) : (
+                    <DockerIcon size={10} />
+                  )}
                 </Tag>
               )}
             </Space>
@@ -535,13 +695,28 @@ export default function Deployments() {
                   display: "inline-flex",
                   alignItems: "center",
                   gap: 3,
-                  background: "rgba(13, 148, 227, 0.1)",
-                  border: "1px solid rgba(13, 148, 227, 0.3)",
-                  color: "#0d94e3",
+                  background: record.container_id.startsWith("native-")
+                    ? "rgba(147, 147, 147, 0.1)"
+                    : "rgba(13, 148, 227, 0.1)",
+                  border: record.container_id.startsWith("native-")
+                    ? "1px solid rgba(147, 147, 147, 0.3)"
+                    : "1px solid rgba(13, 148, 227, 0.3)",
+                  color: record.container_id.startsWith("native-")
+                    ? "#666"
+                    : "#0d94e3",
                 }}
               >
-                <DockerIcon size={10} />
-                <span>Docker</span>
+                {record.container_id.startsWith("native-") ? (
+                  <>
+                    <AppleIcon size={10} />
+                    <span>Native</span>
+                  </>
+                ) : (
+                  <>
+                    <DockerIcon size={10} />
+                    <span>Docker</span>
+                  </>
+                )}
               </Tag>
             </div>
           )}
@@ -769,7 +944,36 @@ export default function Deployments() {
       </Card>
 
       <Modal
-        title="New Deployment"
+        title={
+          <div
+            style={{
+              display: "flex",
+              alignItems: "center",
+              justifyContent: "space-between",
+              paddingRight: 32,
+            }}
+          >
+            <span>New Deployment</span>
+            {isDockerBackend(selectedBackend, selectedWorker) && !isMobile && (
+              <Button
+                size="small"
+                type={showYamlPanel ? "primary" : "default"}
+                icon={<CodeOutlined />}
+                onClick={() => {
+                  if (!showYamlPanel) {
+                    // Generate YAML when opening panel
+                    const yaml = generateYamlFromForm();
+                    setYamlContent(yaml);
+                    setYamlError(null);
+                  }
+                  setShowYamlPanel(!showYamlPanel);
+                }}
+              >
+                YAML
+              </Button>
+            )}
+          </div>
+        }
         open={modalOpen}
         onCancel={() => {
           setModalOpen(false);
@@ -777,509 +981,620 @@ export default function Deployments() {
           setSelectedWorkerId(null);
           setSelectedGpuIndexes([]);
           setSelectedBackend("vllm");
+          setShowYamlPanel(true);
+          setYamlContent("");
+          setYamlError(null);
           form.resetFields();
         }}
         footer={null}
-        width={isMobile ? "100%" : 600}
+        width={
+          isMobile
+            ? "100%"
+            : showYamlPanel && isDockerBackend(selectedBackend, selectedWorker)
+              ? 1100
+              : 600
+        }
         style={
           isMobile ? { top: 20, maxWidth: "100%", margin: "0 8px" } : undefined
         }
       >
         <Form form={form} layout="vertical" onFinish={handleCreate}>
-          <Form.Item
-            name="name"
-            label="Deployment Name"
-            rules={[
-              { required: true, message: "Please enter deployment name" },
-            ]}
-          >
-            <Input placeholder="e.g., qwen3-0.6b-prod" />
-          </Form.Item>
-
-          <Form.Item
-            name="model_id"
-            label="Model"
-            rules={[{ required: true, message: "Please select a model" }]}
-          >
-            <Select
-              placeholder="Select a model"
-              optionLabelProp="label"
-              onChange={(value) => {
-                setSelectedModelId(value);
-                // Auto-select backend based on model source
-                const model = models.find((m) => m.id === value);
-                if (model?.source === "ollama") {
-                  setSelectedBackend("ollama");
-                  form.setFieldValue("backend", "ollama");
-                } else {
-                  setSelectedBackend("vllm");
-                  form.setFieldValue("backend", "vllm");
-                }
+          <div style={{ display: "flex", gap: 24 }}>
+            {/* Left side: Form fields */}
+            <div
+              style={{
+                flex:
+                  showYamlPanel &&
+                  !isMobile &&
+                  isDockerBackend(selectedBackend, selectedWorker)
+                    ? "0 0 500px"
+                    : 1,
               }}
-              options={models.map((m) => {
-                const sourceIcon =
-                  m.source === "ollama" ? (
-                    <OllamaLogo height={10} isDark={isDark} />
-                  ) : (
-                    <HuggingFaceLogo height={10} />
-                  );
-                const sourceLabel =
-                  m.source === "ollama" ? "Ollama" : "HuggingFace";
-                const mlxReady =
-                  m.source !== "ollama" && isMLXReady(m.model_id);
-                const ggufReady =
-                  m.source !== "ollama" && isGGUFReady(m.model_id);
-                return {
-                  label: (
-                    <span
-                      style={{ display: "flex", alignItems: "center", gap: 6 }}
-                    >
-                      <Tag
-                        style={{
-                          ...getTagStyle("small"),
-                          margin: 0,
-                          display: "inline-flex",
-                          alignItems: "center",
-                          gap: 4,
-                          width: 90,
-                        }}
-                      >
+            >
+              <Form.Item
+                name="name"
+                label="Deployment Name"
+                rules={[
+                  { required: true, message: "Please enter deployment name" },
+                ]}
+              >
+                <Input placeholder="e.g., qwen3-0.6b-prod" />
+              </Form.Item>
+
+              <Form.Item
+                name="model_id"
+                label="Model"
+                rules={[{ required: true, message: "Please select a model" }]}
+              >
+                <Select
+                  placeholder="Select a model"
+                  optionLabelProp="label"
+                  onChange={(value) => {
+                    setSelectedModelId(value);
+                    // Auto-select backend based on model source
+                    const model = models.find((m) => m.id === value);
+                    if (model?.source === "ollama") {
+                      setSelectedBackend("ollama");
+                      form.setFieldValue("backend", "ollama");
+                    } else {
+                      setSelectedBackend("vllm");
+                      form.setFieldValue("backend", "vllm");
+                    }
+                  }}
+                  options={models.map((m) => {
+                    const sourceIcon =
+                      m.source === "ollama" ? (
+                        <OllamaLogo height={10} isDark={isDark} />
+                      ) : (
+                        <HuggingFaceLogo height={10} />
+                      );
+                    const sourceLabel =
+                      m.source === "ollama" ? "Ollama" : "HuggingFace";
+                    const mlxReady =
+                      m.source !== "ollama" && isMLXReady(m.model_id);
+                    const ggufReady =
+                      m.source !== "ollama" && isGGUFReady(m.model_id);
+                    return {
+                      label: (
                         <span
                           style={{
-                            width: 16,
-                            display: "inline-flex",
+                            display: "flex",
                             alignItems: "center",
-                            justifyContent: "center",
+                            gap: 6,
                           }}
                         >
-                          {sourceIcon}
+                          <Tag
+                            style={{
+                              ...getTagStyle("small"),
+                              margin: 0,
+                              display: "inline-flex",
+                              alignItems: "center",
+                              gap: 4,
+                              width: 90,
+                            }}
+                          >
+                            <span
+                              style={{
+                                width: 16,
+                                display: "inline-flex",
+                                alignItems: "center",
+                                justifyContent: "center",
+                              }}
+                            >
+                              {sourceIcon}
+                            </span>
+                            {sourceLabel}
+                          </Tag>
+                          {m.name}
+                          {mlxReady && (
+                            <Tag
+                              color="green"
+                              style={{
+                                fontSize: 10,
+                                margin: 0,
+                                padding: "0 4px",
+                              }}
+                            >
+                              MLX
+                            </Tag>
+                          )}
+                          {ggufReady && (
+                            <Tag
+                              color="blue"
+                              style={{
+                                fontSize: 10,
+                                margin: 0,
+                                padding: "0 4px",
+                              }}
+                            >
+                              GGUF
+                            </Tag>
+                          )}
                         </span>
-                        {sourceLabel}
-                      </Tag>
-                      {m.name}
-                      {mlxReady && (
-                        <Tag
-                          color="green"
-                          style={{ fontSize: 10, margin: 0, padding: "0 4px" }}
-                        >
-                          MLX
-                        </Tag>
-                      )}
-                      {ggufReady && (
-                        <Tag
-                          color="blue"
-                          style={{ fontSize: 10, margin: 0, padding: "0 4px" }}
-                        >
-                          GGUF
-                        </Tag>
-                      )}
-                    </span>
-                  ),
-                  value: m.id,
-                };
-              })}
-            />
-          </Form.Item>
-
-          <Form.Item
-            name="worker_id"
-            label="Worker"
-            rules={[{ required: true, message: "Please select a worker" }]}
-          >
-            <Select
-              placeholder="Select a worker"
-              onChange={(value) => {
-                setSelectedWorkerId(value);
-                // Reset GPU selection when worker changes
-                setSelectedGpuIndexes([]);
-                form.setFieldValue("gpu_indexes", undefined);
-                // Check if current backend is available on the new worker
-                const newWorker = workers.find((w) => w.id === value);
-                const isMac = newWorker?.os_type === "darwin";
-                const macBackends = ["ollama", "mlx", "llama_cpp", "vllm"];
-                const linuxBackends = ["vllm", "sglang", "ollama"];
-                const newAvailable = isMac ? macBackends : linuxBackends;
-                // Reset to first available backend if current is not available
-                if (!newAvailable.includes(selectedBackend)) {
-                  const defaultBackend = isMac ? "vllm" : "vllm";
-                  setSelectedBackend(
-                    defaultBackend as
-                      | "vllm"
-                      | "sglang"
-                      | "ollama"
-                      | "mlx"
-                      | "llama_cpp",
-                  );
-                  form.setFieldValue("backend", defaultBackend);
-                }
-              }}
-              options={workers.map((w) => ({
-                label: (
-                  <span
-                    style={{
-                      display: "flex",
-                      justifyContent: "space-between",
-                      alignItems: "center",
-                    }}
-                  >
-                    <span>
-                      {w.name} ({w.address})
-                    </span>
-                    <span style={{ display: "flex", gap: 4 }}>
-                      {w.os_type === "darwin" && (
-                        <Tag color="purple">macOS</Tag>
-                      )}
-                      {w.gpu_info && w.gpu_info.length > 0 && (
-                        <Tag color="blue">
-                          {w.gpu_info.length} GPU
-                          {w.gpu_info.length > 1 ? "s" : ""}
-                        </Tag>
-                      )}
-                    </span>
-                  </span>
-                ),
-                value: w.id,
-              }))}
-            />
-          </Form.Item>
+                      ),
+                      value: m.id,
+                    };
+                  })}
+                />
+              </Form.Item>
 
-          <Form.Item
-            name="backend"
-            label="Inference Backend"
-            rules={[{ required: true, message: "Please select a backend" }]}
-            extra={
-              !selectedWorker
-                ? "Select a worker first"
-                : selectedModel?.source === "ollama"
-                  ? "Ollama models can only use Ollama backend"
-                  : selectedWorker?.os_type === "darwin"
-                    ? "macOS workers support vLLM-Metal, Ollama, MLX, and llama.cpp with Apple Silicon acceleration"
-                    : "HuggingFace models can use vLLM or SGLang"
-            }
-          >
-            <Select
-              placeholder={
-                selectedWorker ? "Select a backend" : "Select a worker first"
-              }
-              disabled={!selectedModelId || !selectedWorkerId}
-              value={selectedBackend}
-              onChange={(value) => setSelectedBackend(value)}
-              options={availableBackends.map((b) => {
-                const config = BACKEND_CONFIG[b];
-                // Show "vLLM-Metal" for vllm on Mac workers
-                const label =
-                  b === "vllm" && selectedWorker?.os_type === "darwin"
-                    ? "vLLM-Metal"
-                    : config.label;
-                return {
-                  label: (
-                    <span
-                      style={{ display: "flex", alignItems: "center", gap: 8 }}
-                    >
+              <Form.Item
+                name="worker_id"
+                label="Worker"
+                rules={[{ required: true, message: "Please select a worker" }]}
+              >
+                <Select
+                  placeholder="Select a worker"
+                  onChange={(value) => {
+                    setSelectedWorkerId(value);
+                    // Reset GPU selection when worker changes
+                    setSelectedGpuIndexes([]);
+                    form.setFieldValue("gpu_indexes", undefined);
+                    // Check if current backend is available on the new worker
+                    const newWorker = workers.find((w) => w.id === value);
+                    const isMac = newWorker?.os_type === "darwin";
+                    const macBackends = ["ollama", "mlx", "llama_cpp", "vllm"];
+                    const linuxBackends = ["vllm", "sglang", "ollama"];
+                    const newAvailable = isMac ? macBackends : linuxBackends;
+                    // Reset to first available backend if current is not available
+                    if (!newAvailable.includes(selectedBackend)) {
+                      const defaultBackend = isMac ? "vllm" : "vllm";
+                      setSelectedBackend(
+                        defaultBackend as
+                          | "vllm"
+                          | "sglang"
+                          | "ollama"
+                          | "mlx"
+                          | "llama_cpp",
+                      );
+                      form.setFieldValue("backend", defaultBackend);
+                    }
+                  }}
+                  options={workers.map((w) => ({
+                    label: (
                       <span
                         style={{
-                          width: 50,
                           display: "flex",
+                          justifyContent: "space-between",
                           alignItems: "center",
-                          justifyContent: "center",
                         }}
                       >
-                        {config.icon}
+                        <span>
+                          {w.name} ({w.address})
+                        </span>
+                        <span style={{ display: "flex", gap: 4 }}>
+                          {w.os_type === "darwin" && (
+                            <Tag color="purple">macOS</Tag>
+                          )}
+                          {w.gpu_info && w.gpu_info.length > 0 && (
+                            <Tag color="blue">
+                              {w.gpu_info.length} GPU
+                              {w.gpu_info.length > 1 ? "s" : ""}
+                            </Tag>
+                          )}
+                        </span>
                       </span>
-                      {label}
-                    </span>
-                  ),
-                  value: b,
-                };
-              })}
-            />
-          </Form.Item>
-
-          {/* macOS Ollama Warning - only show when Ollama backend is selected */}
-          {selectedWorker &&
-            selectedWorker.os_type === "darwin" &&
-            selectedBackend === "ollama" &&
-            !selectedWorker.capabilities?.ollama && (
-              <Alert
-                message="Ollama Not Installed"
-                description={
-                  <div>
-                    <p style={{ margin: "4px 0" }}>
-                      This Mac worker does not have Ollama installed. Please
-                      install it first:
-                    </p>
-                    <pre
-                      style={{
-                        background: "#f5f5f5",
-                        padding: 8,
-                        borderRadius: 4,
-                        fontSize: 12,
-                        margin: "8px 0",
-                      }}
-                    >
-                      brew install ollama{"\n"}
-                      brew services start ollama
-                    </pre>
-                    <p style={{ margin: "4px 0", fontSize: 12, color: "#666" }}>
-                      After installation, the worker will detect Ollama on the
-                      next heartbeat.
-                    </p>
-                  </div>
-                }
-                type="error"
-                showIcon
-                style={{ marginBottom: 16 }}
-              />
-            )}
-
-          {/* macOS Ollama Not Running Warning - only show when Ollama backend is selected */}
-          {selectedWorker &&
-            selectedWorker.os_type === "darwin" &&
-            selectedBackend === "ollama" &&
-            selectedWorker.capabilities?.ollama &&
-            !selectedWorker.capabilities?.ollama_running && (
-              <Alert
-                message="Ollama Not Running"
-                description={
-                  <div>
-                    <p style={{ margin: "4px 0" }}>
-                      Ollama is installed but not running. Please start it:
-                    </p>
-                    <pre
-                      style={{
-                        background: "#f5f5f5",
-                        padding: 8,
-                        borderRadius: 4,
-                        fontSize: 12,
-                        margin: "8px 0",
-                      }}
-                    >
-                      brew services start ollama
-                    </pre>
-                  </div>
-                }
-                type="warning"
-                showIcon
-                style={{ marginBottom: 16 }}
-              />
-            )}
-
-          {/* macOS Backend Info - show auto-install message */}
-          {selectedWorker &&
-            selectedWorker.os_type === "darwin" &&
-            selectedBackend === "vllm" && (
-              <Alert
-                message="vLLM-Metal"
-                description={
-                  <span style={{ fontSize: 12 }}>
-                    Uses Apple Silicon GPU acceleration. Will be automatically
-                    installed on first deployment.
-                  </span>
-                }
-                type="info"
-                showIcon
-                style={{ marginBottom: 16 }}
-              />
-            )}
-          {selectedWorker &&
-            selectedWorker.os_type === "darwin" &&
-            selectedBackend === "mlx" && (
-              <Alert
-                message="MLX-LM"
-                description={
-                  <span style={{ fontSize: 12 }}>
-                    Native Apple Silicon ML framework. Will be automatically
-                    installed on first deployment.
-                  </span>
-                }
-                type="info"
-                showIcon
-                style={{ marginBottom: 16 }}
-              />
-            )}
-          {selectedWorker &&
-            selectedWorker.os_type === "darwin" &&
-            selectedBackend === "llama_cpp" && (
-              <Alert
-                message="llama.cpp"
-                description={
-                  <span style={{ fontSize: 12 }}>
-                    High-performance inference with Metal acceleration. Will be
-                    automatically installed via Homebrew on first deployment.
-                  </span>
-                }
-                type="info"
-                showIcon
-                style={{ marginBottom: 16 }}
-              />
-            )}
+                    ),
+                    value: w.id,
+                  }))}
+                />
+              </Form.Item>
 
-          {/* macOS Info */}
-          {selectedWorker &&
-            selectedWorker.os_type === "darwin" &&
-            selectedWorker.capabilities?.ollama_running && (
-              <Alert
-                message="macOS Worker with Apple Silicon"
-                description={
-                  <div>
-                    <p style={{ margin: "4px 0" }}>
-                      This worker supports native Apple Silicon backends:
-                    </p>
-                    <ul style={{ margin: "4px 0", paddingLeft: 20 }}>
-                      <li>
-                        <strong>Ollama</strong> - Easiest, pull and run models
-                        directly
-                      </li>
-                      <li>
-                        <strong>MLX</strong> - Apple's ML framework, optimized
-                        for Apple Silicon
-                      </li>
-                      <li>
-                        <strong>llama.cpp</strong> - Cross-platform with Metal
-                        acceleration
-                      </li>
-                    </ul>
-                    <p style={{ margin: "4px 0", fontSize: 12, color: "#666" }}>
-                      For MLX/llama.cpp, HuggingFace models will be
-                      automatically converted if needed.
-                    </p>
-                  </div>
+              <Form.Item
+                name="backend"
+                label="Inference Backend"
+                rules={[{ required: true, message: "Please select a backend" }]}
+                extra={
+                  !selectedWorker
+                    ? "Select a worker first"
+                    : selectedModel?.source === "ollama"
+                      ? "Ollama models can only use Ollama backend"
+                      : selectedWorker?.os_type === "darwin"
+                        ? "macOS workers support vLLM-Metal, Ollama, MLX, and llama.cpp with Apple Silicon acceleration"
+                        : "HuggingFace models can use vLLM or SGLang"
                 }
-                type="info"
-                showIcon
-                style={{ marginBottom: 16 }}
-              />
-            )}
-
-          <Form.Item
-            name="gpu_indexes"
-            label="GPU Indexes"
-            extra={
-              selectedWorkerId
-                ? workerGpus.length > 0
-                  ? "Leave empty to use GPU 0"
-                  : "No GPUs detected on this worker"
-                : "Select a worker first"
-            }
-          >
-            <Select
-              mode="multiple"
-              placeholder={
-                selectedWorkerId
-                  ? "Select GPUs (default: 0)"
-                  : "Select a worker first"
-              }
-              disabled={!selectedWorkerId}
-              onChange={(values: number[]) => setSelectedGpuIndexes(values)}
-              options={
-                workerGpus.length > 0
-                  ? workerGpus.map((gpu) => ({
+              >
+                <Select
+                  placeholder={
+                    selectedWorker
+                      ? "Select a backend"
+                      : "Select a worker first"
+                  }
+                  disabled={!selectedModelId || !selectedWorkerId}
+                  value={selectedBackend}
+                  onChange={(value) => setSelectedBackend(value)}
+                  options={availableBackends.map((b) => {
+                    const config = BACKEND_CONFIG[b];
+                    // Show "vLLM-Metal" for vllm on Mac workers
+                    const label =
+                      b === "vllm" && selectedWorker?.os_type === "darwin"
+                        ? "vLLM-Metal"
+                        : config.label;
+                    return {
                       label: (
                         <span
                           style={{
                             display: "flex",
-                            justifyContent: "space-between",
                             alignItems: "center",
+                            gap: 8,
                           }}
                         >
-                          <span>
-                            GPU {gpu.index}: {gpu.name}
-                          </span>
-                          <Tag
-                            color={
-                              gpu.memory_free / gpu.memory_total > 0.5
-                                ? "green"
-                                : "orange"
-                            }
-                            style={{ marginLeft: 8, fontSize: 11 }}
+                          <span
+                            style={{
+                              width: 50,
+                              display: "flex",
+                              alignItems: "center",
+                              justifyContent: "center",
+                            }}
                           >
-                            {Math.round(gpu.memory_free / 1024 / 1024 / 1024)}GB
-                            free
-                          </Tag>
+                            {config.icon}
+                          </span>
+                          {label}
                         </span>
                       ),
-                      value: gpu.index,
-                    }))
-                  : [{ label: <span>GPU 0</span>, value: 0 }]
-              }
-            />
-          </Form.Item>
+                      value: b,
+                    };
+                  })}
+                />
+              </Form.Item>
 
-          {/* Model Compatibility Check - Show when model is selected for vLLM/SGLang */}
-          {selectedModel &&
-            selectedModel.source !== "ollama" &&
-            !["mlx", "llama_cpp"].includes(selectedBackend) && (
-              <ModelCompatibilityCheck
-                modelId={selectedModel.model_id}
-                backend={selectedBackend}
-                gpuMemoryGb={selectedGpuMemoryGb}
-                precision="fp16"
-              />
-            )}
+              {/* macOS Ollama Warning - only show when Ollama backend is selected */}
+              {selectedWorker &&
+                selectedWorker.os_type === "darwin" &&
+                selectedBackend === "ollama" &&
+                !selectedWorker.capabilities?.ollama && (
+                  <Alert
+                    message="Ollama Not Installed"
+                    description={
+                      <div>
+                        <p style={{ margin: "4px 0" }}>
+                          This Mac worker does not have Ollama installed. Please
+                          install it first:
+                        </p>
+                        <pre
+                          style={{
+                            background: "#f5f5f5",
+                            padding: 8,
+                            borderRadius: 4,
+                            fontSize: 12,
+                            margin: "8px 0",
+                          }}
+                        >
+                          brew install ollama{"\n"}
+                          brew services start ollama
+                        </pre>
+                        <p
+                          style={{
+                            margin: "4px 0",
+                            fontSize: 12,
+                            color: "#666",
+                          }}
+                        >
+                          After installation, the worker will detect Ollama on
+                          the next heartbeat.
+                        </p>
+                      </div>
+                    }
+                    type="error"
+                    showIcon
+                    style={{ marginBottom: 16 }}
+                  />
+                )}
 
-          {/* Model Format Compatibility - Show for MLX/llama.cpp backends */}
-          {selectedModel &&
-            selectedModel.source !== "ollama" &&
-            ["mlx", "llama_cpp"].includes(selectedBackend) && (
-              <ModelFormatCompatibility
-                modelId={selectedModel.model_id}
-                backend={selectedBackend as "mlx" | "llama_cpp"}
-                showDetails={true}
-              />
-            )}
+              {/* macOS Ollama Not Running Warning - only show when Ollama backend is selected */}
+              {selectedWorker &&
+                selectedWorker.os_type === "darwin" &&
+                selectedBackend === "ollama" &&
+                selectedWorker.capabilities?.ollama &&
+                !selectedWorker.capabilities?.ollama_running && (
+                  <Alert
+                    message="Ollama Not Running"
+                    description={
+                      <div>
+                        <p style={{ margin: "4px 0" }}>
+                          Ollama is installed but not running. Please start it:
+                        </p>
+                        <pre
+                          style={{
+                            background: "#f5f5f5",
+                            padding: 8,
+                            borderRadius: 4,
+                            fontSize: 12,
+                            margin: "8px 0",
+                          }}
+                        >
+                          brew services start ollama
+                        </pre>
+                      </div>
+                    }
+                    type="warning"
+                    showIcon
+                    style={{ marginBottom: 16 }}
+                  />
+                )}
+
+              {/* macOS Backend Info - show auto-install message */}
+              {selectedWorker &&
+                selectedWorker.os_type === "darwin" &&
+                selectedBackend === "vllm" && (
+                  <Alert
+                    message="vLLM-Metal"
+                    description={
+                      <span style={{ fontSize: 12 }}>
+                        Uses Apple Silicon GPU acceleration. Will be
+                        automatically installed on first deployment.
+                      </span>
+                    }
+                    type="info"
+                    showIcon
+                    style={{ marginBottom: 16 }}
+                  />
+                )}
+              {selectedWorker &&
+                selectedWorker.os_type === "darwin" &&
+                selectedBackend === "mlx" && (
+                  <Alert
+                    message="MLX-LM"
+                    description={
+                      <span style={{ fontSize: 12 }}>
+                        Native Apple Silicon ML framework. Will be automatically
+                        installed on first deployment.
+                      </span>
+                    }
+                    type="info"
+                    showIcon
+                    style={{ marginBottom: 16 }}
+                  />
+                )}
+              {selectedWorker &&
+                selectedWorker.os_type === "darwin" &&
+                selectedBackend === "llama_cpp" && (
+                  <Alert
+                    message="llama.cpp"
+                    description={
+                      <span style={{ fontSize: 12 }}>
+                        High-performance inference with Metal acceleration. Will
+                        be automatically installed via Homebrew on first
+                        deployment.
+                      </span>
+                    }
+                    type="info"
+                    showIcon
+                    style={{ marginBottom: 16 }}
+                  />
+                )}
+
+              {/* macOS Info */}
+              {selectedWorker &&
+                selectedWorker.os_type === "darwin" &&
+                selectedWorker.capabilities?.ollama_running && (
+                  <Alert
+                    message="macOS Worker with Apple Silicon"
+                    description={
+                      <div>
+                        <p style={{ margin: "4px 0" }}>
+                          This worker supports native Apple Silicon backends:
+                        </p>
+                        <ul style={{ margin: "4px 0", paddingLeft: 20 }}>
+                          <li>
+                            <strong>Ollama</strong> - Easiest, pull and run
+                            models directly
+                          </li>
+                          <li>
+                            <strong>MLX</strong> - Apple's ML framework,
+                            optimized for Apple Silicon
+                          </li>
+                          <li>
+                            <strong>llama.cpp</strong> - Cross-platform with
+                            Metal acceleration
+                          </li>
+                        </ul>
+                        <p
+                          style={{
+                            margin: "4px 0",
+                            fontSize: 12,
+                            color: "#666",
+                          }}
+                        >
+                          For MLX/llama.cpp, HuggingFace models will be
+                          automatically converted if needed.
+                        </p>
+                      </div>
+                    }
+                    type="info"
+                    showIcon
+                    style={{ marginBottom: 16 }}
+                  />
+                )}
 
-          {/* Version Override - Show when model is selected (not for MLX/llama.cpp) */}
-          {selectedModelId &&
-            !["mlx", "llama_cpp"].includes(selectedBackend) && (
               <Form.Item
-                name={["extra_params", "docker_image"]}
-                label={`${BACKEND_CONFIG[selectedBackend]?.label || "Backend"} Version`}
-                extra="Override the model's default backend version for this deployment"
+                name="gpu_indexes"
+                label="GPU Indexes"
+                extra={
+                  selectedWorkerId
+                    ? workerGpus.length > 0
+                      ? "Leave empty to use GPU 0"
+                      : "No GPUs detected on this worker"
+                    : "Select a worker first"
+                }
               >
                 <Select
-                  placeholder="Use model default"
-                  allowClear
-                  showSearch
-                  options={(
-                    (
-                      backendVersionsData as Record<
-                        string,
-                        {
-                          versions: Array<{
-                            version: string;
-                            image: string;
-                            recommended?: boolean;
-                          }>;
-                        }
-                      >
-                    )[selectedBackend]?.versions || []
-                  ).map((v) => ({
-                    label: (
-                      <span>
-                        {v.version}
-                        {v.recommended && (
-                          <Tag
-                            color="green"
-                            style={{ marginLeft: 8, fontSize: 10 }}
-                          >
-                            Recommended
-                          </Tag>
-                        )}
-                      </span>
-                    ),
-                    value: v.image,
-                  }))}
+                  mode="multiple"
+                  placeholder={
+                    selectedWorkerId
+                      ? "Select GPUs (default: 0)"
+                      : "Select a worker first"
+                  }
+                  disabled={!selectedWorkerId}
+                  onChange={(values: number[]) => setSelectedGpuIndexes(values)}
+                  options={
+                    workerGpus.length > 0
+                      ? workerGpus.map((gpu) => ({
+                          label: (
+                            <span
+                              style={{
+                                display: "flex",
+                                justifyContent: "space-between",
+                                alignItems: "center",
+                              }}
+                            >
+                              <span>
+                                GPU {gpu.index}: {gpu.name}
+                              </span>
+                              <Tag
+                                color={
+                                  gpu.memory_free / gpu.memory_total > 0.5
+                                    ? "green"
+                                    : "orange"
+                                }
+                                style={{ marginLeft: 8, fontSize: 11 }}
+                              >
+                                {Math.round(
+                                  gpu.memory_free / 1024 / 1024 / 1024,
+                                )}
+                                GB free
+                              </Tag>
+                            </span>
+                          ),
+                          value: gpu.index,
+                        }))
+                      : [{ label: <span>GPU 0</span>, value: 0 }]
+                  }
                 />
               </Form.Item>
-            )}
 
-          {/* Advanced Parameters - Show when model is selected (not for MLX/llama.cpp) */}
-          {selectedModelId &&
-            !["mlx", "llama_cpp"].includes(selectedBackend) && (
-              <DeploymentAdvancedForm backend={selectedBackend} form={form} />
-            )}
+              {/* Model Compatibility Check - Show when model is selected for vLLM/SGLang */}
+              {selectedModel &&
+                selectedModel.source !== "ollama" &&
+                !["mlx", "llama_cpp"].includes(selectedBackend) && (
+                  <ModelCompatibilityCheck
+                    modelId={selectedModel.model_id}
+                    backend={selectedBackend}
+                    gpuMemoryGb={selectedGpuMemoryGb}
+                    precision="fp16"
+                  />
+                )}
+
+              {/* Model Format Compatibility - Show for MLX/llama.cpp backends */}
+              {selectedModel &&
+                selectedModel.source !== "ollama" &&
+                ["mlx", "llama_cpp"].includes(selectedBackend) && (
+                  <ModelFormatCompatibility
+                    modelId={selectedModel.model_id}
+                    backend={selectedBackend as "mlx" | "llama_cpp"}
+                    showDetails={true}
+                  />
+                )}
+
+              {/* Version Override - Show when model is selected (not for MLX/llama.cpp) */}
+              {selectedModelId &&
+                !["mlx", "llama_cpp"].includes(selectedBackend) && (
+                  <Form.Item
+                    name={["extra_params", "docker_image"]}
+                    label={`${BACKEND_CONFIG[selectedBackend]?.label || "Backend"} Version`}
+                    extra="Override the model's default backend version for this deployment"
+                  >
+                    <Select
+                      placeholder="Use model default"
+                      allowClear
+                      showSearch
+                      options={(
+                        (
+                          backendVersionsData as Record<
+                            string,
+                            {
+                              versions: Array<{
+                                version: string;
+                                image: string;
+                                recommended?: boolean;
+                              }>;
+                            }
+                          >
+                        )[selectedBackend]?.versions || []
+                      ).map((v) => ({
+                        label: (
+                          <span>
+                            {v.version}
+                            {v.recommended && (
+                              <Tag
+                                color="green"
+                                style={{ marginLeft: 8, fontSize: 10 }}
+                              >
+                                Recommended
+                              </Tag>
+                            )}
+                          </span>
+                        ),
+                        value: v.image,
+                      }))}
+                    />
+                  </Form.Item>
+                )}
+
+              {/* Advanced Parameters - Show when model is selected (not for MLX/llama.cpp) */}
+              {selectedModelId &&
+                !["mlx", "llama_cpp"].includes(selectedBackend) && (
+                  <DeploymentAdvancedForm
+                    backend={selectedBackend}
+                    form={form}
+                  />
+                )}
+            </div>
+
+            {/* Right side: YAML Editor (desktop only, Docker backends only) */}
+            {showYamlPanel &&
+              !isMobile &&
+              isDockerBackend(selectedBackend, selectedWorker) && (
+                <div style={{ flex: "1 1 500px", minWidth: 400 }}>
+                  <div
+                    style={{
+                      marginBottom: 8,
+                      display: "flex",
+                      alignItems: "center",
+                      gap: 8,
+                    }}
+                  >
+                    <Text strong>Docker Compose</Text>
+                    {yamlError && (
+                      <Tag color="error" style={{ fontSize: 11 }}>
+                        {yamlError}
+                      </Tag>
+                    )}
+                  </div>
+                  <div
+                    style={{
+                      border: `1px solid ${isDark ? "#424242" : "#d9d9d9"}`,
+                      borderRadius: 6,
+                      overflow: "hidden",
+                    }}
+                  >
+                    <Editor
+                      height="450px"
+                      language="yaml"
+                      theme={isDark ? "vs-dark" : "light"}
+                      value={yamlContent}
+                      onChange={(value) => handleYamlChange(value || "")}
+                      onMount={() => setIsYamlUserEditing(false)}
+                      options={{
+                        minimap: { enabled: false },
+                        fontSize: 13,
+                        lineNumbers: "on",
+                        scrollBeyondLastLine: false,
+                        wordWrap: "on",
+                        tabSize: 2,
+                        automaticLayout: true,
+                        padding: { top: 8, bottom: 8 },
+                      }}
+                    />
+                  </div>
+                  <div style={{ marginTop: 6, fontSize: 11, color: "#888" }}>
+                    Form ↔ YAML auto-sync. Edit either side.
+                  </div>
+                </div>
+              )}
+          </div>
 
-          <Form.Item>
+          <Form.Item style={{ marginTop: 16 }}>
             <Space>
               <Button type="primary" htmlType="submit">
                 Deploy
diff --git a/frontend/src/utils/dockerCompose.ts b/frontend/src/utils/dockerCompose.ts
new file mode 100644
index 0000000..e5adf18
--- /dev/null
+++ b/frontend/src/utils/dockerCompose.ts
@@ -0,0 +1,336 @@
+/**
+ * Docker Compose YAML generator and parser for LMStack deployments
+ */
+
+import YAML from "yaml";
+
+export interface DeploymentConfig {
+  name: string;
+  model_id: string;
+  model_name?: string;
+  backend: string;
+  worker_name?: string;
+  gpu_indexes?: number[];
+  extra_params?: {
+    docker_image?: string;
+    tensor_parallel_size?: number;
+    max_model_len?: number;
+    gpu_memory_utilization?: number;
+    quantization?: string;
+    dtype?: string;
+    enforce_eager?: boolean;
+    trust_remote_code?: boolean;
+    [key: string]: unknown;
+  };
+}
+
+interface DockerComposeService {
+  image: string;
+  container_name?: string;
+  ports?: string[];
+  environment?: Record<string, string>;
+  volumes?: string[];
+  deploy?: {
+    resources?: {
+      reservations?: {
+        devices?: Array<{
+          driver: string;
+          count?: number | string;
+          device_ids?: string[];
+          capabilities: string[][];
+        }>;
+      };
+    };
+  };
+  command?: string;
+  restart?: string;
+  shm_size?: string;
+  ipc?: string;
+}
+
+interface DockerCompose {
+  version: string;
+  services: Record<string, DockerComposeService>;
+}
+
+// Default images for each backend
+const DEFAULT_IMAGES: Record<string, string> = {
+  vllm: "vllm/vllm-openai:latest",
+  sglang: "lmsysorg/sglang:latest",
+  ollama: "ollama/ollama:latest",
+};
+
+/**
+ * Generate Docker Compose YAML from deployment config
+ */
+export function generateDockerCompose(config: DeploymentConfig): string {
+  const backend = config.backend;
+  const serviceName = config.name.replace(/[^a-zA-Z0-9_-]/g, "-").toLowerCase();
+
+  const service: DockerComposeService = {
+    image:
+      config.extra_params?.docker_image ||
+      DEFAULT_IMAGES[backend] ||
+      "vllm/vllm-openai:latest",
+    container_name: `lmstack-${serviceName}`,
+    restart: "unless-stopped",
+  };
+
+  // GPU configuration
+  if (config.gpu_indexes && config.gpu_indexes.length > 0) {
+    service.deploy = {
+      resources: {
+        reservations: {
+          devices: [
+            {
+              driver: "nvidia",
+              device_ids: config.gpu_indexes.map(String),
+              capabilities: [["gpu"]],
+            },
+          ],
+        },
+      },
+    };
+  } else {
+    service.deploy = {
+      resources: {
+        reservations: {
+          devices: [
+            {
+              driver: "nvidia",
+              count: "all",
+              capabilities: [["gpu"]],
+            },
+          ],
+        },
+      },
+    };
+  }
+
+  // Backend-specific configuration
+  if (backend === "vllm") {
+    service.ports = ["8000:8000"];
+    service.ipc = "host";
+
+    const cmdParts = [`--model ${config.model_id}`];
+
+    if (config.extra_params?.tensor_parallel_size) {
+      cmdParts.push(
+        `--tensor-parallel-size ${config.extra_params.tensor_parallel_size}`,
+      );
+    }
+    if (config.extra_params?.max_model_len) {
+      cmdParts.push(`--max-model-len ${config.extra_params.max_model_len}`);
+    }
+    if (config.extra_params?.gpu_memory_utilization) {
+      cmdParts.push(
+        `--gpu-memory-utilization ${config.extra_params.gpu_memory_utilization}`,
+      );
+    }
+    if (config.extra_params?.quantization) {
+      cmdParts.push(`--quantization ${config.extra_params.quantization}`);
+    }
+    if (config.extra_params?.dtype) {
+      cmdParts.push(`--dtype ${config.extra_params.dtype}`);
+    }
+    if (config.extra_params?.enforce_eager) {
+      cmdParts.push("--enforce-eager");
+    }
+    if (config.extra_params?.trust_remote_code) {
+      cmdParts.push("--trust-remote-code");
+    }
+
+    service.command = cmdParts.join(" \\\n      ");
+  } else if (backend === "sglang") {
+    service.ports = ["30000:30000"];
+    service.shm_size = "32g";
+
+    const cmdParts = [
+      "python3 -m sglang.launch_server",
+      `--model-path ${config.model_id}`,
+      "--host 0.0.0.0",
+      "--port 30000",
+    ];
+
+    if (config.extra_params?.tensor_parallel_size) {
+      cmdParts.push(`--tp ${config.extra_params.tensor_parallel_size}`);
+    }
+    if (config.extra_params?.max_model_len) {
+      cmdParts.push(`--context-length ${config.extra_params.max_model_len}`);
+    }
+    if (config.extra_params?.quantization) {
+      cmdParts.push(`--quantization ${config.extra_params.quantization}`);
+    }
+    if (config.extra_params?.trust_remote_code) {
+      cmdParts.push("--trust-remote-code");
+    }
+
+    service.command = cmdParts.join(" \\\n      ");
+  } else if (backend === "ollama") {
+    service.ports = ["11434:11434"];
+    service.volumes = ["ollama_data:/root/.ollama"];
+    service.environment = {
+      OLLAMA_HOST: "0.0.0.0:11434",
+    };
+  }
+
+  const compose: DockerCompose = {
+    version: "3.8",
+    services: {
+      [serviceName]: service,
+    },
+  };
+
+  // Add volumes for Ollama
+  if (backend === "ollama") {
+    return YAML.stringify({
+      ...compose,
+      volumes: {
+        ollama_data: {},
+      },
+    });
+  }
+
+  return YAML.stringify(compose);
+}
+
+/**
+ * Parse Docker Compose YAML and extract deployment config
+ */
+export function parseDockerCompose(
+  yamlContent: string,
+): Partial<DeploymentConfig> | null {
+  try {
+    const compose = YAML.parse(yamlContent) as DockerCompose;
+
+    if (!compose.services) {
+      return null;
+    }
+
+    const serviceName = Object.keys(compose.services)[0];
+    const service = compose.services[serviceName];
+
+    if (!service) {
+      return null;
+    }
+
+    const config: Partial<DeploymentConfig> = {
+      name: serviceName,
+      extra_params: {},
+    };
+
+    // Extract image
+    if (service.image) {
+      config.extra_params!.docker_image = service.image;
+
+      // Detect backend from image
+      if (service.image.includes("vllm")) {
+        config.backend = "vllm";
+      } else if (service.image.includes("sglang")) {
+        config.backend = "sglang";
+      } else if (service.image.includes("ollama")) {
+        config.backend = "ollama";
+      }
+    }
+
+    // Extract GPU indexes
+    const devices = service.deploy?.resources?.reservations?.devices;
+    if (devices && devices[0]?.device_ids) {
+      config.gpu_indexes = devices[0].device_ids.map(Number);
+    }
+
+    // Parse command to extract params
+    if (service.command) {
+      const cmd = service.command;
+
+      // Extract model
+      const modelMatch = cmd.match(/--model(?:-path)?\s+(\S+)/);
+      if (modelMatch) {
+        config.model_id = modelMatch[1];
+      }
+
+      // Extract tensor_parallel_size
+      const tpMatch = cmd.match(/--tensor-parallel-size\s+(\d+)|--tp\s+(\d+)/);
+      if (tpMatch) {
+        config.extra_params!.tensor_parallel_size = parseInt(
+          tpMatch[1] || tpMatch[2],
+        );
+      }
+
+      // Extract max_model_len
+      const maxLenMatch = cmd.match(
+        /--max-model-len\s+(\d+)|--context-length\s+(\d+)/,
+      );
+      if (maxLenMatch) {
+        config.extra_params!.max_model_len = parseInt(
+          maxLenMatch[1] || maxLenMatch[2],
+        );
+      }
+
+      // Extract gpu_memory_utilization
+      const gpuMemMatch = cmd.match(/--gpu-memory-utilization\s+([\d.]+)/);
+      if (gpuMemMatch) {
+        config.extra_params!.gpu_memory_utilization = parseFloat(
+          gpuMemMatch[1],
+        );
+      }
+
+      // Extract quantization
+      const quantMatch = cmd.match(/--quantization\s+(\S+)/);
+      if (quantMatch) {
+        config.extra_params!.quantization = quantMatch[1];
+      }
+
+      // Extract dtype
+      const dtypeMatch = cmd.match(/--dtype\s+(\S+)/);
+      if (dtypeMatch) {
+        config.extra_params!.dtype = dtypeMatch[1];
+      }
+
+      // Check for flags
+      if (cmd.includes("--enforce-eager")) {
+        config.extra_params!.enforce_eager = true;
+      }
+      if (cmd.includes("--trust-remote-code")) {
+        config.extra_params!.trust_remote_code = true;
+      }
+    }
+
+    return config;
+  } catch (e) {
+    console.error("Failed to parse Docker Compose YAML:", e);
+    return null;
+  }
+}
+
+/**
+ * Validate Docker Compose YAML syntax
+ */
+export function validateDockerCompose(yamlContent: string): {
+  valid: boolean;
+  error?: string;
+} {
+  try {
+    const parsed = YAML.parse(yamlContent);
+
+    if (!parsed) {
+      return { valid: false, error: "Empty YAML" };
+    }
+
+    if (!parsed.services) {
+      return { valid: false, error: "Missing 'services' section" };
+    }
+
+    const serviceNames = Object.keys(parsed.services);
+    if (serviceNames.length === 0) {
+      return { valid: false, error: "No services defined" };
+    }
+
+    return { valid: true };
+  } catch (e) {
+    return {
+      valid: false,
+      error: `YAML syntax error: ${(e as Error).message}`,
+    };
+  }
+}
diff --git a/worker/agent.py b/worker/agent.py
index 2b9b5b7..79e47a8 100644
--- a/worker/agent.py
+++ b/worker/agent.py
@@ -318,6 +318,18 @@ async def lifespan(app: FastAPI):
         # Set agent references for routes
         _set_agent_references(agent)
 
+        # Recover native processes from previous run (Mac only)
+        if agent.native_manager:
+            logger.info("Attempting to recover native processes from previous run...")
+            try:
+                results = await agent.native_manager.recover_processes()
+                if results:
+                    recovered = sum(1 for v in results.values() if v == "recovered")
+                    if recovered > 0:
+                        logger.info(f"Recovered {recovered} native process(es)")
+            except Exception as e:
+                logger.warning(f"Failed to recover native processes: {e}")
+
         # Register with server
         registered = await agent.register()
         if not registered:
diff --git a/worker/native_ops/process_manager.py b/worker/native_ops/process_manager.py
index 7997378..4f2606f 100644
--- a/worker/native_ops/process_manager.py
+++ b/worker/native_ops/process_manager.py
@@ -42,6 +42,125 @@ def __init__(self):
         self._converter = ModelConverter()
         self._log_dir = Path.home() / ".lmstack" / "logs"
         self._log_dir.mkdir(parents=True, exist_ok=True)
+        self._state_file = Path.home() / ".lmstack" / "native_processes.json"
+
+    def _save_state(self) -> None:
+        """Save process state to disk for recovery after restart."""
+        state = {}
+        for process_id, proc in self._processes.items():
+            state[process_id] = {
+                "process_id": proc.process_id,
+                "pid": proc.pid,
+                "backend": proc.backend,
+                "model_id": proc.model_id,
+                "port": proc.port,
+            }
+        try:
+            import json
+
+            self._state_file.parent.mkdir(parents=True, exist_ok=True)
+            with open(self._state_file, "w") as f:
+                json.dump(state, f, indent=2)
+            logger.debug(f"Saved {len(state)} process(es) to state file")
+        except Exception as e:
+            logger.warning(f"Failed to save process state: {e}")
+
+    def _load_state(self) -> dict:
+        """Load process state from disk."""
+        if not self._state_file.exists():
+            return {}
+        try:
+            import json
+
+            with open(self._state_file) as f:
+                return json.load(f)
+        except Exception as e:
+            logger.warning(f"Failed to load process state: {e}")
+            return {}
+
+    async def recover_processes(self) -> dict[str, str]:
+        """Recover processes from previous run.
+
+        Checks if previously tracked processes are still running by testing
+        their API endpoints. If running, re-registers them in the manager.
+
+        Returns:
+            Dictionary mapping process_id to recovery status:
+            - "recovered": Process still running and re-registered
+            - "stopped": Process no longer running, cleaned up
+            - "error": Recovery failed
+        """
+        import httpx
+
+        state = self._load_state()
+        if not state:
+            logger.info("No previous process state to recover")
+            return {}
+
+        results = {}
+        for process_id, proc_info in state.items():
+            port = proc_info.get("port")
+            backend = proc_info.get("backend")
+            model_id = proc_info.get("model_id")
+            pid = proc_info.get("pid", 0)
+
+            logger.info(f"Attempting to recover process {process_id} ({backend}) on port {port}")
+
+            # Skip Ollama - it's a system service, just re-register it
+            if backend == "ollama":
+                # Check if Ollama is running
+                try:
+                    async with httpx.AsyncClient(timeout=2.0) as client:
+                        response = await client.get(f"http://localhost:{port}/api/tags")
+                        if response.status_code == 200:
+                            self._processes[process_id] = NativeProcess(
+                                process_id=process_id,
+                                pid=0,
+                                backend=backend,
+                                model_id=model_id,
+                                port=port,
+                            )
+                            results[process_id] = "recovered"
+                            logger.info(f"Recovered Ollama process {process_id}")
+                            continue
+                except Exception:
+                    pass
+                results[process_id] = "stopped"
+                logger.info(f"Ollama not running for {process_id}")
+                continue
+
+            # For MLX, llama.cpp, vLLM - check if port is responding
+            try:
+                async with httpx.AsyncClient(timeout=5.0) as client:
+                    response = await client.get(f"http://localhost:{port}/v1/models")
+                    if response.status_code == 200:
+                        # Process is still running! Re-register it
+                        self._processes[process_id] = NativeProcess(
+                            process_id=process_id,
+                            pid=pid,
+                            backend=backend,
+                            model_id=model_id,
+                            port=port,
+                            log_file=self._log_dir / f"{process_id}.log",
+                        )
+                        results[process_id] = "recovered"
+                        logger.info(f"Recovered process {process_id} ({backend}) on port {port}")
+                        continue
+            except Exception:
+                pass
+
+            # Process not running
+            results[process_id] = "stopped"
+            logger.info(f"Process {process_id} no longer running on port {port}")
+
+        # Update state file to remove stopped processes
+        self._save_state()
+
+        recovered = sum(1 for v in results.values() if v == "recovered")
+        stopped = sum(1 for v in results.values() if v == "stopped")
+        logger.info(f"Process recovery complete: {recovered} recovered, {stopped} stopped")
+
+        return results
 
     def _write_log(self, process_id: str, message: str) -> None:
         """Write a message to a process's log file."""
@@ -170,6 +289,7 @@ async def start_process(
             raise ValueError(f"Unknown backend: {backend}")
 
         self._processes[process_id] = process
+        self._save_state()  # Persist for recovery after restart
         logger.info(f"Started {backend} process {process_id} on port {port}")
         return process
 
@@ -201,6 +321,7 @@ async def stop_process(self, process_id: str) -> bool:
                         process.process.kill()
 
             del self._processes[process_id]
+            self._save_state()  # Update persisted state
             logger.info(f"Stopped process {process_id}")
             return True
 

From 0c758e36c141ebb310a4ff61aabfc4c891c699d0 Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 19:41:11 +0800
Subject: [PATCH 2/8] fix: add worker os_type/gpu_type/capabilities
 auto-migration on startup

---
 backend/app/database.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/backend/app/database.py b/backend/app/database.py
index 2f7ea8f..0557d00 100644
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -113,6 +113,28 @@ async def column_exists(table_name: str, column_name: str) -> bool:
         await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN conversation_id INTEGER"))
         logger.info("'conversation_id' column added!")
 
+    # Migration: Add os_type to workers (for Mac native deployment support)
+    if not await column_exists("workers", "os_type"):
+        logger.info("Adding 'os_type' column to workers table...")
+        await conn.execute(
+            text("ALTER TABLE workers ADD COLUMN os_type VARCHAR(50) DEFAULT 'linux'")
+        )
+        logger.info("'os_type' column added!")
+
+    # Migration: Add gpu_type to workers (for Mac Apple Silicon detection)
+    if not await column_exists("workers", "gpu_type"):
+        logger.info("Adding 'gpu_type' column to workers table...")
+        await conn.execute(
+            text("ALTER TABLE workers ADD COLUMN gpu_type VARCHAR(50) DEFAULT 'nvidia'")
+        )
+        logger.info("'gpu_type' column added!")
+
+    # Migration: Add capabilities to workers (for backend availability tracking)
+    if not await column_exists("workers", "capabilities"):
+        logger.info("Adding 'capabilities' column to workers table...")
+        await conn.execute(text("ALTER TABLE workers ADD COLUMN capabilities JSON"))
+        logger.info("'capabilities' column added!")
+
 
 async def init_db():
     """Initialize database tables and run migrations"""

From e5d9a87596cccea7f32e01bf82e981cf1806a657 Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 19:43:34 +0800
Subject: [PATCH 3/8] fix: add apps.parent_app_id auto-migration on startup

---
 backend/app/database.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/backend/app/database.py b/backend/app/database.py
index 0557d00..e9b422c 100644
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -135,6 +135,14 @@ async def column_exists(table_name: str, column_name: str) -> bool:
         await conn.execute(text("ALTER TABLE workers ADD COLUMN capabilities JSON"))
         logger.info("'capabilities' column added!")
 
+    # Migration: Add parent_app_id to apps (for monitoring services like Prometheus)
+    if not await column_exists("apps", "parent_app_id"):
+        logger.info("Adding 'parent_app_id' column to apps table...")
+        await conn.execute(
+            text("ALTER TABLE apps ADD COLUMN parent_app_id INTEGER REFERENCES apps(id)")
+        )
+        logger.info("'parent_app_id' column added!")
+
 
 async def init_db():
     """Initialize database tables and run migrations"""

From 872b1da59c479be6f680f6bba166fb1eecc1bd37 Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 19:45:12 +0800
Subject: [PATCH 4/8] feat: auto-detect and add missing database columns on
 startup

---
 backend/app/database.py | 181 ++++++++++++++++++----------------------
 1 file changed, 80 insertions(+), 101 deletions(-)

diff --git a/backend/app/database.py b/backend/app/database.py
index e9b422c..544d248 100644
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -37,111 +37,90 @@ async def get_db() -> AsyncSession:
             await session.close()
 
 
+def _get_column_type_sql(column) -> str:
+    """Convert SQLAlchemy column type to SQLite type string."""
+    from sqlalchemy import JSON, Boolean, DateTime, Float, Integer, String, Text
+
+    col_type = type(column.type)
+
+    if col_type == Integer or "Integer" in str(col_type):
+        return "INTEGER"
+    elif col_type == String or "String" in str(col_type):
+        length = getattr(column.type, "length", None)
+        return f"VARCHAR({length})" if length else "VARCHAR(255)"
+    elif col_type == Text or "Text" in str(col_type):
+        return "TEXT"
+    elif col_type == Boolean or "Boolean" in str(col_type):
+        return "BOOLEAN"
+    elif col_type == Float or "Float" in str(col_type):
+        return "FLOAT"
+    elif col_type == DateTime or "DateTime" in str(col_type):
+        return "DATETIME"
+    elif col_type == JSON or "JSON" in str(col_type):
+        return "JSON"
+    else:
+        # Default fallback
+        return "TEXT"
+
+
 async def _run_migrations(conn):
-    """Run schema migrations for new columns (SQLite compatible)."""
+    """Auto-detect and add missing columns by comparing models with database schema."""
     from sqlalchemy import text
 
-    async def column_exists(table_name: str, column_name: str) -> bool:
-        """Check if a column exists in a table."""
-        result = await conn.execute(text(f"PRAGMA table_info({table_name})"))
-        columns = [row[1] for row in result.fetchall()]
-        return column_name in columns
-
-    # Migration: Add container_name to deployments (for Windows Docker compatibility)
-    if not await column_exists("deployments", "container_name"):
-        logger.info("Adding 'container_name' column to deployments table...")
-        await conn.execute(text("ALTER TABLE deployments ADD COLUMN container_name VARCHAR(255)"))
-        logger.info("'container_name' column added!")
-
-    # Migration: Add is_local to registration_tokens (for local worker detection)
-    if not await column_exists("registration_tokens", "is_local"):
-        logger.info("Adding 'is_local' column to registration_tokens table...")
-        await conn.execute(
-            text("ALTER TABLE registration_tokens ADD COLUMN is_local BOOLEAN DEFAULT 0")
-        )
-        logger.info("'is_local' column added!")
-
-    # Migration: Add conversation_type to conversations (for Agent chat support)
-    if not await column_exists("conversations", "conversation_type"):
-        logger.info("Adding 'conversation_type' column to conversations table...")
-        await conn.execute(
-            text(
-                "ALTER TABLE conversations ADD COLUMN conversation_type VARCHAR(20) DEFAULT 'chat' NOT NULL"
-            )
-        )
-        logger.info("'conversation_type' column added!")
-
-    # Migration: Add agent_config to conversations (for Agent configuration)
-    if not await column_exists("conversations", "agent_config"):
-        logger.info("Adding 'agent_config' column to conversations table...")
-        await conn.execute(text("ALTER TABLE conversations ADD COLUMN agent_config JSON"))
-        logger.info("'agent_config' column added!")
-
-    # Migration: Add tool_calls to messages (for Agent tool calls)
-    if not await column_exists("messages", "tool_calls"):
-        logger.info("Adding 'tool_calls' column to messages table...")
-        await conn.execute(text("ALTER TABLE messages ADD COLUMN tool_calls JSON"))
-        logger.info("'tool_calls' column added!")
-
-    # Migration: Add tool_call_id to messages (for Agent tool results)
-    if not await column_exists("messages", "tool_call_id"):
-        logger.info("Adding 'tool_call_id' column to messages table...")
-        await conn.execute(text("ALTER TABLE messages ADD COLUMN tool_call_id VARCHAR(100)"))
-        logger.info("'tool_call_id' column added!")
-
-    # Migration: Add step_type to messages (for Agent execution steps)
-    if not await column_exists("messages", "step_type"):
-        logger.info("Adding 'step_type' column to messages table...")
-        await conn.execute(text("ALTER TABLE messages ADD COLUMN step_type VARCHAR(50)"))
-        logger.info("'step_type' column added!")
-
-    # Migration: Add execution_time_ms to messages (for tool execution timing)
-    if not await column_exists("messages", "execution_time_ms"):
-        logger.info("Adding 'execution_time_ms' column to messages table...")
-        await conn.execute(text("ALTER TABLE messages ADD COLUMN execution_time_ms FLOAT"))
-        logger.info("'execution_time_ms' column added!")
-
-    # Migration: Add tuning_config to tuning_jobs (for multi-framework testing)
-    if not await column_exists("tuning_jobs", "tuning_config"):
-        logger.info("Adding 'tuning_config' column to tuning_jobs table...")
-        await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN tuning_config JSON"))
-        logger.info("'tuning_config' column added!")
-
-    # Migration: Add conversation_id to tuning_jobs (for Agent Chat integration)
-    if not await column_exists("tuning_jobs", "conversation_id"):
-        logger.info("Adding 'conversation_id' column to tuning_jobs table...")
-        await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN conversation_id INTEGER"))
-        logger.info("'conversation_id' column added!")
-
-    # Migration: Add os_type to workers (for Mac native deployment support)
-    if not await column_exists("workers", "os_type"):
-        logger.info("Adding 'os_type' column to workers table...")
-        await conn.execute(
-            text("ALTER TABLE workers ADD COLUMN os_type VARCHAR(50) DEFAULT 'linux'")
-        )
-        logger.info("'os_type' column added!")
-
-    # Migration: Add gpu_type to workers (for Mac Apple Silicon detection)
-    if not await column_exists("workers", "gpu_type"):
-        logger.info("Adding 'gpu_type' column to workers table...")
-        await conn.execute(
-            text("ALTER TABLE workers ADD COLUMN gpu_type VARCHAR(50) DEFAULT 'nvidia'")
-        )
-        logger.info("'gpu_type' column added!")
-
-    # Migration: Add capabilities to workers (for backend availability tracking)
-    if not await column_exists("workers", "capabilities"):
-        logger.info("Adding 'capabilities' column to workers table...")
-        await conn.execute(text("ALTER TABLE workers ADD COLUMN capabilities JSON"))
-        logger.info("'capabilities' column added!")
-
-    # Migration: Add parent_app_id to apps (for monitoring services like Prometheus)
-    if not await column_exists("apps", "parent_app_id"):
-        logger.info("Adding 'parent_app_id' column to apps table...")
-        await conn.execute(
-            text("ALTER TABLE apps ADD COLUMN parent_app_id INTEGER REFERENCES apps(id)")
+    async def get_table_columns(table_name: str) -> set[str]:
+        """Get all column names from a database table."""
+        try:
+            result = await conn.execute(text(f"PRAGMA table_info({table_name})"))
+            return {row[1] for row in result.fetchall()}
+        except Exception:
+            return set()
+
+    async def table_exists(table_name: str) -> bool:
+        """Check if a table exists in the database."""
+        result = await conn.execute(
+            text("SELECT name FROM sqlite_master WHERE type='table' AND name=:name"),
+            {"name": table_name},
         )
-        logger.info("'parent_app_id' column added!")
+        return result.fetchone() is not None
+
+    # Iterate through all tables defined in models
+    for table_name, table in Base.metadata.tables.items():
+        # Skip if table doesn't exist yet (will be created by create_all)
+        if not await table_exists(table_name):
+            continue
+
+        # Get existing columns in database
+        existing_columns = await get_table_columns(table_name)
+
+        # Check each column in the model
+        for column in table.columns:
+            if column.name not in existing_columns:
+                # Build ALTER TABLE statement
+                col_type = _get_column_type_sql(column)
+
+                # Handle default values
+                default_clause = ""
+                if column.default is not None:
+                    default_val = column.default.arg
+                    if callable(default_val):
+                        default_val = default_val(None)
+                    if isinstance(default_val, str):
+                        default_clause = f" DEFAULT '{default_val}'"
+                    elif isinstance(default_val, bool):
+                        default_clause = f" DEFAULT {1 if default_val else 0}"
+                    elif default_val is not None:
+                        default_clause = f" DEFAULT {default_val}"
+
+                sql = (
+                    f"ALTER TABLE {table_name} ADD COLUMN {column.name} {col_type}{default_clause}"
+                )
+
+                logger.info(f"Auto-migration: Adding '{column.name}' column to {table_name}...")
+                try:
+                    await conn.execute(text(sql))
+                    logger.info(f"Column '{column.name}' added to {table_name}!")
+                except Exception as e:
+                    logger.warning(f"Failed to add column {column.name} to {table_name}: {e}")
 
 
 async def init_db():

From a37f3baf42a5e4bdbd36f16094015f309026dff6 Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 19:48:14 +0800
Subject: [PATCH 5/8] fix: add healthcheck and proper depends_on to
 docker-compose.local.yml

---
 docker-compose.local.yml | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/docker-compose.local.yml b/docker-compose.local.yml
index 4423911..b44a7fe 100644
--- a/docker-compose.local.yml
+++ b/docker-compose.local.yml
@@ -5,13 +5,14 @@
 # Run:   docker compose -f docker-compose.local.yml up -d
 
 services:
-  server:
+  backend:
     image: infinirc/lmstack-backend:local
     container_name: lmstack-backend
     user: root
-    # Use bridge network for Windows compatibility (network_mode: host doesn't work on Windows)
     ports:
-      - "0.0.0.0:52000:52000"
+      - "52000:52000"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
     volumes:
       - lmstack-data:/app/data
       - /var/run/docker.sock:/var/run/docker.sock
@@ -19,6 +20,12 @@ services:
     environment:
       - LMSTACK_SECRET_KEY=${SECRET_KEY:-dev-secret-key}
       - LMSTACK_EXTERNAL_URL=${EXTERNAL_URL:-}
+    healthcheck:
+      test: ["CMD", "python", "-c", "import httpx; r=httpx.get('http://127.0.0.1:52000/health'); exit(0 if r.status_code==200 else 1)"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 10s
     restart: unless-stopped
     networks:
       - lmstack
@@ -27,12 +34,21 @@ services:
     image: infinirc/lmstack-frontend:local
     container_name: lmstack-frontend
     ports:
-      - "0.0.0.0:3000:80"
+      - "3000:80"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
     environment:
-      - BACKEND_HOST=server
+      - BACKEND_HOST=backend
       - NGINX_ENVSUBST_FILTER=BACKEND_HOST
     depends_on:
-      - server
+      backend:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:80/"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+      start_period: 5s
     restart: unless-stopped
     networks:
       - lmstack

From 13d24159258b80c1458e15918237d660b790740b Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 19:57:57 +0800
Subject: [PATCH 6/8] fix: import models before creating tables in init_db

---
 backend/app/database.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/backend/app/database.py b/backend/app/database.py
index 544d248..e21c7ed 100644
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -125,6 +125,10 @@ async def table_exists(table_name: str) -> bool:
 
 async def init_db():
     """Initialize database tables and run migrations"""
+    # Import all models to register them with Base.metadata
+    # This ensures all tables are created by create_all()
+    import app.models  # noqa: F401
+
     try:
         async with engine.begin() as conn:
             await conn.run_sync(Base.metadata.create_all)

From 3becc0fb0198c7d310f765ccd48d28e9e0d86117 Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 20:02:08 +0800
Subject: [PATCH 7/8] fix: auto-detect architecture for Docker CLI in backend
 Dockerfile

---
 backend/Dockerfile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/backend/Dockerfile b/backend/Dockerfile
index 323cd1e..9f995e0 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -21,9 +21,11 @@ WORKDIR /app
 
 # Install docker CLI for local worker spawn feature
 # Using Docker 27.x for API version 1.47 compatibility
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl \
-    && curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-27.4.1.tgz | tar xz --strip-components=1 -C /usr/local/bin docker/docker \
+# Auto-detect architecture (x86_64 or aarch64)
+RUN apt-get update && apt-get install -y --no-install-recommends curl \
+    && ARCH=$(uname -m) \
+    && if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then DOCKER_ARCH="aarch64"; else DOCKER_ARCH="x86_64"; fi \
+    && curl -fsSL "https://download.docker.com/linux/static/stable/${DOCKER_ARCH}/docker-27.4.1.tgz" | tar xz --strip-components=1 -C /usr/local/bin docker/docker \
     && rm -rf /var/lib/apt/lists/*
 
 # Copy installed packages from builder

From 5164dab852bcf358a1964c87ae8943af590b8357 Mon Sep 17 00:00:00 2001
From: rickychen-infinirc <ricky.chen@infinirc.com>
Date: Mon, 2 Feb 2026 20:06:01 +0800
Subject: [PATCH 8/8] feat: add ARM64 support for Docker images and
 configurable worker image

---
 .github/workflows/docker.yml         |  5 ++++-
 backend/app/api/workers.py           |  5 ++++-
 backend/app/config.py                |  1 +
 backend/app/services/local_worker.py | 16 ++++++++++++++--
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index facd18f..497b496 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -44,6 +44,9 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -79,7 +82,7 @@ jobs:
           labels: ${{ steps.meta.outputs.labels }}
           cache-from: type=gha
           cache-to: type=gha,mode=max
-          platforms: linux/amd64
+          platforms: linux/amd64,linux/arm64
 
   # Notify on completion
   notify:
diff --git a/backend/app/api/workers.py b/backend/app/api/workers.py
index ac448c4..8d61a2d 100644
--- a/backend/app/api/workers.py
+++ b/backend/app/api/workers.py
@@ -531,6 +531,9 @@ def _generate_docker_command(token: str, name: str, backend_url: str) -> str:
 
     Command is single-line for cross-platform compatibility (Linux/Mac/Windows).
     """
+    from app.config import get_settings
+
+    settings = get_settings()
     return (
         f"docker run -d --name lmstack-worker --restart unless-stopped "
         f"--network host --gpus all --privileged "
@@ -540,7 +543,7 @@ def _generate_docker_command(token: str, name: str, backend_url: str) -> str:
         f"-e BACKEND_URL={backend_url} "
         f"-e WORKER_NAME={name} "
         f"-e REGISTRATION_TOKEN={token} "
-        f"infinirc/lmstack-worker:latest"
+        f"{settings.worker_image}"
     )
 
 
diff --git a/backend/app/config.py b/backend/app/config.py
index d573104..673898c 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -32,6 +32,7 @@ class Settings(BaseSettings):
     # Worker settings
     worker_heartbeat_interval: int = 10  # seconds between status checks
     worker_timeout: int = 30  # seconds to consider worker offline
+    worker_image: str = "infinirc/lmstack-worker:latest"  # Docker image for local worker
 
     # vLLM defaults
     vllm_default_image: str = "vllm/vllm-openai:latest"
diff --git a/backend/app/services/local_worker.py b/backend/app/services/local_worker.py
index c6be866..75a2c63 100644
--- a/backend/app/services/local_worker.py
+++ b/backend/app/services/local_worker.py
@@ -205,12 +205,24 @@ def spawn_docker_worker(
     backend_url: str,
     registration_token: str,
     container_name: str = "lmstack-worker",
+    worker_image: str | None = None,
 ) -> dict:
     """Spawn a Docker worker container on the local machine.
 
+    Args:
+        worker_name: Name for the worker
+        backend_url: URL of the backend server
+        registration_token: Token for worker registration
+        container_name: Name for the Docker container
+        worker_image: Docker image to use (defaults to settings.worker_image)
+
     Returns:
         dict with keys: success, message, container_id (if success)
     """
+    from app.config import get_settings
+
+    settings = get_settings()
+    image = worker_image or settings.worker_image
     # On Mac, ensure Ollama is running with external access before starting Docker
     if platform.system() == "Darwin":
         logger.info("Mac detected, ensuring Ollama is running with external access...")
@@ -270,11 +282,11 @@ def spawn_docker_worker(
         f"WORKER_NAME={worker_name}",
         "-e",
         f"REGISTRATION_TOKEN={registration_token}",
-        "infinirc/lmstack-worker:latest",
+        image,
     ]
 
     try:
-        logger.info(f"Spawning Docker worker: {worker_name}")
+        logger.info(f"Spawning Docker worker: {worker_name} with image {image}")
         result = subprocess.run(
             cmd,
             capture_output=True,