jundot · deepsweet · Apr 21, 2026
diff --git a/docs/experimental/dflash_mlx_integration.md b/docs/experimental/dflash_mlx_integration.md
@@ -90,7 +90,7 @@ Other model families (Llama, Gemma, etc.) are not supported — they require bot
 |---------|------|-------------|
 | `dflash_enabled` | bool | Enable/disable DFlash for this model |
 | `dflash_draft_model` | str | Path or HuggingFace repo for draft checkpoint |
-| `dflash_draft_quant_bits` | int\|None | Draft model quantization (None=bf16, 4=int4) |
+| `dflash_draft_quant_bits` | int\|None | Draft model quantization (None=bf16/fp16, 4=int4) |
 
 Configured via web admin UI → Model Settings → Experimental Features → DFlash.
 
@@ -240,7 +240,7 @@ DFlash check runs **before** engine type routing in `_load_engine()`. If `dflash
 Located in Model Settings → Advanced Settings → Experimental Features → DFlash:
 - **Toggle**: enable/disable DFlash
 - **Draft Model**: dropdown of available models
-- **Draft Quantization**: bf16 (default) / 4-bit
+- **Draft Quantization**: bf16/fp16 (default) / 4-bit
 
 ### Logging
 

diff --git a/omlx/admin/templates/dashboard/_modal_model_settings.html b/omlx/admin/templates/dashboard/_modal_model_settings.html
@@ -633,9 +633,8 @@ <h4 class="text-xs font-bold uppercase tracking-widest text-neutral-400 mb-3">{{
                                             <label class="block text-xs font-bold uppercase tracking-wider text-neutral-500 mb-2">Draft Quantization</label>
                                             <select x-model="modelSettings.dflash_draft_quant_bits"
                                                     class="w-full px-4 py-2.5 border border-neutral-200 rounded-xl text-sm focus:ring-2 focus:ring-neutral-900 focus:border-transparent transition-all bg-white">
-                                                <option value="">bf16 (default)</option>
+                                                <option value="">None (default)</option>
                                                 <option value="4">4-bit</option>
-                                                <option value="8">8-bit</option>
                                             </select>
                                         </div>
                                     </div>

diff --git a/omlx/model_settings.py b/omlx/model_settings.py
@@ -107,7 +107,7 @@ class ModelSettings:
     # DFlash (block diffusion speculative decoding)
     dflash_enabled: bool = False
     dflash_draft_model: Optional[str] = None  # Path/repo for DFlash draft checkpoint
-    dflash_draft_quant_bits: Optional[int] = None  # Draft model quantization (None=bf16, 4)
+    dflash_draft_quant_bits: Optional[int] = None  # Draft model quantization (None=bf16/fp16, 4)
 
     # Model management flags
     is_pinned: bool = False