diff --git a/docs/experimental/dflash_mlx_integration.md b/docs/experimental/dflash_mlx_integration.md index 70e29415..a9dc17c4 100644 --- a/docs/experimental/dflash_mlx_integration.md +++ b/docs/experimental/dflash_mlx_integration.md @@ -90,7 +90,7 @@ Other model families (Llama, Gemma, etc.) are not supported — they require bot |---------|------|-------------| | `dflash_enabled` | bool | Enable/disable DFlash for this model | | `dflash_draft_model` | str | Path or HuggingFace repo for draft checkpoint | -| `dflash_draft_quant_bits` | int\|None | Draft model quantization (None=bf16, 4=int4) | +| `dflash_draft_quant_bits` | int\|None | Draft model quantization (None=bf16/fp16, 4=int4) | Configured via web admin UI → Model Settings → Experimental Features → DFlash. @@ -240,7 +240,7 @@ DFlash check runs **before** engine type routing in `_load_engine()`. If `dflash Located in Model Settings → Advanced Settings → Experimental Features → DFlash: - **Toggle**: enable/disable DFlash - **Draft Model**: dropdown of available models -- **Draft Quantization**: bf16 (default) / 4-bit +- **Draft Quantization**: bf16/fp16 (default) / 4-bit ### Logging diff --git a/omlx/admin/templates/dashboard/_modal_model_settings.html b/omlx/admin/templates/dashboard/_modal_model_settings.html index 2d2142b9..6e898a1b 100644 --- a/omlx/admin/templates/dashboard/_modal_model_settings.html +++ b/omlx/admin/templates/dashboard/_modal_model_settings.html @@ -633,9 +633,8 @@

{{ diff --git a/omlx/model_settings.py b/omlx/model_settings.py index bcf2ad71..d31be1f6 100644 --- a/omlx/model_settings.py +++ b/omlx/model_settings.py @@ -107,7 +107,7 @@ class ModelSettings: # DFlash (block diffusion speculative decoding) dflash_enabled: bool = False dflash_draft_model: Optional[str] = None # Path/repo for DFlash draft checkpoint - dflash_draft_quant_bits: Optional[int] = None # Draft model quantization (None=bf16, 4) + dflash_draft_quant_bits: Optional[int] = None # Draft model quantization (None=bf16/fp16, 4) # Model management flags is_pinned: bool = False