ondeinference · setoelkahfi · Jun 22, 2026
diff --git a/.agents/AGENTS.md b/.agents/AGENTS.md
@@ -147,10 +147,15 @@ All model constants live in `src/inference/models.rs`. When adding a new model:
 | Qwen 2.5 Coder 1.5B Instruct (GGUF Q4_K_M) | `bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF` | `Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf` | ~941 MB | All platforms (mobile default) |
 | Qwen 2.5 Coder 3B Instruct (GGUF Q4_K_M) | `bartowski/Qwen2.5-Coder-3B-Instruct-GGUF` | `Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf` | ~1.93 GB | All platforms (desktop default) |
 | Qwen 2.5 Coder 7B Instruct (GGUF Q4_K_M) | `bartowski/Qwen2.5-Coder-7B-Instruct-GGUF` | `Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf` | ~4.4 GB | Higher-memory devices |
-| Qwen 3 1.7B (GGUF Q4_K_M) | `bartowski/Qwen3-1.7B-GGUF` | `Qwen3-1.7B-Q4_K_M.gguf` | ~1.3 GB | All platforms |
-| Qwen 3 4B (GGUF Q4_K_M) | `bartowski/Qwen3-4B-GGUF` | `Qwen3-4B-Q4_K_M.gguf` | ~2.7 GB | All platforms |
-| Qwen 3 8B (GGUF Q4_K_M) | `bartowski/Qwen3-8B-GGUF` | `Qwen3-8B-Q4_K_M.gguf` | ~5 GB | Higher-memory devices |
-| Qwen 3 14B (GGUF Q4_K_M) | `bartowski/Qwen3-14B-GGUF` | `Qwen3-14B-Q4_K_M.gguf` | ~8.4 GB | Higher-memory devices |
+| Qwen 3 0.6B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-0.6B-GGUF` | `Qwen_Qwen3-0.6B-Q4_K_M.gguf` | ~0.5 GB | All platforms |
+| Qwen 3 1.7B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-1.7B-GGUF` | `Qwen_Qwen3-1.7B-Q4_K_M.gguf` | ~1.3 GB | All platforms |
+| Qwen 3 4B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-4B-GGUF` | `Qwen_Qwen3-4B-Q4_K_M.gguf` | ~2.7 GB | All platforms |
+| Qwen 3 8B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-8B-GGUF` | `Qwen_Qwen3-8B-Q4_K_M.gguf` | ~5 GB | Higher-memory devices |
+| Qwen 3 14B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-14B-GGUF` | `Qwen_Qwen3-14B-Q4_K_M.gguf` | ~8.4 GB | Higher-memory devices |
+| Qwen 3 32B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-32B-GGUF` | `Qwen_Qwen3-32B-Q4_K_M.gguf` | ~19.8 GB | High-memory desktop (32+ GB) |
+| Qwen 3 4B Instruct 2507 (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF` | `Qwen_Qwen3-4B-Instruct-2507-Q4_K_M.gguf` | ~2.5 GB | All platforms (latest non-thinking 4B) |
+| Qwen 3 4B Thinking 2507 (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF` | `Qwen_Qwen3-4B-Thinking-2507-Q4_K_M.gguf` | ~2.5 GB | All platforms (latest reasoning 4B) |
+| Qwen 3 30B-A3B Instruct 2507 (GGUF Q4_K_M, MoE) | `bartowski/Qwen_Qwen3-30B-A3B-Instruct-2507-GGUF` | `Qwen_Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf` | ~18.6 GB | High-memory desktop (32+ GB), `qwen3moe` |
 | DeepSeek Coder 6.7B Instruct (GGUF Q4_K_M) | `bartowski/deepseek-coder-6.7b-instruct-GGUF` | `deepseek-coder-6.7b-instruct-Q4_K_M.gguf` | ~3.8 GB | Higher-memory devices, custom chat template |
 | Qwen 2.5 Coder 7B Instruct (ISQ) | `Qwen/Qwen2.5-Coder-7B-Instruct` | safetensors (ISQ in-situ) | ~8 GB | macOS (ISQ pipeline) |
 
@@ -374,6 +379,15 @@ await engine.unloadModel()
 | `defaultModelConfig()` | `GgufModelConfig` | Platform-aware Coder default (1.5B on iOS/tvOS/Android, 3B on desktop) |
 | `qwen251_5bConfig()` | `GgufModelConfig` | Forces Qwen 2.5 1.5B regardless of platform |
 | `qwen253bConfig()` | `GgufModelConfig` | Forces Qwen 2.5 3B regardless of platform |
+| `qwen306bConfig()` | `GgufModelConfig` | Qwen 3 0.6B (~0.5 GB) |
+| `qwen317bConfig()` | `GgufModelConfig` | Qwen 3 1.7B (~1.3 GB) |
+| `qwen34bConfig()` | `GgufModelConfig` | Qwen 3 4B (~2.7 GB) |
+| `qwen38bConfig()` | `GgufModelConfig` | Qwen 3 8B (~5 GB) |
+| `qwen314bConfig()` | `GgufModelConfig` | Qwen 3 14B (~8.4 GB) |
+| `qwen332bConfig()` | `GgufModelConfig` | Qwen 3 32B (~19.8 GB) |
+| `qwen34bInstruct2507Config()` | `GgufModelConfig` | Qwen 3 4B Instruct 2507 — latest non-thinking 4B (~2.5 GB) |
+| `qwen34bThinking2507Config()` | `GgufModelConfig` | Qwen 3 4B Thinking 2507 — latest reasoning 4B (~2.5 GB) |
+| `qwen330bA3bInstruct2507Config()` | `GgufModelConfig` | Qwen 3 30B-A3B Instruct 2507 MoE (~18.6 GB) |
 | `defaultSamplingConfig()` | `SamplingConfig` | temp=0.7, top_p=0.95, max_tokens=512 |
 | `deterministicSamplingConfig()` | `SamplingConfig` | temp=0.0, greedy |
 | `mobileSamplingConfig()` | `SamplingConfig` | temp=0.7, max_tokens=128 |

diff --git a/sdk/dart/rust/src/api.rs b/sdk/dart/rust/src/api.rs
@@ -542,6 +542,62 @@ pub fn qwen25_coder_3b_config() -> GgufModelConfig {
     OndeGgufModelConfig::qwen25_coder_3b().into()
 }
 
+// ── Qwen 3 family ────────────────────────────────────────────────────────────
+
+/// `GgufModelConfig` for Qwen 3 0.6B Q4_K_M (~0.5 GB).
+#[frb(sync)]
+pub fn qwen3_0_6b_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_0_6b().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 1.7B Q4_K_M (~1.3 GB).
+#[frb(sync)]
+pub fn qwen3_1_7b_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_1_7b().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 4B Q4_K_M (~2.7 GB).
+#[frb(sync)]
+pub fn qwen3_4b_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_4b().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 8B Q4_K_M (~5 GB).
+#[frb(sync)]
+pub fn qwen3_8b_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_8b().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 14B Q4_K_M (~8.4 GB).
+#[frb(sync)]
+pub fn qwen3_14b_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_14b().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 32B Q4_K_M (~19.8 GB).
+#[frb(sync)]
+pub fn qwen3_32b_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_32b().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 4B Instruct 2507 Q4_K_M (~2.5 GB).
+#[frb(sync)]
+pub fn qwen3_4b_instruct_2507_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_4b_instruct_2507().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 4B Thinking 2507 Q4_K_M (~2.5 GB).
+#[frb(sync)]
+pub fn qwen3_4b_thinking_2507_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_4b_thinking_2507().into()
+}
+
+/// `GgufModelConfig` for Qwen 3 30B-A3B Instruct 2507 (MoE) Q4_K_M (~18.6 GB).
+#[frb(sync)]
+pub fn qwen3_30b_a3b_instruct_2507_config() -> GgufModelConfig {
+    OndeGgufModelConfig::qwen3_30b_a3b_instruct_2507().into()
+}
+
 /// Default sampling config: `temperature=0.7`, `top_p=0.95`, `max_tokens=512`.
 #[frb(sync)]
 pub fn default_sampling_config() -> SamplingConfig {

diff --git a/sdk/kotlin/lib/src/shared/kotlin/com/ondeinference/onde/Convenience.kt b/sdk/kotlin/lib/src/shared/kotlin/com/ondeinference/onde/Convenience.kt
@@ -25,6 +25,26 @@ object OndeModels {
     fun qwen25_1_5b(): GgufModelConfig = uniffi.onde.qwen2515bConfig()
     /** Qwen 2.5 3B Instruct GGUF Q4_K_M (~1.93 GB). */
     fun qwen25_3b(): GgufModelConfig   = uniffi.onde.qwen253bConfig()
+
+    // ── Qwen 3 family ────────────────────────────────────────────────────────
+    /** Qwen 3 0.6B GGUF Q4_K_M (~0.5 GB) — smallest Qwen 3 variant. */
+    fun qwen3_0_6b(): GgufModelConfig = uniffi.onde.qwen306bConfig()
+    /** Qwen 3 1.7B GGUF Q4_K_M (~1.3 GB). */
+    fun qwen3_1_7b(): GgufModelConfig = uniffi.onde.qwen317bConfig()
+    /** Qwen 3 4B GGUF Q4_K_M (~2.7 GB). */
+    fun qwen3_4b(): GgufModelConfig   = uniffi.onde.qwen34bConfig()
+    /** Qwen 3 8B GGUF Q4_K_M (~5 GB). */
+    fun qwen3_8b(): GgufModelConfig   = uniffi.onde.qwen38bConfig()
+    /** Qwen 3 14B GGUF Q4_K_M (~8.4 GB). */
+    fun qwen3_14b(): GgufModelConfig  = uniffi.onde.qwen314bConfig()
+    /** Qwen 3 32B GGUF Q4_K_M (~19.8 GB) — largest dense Qwen 3. */
+    fun qwen3_32b(): GgufModelConfig  = uniffi.onde.qwen332bConfig()
+    /** Qwen 3 4B Instruct 2507 GGUF Q4_K_M (~2.5 GB) — latest non-thinking 4B. */
+    fun qwen3_4b_instruct_2507(): GgufModelConfig = uniffi.onde.qwen34bInstruct2507Config()
+    /** Qwen 3 4B Thinking 2507 GGUF Q4_K_M (~2.5 GB) — latest reasoning 4B. */
+    fun qwen3_4b_thinking_2507(): GgufModelConfig = uniffi.onde.qwen34bThinking2507Config()
+    /** Qwen 3 30B-A3B Instruct 2507 GGUF Q4_K_M (~18.6 GB) — flagship MoE. */
+    fun qwen3_30b_a3b_instruct_2507(): GgufModelConfig = uniffi.onde.qwen330bA3bInstruct2507Config()
 }
 
 /**

diff --git a/sdk/react-native/rust/src/lib.rs b/sdk/react-native/rust/src/lib.rs
@@ -470,6 +470,62 @@ pub extern "C" fn onde_qwen25_3b_config() -> *mut c_char {
     to_json_cstring(&GgufModelConfig::qwen25_3b())
 }
 
+// ── Qwen 3 family ────────────────────────────────────────────────────────────
+
+/// Return the Qwen 3 0.6B GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_0_6b_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_0_6b())
+}
+
+/// Return the Qwen 3 1.7B GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_1_7b_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_1_7b())
+}
+
+/// Return the Qwen 3 4B GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_4b_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_4b())
+}
+
+/// Return the Qwen 3 8B GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_8b_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_8b())
+}
+
+/// Return the Qwen 3 14B GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_14b_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_14b())
+}
+
+/// Return the Qwen 3 32B GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_32b_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_32b())
+}
+
+/// Return the Qwen 3 4B Instruct 2507 GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_4b_instruct_2507_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_4b_instruct_2507())
+}
+
+/// Return the Qwen 3 4B Thinking 2507 GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_4b_thinking_2507_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_4b_thinking_2507())
+}
+
+/// Return the Qwen 3 30B-A3B Instruct 2507 (MoE) GGUF model config as JSON.
+#[no_mangle]
+pub extern "C" fn onde_qwen3_30b_a3b_instruct_2507_config() -> *mut c_char {
+    to_json_cstring(&GgufModelConfig::qwen3_30b_a3b_instruct_2507())
+}
+
 // ── Sampling presets ─────────────────────────────────────────────────────────
 
 /// Return the default sampling config as JSON.