Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions .agents/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,15 @@ All model constants live in `src/inference/models.rs`. When adding a new model:
| Qwen 2.5 Coder 1.5B Instruct (GGUF Q4_K_M) | `bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF` | `Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf` | ~941 MB | All platforms (mobile default) |
| Qwen 2.5 Coder 3B Instruct (GGUF Q4_K_M) | `bartowski/Qwen2.5-Coder-3B-Instruct-GGUF` | `Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf` | ~1.93 GB | All platforms (desktop default) |
| Qwen 2.5 Coder 7B Instruct (GGUF Q4_K_M) | `bartowski/Qwen2.5-Coder-7B-Instruct-GGUF` | `Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf` | ~4.4 GB | Higher-memory devices |
| Qwen 3 1.7B (GGUF Q4_K_M) | `bartowski/Qwen3-1.7B-GGUF` | `Qwen3-1.7B-Q4_K_M.gguf` | ~1.3 GB | All platforms |
| Qwen 3 4B (GGUF Q4_K_M) | `bartowski/Qwen3-4B-GGUF` | `Qwen3-4B-Q4_K_M.gguf` | ~2.7 GB | All platforms |
| Qwen 3 8B (GGUF Q4_K_M) | `bartowski/Qwen3-8B-GGUF` | `Qwen3-8B-Q4_K_M.gguf` | ~5 GB | Higher-memory devices |
| Qwen 3 14B (GGUF Q4_K_M) | `bartowski/Qwen3-14B-GGUF` | `Qwen3-14B-Q4_K_M.gguf` | ~8.4 GB | Higher-memory devices |
| Qwen 3 0.6B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-0.6B-GGUF` | `Qwen_Qwen3-0.6B-Q4_K_M.gguf` | ~0.5 GB | All platforms |
| Qwen 3 1.7B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-1.7B-GGUF` | `Qwen_Qwen3-1.7B-Q4_K_M.gguf` | ~1.3 GB | All platforms |
| Qwen 3 4B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-4B-GGUF` | `Qwen_Qwen3-4B-Q4_K_M.gguf` | ~2.7 GB | All platforms |
| Qwen 3 8B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-8B-GGUF` | `Qwen_Qwen3-8B-Q4_K_M.gguf` | ~5 GB | Higher-memory devices |
| Qwen 3 14B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-14B-GGUF` | `Qwen_Qwen3-14B-Q4_K_M.gguf` | ~8.4 GB | Higher-memory devices |
| Qwen 3 32B (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-32B-GGUF` | `Qwen_Qwen3-32B-Q4_K_M.gguf` | ~19.8 GB | High-memory desktop (32+ GB) |
| Qwen 3 4B Instruct 2507 (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF` | `Qwen_Qwen3-4B-Instruct-2507-Q4_K_M.gguf` | ~2.5 GB | All platforms (latest non-thinking 4B) |
| Qwen 3 4B Thinking 2507 (GGUF Q4_K_M) | `bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF` | `Qwen_Qwen3-4B-Thinking-2507-Q4_K_M.gguf` | ~2.5 GB | All platforms (latest reasoning 4B) |
| Qwen 3 30B-A3B Instruct 2507 (GGUF Q4_K_M, MoE) | `bartowski/Qwen_Qwen3-30B-A3B-Instruct-2507-GGUF` | `Qwen_Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf` | ~18.6 GB | High-memory desktop (32+ GB), `qwen3moe` |
| DeepSeek Coder 6.7B Instruct (GGUF Q4_K_M) | `bartowski/deepseek-coder-6.7b-instruct-GGUF` | `deepseek-coder-6.7b-instruct-Q4_K_M.gguf` | ~3.8 GB | Higher-memory devices, custom chat template |
| Qwen 2.5 Coder 7B Instruct (ISQ) | `Qwen/Qwen2.5-Coder-7B-Instruct` | safetensors (ISQ in-situ) | ~8 GB | macOS (ISQ pipeline) |

Expand Down Expand Up @@ -374,6 +379,15 @@ await engine.unloadModel()
| `defaultModelConfig()` | `GgufModelConfig` | Platform-aware Coder default (1.5B on iOS/tvOS/Android, 3B on desktop) |
| `qwen251_5bConfig()` | `GgufModelConfig` | Forces Qwen 2.5 1.5B regardless of platform |
| `qwen253bConfig()` | `GgufModelConfig` | Forces Qwen 2.5 3B regardless of platform |
| `qwen306bConfig()` | `GgufModelConfig` | Qwen 3 0.6B (~0.5 GB) |
| `qwen317bConfig()` | `GgufModelConfig` | Qwen 3 1.7B (~1.3 GB) |
| `qwen34bConfig()` | `GgufModelConfig` | Qwen 3 4B (~2.7 GB) |
| `qwen38bConfig()` | `GgufModelConfig` | Qwen 3 8B (~5 GB) |
| `qwen314bConfig()` | `GgufModelConfig` | Qwen 3 14B (~8.4 GB) |
| `qwen332bConfig()` | `GgufModelConfig` | Qwen 3 32B (~19.8 GB) |
| `qwen34bInstruct2507Config()` | `GgufModelConfig` | Qwen 3 4B Instruct 2507 — latest non-thinking 4B (~2.5 GB) |
| `qwen34bThinking2507Config()` | `GgufModelConfig` | Qwen 3 4B Thinking 2507 — latest reasoning 4B (~2.5 GB) |
| `qwen330bA3bInstruct2507Config()` | `GgufModelConfig` | Qwen 3 30B-A3B Instruct 2507 MoE (~18.6 GB) |
| `defaultSamplingConfig()` | `SamplingConfig` | temp=0.7, top_p=0.95, max_tokens=512 |
| `deterministicSamplingConfig()` | `SamplingConfig` | temp=0.0, greedy |
| `mobileSamplingConfig()` | `SamplingConfig` | temp=0.7, max_tokens=128 |
Expand Down
56 changes: 56 additions & 0 deletions sdk/dart/rust/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,62 @@ pub fn qwen25_coder_3b_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen25_coder_3b().into()
}

// ── Qwen 3 family ────────────────────────────────────────────────────────────

/// `GgufModelConfig` for Qwen 3 0.6B Q4_K_M (~0.5 GB).
#[frb(sync)]
pub fn qwen3_0_6b_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_0_6b().into()
}

/// `GgufModelConfig` for Qwen 3 1.7B Q4_K_M (~1.3 GB).
#[frb(sync)]
pub fn qwen3_1_7b_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_1_7b().into()
}

/// `GgufModelConfig` for Qwen 3 4B Q4_K_M (~2.7 GB).
#[frb(sync)]
pub fn qwen3_4b_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_4b().into()
}

/// `GgufModelConfig` for Qwen 3 8B Q4_K_M (~5 GB).
#[frb(sync)]
pub fn qwen3_8b_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_8b().into()
}

/// `GgufModelConfig` for Qwen 3 14B Q4_K_M (~8.4 GB).
#[frb(sync)]
pub fn qwen3_14b_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_14b().into()
}

/// `GgufModelConfig` for Qwen 3 32B Q4_K_M (~19.8 GB).
#[frb(sync)]
pub fn qwen3_32b_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_32b().into()
}

/// `GgufModelConfig` for Qwen 3 4B Instruct 2507 Q4_K_M (~2.5 GB).
#[frb(sync)]
pub fn qwen3_4b_instruct_2507_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_4b_instruct_2507().into()
}

/// `GgufModelConfig` for Qwen 3 4B Thinking 2507 Q4_K_M (~2.5 GB).
#[frb(sync)]
pub fn qwen3_4b_thinking_2507_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_4b_thinking_2507().into()
}

/// `GgufModelConfig` for Qwen 3 30B-A3B Instruct 2507 (MoE) Q4_K_M (~18.6 GB).
#[frb(sync)]
pub fn qwen3_30b_a3b_instruct_2507_config() -> GgufModelConfig {
OndeGgufModelConfig::qwen3_30b_a3b_instruct_2507().into()
}

/// Default sampling config: `temperature=0.7`, `top_p=0.95`, `max_tokens=512`.
#[frb(sync)]
pub fn default_sampling_config() -> SamplingConfig {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@ object OndeModels {
fun qwen25_1_5b(): GgufModelConfig = uniffi.onde.qwen2515bConfig()
/** Qwen 2.5 3B Instruct GGUF Q4_K_M (~1.93 GB). */
fun qwen25_3b(): GgufModelConfig = uniffi.onde.qwen253bConfig()

// ── Qwen 3 family ────────────────────────────────────────────────────────
/** Qwen 3 0.6B GGUF Q4_K_M (~0.5 GB) — smallest Qwen 3 variant. */
fun qwen3_0_6b(): GgufModelConfig = uniffi.onde.qwen306bConfig()
/** Qwen 3 1.7B GGUF Q4_K_M (~1.3 GB). */
fun qwen3_1_7b(): GgufModelConfig = uniffi.onde.qwen317bConfig()
/** Qwen 3 4B GGUF Q4_K_M (~2.7 GB). */
fun qwen3_4b(): GgufModelConfig = uniffi.onde.qwen34bConfig()
/** Qwen 3 8B GGUF Q4_K_M (~5 GB). */
fun qwen3_8b(): GgufModelConfig = uniffi.onde.qwen38bConfig()
/** Qwen 3 14B GGUF Q4_K_M (~8.4 GB). */
fun qwen3_14b(): GgufModelConfig = uniffi.onde.qwen314bConfig()
/** Qwen 3 32B GGUF Q4_K_M (~19.8 GB) — largest dense Qwen 3. */
fun qwen3_32b(): GgufModelConfig = uniffi.onde.qwen332bConfig()
/** Qwen 3 4B Instruct 2507 GGUF Q4_K_M (~2.5 GB) — latest non-thinking 4B. */
fun qwen3_4b_instruct_2507(): GgufModelConfig = uniffi.onde.qwen34bInstruct2507Config()
/** Qwen 3 4B Thinking 2507 GGUF Q4_K_M (~2.5 GB) — latest reasoning 4B. */
fun qwen3_4b_thinking_2507(): GgufModelConfig = uniffi.onde.qwen34bThinking2507Config()
/** Qwen 3 30B-A3B Instruct 2507 GGUF Q4_K_M (~18.6 GB) — flagship MoE. */
fun qwen3_30b_a3b_instruct_2507(): GgufModelConfig = uniffi.onde.qwen330bA3bInstruct2507Config()
}

/**
Expand Down
56 changes: 56 additions & 0 deletions sdk/react-native/rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,62 @@ pub extern "C" fn onde_qwen25_3b_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen25_3b())
}

// ── Qwen 3 family ────────────────────────────────────────────────────────────

/// Return the Qwen 3 0.6B GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_0_6b_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_0_6b())
}

/// Return the Qwen 3 1.7B GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_1_7b_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_1_7b())
}

/// Return the Qwen 3 4B GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_4b_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_4b())
}

/// Return the Qwen 3 8B GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_8b_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_8b())
}

/// Return the Qwen 3 14B GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_14b_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_14b())
}

/// Return the Qwen 3 32B GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_32b_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_32b())
}

/// Return the Qwen 3 4B Instruct 2507 GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_4b_instruct_2507_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_4b_instruct_2507())
}

/// Return the Qwen 3 4B Thinking 2507 GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_4b_thinking_2507_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_4b_thinking_2507())
}

/// Return the Qwen 3 30B-A3B Instruct 2507 (MoE) GGUF model config as JSON.
#[no_mangle]
pub extern "C" fn onde_qwen3_30b_a3b_instruct_2507_config() -> *mut c_char {
to_json_cstring(&GgufModelConfig::qwen3_30b_a3b_instruct_2507())
}

// ── Sampling presets ─────────────────────────────────────────────────────────

/// Return the default sampling config as JSON.
Expand Down
Loading