Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/rust-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ jobs:
- ""
- "vision"
- "vlm"
- "ort-download-binaries"
- "ort-load-dynamic"
- "ort-download-binaries,ort-api-24"
- "ort-load-dynamic,ort-api-24"
- "video"
- "viewer"
- "annotator"
Expand All @@ -49,7 +49,7 @@ jobs:
- name: Clippy
run: |
if [ "${{ matrix.feature }}" = "all-features" ]; then
cargo clippy --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets -- -D warnings
cargo clippy --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24" --all-targets -- -D warnings
elif [ "${{ matrix.feature }}" = "" ]; then
cargo clippy --no-default-features --all-targets -- -D warnings
else
Expand All @@ -74,7 +74,7 @@ jobs:
uses: dtolnay/rust-toolchain@stable

- name: Check
run: cargo check --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets
run: cargo check --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24" --all-targets

test:
name: cargo-test
Expand All @@ -94,7 +94,7 @@ jobs:
uses: dtolnay/rust-toolchain@nightly

- name: Test
run: cargo +nightly test --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets
run: cargo +nightly test --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24" --all-targets

build-linux:
needs: test
Expand All @@ -120,4 +120,4 @@ jobs:
uses: dtolnay/rust-toolchain@stable

- name: Build
run: cargo build --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic"
run: cargo build --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24"
14 changes: 12 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ fast_image_resize = { version = "5.5.0", default-features = false, features = ["
minifb = { version = "0.28.0", optional = true }
video-rs = { version = "0.10.5", features = ["ndarray"], optional = true }
ndarray-npy = { version = "0.10", optional = true }
ort = { version = "=2.0.0-rc.11", default-features = false, features = [
ort = { version = "=2.0.0-rc.12", default-features = false, features = [
"tls-rustls",
"copy-dylibs",
"half",
Expand Down Expand Up @@ -75,12 +75,22 @@ strip = true


[features]
default = ["ort-download-binaries", "vision", "annotator"]
default = ["ort-download-binaries", "vision", "annotator", "ort-api-24"]

# ONNXRuntime loading strategies
ort-download-binaries = ["ort/download-binaries"]
ort-load-dynamic = ["ort/load-dynamic"]

# ONNXRuntime API version selection
ort-api-17 = ["ort/api-17"]
ort-api-18 = ["ort/api-18"]
ort-api-19 = ["ort/api-19"]
ort-api-20 = ["ort/api-20"]
ort-api-21 = ["ort/api-21"]
ort-api-22 = ["ort/api-22"]
ort-api-23 = ["ort/api-23"]
ort-api-24 = ["ort/api-24"]

# Cuda features (Internal use)
cuda-runtime = ["dep:cudarc"]
cuda-runtime-11040 = ["cuda-runtime", "cudarc/cuda-11040"]
Expand Down
95 changes: 95 additions & 0 deletions docs/cargo-features/ep.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Execution Providers

Hardware acceleration for inference. Enable the one matching your hardware.

## Execution Providers

| Feature | Platform | Description |
|---------|----------|-------------|
| `cuda` | NVIDIA GPU | CUDA execution provider |
| `tensorrt` | NVIDIA GPU | TensorRT execution provider |
| `nvrtx` | NVIDIA GPU | NVRTX execution provider |
| `coreml` | Apple Silicon | macOS/iOS inference |
| `openvino` | Intel | CPU/GPU/VPU acceleration |
| `directml` | Windows | DirectML acceleration |
| `rocm` | AMD GPU | ROCm acceleration |
| `onednn` | Intel | Deep Neural Network Library |
| `cann` | Huawei | Ascend NPU |
| `rknpu` | Rockchip | NPU acceleration |
| `armnn` | ARM | Neural Network SDK |
| `xnnpack` | Mobile | CPU optimization |
| `webgpu` | Web | WebGPU/Chrome |
| `nnapi` | Android | Neural Networks API |
| `qnn` | Qualcomm | SNPE acceleration |
| `tvm` | - | Apache TVM |
| `azure` | Azure | ML execution provider |
| `migraphx` | AMD | MIGraphX |
| `vitis` | Xilinx | Vitis AI |

---

## CUDA Image Processor

!!! info "Prerequisites"
Requires [cudarc](https://github.com/coreylowman/cudarc) for CUDA kernels.

Enable GPU-accelerated image preprocessing:

| Pattern | Description | Example |
|---------|-------------|---------|
| `<ep>-full` | Auto-detect CUDA version via `nvcc` | `cuda-full`, `tensorrt-full` |
| `<ep>-cuda-<ver>` | Specific CUDA version | `cuda-12040`, `tensorrt-cuda-12040` |

- **`<ep>`**: `cuda`, `tensorrt`, or `nvrtx`
- **`<ver>`**: Specific CUDA version

### Supported CUDA Versions

| Version | Features |
|---------|----------|
| 11.x | `cuda-11040`, `cuda-11050`, `cuda-11060`, `cuda-11070`, `cuda-11080` |
| 12.x | `cuda-12000`, `cuda-12010`, `cuda-12020`, `cuda-12030`, `cuda-12040`, `cuda-12050`, `cuda-12060`, `cuda-12080`, `cuda-12090` |
| 13.x | `cuda-13000`, `cuda-13010` |

!!! note "TensorRT/NVRTX Versions"
Replace `cuda-` with `tensorrt-cuda-` or `nvrtx-cuda-` for TensorRT/NVRTX versions.
Example: `tensorrt-cuda-12040`, `nvrtx-cuda-12080`

### Feature & Device Combinations

| Scenario | Feature | Model Device | Processor | Speed |
|----------|---------|--------------|-----------|-------|
| CPU Only | `vision` (default) | `cpu` | `cpu` | Baseline |
| CUDA | `cuda` | `cuda` | `cpu` | Slow preprocess |
| CUDA (fast) | `cuda-full` | `cuda` | `cuda` | Fast preprocess |
| TensorRT | `tensorrt` | `tensorrt` | `cpu` | Slow preprocess |
| TensorRT (fast) | `tensorrt-full` | `tensorrt` | `cuda` | Fast preprocess |

!!! tip "TensorRT EP + CUDA EP + CUDA Image Processor"
```toml
features = ["tensorrt-full", "cuda"]
# Or
features = ["tensorrt", "cuda-full"]
```

!!! warning "Device Consistency"
Different EPs can use different devices (e.g., `tensorrt:0` + `cuda:1`).

However, when using **NVIDIA EP + CUDA image processor**, they **MUST** use the **same GPU ID**:
```toml
# ✅ Correct: same GPU
--device cuda:0 --processor-device cuda:0

# ❌ Wrong: different GPUs
--device cuda:0 --processor-device cuda:1
```


!!! danger "Don't mix CUDA versions"
```toml
# ❌ Wrong
features = ["cuda-12040", "cuda-11080"]

# ✅ Correct
features = ["tensorrt-full"]
```
111 changes: 23 additions & 88 deletions docs/cargo-features/ort.md
Original file line number Diff line number Diff line change
@@ -1,102 +1,37 @@
# Execution Providers
# ONNX Runtime
ONNX Runtime configuration and API version management.

Hardware acceleration for inference. Enable the one matching your hardware.

## ONNX Runtime
## Configuration

| Feature | Description | Default |
|---------|-------------|:-------:|
| `ort-download-binaries` | Auto-download ONNX Runtime binaries from [pyke](https://ort.pyke.io) | ✓ |
| `ort-load-dynamic` | Manual linking for custom builds. See [Linking Guide](https://ort.pyke.io/setup/linking) | x |

## Execution Providers

| Feature | Platform | Description |
|---------|----------|-------------|
| `cuda` | NVIDIA GPU | CUDA execution provider |
| `tensorrt` | NVIDIA GPU | TensorRT execution provider |
| `nvrtx` | NVIDIA GPU | NVRTX execution provider |
| `coreml` | Apple Silicon | macOS/iOS inference |
| `openvino` | Intel | CPU/GPU/VPU acceleration |
| `directml` | Windows | DirectML acceleration |
| `rocm` | AMD GPU | ROCm acceleration |
| `onednn` | Intel | Deep Neural Network Library |
| `cann` | Huawei | Ascend NPU |
| `rknpu` | Rockchip | NPU acceleration |
| `armnn` | ARM | Neural Network SDK |
| `xnnpack` | Mobile | CPU optimization |
| `webgpu` | Web | WebGPU/Chrome |
| `nnapi` | Android | Neural Networks API |
| `qnn` | Qualcomm | SNPE acceleration |
| `tvm` | - | Apache TVM |
| `azure` | Azure | ML execution provider |
| `migraphx` | AMD | MIGraphX |
| `vitis` | Xilinx | Vitis AI |

---

## CUDA Image Processor

!!! info "Prerequisites"
Requires [cudarc](https://github.com/coreylowman/cudarc) for CUDA kernels.

Enable GPU-accelerated image preprocessing:

| Pattern | Description | Example |
|---------|-------------|---------|
| `<ep>-full` | Auto-detect CUDA version via `nvcc` | `cuda-full`, `tensorrt-full` |
| `<ep>-cuda-<ver>` | Specific CUDA version | `cuda-12040`, `tensorrt-cuda-12040` |

- **`<ep>`**: `cuda`, `tensorrt`, or `nvrtx`
- **`<ver>`**: Specific CUDA version

### Supported CUDA Versions

| Version | Features |
|---------|----------|
| 11.x | `cuda-11040`, `cuda-11050`, `cuda-11060`, `cuda-11070`, `cuda-11080` |
| 12.x | `cuda-12000`, `cuda-12010`, `cuda-12020`, `cuda-12030`, `cuda-12040`, `cuda-12050`, `cuda-12060`, `cuda-12080`, `cuda-12090` |
| 13.x | `cuda-13000`, `cuda-13010` |
### API Version Selection

!!! note "TensorRT/NVRTX Versions"
Replace `cuda-` with `tensorrt-cuda-` or `nvrtx-cuda-` for TensorRT/NVRTX versions.
Example: `tensorrt-cuda-12040`, `nvrtx-cuda-12080`
This library supports ONNX Runtime versions 1.17 through 1.24 via API version features.

### Feature & Device Combinations
| Feature | ONNX Runtime | Requirements |
|---------|--------------|--------------|
| `ort-api-17` | v1.17 | Baseline |
| `ort-api-18` | v1.18 | - |
| `ort-api-19` | v1.19 | - |
| `ort-api-20` | v1.20 | Adapter API available |
| `ort-api-21` | v1.21 | - |
| `ort-api-22` | v1.22 | - |
| `ort-api-23` | v1.23 | - |
| `ort-api-24` | v1.24 | **Default** - Latest features |

| Scenario | Feature | Model Device | Processor | Speed |
|----------|---------|--------------|-----------|-------|
| CPU Only | `vision` (default) | `cpu` | `cpu` | Baseline |
| CUDA | `cuda` | `cuda` | `cpu` | Slow preprocess |
| CUDA (fast) | `cuda-full` | `cuda` | `cuda` | Fast preprocess |
| TensorRT | `tensorrt` | `tensorrt` | `cpu` | Slow preprocess |
| TensorRT (fast) | `tensorrt-full` | `tensorrt` | `cuda` | Fast preprocess |

!!! tip "TensorRT EP + CUDA EP + CUDA Image Processor"
```toml
features = ["tensorrt-full", "cuda"]
# Or
features = ["tensorrt", "cuda-full"]
```

!!! warning "Device Consistency"
Different EPs can use different devices (e.g., `tensorrt:0` + `cuda:1`).

However, when using **NVIDIA EP + CUDA image processor**, they **MUST** use the **same GPU ID**:
!!! tip "API Version Selection"
```toml
# ✅ Correct: same GPU
--device cuda:0 --processor-device cuda:0
# Default uses api-24 (latest)
usls = { version = "0.2", features = ["vision"] }

# ❌ Wrong: different GPUs
--device cuda:0 --processor-device cuda:1
# Specify API version explicitly
usls = { version = "0.2", features = ["vision", "ort-api-20"] }
```


!!! danger "Don't mix CUDA versions"
```toml
# ❌ Wrong
features = ["cuda-12040", "cuda-11080"]

# ✅ Correct
features = ["tensorrt-full"]
```
!!! note "Version Compatibility"
- Each API version includes all features from previous versions
- Check [ORT multiversion docs](https://ort.pyke.io/setup/multiversion) for minimum version requirements
3 changes: 2 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ nav:
- Integration: getting-started/integration.md
- Cargo Features:
- Overview: cargo-features/overview.md
- ONNX Runtime & EP: cargo-features/ort.md
- ONNX Runtime Version: cargo-features/ort.md
- Execution Provider: cargo-features/ep.md
- Image Formats: cargo-features/image-formats.md
- Model Categories: cargo-features/models.md
- Utilities: cargo-features/utils.md
Expand Down
2 changes: 1 addition & 1 deletion src/models/vision/pipeline/basemodel.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use anyhow::Result;
use ort::tensor::TensorElementType;
use ort::value::TensorElementType;

use crate::{
Config, Device, Engine, Engines, FromConfig, Image, ImageProcessor, Model, Module, Scale, Task,
Expand Down
6 changes: 3 additions & 3 deletions src/models/vlm/sam3_image/impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub struct Sam3Image {

impl Sam3Image {
fn extract_f32(val: &DynValue) -> Result<ArrayD<f32>> {
use ort::tensor::TensorElementType as TE;
use ort::value::TensorElementType as TE;
use ort::value::ValueType;
match val.dtype() {
ValueType::Tensor { ty, .. } => match ty {
Expand All @@ -54,7 +54,7 @@ impl Sam3Image {
}

use ort::memory::AllocationDevice;
use ort::tensor::TensorElementType as TE;
use ort::value::TensorElementType as TE;
use ort::value::ValueType;

let owned = text_feat
Expand Down Expand Up @@ -198,7 +198,7 @@ impl Sam3Image {
let mut res = Vec::with_capacity(texts.len());
for chunk in texts.chunks(self.text_batch) {
use ort::memory::AllocationDevice;
use ort::tensor::TensorElementType as TE;
use ort::value::TensorElementType as TE;
use ort::value::ValueType;

let encs = self.text_processor.encode_texts(chunk, true)?;
Expand Down
2 changes: 1 addition & 1 deletion src/ort/dtype.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use ort::tensor::TensorElementType;
use ort::value::TensorElementType;

impl From<TensorElementType> for crate::DType {
fn from(dtype: TensorElementType) -> Self {
Expand Down
Loading
Loading