jamjamjon · jamjamjon · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
@@ -22,8 +22,8 @@ jobs:
           - ""
           - "vision"
           - "vlm"
-          - "ort-download-binaries"
-          - "ort-load-dynamic"
+          - "ort-download-binaries,ort-api-24"
+          - "ort-load-dynamic,ort-api-24"
           - "video"
           - "viewer"
           - "annotator"
@@ -49,7 +49,7 @@ jobs:
       - name: Clippy
         run: |
           if [ "${{ matrix.feature }}" = "all-features" ]; then
-            cargo clippy --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets -- -D warnings
+            cargo clippy --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24" --all-targets -- -D warnings
           elif [ "${{ matrix.feature }}" = "" ]; then
             cargo clippy --no-default-features --all-targets -- -D warnings
           else
@@ -74,7 +74,7 @@ jobs:
         uses: dtolnay/rust-toolchain@stable
 
       - name: Check
-        run: cargo check --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets
+        run: cargo check --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24" --all-targets
 
   test:
     name: cargo-test
@@ -94,7 +94,7 @@ jobs:
         uses: dtolnay/rust-toolchain@nightly
 
       - name: Test
-        run: cargo +nightly test --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets
+        run: cargo +nightly test --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24" --all-targets
 
   build-linux:
     needs: test
@@ -120,4 +120,4 @@ jobs:
         uses: dtolnay/rust-toolchain@stable
 
       - name: Build
-        run: cargo build --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic"
+        run: cargo build --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic,ort-api-24"
diff --git a/Cargo.toml b/Cargo.toml
@@ -40,7 +40,7 @@ fast_image_resize = { version = "5.5.0", default-features = false, features = ["
 minifb = { version = "0.28.0", optional = true }
 video-rs = { version = "0.10.5", features = ["ndarray"], optional = true }
 ndarray-npy = { version = "0.10", optional = true  }
-ort = { version = "=2.0.0-rc.11", default-features = false, features = [
+ort = { version = "=2.0.0-rc.12", default-features = false, features = [
   "tls-rustls",
   "copy-dylibs",
   "half",
@@ -75,12 +75,22 @@ strip = true
 
 
 [features]
-default = ["ort-download-binaries", "vision", "annotator"]
+default = ["ort-download-binaries", "vision", "annotator", "ort-api-24"]
 
 # ONNXRuntime loading strategies
 ort-download-binaries = ["ort/download-binaries"]
 ort-load-dynamic = ["ort/load-dynamic"] 
 
+# ONNXRuntime API version selection
+ort-api-17 = ["ort/api-17"]
+ort-api-18 = ["ort/api-18"]
+ort-api-19 = ["ort/api-19"]
+ort-api-20 = ["ort/api-20"]
+ort-api-21 = ["ort/api-21"]
+ort-api-22 = ["ort/api-22"]
+ort-api-23 = ["ort/api-23"]
+ort-api-24 = ["ort/api-24"]
+
 # Cuda features (Internal use)
 cuda-runtime = ["dep:cudarc"]
 cuda-runtime-11040 = ["cuda-runtime", "cudarc/cuda-11040"]

diff --git a/docs/cargo-features/ep.md b/docs/cargo-features/ep.md
@@ -0,0 +1,95 @@
+# Execution Providers
+
+Hardware acceleration for inference. Enable the one matching your hardware.
+
+## Execution Providers
+
+| Feature | Platform | Description |
+|---------|----------|-------------|
+| `cuda` | NVIDIA GPU | CUDA execution provider |
+| `tensorrt` | NVIDIA GPU | TensorRT execution provider |
+| `nvrtx` | NVIDIA GPU | NVRTX execution provider |
+| `coreml` | Apple Silicon | macOS/iOS inference |
+| `openvino` | Intel | CPU/GPU/VPU acceleration |
+| `directml` | Windows | DirectML acceleration |
+| `rocm` | AMD GPU | ROCm acceleration |
+| `onednn` | Intel | Deep Neural Network Library |
+| `cann` | Huawei | Ascend NPU |
+| `rknpu` | Rockchip | NPU acceleration |
+| `armnn` | ARM | Neural Network SDK |
+| `xnnpack` | Mobile | CPU optimization |
+| `webgpu` | Web | WebGPU/Chrome |
+| `nnapi` | Android | Neural Networks API |
+| `qnn` | Qualcomm | SNPE acceleration |
+| `tvm` | - | Apache TVM |
+| `azure` | Azure | ML execution provider |
+| `migraphx` | AMD | MIGraphX |
+| `vitis` | Xilinx | Vitis AI |
+
+---
+
+## CUDA Image Processor
+
+!!! info "Prerequisites"
+    Requires [cudarc](https://github.com/coreylowman/cudarc) for CUDA kernels.
+
+Enable GPU-accelerated image preprocessing:
+
+| Pattern | Description | Example |
+|---------|-------------|---------|
+| `<ep>-full` | Auto-detect CUDA version via `nvcc` | `cuda-full`, `tensorrt-full` |
+| `<ep>-cuda-<ver>` | Specific CUDA version | `cuda-12040`, `tensorrt-cuda-12040` |
+
+- **`<ep>`**: `cuda`, `tensorrt`, or `nvrtx`
+- **`<ver>`**: Specific CUDA version
+
+### Supported CUDA Versions
+
+| Version | Features |
+|---------|----------|
+| 11.x | `cuda-11040`, `cuda-11050`, `cuda-11060`, `cuda-11070`, `cuda-11080` |
+| 12.x | `cuda-12000`, `cuda-12010`, `cuda-12020`, `cuda-12030`, `cuda-12040`, `cuda-12050`, `cuda-12060`, `cuda-12080`, `cuda-12090` |
+| 13.x | `cuda-13000`, `cuda-13010` |
+
+!!! note "TensorRT/NVRTX Versions"
+    Replace `cuda-` with `tensorrt-cuda-` or `nvrtx-cuda-` for TensorRT/NVRTX versions.
+    Example: `tensorrt-cuda-12040`, `nvrtx-cuda-12080`
+
+### Feature & Device Combinations
+
+| Scenario | Feature | Model Device | Processor | Speed |
+|----------|---------|--------------|-----------|-------|
+| CPU Only | `vision` (default) | `cpu` | `cpu` | Baseline |
+| CUDA | `cuda` | `cuda` | `cpu` | Slow preprocess |
+| CUDA (fast) | `cuda-full` | `cuda` | `cuda` | Fast preprocess |
+| TensorRT | `tensorrt` | `tensorrt` | `cpu` | Slow preprocess |
+| TensorRT (fast) | `tensorrt-full` | `tensorrt` | `cuda` | Fast preprocess |
+
+!!! tip "TensorRT EP + CUDA EP + CUDA Image Processor"
+    ```toml
+    features = ["tensorrt-full", "cuda"]
+    # Or
+    features = ["tensorrt", "cuda-full"]
+    ```
+
+!!! warning "Device Consistency"
+    Different EPs can use different devices (e.g., `tensorrt:0` + `cuda:1`).
+
+    However, when using **NVIDIA EP + CUDA image processor**, they **MUST** use the **same GPU ID**:
+    ```toml
+    # ✅ Correct: same GPU
+    --device cuda:0 --processor-device cuda:0
+
+    # ❌ Wrong: different GPUs
+    --device cuda:0 --processor-device cuda:1
+    ```
+
+
+!!! danger "Don't mix CUDA versions"
+    ```toml
+    # ❌ Wrong
+    features = ["cuda-12040", "cuda-11080"]
+
+    # ✅ Correct
+    features = ["tensorrt-full"]
+    ```
diff --git a/docs/cargo-features/ort.md b/docs/cargo-features/ort.md
@@ -1,102 +1,37 @@
-# Execution Providers
+# ONNX Runtime
+ONNX Runtime configuration and API version management.
 
-Hardware acceleration for inference. Enable the one matching your hardware.
-
-## ONNX Runtime
+## Configuration
 
 | Feature | Description | Default |
 |---------|-------------|:-------:|
 | `ort-download-binaries` | Auto-download ONNX Runtime binaries from [pyke](https://ort.pyke.io) | ✓ |
 | `ort-load-dynamic` | Manual linking for custom builds. See [Linking Guide](https://ort.pyke.io/setup/linking) | x |
 
-## Execution Providers
-
-| Feature | Platform | Description |
-|---------|----------|-------------|
-| `cuda` | NVIDIA GPU | CUDA execution provider |
-| `tensorrt` | NVIDIA GPU | TensorRT execution provider |
-| `nvrtx` | NVIDIA GPU | NVRTX execution provider |
-| `coreml` | Apple Silicon | macOS/iOS inference |
-| `openvino` | Intel | CPU/GPU/VPU acceleration |
-| `directml` | Windows | DirectML acceleration |
-| `rocm` | AMD GPU | ROCm acceleration |
-| `onednn` | Intel | Deep Neural Network Library |
-| `cann` | Huawei | Ascend NPU |
-| `rknpu` | Rockchip | NPU acceleration |
-| `armnn` | ARM | Neural Network SDK |
-| `xnnpack` | Mobile | CPU optimization |
-| `webgpu` | Web | WebGPU/Chrome |
-| `nnapi` | Android | Neural Networks API |
-| `qnn` | Qualcomm | SNPE acceleration |
-| `tvm` | - | Apache TVM |
-| `azure` | Azure | ML execution provider |
-| `migraphx` | AMD | MIGraphX |
-| `vitis` | Xilinx | Vitis AI |
-
----
-
-## CUDA Image Processor
-
-!!! info "Prerequisites"
-    Requires [cudarc](https://github.com/coreylowman/cudarc) for CUDA kernels.
-
-Enable GPU-accelerated image preprocessing:
-
-| Pattern | Description | Example |
-|---------|-------------|---------|
-| `<ep>-full` | Auto-detect CUDA version via `nvcc` | `cuda-full`, `tensorrt-full` |
-| `<ep>-cuda-<ver>` | Specific CUDA version | `cuda-12040`, `tensorrt-cuda-12040` |
-
-- **`<ep>`**: `cuda`, `tensorrt`, or `nvrtx`
-- **`<ver>`**: Specific CUDA version
-
-### Supported CUDA Versions
-
-| Version | Features |
-|---------|----------|
-| 11.x | `cuda-11040`, `cuda-11050`, `cuda-11060`, `cuda-11070`, `cuda-11080` |
-| 12.x | `cuda-12000`, `cuda-12010`, `cuda-12020`, `cuda-12030`, `cuda-12040`, `cuda-12050`, `cuda-12060`, `cuda-12080`, `cuda-12090` |
-| 13.x | `cuda-13000`, `cuda-13010` |
+### API Version Selection
 
-!!! note "TensorRT/NVRTX Versions"
-    Replace `cuda-` with `tensorrt-cuda-` or `nvrtx-cuda-` for TensorRT/NVRTX versions.
-    Example: `tensorrt-cuda-12040`, `nvrtx-cuda-12080`
+This library supports ONNX Runtime versions 1.17 through 1.24 via API version features.
 
-### Feature & Device Combinations
+| Feature | ONNX Runtime | Requirements |
+|---------|--------------|--------------|
+| `ort-api-17` | v1.17 | Baseline |
+| `ort-api-18` | v1.18 | - |
+| `ort-api-19` | v1.19 | - |
+| `ort-api-20` | v1.20 | Adapter API available |
+| `ort-api-21` | v1.21 | - |
+| `ort-api-22` | v1.22 | - |
+| `ort-api-23` | v1.23 | - |
+| `ort-api-24` | v1.24 | **Default** - Latest features |
 
-| Scenario | Feature | Model Device | Processor | Speed |
-|----------|---------|--------------|-----------|-------|
-| CPU Only | `vision` (default) | `cpu` | `cpu` | Baseline |
-| CUDA | `cuda` | `cuda` | `cpu` | Slow preprocess |
-| CUDA (fast) | `cuda-full` | `cuda` | `cuda` | Fast preprocess |
-| TensorRT | `tensorrt` | `tensorrt` | `cpu` | Slow preprocess |
-| TensorRT (fast) | `tensorrt-full` | `tensorrt` | `cuda` | Fast preprocess |
-
-!!! tip "TensorRT EP + CUDA EP + CUDA Image Processor"
-    ```toml
-    features = ["tensorrt-full", "cuda"]
-    # Or
-    features = ["tensorrt", "cuda-full"]
-    ```
-
-!!! warning "Device Consistency"
-    Different EPs can use different devices (e.g., `tensorrt:0` + `cuda:1`).
-
-    However, when using **NVIDIA EP + CUDA image processor**, they **MUST** use the **same GPU ID**:
+!!! tip "API Version Selection"
     ```toml
-    # ✅ Correct: same GPU
-    --device cuda:0 --processor-device cuda:0
+    # Default uses api-24 (latest)
+    usls = { version = "0.2", features = ["vision"] }
 
-    # ❌ Wrong: different GPUs
-    --device cuda:0 --processor-device cuda:1
+    # Specify API version explicitly
+    usls = { version = "0.2", features = ["vision", "ort-api-20"] }
     ```
 
-
-!!! danger "Don't mix CUDA versions"
-    ```toml
-    # ❌ Wrong
-    features = ["cuda-12040", "cuda-11080"]
-
-    # ✅ Correct
-    features = ["tensorrt-full"]
-    ```
+!!! note "Version Compatibility"
+    - Each API version includes all features from previous versions
+    - Check [ORT multiversion docs](https://ort.pyke.io/setup/multiversion) for minimum version requirements
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -69,7 +69,8 @@ nav:
     - Integration: getting-started/integration.md
   - Cargo Features: 
     - Overview: cargo-features/overview.md
-    - ONNX Runtime & EP: cargo-features/ort.md
+    - ONNX Runtime Version: cargo-features/ort.md
+    - Execution Provider: cargo-features/ep.md
     - Image Formats: cargo-features/image-formats.md
     - Model Categories: cargo-features/models.md
     - Utilities: cargo-features/utils.md

diff --git a/src/models/vision/pipeline/basemodel.rs b/src/models/vision/pipeline/basemodel.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use ort::tensor::TensorElementType;
+use ort::value::TensorElementType;
 
 use crate::{
     Config, Device, Engine, Engines, FromConfig, Image, ImageProcessor, Model, Module, Scale, Task,

diff --git a/src/models/vlm/sam3_image/impl.rs b/src/models/vlm/sam3_image/impl.rs
@@ -29,7 +29,7 @@ pub struct Sam3Image {
 
 impl Sam3Image {
     fn extract_f32(val: &DynValue) -> Result<ArrayD<f32>> {
-        use ort::tensor::TensorElementType as TE;
+        use ort::value::TensorElementType as TE;
         use ort::value::ValueType;
         match val.dtype() {
             ValueType::Tensor { ty, .. } => match ty {
@@ -54,7 +54,7 @@ impl Sam3Image {
         }
 
         use ort::memory::AllocationDevice;
-        use ort::tensor::TensorElementType as TE;
+        use ort::value::TensorElementType as TE;
         use ort::value::ValueType;
 
         let owned = text_feat
@@ -198,7 +198,7 @@ impl Sam3Image {
         let mut res = Vec::with_capacity(texts.len());
         for chunk in texts.chunks(self.text_batch) {
             use ort::memory::AllocationDevice;
-            use ort::tensor::TensorElementType as TE;
+            use ort::value::TensorElementType as TE;
             use ort::value::ValueType;
 
             let encs = self.text_processor.encode_texts(chunk, true)?;

diff --git a/src/ort/dtype.rs b/src/ort/dtype.rs
@@ -1,4 +1,4 @@
-use ort::tensor::TensorElementType;
+use ort::value::TensorElementType;
 
 impl From<TensorElementType> for crate::DType {
     fn from(dtype: TensorElementType) -> Self {