Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/rust-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
- name: Clippy
run: |
if [ "${{ matrix.feature }}" = "all-features" ]; then
cargo clippy --no-default-features --features "all-models,video,viewer,annotator,hf-hub,ort-download-binaries,ort-load-dynamic" --all-targets -- -D warnings
cargo clippy --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets -- -D warnings
elif [ "${{ matrix.feature }}" = "" ]; then
cargo clippy --no-default-features --all-targets -- -D warnings
else
Expand All @@ -74,7 +74,7 @@ jobs:
uses: dtolnay/rust-toolchain@stable

- name: Check
run: cargo check --no-default-features --features "all-models,video,viewer,annotator,hf-hub,ort-download-binaries,ort-load-dynamic" --all-targets
run: cargo check --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets

test:
name: cargo-test
Expand All @@ -94,7 +94,7 @@ jobs:
uses: dtolnay/rust-toolchain@nightly

- name: Test
run: cargo +nightly test --no-default-features --features "all-models,video,viewer,annotator,hf-hub,ort-download-binaries,ort-load-dynamic" --all-targets
run: cargo +nightly test --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic" --all-targets

build-linux:
needs: test
Expand All @@ -120,4 +120,4 @@ jobs:
uses: dtolnay/rust-toolchain@stable

- name: Build
run: cargo build --no-default-features --features "all-models,video,viewer,annotator,hf-hub,ort-download-binaries,ort-load-dynamic"
run: cargo build --no-default-features --features "all-models,video,viewer,annotator,ort-download-binaries,ort-load-dynamic"
4 changes: 0 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ cudarc = { version = "0.19", optional = true, default-features = false, features
"dynamic-linking"
] }
ureq = { version = "3.1.4", default-features = false, features = ["rustls", "gzip"] }
hf-hub = { version = "0.4.3", default-features = false, features = ["ureq", "rustls-tls"], optional = true }
tokenizers = { version = "0.22.1", optional = true }
lru = { version = "0.16.2", default-features = false }

Expand Down Expand Up @@ -200,9 +199,6 @@ image-qoi = ["image/qoi"]
image-tga = ["image/tga"]
image-all-formats = ["image/default-formats"]

# Hugging Face hub support (for downloading models from Hugging Face)
hf-hub = ["dep:hf-hub"]

# Model Zoo
vision = []
vlm = ["vision", "dep:tokenizers", "dep:ndarray-npy"]
Expand Down
2 changes: 1 addition & 1 deletion docs/cargo-features/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

---

visualization, video I/O, model hub, and annotation utilities.
visualization, video I/O, and annotation utilities.

[:octicons-arrow-right-24: Utilities →](./utils.md)

Expand Down
1 change: 0 additions & 1 deletion docs/cargo-features/utils.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
| ***`annotator`*** | Annotation | Draw bounding boxes, keypoints, masks on images | `ab_glyph`, `imageproc` | ✓ |
| **`viewer`** | Visualization | Real-time image/video display (like OpenCV `imshow`) | `minifb` | x |
| **`video`** | I/O | Video read/write streaming support | `video-rs` | x |
| **`hf-hub`** | Model Hub | Download models from Hugging Face | `hf-hub` | x |

!!! tip "Usage Example"
```toml
Expand Down
86 changes: 86 additions & 0 deletions docs/guides/hub.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Hub

`Hub` downloads and caches files from **GitHub Releases** and **Hugging Face** repositories.

## Supported Formats

| Source | Format | Example |
|--------|--------|---------|
| Local file | File path | `"./model.onnx"` |
| GitHub Release | `<tag>/<file>` | `"yolo/v5-n-det.onnx"` |
| GitHub Release URL | Full URL | `"https://github.com/<owner>/<repo>/releases/download/<tag>/<file>"` |
| HF (inline) | `<owner>/<repo>/<file>` | `"BAAI/bge-m3/tokenizer.json"` |
| HF (dedicated) | `<file>` via `from_hf` | `"onnx/model.onnx"` |
| HF URL | Full URL (`resolve`/`blob`) | `"https://huggingface.co/<owner>/<repo>/blob/main/<file>"` |

!!! tip "HF Endpoint"
By default, Hugging Face downloads use `https://huggingface.co`.

Set the `HF_ENDPOINT` environment variable to use a mirror:
```bash
export HF_ENDPOINT=https://hf-mirror.com
```

## GitHub Release

!!! example "Default Repository"
Download files from the default GitHub repository (`jamjamjon/assets`):

```rust
let path = Hub::default().try_fetch("images/bus.jpg")?;
```

!!! example "Custom Repository"
```rust
let mut hub = Hub::new("owner", "repo");
let path = hub.try_fetch("<tag>/<file>")?;
```

!!! example "Direct GitHub URL"
```rust
let path = Hub::default().try_fetch(
"https://github.com/<owner>/<repo>/releases/download/<tag>/<file>"
)?;
```

## Hugging Face

!!! example "Inline Path (Recommended)"
Use `<owner>/<repo>/<file>` format directly — no extra setup needed:

```rust
let path = Hub::default().try_fetch("<owner>/<repo>/<folder>/<file>")?;
```

!!! example "Dedicated Hub"
Bind a Hub to a specific HF repository:

```rust
let mut hub = Hub::from_hf("<owner>", "<repo>")?;
let path = hub.try_fetch("<file>")?;
let path = hub.try_fetch("<folder>/<file>")?;
```

!!! example "Direct HF URL"
Supports both `/resolve/` and `/blob/` URLs:

```rust
let path = Hub::default().try_fetch(
"https://huggingface.co/<owner>/<repo>/blob/main/<file>"
)?;
```

## Repository Info

!!! example "Inspect Repository"
```rust
Hub::default().info()?; // GitHub releases
Hub::from_hf("<owner>", "<repo>")?.info()?; // HF file tree with sizes
```

## Caching

!!! info "Cache Behavior"
- Files are cached locally after the first download (`~/.cache/usls/` or similar).
- GitHub release metadata: TTL-based (default 10 min, configurable via `with_ttl`).
- Failed or incomplete downloads are discarded (atomic write via temp files).
2 changes: 2 additions & 0 deletions docs/model-zoo/embedding.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,7 @@ hide:
| [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1) | Vision-Language Embedding | [demo](https://github.com/jamjamjon/usls/tree/main/examples/embedding) | ✅ | ❓ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [jina-clip-v2](https://huggingface.co/jinaai/jina-clip-v2) | Vision-Language Embedding | [demo](https://github.com/jamjamjon/usls/tree/main/examples/embedding) | ✅ | ❓ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [mobileclip](https://github.com/apple/ml-mobileclip) | Vision-Language Embedding | [demo](https://github.com/jamjamjon/usls/tree/main/examples/embedding) | ✅ | ❓ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [SigLIP](https://huggingface.co/collections/google/siglip) | Vision-Language Embedding | [demo](https://github.com/jamjamjon/usls/tree/main/examples/embedding) | ✅ | ❓ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [SigLIPv2](https://huggingface.co/collections/google/siglip2) | Vision-Language Embedding | [demo](https://github.com/jamjamjon/usls/tree/main/examples/embedding) | ✅ | ❓ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision Embedding | [demo](https://github.com/jamjamjon/usls/tree/main/examples/embedding) | ✅ | ❓ | ✅ | ❌ | ❌ | ❌ | ❌ |
| [DINOv3](https://github.com/facebookresearch/dinov3) | Vision Embedding | [demo](https://github.com/jamjamjon/usls/tree/main/examples/embedding) | ✅ | ❓ | ✅ | ✅ | ✅ | ✅ | ✅ |
60 changes: 45 additions & 15 deletions examples/embedding/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,51 @@ This directory contains examples for embedding models that extract feature repre
Vision-language model for image and text embeddings.

**Variants:**
- `clip-b16` - CLIP ViT-B/16
- `clip-b32` - CLIP ViT-B/32
- `clip-l14` - CLIP ViT-L/14
- `jina-clip-v1` - Jina CLIP v1
- `jina-clip-v2` - Jina CLIP v2
- `mobileclip-s0` - MobileCLIP S0
- `mobileclip-s1` - MobileCLIP S1
- `mobileclip-s2` - MobileCLIP S2
- `mobileclip-b` - MobileCLIP B
- `mobileclip-blt` - MobileCLIP BLT
- `mobileclip2-s0` - MobileCLIP2 S0 (default)
- `mobileclip2-s2` - MobileCLIP2 S2
- `mobileclip2-s4` - MobileCLIP2 S4
- `mobileclip2-b` - MobileCLIP2 B
- `mobileclip2-l14` - MobileCLIP2 L14

**OpenAI CLIP:**
- `clip-b16` - ViT-B/16 (85M params)
- `clip-b32` - ViT-B/32 (87M params)
- `clip-l14` - ViT-L/14 (304M params)

**Jina CLIP:**
- `jina-clip-v1` - Improved performance, 224x224
- `jina-clip-v2` - 512x512 resolution, better accuracy

**MobileCLIP (Apple):**
- `mobileclip-s0` - Small variant S0
- `mobileclip-s1` - Small variant S1
- `mobileclip-s2` - Small variant S2
- `mobileclip-b` - Base variant
- `mobileclip-blt` - Base with large text encoder

**MobileCLIP v2:**
- `mobileclip2-s0` - Enhanced small S0 (default)
- `mobileclip2-s2` - Enhanced small S2
- `mobileclip2-s4` - Enhanced small S4
- `mobileclip2-b` - Enhanced base
- `mobileclip2-l14` - Enhanced large

**SigLIP (Google DeepMind):**
- `siglip-b16-224` - Base, patch16, 224x224
- `siglip-b16-256` - Base, patch16, 256x256
- `siglip-b16-384` - Base, patch16, 384x384
- `siglip-b16-512` - Base, patch16, 512x512
- `siglip-l16-256` - Large, patch16, 256x256
- `siglip-l16-384` - Large, patch16, 384x384

**SigLIP v2 (Google DeepMind):**
- `siglip2-b16-224` - Base v2, patch16, 224x224
- `siglip2-b16-256` - Base v2, patch16, 256x256
- `siglip2-b16-384` - Base v2, patch16, 384x384
- `siglip2-b16-512` - Base v2, patch16, 512x512
- `siglip2-l16-256` - Large v2, patch16, 256x256
- `siglip2-l16-384` - Large v2, patch16, 384x384
- `siglip2-l16-512` - Large v2, patch16, 512x512
- `siglip2-so400m-patch14-224` - 400M, patch14, 224x224
- `siglip2-so400m-patch14-384` - 400M, patch14, 384x384
- `siglip2-so400m-patch16-256` - 400M, patch16, 256x256
- `siglip2-so400m-patch16-384` - 400M, patch16, 384x384
- `siglip2-so400m-patch16-512` - 400M, patch16, 512x512

**Usage:**
```bash
Expand Down
20 changes: 19 additions & 1 deletion examples/embedding/clip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use usls::{Config, DType, Device};

#[derive(Args, Debug)]
pub struct ClipArgs {
/// Variant: clip-b16, clip-b32, clip-l14, jina-clip-v1, jina-clip-v2, mobileclip-s0, mobileclip-s1, mobileclip-s2, mobileclip-b, mobileclip-blt, mobileclip2-s0, mobileclip2-s2, mobileclip2-s4, mobileclip2-b, mobileclip2-l14
/// Variant: clip-b16, clip-b32, clip-l14, jina-clip-v1, jina-clip-v2, mobileclip-s0, mobileclip-s1, mobileclip-s2, mobileclip-b, mobileclip-blt, mobileclip2-s0, mobileclip2-s2, mobileclip2-s4, mobileclip2-b, mobileclip2-l14, siglip-b16-224, siglip-b16-256, siglip-b16-384, siglip-b16-512, siglip-l16-256, siglip-l16-384, siglip2-b16-224, siglip2-b16-256, siglip2-b16-384, siglip2-b16-512, siglip2-l16-256, siglip2-l16-384, siglip2-l16-512, siglip2-so400m-patch14-224, siglip2-so400m-patch14-384, siglip2-so400m-patch16-256, siglip2-so400m-patch16-384, siglip2-so400m-patch16-512
#[arg(long, default_value = "mobileclip2-s0")]
pub variant: String,

Expand Down Expand Up @@ -62,6 +62,24 @@ pub fn config(args: &ClipArgs) -> Result<Config> {
"mobileclip2-s4" => Config::mobileclip2_s4(),
"mobileclip2-b" => Config::mobileclip2_b(),
"mobileclip2-l14" => Config::mobileclip2_l14(),
"siglip-b16-224" => Config::siglip_b16_224(),
"siglip-b16-256" => Config::siglip_b16_256(),
"siglip-b16-384" => Config::siglip_b16_384(),
"siglip-b16-512" => Config::siglip_b16_512(),
"siglip-l16-256" => Config::siglip_l16_256(),
"siglip-l16-384" => Config::siglip_l16_384(),
"siglip2-b16-224" => Config::siglip2_b16_224(),
"siglip2-b16-256" => Config::siglip2_b16_256(),
"siglip2-b16-384" => Config::siglip2_b16_384(),
"siglip2-b16-512" => Config::siglip2_b16_512(),
"siglip2-l16-256" => Config::siglip2_l16_256(),
"siglip2-l16-384" => Config::siglip2_l16_384(),
"siglip2-l16-512" => Config::siglip2_l16_512(),
"siglip2-so400m-patch14-224" => Config::siglip2_so400m_patch14_224(),
"siglip2-so400m-patch14-384" => Config::siglip2_so400m_patch14_384(),
"siglip2-so400m-patch16-256" => Config::siglip2_so400m_patch16_256(),
"siglip2-so400m-patch16-384" => Config::siglip2_so400m_patch16_384(),
"siglip2-so400m-patch16-512" => Config::siglip2_so400m_patch16_512(),
_ => anyhow::bail!("Unsupported CLIP variant: {}", args.variant),
}
.with_visual_dtype(args.visual_dtype)
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ nav:
- Guides:
- Overview: guides/overview.md
- Config System: guides/config.md
- Hub (GitHub / Hugging Face): guides/hub.md
- Data Loading: guides/dataloader.md
- Execution Providers: guides/ep.md
- DType & Quantization: guides/dtype.md
Expand Down
22 changes: 0 additions & 22 deletions src/config/impl_inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,18 +102,6 @@ impl crate::Config {
self
}

/// Set max tokens.
pub fn with_max_tokens(mut self, max_tokens: u64) -> Self {
self.inference.max_tokens = Some(max_tokens);
self
}

/// Set ignore eos flag.
pub fn with_ignore_eos(mut self, ignore_eos: bool) -> Self {
self.inference.ignore_eos = ignore_eos;
self
}

/// Get class confidences (accessor for inference params).
pub fn class_confs(&self) -> &[f32] {
&self.inference.class_confs
Expand All @@ -135,16 +123,6 @@ impl crate::Config {
self
}

// pub fn with_temperature(mut self, temperature: f32) -> Self {
// self.inference.temperature = temperature;
// self
// }

// pub fn with_topp(mut self, topp: f32) -> Self {
// self.inference.topp = topp;
// self
// }

/// Get text confidences.
pub fn text_confs(&self) -> &[f32] {
&self.inference.text_confs
Expand Down
32 changes: 21 additions & 11 deletions src/config/impl_text_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ impl crate::Config {
self
}

// /// Set maximum number of tokens to generate.
// pub fn with_max_tokens(mut self, n: u64) -> Self {
// self.text_processor.max_tokens = Some(n);
// self
// }

// /// Set whether to ignore the end-of-sequence token.
// pub fn with_ignore_eos(mut self, ignore_eos: bool) -> Self {
// self.text_processor.ignore_eos = ignore_eos;
// self
// }
/// Set maximum number of tokens to generate.
pub fn with_max_tokens(mut self, n: u64) -> Self {
self.text_processor.max_tokens = Some(n);
self
}

/// Set whether to ignore the end-of-sequence token.
pub fn with_ignore_eos(mut self, ignore_eos: bool) -> Self {
self.text_processor.ignore_eos = ignore_eos;
self
}

/// Set special tokens map file.
pub fn with_special_tokens_map_file(mut self, file: impl Into<String>) -> Self {
Expand All @@ -40,4 +40,14 @@ impl crate::Config {
self.text_processor.config_file = Some(file.into());
self
}

pub fn with_temperature(mut self, temperature: f32) -> Self {
self.text_processor.temperature = temperature;
self
}

pub fn with_topp(mut self, topp: f32) -> Self {
self.text_processor.topp = topp;
self
}
}
12 changes: 0 additions & 12 deletions src/config/inference_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,6 @@ pub struct InferenceParams {
pub db_unclip_ratio: Option<f32>,
pub db_binary_thresh: Option<f32>,
pub token_level_class: bool,
/// Maximum number of tokens to generate.
pub max_tokens: Option<u64>,
/// Whether to ignore the end-of-sequence token.
pub ignore_eos: bool,
// /// Temperature parameter for text generation.
// pub temperature: f32,
// /// Top-p parameter for nucleus sampling.
// pub topp: f32,

// Task-specific parameters
#[cfg(feature = "vision")]
Expand Down Expand Up @@ -83,10 +75,6 @@ impl Default for InferenceParams {
find_contours: Default::default(),
up_scale: 2.0,
text_names: Default::default(),
max_tokens: Default::default(),
ignore_eos: Default::default(),
// temperature: 1.0,
// topp: 0.9,
token_level_class: Default::default(),
#[cfg(feature = "vision")]
yolo_preds_format: Default::default(),
Expand Down
Loading