Skip to content

Commit fcdbc96

Browse files
Add onnx model for non rknn computer (#20)
* fix: Handle Event_UNK and final cleanup - Added `Unknown` variant to `SenseVoiceEvent` enum to handle "EVENT_UNK" tag. - Removed debug `println!` statements and unused variables/imports from `src/lib.rs`. - Synced `examples/basic.rs` with user's warning fixes. - Ran `cargo fmt`. - Verified compilation and tests pass without warnings. * Bump version --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent 0845663 commit fcdbc96

File tree

10 files changed

+602
-876
lines changed

10 files changed

+602
-876
lines changed

Cargo.lock

Lines changed: 61 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "sensevoice-rs"
3-
version = "0.1.3"
3+
version = "0.1.4"
44
edition = "2021"
55
description = "A Rust-based, SenseVoiceSmall "
66
homepage = "https://github.com/darkautism/sensevoice-rs"
@@ -12,26 +12,28 @@ license = "MIT"
1212

1313
[dependencies]
1414
kaldi-fbank-rust-kautism = "0.1.0"
15-
rknn-rs = "0.1.2"
15+
rknn-rs = { version = "0.1.2", optional = true }
1616
hound = "3.5.1"
1717
ndarray = { version="0.16.1", features=["rayon"]}
1818
ndarray-npy = "0.9.1"
1919
sentencepiece = "0.12.0"
2020
ndarray-stats = "0.6.0"
2121
rayon = "1.11.0"
2222
hf-hub = "0.4.3"
23-
regex = "1.11.1"
24-
voice_activity_detector = { version = "0.2.1", optional = true }
23+
regex = "1.12.2"
24+
voice_activity_detector = "0.2.1"
2525
futures = { version = "0.3.31", optional = true }
2626
async-stream = { version = "0.3.6", optional = true }
27+
ort = { version = "2.0.0-rc.10", features = ["ndarray", "load-dynamic"] }
2728

29+
[dev-dependencies]
30+
tokio = { version = "1.36", features = ["full"] }
31+
futures = "0.3.31"
2832

2933
[features]
3034
default = []
31-
stream = ["futures", "async-stream", "voice_activity_detector"]
35+
stream = ["futures", "async-stream"]
36+
rknpu = ["dep:rknn-rs"]
3237

3338
[target.'cfg(target_os = "macos")'.dependencies]
3439
ort = { version = "2.0.0-rc.10", features = ["coreml", "ndarray", "load-dynamic"] }
35-
36-
[target.'cfg(not(target_os = "macos"))'.dependencies]
37-
ort = { version = "2.0.0-rc.10", features = ["ndarray", "load-dynamic"] }

README.md

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,48 @@
11
# SenseVoiceSmall [![dependency status](https://deps.rs/repo/github/darkautism/sensevoice-rs/status.svg)](https://deps.rs/repo/github/darkautism/sensevoice-rs)
22

3-
A Rust-based, Rknn as backend ASR. Running on the low cost SBC npu, fast and chep.
3+
A Rust-based ASR system with dual backends: ONNX Runtime for standard PCs and RKNN for Rockchip NPUs. It runs smoothly on common desktops and laptops, while also supporting low-cost SBC NPUs for accelerated inference.
44

5-
## Install
5+
## Rockchip Installation Only
66

7-
You should install rknn.so first.
7+
You need to install `rknn.so` first:
88

99
```bash
1010
sudo curl -L https://github.com/airockchip/rknn-toolkit2/raw/refs/heads/master/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so -o /lib/librknnrt.so
1111
```
1212

13-
## Example
13+
Then, add the feature gate `rknpu` in your `Cargo.toml`.
14+
15+
## Installation
16+
17+
Download [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) and unzip it. Copy `libonnxruntime.so.1.xx.x` and `libonnxruntime.so` into `/lib/`, `lib64/`, `usr/lib/`, or another appropriate library directory.
18+
19+
On Windows, place the DLL files in the same directory as your executable (or working directory).
20+
21+
## Usage & Example
22+
23+
This library provides two methods: it can process either an audio file or an audio stream.
24+
25+
See the [examples](examples) directory for more details.
1426

15-
Also see [examples](examples) dictionary
1627
```Rust
1728
use hf_hub::api::sync::Api;
18-
use sensevoice_rs::SenseVoiceSmall;
19-
29+
use sensevoice_rs::{silero_vad::VadConfig, SenseVoiceSmall};
2030

2131
fn main() -> Result<(), Box<dyn std::error::Error>> {
22-
let mut svs = SenseVoiceSmall::init("happyme531/SenseVoiceSmall-RKNN2")?;
23-
32+
// init logic was changed to remove model_path argument
33+
let svs = SenseVoiceSmall::init(VadConfig::default())?;
34+
2435
let api = Api::new().unwrap();
36+
// happyme531/SenseVoiceSmall-RKNN2 has output.wav.
2537
let repo = api.model("happyme531/SenseVoiceSmall-RKNN2".to_owned());
38+
// Use try-catch or ensure file exists.
39+
// For basic example, we assume we can download it.
2640
let wav_path = repo.get("output.wav")?;
2741
let allseg = svs.infer_file(wav_path)?;
2842
for seg in allseg {
2943
println!("{:?}", seg);
3044
}
31-
45+
3246
Ok(svs.destroy()?)
3347
}
3448

@@ -37,11 +51,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
3751
## Output Example
3852

3953
```Rust
40-
VoiceText { start_ms: 60, end_ms: 6120, language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "大家好喵今天给大家分享的是在线一线语音生成网站的合集能够更加分富" }
41-
VoiceText { start_ms: 6060, end_ms: 12120, language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "方面大家选择自己想要生成的角色进入网站可以看到所有的删至" }
42-
VoiceText { start_ms: 12060, end_ms: 18120, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "模型都在这里选择你想要擅藏的角色点击进入就来到我" }
43-
VoiceText { start_ms: 18060, end_ms: 24120, language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "到了生成的页面在文本框内输入你想要生成的内容然后点击生成就好了" }
44-
VoiceText { start_ms: 24060, end_ms: 30120, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "另外呢因为每次的生成结果都会有一些不一样的地方如果您觉得第一次的生成结果" }
45-
VoiceText { start_ms: 30060, end_ms: 36120, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "生成效果不好的话可以尝试重新生成也可以稍微调取一下像的住址再生成试试" }
46-
VoiceText { start_ms: 36060, end_ms: 39840, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "使用时一定要遵守法律法规不可以损害刷害人的形象哦" }
54+
VoiceText { language: NoSpeech, emotion: Unknown, event: Unknown, punctuation_normalization: Woitn, content: "" }
55+
VoiceText { language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "大家好喵今天给大家分享的是在线一线语音生成网站的合集能够更加方便大家选择自己想要生成的角色进入网站" }
56+
VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "生成模型都在这里选择你想要深藏的角色点击进入就来到了" }
57+
VoiceText { language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "生成的页面在文本框内输入你想要生成的内容然后点击三层你的" }
58+
VoiceText { language: Ja, emotion: Unknown, event: Bgm, punctuation_normalization: Woitn, content: "" }
59+
VoiceText { language: NoSpeech, emotion: Unknown, event: Unknown, punctuation_normalization: Woitn, content: "" }
60+
VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "另外呢因为每次的生成结果都会有一些不一样的地方如果您觉得第一次的生成效果不好的话可以尝试重新生成也可以稍微调节一下现面的注意" }
61+
VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "在深造事实" }
62+
VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "同时一定要遵守法律法规不可以损害刷人的形象哦" }
63+
VoiceText { language: En, emotion: Unknown, event: Bgm, punctuation_normalization: Woitn, content: "" }
64+
4765
```

examples/basic.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
use hf_hub::api::sync::Api;
2-
use sensevoice_rs::{fsmn_vad::VADXOptions, SenseVoiceSmall};
3-
2+
use sensevoice_rs::{silero_vad::VadConfig, SenseVoiceSmall};
43

54
fn main() -> Result<(), Box<dyn std::error::Error>> {
6-
let mut svs = SenseVoiceSmall::init("happyme531/SenseVoiceSmall-RKNN2", VADXOptions::default())?;
7-
5+
// init logic was changed to remove model_path argument
6+
let svs = SenseVoiceSmall::init(VadConfig::default())?;
7+
88
let api = Api::new().unwrap();
9+
// happyme531/SenseVoiceSmall-RKNN2 has output.wav.
910
let repo = api.model("happyme531/SenseVoiceSmall-RKNN2".to_owned());
11+
// Use try-catch or ensure file exists.
12+
// For basic example, we assume we can download it.
1013
let wav_path = repo.get("output.wav")?;
1114
let allseg = svs.infer_file(wav_path)?;
1215
for seg in allseg {
1316
println!("{:?}", seg);
1417
}
15-
18+
1619
Ok(svs.destroy()?)
1720
}

examples/stream.rs

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
1+
use futures::stream::StreamExt;
12
use hf_hub::api::sync::Api;
2-
use sensevoice_rs::SenseVoiceSmall;
3+
use sensevoice_rs::{silero_vad::VadConfig, SenseVoiceSmall};
4+
use std::fs::File;
5+
use std::io::{Read, Seek, SeekFrom};
6+
use std::thread;
7+
use std::time::Duration; // For .next() in infer_stream if used, but here we pass Reader?
8+
// infer_stream expects Stream of Vec<i16>, not a Reader directly unless adapted.
9+
// Let's check lib.rs signature.
10+
// infer_stream<S>(input_stream: S) where S: Stream<Item = Vec<i16>>
11+
// The example code seems to be passing a Reader which is wrong unless there is an adapter.
12+
// But I should fix the compilation errors first.
313

414
// 自定義 DelayedReader 模擬流式輸入,每次讀取延遲 0.5 秒
515
struct DelayedReader {
@@ -23,7 +33,7 @@ impl DelayedReader {
2333
}
2434
}
2535

26-
fn fill_buffer(&mut self) -> Result<(), Box<dyn std::error::Error>> {
36+
fn fill_buffer(&mut self) -> std::io::Result<()> {
2737
self.buffer.clear();
2838
let mut temp_buf = vec![0u8; self.chunk_size];
2939
let bytes_read = self.file.read(&mut temp_buf)?;
@@ -54,19 +64,55 @@ impl Read for DelayedReader {
5464
}
5565
}
5666

57-
fn main() -> Result<(), Box<dyn std::error::Error>> {
58-
let mut svs = SenseVoiceSmall::init("happyme531/SenseVoiceSmall-RKNN2")?;
67+
// Need to adapt Reader to Stream for infer_stream
68+
use async_stream::stream;
69+
use futures::stream::Stream;
70+
71+
fn reader_to_stream(mut reader: DelayedReader) -> impl Stream<Item = Vec<i16>> {
72+
stream! {
73+
loop {
74+
let mut buf = vec![0u8; 1024]; // bytes
75+
match reader.read(&mut buf) {
76+
Ok(0) => break,
77+
Ok(n) => {
78+
// Convert bytes to i16
79+
let samples: Vec<i16> = buf[..n].chunks(2).map(|c| {
80+
if c.len() == 2 {
81+
i16::from_le_bytes([c[0], c[1]])
82+
} else {
83+
0
84+
}
85+
}).collect();
86+
yield samples;
87+
}
88+
Err(_) => break,
89+
}
90+
}
91+
}
92+
}
93+
94+
#[tokio::main]
95+
async fn main() -> Result<(), Box<dyn std::error::Error>> {
96+
let mut svs = SenseVoiceSmall::init(VadConfig::default())?;
5997

6098
let api = Api::new().unwrap();
6199
let repo = api.model("happyme531/SenseVoiceSmall-RKNN2".to_owned());
62100
let wav_path = repo.get("output.wav")?;
63101
let file_for_stream = File::open(&wav_path)?;
64-
let delayed_reader = DelayedReader::new(file_for_stream, 1024, Duration::from_millis(500));
102+
let delayed_reader = DelayedReader::new(file_for_stream, 1024, Duration::from_millis(50)); // Faster delay
103+
104+
let stream = Box::pin(reader_to_stream(delayed_reader));
105+
let mut stream_out = Box::pin(svs.infer_stream(stream));
65106

66-
let allseg = svs.infer_stream(delayed_reader)?;
67-
for seg in allseg {
68-
println!("{:?}", seg);
107+
while let Some(res) = stream_out.next().await {
108+
match res {
109+
Ok(seg) => println!("{:?}", seg),
110+
Err(e) => eprintln!("Error: {}", e),
111+
}
69112
}
70113

114+
// Explicitly drop stream_out to release borrow on svs
115+
drop(stream_out);
116+
71117
Ok(svs.destroy()?)
72118
}

src/config.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
use std::path::PathBuf;
2+
3+
/// 設定檔結構,用於手動載入模型
4+
/// Configuration struct for manual model loading
5+
#[derive(Debug, Clone)]
6+
pub struct SenseVoiceConfig {
7+
/// 主要模型路徑 (ONNX 或 RKNN)
8+
/// Path to the main model (ONNX or RKNN)
9+
pub model_path: PathBuf,
10+
11+
/// Tokenizer 模型路徑 (sentencepiece)
12+
/// Path to the tokenizer model
13+
pub tokenizer_path: PathBuf,
14+
15+
/// ASR CMVN 檔案路徑
16+
/// Path to the ASR CMVN file
17+
pub cmvn_path: Option<PathBuf>,
18+
}

0 commit comments

Comments
 (0)