darkautism
diff --git a/‎Cargo.lock‎
Lines changed: 61 additions & 5 deletions b/‎Cargo.lock‎
Lines changed: 61 additions & 5 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 10 additions & 8 deletions b/‎Cargo.toml‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎README.md‎
Lines changed: 35 additions & 17 deletions b/‎README.md‎
Lines changed: 35 additions & 17 deletions
diff --git a/‎examples/basic.rs‎
Lines changed: 8 additions & 5 deletions b/‎examples/basic.rs‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎examples/stream.rs‎
Lines changed: 54 additions & 8 deletions b/‎examples/stream.rs‎
Lines changed: 54 additions & 8 deletions
diff --git a/‎src/config.rs‎
Lines changed: 18 additions & 0 deletions b/‎src/config.rs‎
Lines changed: 18 additions & 0 deletions
@@ -1,6 +1,6 @@
 [package]
 name = "sensevoice-rs"
-version = "0.1.3"
+version = "0.1.4"
 edition = "2021"
 description = "A Rust-based, SenseVoiceSmall "
 homepage = "https://github.com/darkautism/sensevoice-rs"
@@ -12,26 +12,28 @@ license = "MIT"
 
 [dependencies]
 kaldi-fbank-rust-kautism = "0.1.0"
-rknn-rs = "0.1.2"
+rknn-rs = { version = "0.1.2", optional = true }
 hound = "3.5.1"
 ndarray = { version="0.16.1", features=["rayon"]}
 ndarray-npy = "0.9.1"
 sentencepiece = "0.12.0"
 ndarray-stats = "0.6.0"
 rayon = "1.11.0"
 hf-hub = "0.4.3"
-regex = "1.11.1"
-voice_activity_detector = { version = "0.2.1", optional = true }
+regex = "1.12.2"
+voice_activity_detector = "0.2.1"
 futures = { version = "0.3.31", optional = true }
 async-stream = { version = "0.3.6", optional = true }
+ort = { version = "2.0.0-rc.10", features = ["ndarray", "load-dynamic"] }
 
+[dev-dependencies]
+tokio = { version = "1.36", features = ["full"] }
+futures = "0.3.31"
 
 [features]
 default = []
-stream = ["futures", "async-stream", "voice_activity_detector"]
+stream = ["futures", "async-stream"]
+rknpu = ["dep:rknn-rs"]
 
 [target.'cfg(target_os = "macos")'.dependencies]
 ort = { version = "2.0.0-rc.10", features = ["coreml", "ndarray", "load-dynamic"] }
-
-[target.'cfg(not(target_os = "macos"))'.dependencies]
-ort = { version = "2.0.0-rc.10", features = ["ndarray", "load-dynamic"] }
 
@@ -1,34 +1,48 @@
 # SenseVoiceSmall [![dependency status](https://deps.rs/repo/github/darkautism/sensevoice-rs/status.svg)](https://deps.rs/repo/github/darkautism/sensevoice-rs)
 
-A Rust-based, Rknn as backend ASR. Running on the low cost SBC npu, fast and chep.
+A Rust-based ASR system with dual backends: ONNX Runtime for standard PCs and RKNN for Rockchip NPUs. It runs smoothly on common desktops and laptops, while also supporting low-cost SBC NPUs for accelerated inference.
 
-## Install
+## Rockchip Installation Only
 
-You should install rknn.so first.
+You need to install `rknn.so` first:
 
 ```bash
 sudo curl -L https://github.com/airockchip/rknn-toolkit2/raw/refs/heads/master/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so -o /lib/librknnrt.so
 ```
 
-## Example
+Then, add the feature gate `rknpu` in your `Cargo.toml`.
+
+## Installation
+
+Download [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) and unzip it. Copy `libonnxruntime.so.1.xx.x` and `libonnxruntime.so` into `/lib/`, `lib64/`, `usr/lib/`, or another appropriate library directory.
+
+On Windows, place the DLL files in the same directory as your executable (or working directory).
+
+## Usage & Example
+
+This library provides two methods: it can process either an audio file or an audio stream.
+
+See the [examples](examples) directory for more details.
 
-Also see [examples](examples) dictionary
 ```Rust
 use hf_hub::api::sync::Api;
-use sensevoice_rs::SenseVoiceSmall;
-
+use sensevoice_rs::{silero_vad::VadConfig, SenseVoiceSmall};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let mut svs = SenseVoiceSmall::init("happyme531/SenseVoiceSmall-RKNN2")?;
-    
+    // init logic was changed to remove model_path argument
+    let svs = SenseVoiceSmall::init(VadConfig::default())?;
+
     let api = Api::new().unwrap();
+    // happyme531/SenseVoiceSmall-RKNN2 has output.wav.
     let repo = api.model("happyme531/SenseVoiceSmall-RKNN2".to_owned());
+    // Use try-catch or ensure file exists.
+    // For basic example, we assume we can download it.
     let wav_path = repo.get("output.wav")?;
     let allseg = svs.infer_file(wav_path)?;
     for seg in allseg {
         println!("{:?}", seg);
     }
-    
+
     Ok(svs.destroy()?)
 }
 
@@ -37,11 +51,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 ## Output Example
 
 ```Rust
-VoiceText { start_ms: 60, end_ms: 6120, language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "大家好喵今天给大家分享的是在线一线语音生成网站的合集能够更加分富" }
-VoiceText { start_ms: 6060, end_ms: 12120, language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "方面大家选择自己想要生成的角色进入网站可以看到所有的删至" }
-VoiceText { start_ms: 12060, end_ms: 18120, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "模型都在这里选择你想要擅藏的角色点击进入就来到我" }
-VoiceText { start_ms: 18060, end_ms: 24120, language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "到了生成的页面在文本框内输入你想要生成的内容然后点击生成就好了" }
-VoiceText { start_ms: 24060, end_ms: 30120, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "另外呢因为每次的生成结果都会有一些不一样的地方如果您觉得第一次的生成结果" }
-VoiceText { start_ms: 30060, end_ms: 36120, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "生成效果不好的话可以尝试重新生成也可以稍微调取一下像的住址再生成试试" }
-VoiceText { start_ms: 36060, end_ms: 39840, language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "使用时一定要遵守法律法规不可以损害刷害人的形象哦" }
+VoiceText { language: NoSpeech, emotion: Unknown, event: Unknown, punctuation_normalization: Woitn, content: "" }
+VoiceText { language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "大家好喵今天给大家分享的是在线一线语音生成网站的合集能够更加方便大家选择自己想要生成的角色进入网站" }
+VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "生成模型都在这里选择你想要深藏的角色点击进入就来到了" }
+VoiceText { language: Zh, emotion: Happy, event: Bgm, punctuation_normalization: Woitn, content: "生成的页面在文本框内输入你想要生成的内容然后点击三层你的" }
+VoiceText { language: Ja, emotion: Unknown, event: Bgm, punctuation_normalization: Woitn, content: "" }
+VoiceText { language: NoSpeech, emotion: Unknown, event: Unknown, punctuation_normalization: Woitn, content: "" }
+VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "另外呢因为每次的生成结果都会有一些不一样的地方如果您觉得第一次的生成效果不好的话可以尝试重新生成也可以稍微调节一下现面的注意" }
+VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "在深造事实" }
+VoiceText { language: Zh, emotion: Neutral, event: Bgm, punctuation_normalization: Woitn, content: "同时一定要遵守法律法规不可以损害刷人的形象哦" }
+VoiceText { language: En, emotion: Unknown, event: Bgm, punctuation_normalization: Woitn, content: "" }
+
 ```
@@ -1,17 +1,20 @@
 use hf_hub::api::sync::Api;
-use sensevoice_rs::{fsmn_vad::VADXOptions, SenseVoiceSmall};
-
+use sensevoice_rs::{silero_vad::VadConfig, SenseVoiceSmall};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let mut svs = SenseVoiceSmall::init("happyme531/SenseVoiceSmall-RKNN2", VADXOptions::default())?;
-    
+    // init logic was changed to remove model_path argument
+    let svs = SenseVoiceSmall::init(VadConfig::default())?;
+
     let api = Api::new().unwrap();
+    // happyme531/SenseVoiceSmall-RKNN2 has output.wav.
     let repo = api.model("happyme531/SenseVoiceSmall-RKNN2".to_owned());
+    // Use try-catch or ensure file exists.
+    // For basic example, we assume we can download it.
     let wav_path = repo.get("output.wav")?;
     let allseg = svs.infer_file(wav_path)?;
     for seg in allseg {
         println!("{:?}", seg);
     }
-    
+
     Ok(svs.destroy()?)
 }
@@ -1,5 +1,15 @@
+use futures::stream::StreamExt;
 use hf_hub::api::sync::Api;
-use sensevoice_rs::SenseVoiceSmall;
+use sensevoice_rs::{silero_vad::VadConfig, SenseVoiceSmall};
+use std::fs::File;
+use std::io::{Read, Seek, SeekFrom};
+use std::thread;
+use std::time::Duration; // For .next() in infer_stream if used, but here we pass Reader?
+                         // infer_stream expects Stream of Vec<i16>, not a Reader directly unless adapted.
+                         // Let's check lib.rs signature.
+                         // infer_stream<S>(input_stream: S) where S: Stream<Item = Vec<i16>>
+                         // The example code seems to be passing a Reader which is wrong unless there is an adapter.
+                         // But I should fix the compilation errors first.
 
 // 自定義 DelayedReader 模擬流式輸入，每次讀取延遲 0.5 秒
 struct DelayedReader {
@@ -23,7 +33,7 @@ impl DelayedReader {
         }
     }
 
-    fn fill_buffer(&mut self) -> Result<(), Box<dyn std::error::Error>> {
+    fn fill_buffer(&mut self) -> std::io::Result<()> {
         self.buffer.clear();
         let mut temp_buf = vec![0u8; self.chunk_size];
         let bytes_read = self.file.read(&mut temp_buf)?;
@@ -54,19 +64,55 @@ impl Read for DelayedReader {
     }
 }
 
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let mut svs = SenseVoiceSmall::init("happyme531/SenseVoiceSmall-RKNN2")?;
+// Need to adapt Reader to Stream for infer_stream
+use async_stream::stream;
+use futures::stream::Stream;
+
+fn reader_to_stream(mut reader: DelayedReader) -> impl Stream<Item = Vec<i16>> {
+    stream! {
+        loop {
+            let mut buf = vec![0u8; 1024]; // bytes
+            match reader.read(&mut buf) {
+                Ok(0) => break,
+                Ok(n) => {
+                    // Convert bytes to i16
+                    let samples: Vec<i16> = buf[..n].chunks(2).map(|c| {
+                        if c.len() == 2 {
+                            i16::from_le_bytes([c[0], c[1]])
+                        } else {
+                            0
+                        }
+                    }).collect();
+                    yield samples;
+                }
+                Err(_) => break,
+            }
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let mut svs = SenseVoiceSmall::init(VadConfig::default())?;
 
     let api = Api::new().unwrap();
     let repo = api.model("happyme531/SenseVoiceSmall-RKNN2".to_owned());
     let wav_path = repo.get("output.wav")?;
     let file_for_stream = File::open(&wav_path)?;
-    let delayed_reader = DelayedReader::new(file_for_stream, 1024, Duration::from_millis(500));
+    let delayed_reader = DelayedReader::new(file_for_stream, 1024, Duration::from_millis(50)); // Faster delay
+
+    let stream = Box::pin(reader_to_stream(delayed_reader));
+    let mut stream_out = Box::pin(svs.infer_stream(stream));
 
-    let allseg = svs.infer_stream(delayed_reader)?;
-    for seg in allseg {
-        println!("{:?}", seg);
+    while let Some(res) = stream_out.next().await {
+        match res {
+            Ok(seg) => println!("{:?}", seg),
+            Err(e) => eprintln!("Error: {}", e),
+        }
     }
 
+    // Explicitly drop stream_out to release borrow on svs
+    drop(stream_out);
+
     Ok(svs.destroy()?)
 }
@@ -0,0 +1,18 @@
+use std::path::PathBuf;
+
+/// 設定檔結構，用於手動載入模型
+/// Configuration struct for manual model loading
+#[derive(Debug, Clone)]
+pub struct SenseVoiceConfig {
+    /// 主要模型路徑 (ONNX 或 RKNN)
+    /// Path to the main model (ONNX or RKNN)
+    pub model_path: PathBuf,
+
+    /// Tokenizer 模型路徑 (sentencepiece)
+    /// Path to the tokenizer model
+    pub tokenizer_path: PathBuf,
+
+    /// ASR CMVN 檔案路徑
+    /// Path to the ASR CMVN file
+    pub cmvn_path: Option<PathBuf>,
+}