From 3a2f63cc3706ec11b4e54d9e047030a2d71aca9d Mon Sep 17 00:00:00 2001
From: chenchaonan <2301835860@qq.com>
Date: Mon, 30 Mar 2026 14:49:18 +0800
Subject: [PATCH] Resolve the conflict

---
 README.md                                          |  2 +-
 README_ZH.md                                       |  2 +-
 __init__.py                                        |  2 +-
 fish_speech_src/.gitignore                         |  4 ++--
 fish_speech_src/docs/ar/finetune.md                |  4 ++--
 fish_speech_src/docs/ar/inference.md               |  6 +++---
 fish_speech_src/docs/en/finetune.md                |  4 ++--
 fish_speech_src/docs/en/inference.md               |  6 +++---
 fish_speech_src/docs/ja/finetune.md                |  4 ++--
 fish_speech_src/docs/ja/inference.md               |  6 +++---
 fish_speech_src/docs/ko/finetune.md                |  4 ++--
 fish_speech_src/docs/ko/inference.md               |  6 +++---
 fish_speech_src/docs/pt/finetune.md                |  4 ++--
 fish_speech_src/docs/pt/inference.md               |  6 +++---
 fish_speech_src/docs/zh/finetune.md                |  4 ++--
 fish_speech_src/docs/zh/inference.md               |  6 +++---
 .../fish_speech/configs/lora/r_8_alpha_16.yaml     |  4 ----
 .../callbacks/__init__.py                          |  0
 .../callbacks/grad_norm.py                         |  0
 .../configs/base.yaml                              |  2 +-
 .../fish_speech_s2/configs/lora/r_8_alpha_16.yaml  |  4 ++++
 .../configs/modded_dac_vq.yaml                     | 10 +++++-----
 .../configs/text2semantic_finetune.yaml            | 14 +++++++-------
 .../content_sequence.py                            |  2 +-
 .../conversation.py                                |  4 ++--
 .../datasets/concat_repeat.py                      |  0
 .../datasets/protos/text-data.proto                |  0
 .../datasets/protos/text_data_pb2.py               |  0
 .../datasets/protos/text_data_stream.py            |  0
 .../datasets/semantic.py                           | 14 +++++++-------
 .../datasets/vqgan.py                              |  2 +-
 .../{fish_speech => fish_speech_s2}/i18n/README.md |  4 ++--
 .../i18n/__init__.py                               |  0
 .../{fish_speech => fish_speech_s2}/i18n/core.py   |  0
 .../i18n/locale/en_US.json                         |  0
 .../i18n/locale/es_ES.json                         |  0
 .../i18n/locale/ja_JP.json                         |  0
 .../i18n/locale/ko_KR.json                         |  0
 .../i18n/locale/pt_BR.json                         |  0
 .../i18n/locale/zh_CN.json                         |  0
 .../{fish_speech => fish_speech_s2}/i18n/scan.py   |  0
 .../inference_engine/__init__.py                   | 14 +++++++-------
 .../inference_engine/reference_loader.py           |  6 +++---
 .../inference_engine/utils.py                      |  0
 .../inference_engine/vq_manager.py                 |  2 +-
 .../models/dac/__init__.py                         |  0
 .../models/dac/inference.py                        |  2 +-
 .../models/dac/modded_dac.py                       |  2 +-
 .../models/dac/rvq.py                              |  0
 .../models/text2semantic/__init__.py               |  0
 .../models/text2semantic/inference.py              |  8 ++++----
 .../models/text2semantic/lit_module.py             |  4 ++--
 .../models/text2semantic/llama.py                  |  4 ++--
 .../models/text2semantic/lora.py                   |  0
 .../{fish_speech => fish_speech_s2}/scheduler.py   |  0
 .../text/__init__.py                               |  0
 .../{fish_speech => fish_speech_s2}/text/clean.py  |  0
 .../{fish_speech => fish_speech_s2}/tokenizer.py   |  0
 .../{fish_speech => fish_speech_s2}/train.py       |  2 +-
 .../utils/__init__.py                              |  0
 .../utils/braceexpand.py                           |  0
 .../utils/context.py                               |  0
 .../{fish_speech => fish_speech_s2}/utils/file.py  |  0
 .../utils/instantiators.py                         |  0
 .../utils/logger.py                                |  0
 .../utils/logging_utils.py                         |  2 +-
 .../utils/rich_utils.py                            |  2 +-
 .../utils/schema.py                                |  2 +-
 .../utils/spectrogram.py                           |  0
 .../{fish_speech => fish_speech_s2}/utils/utils.py |  0
 fish_speech_src/tools/api_client.py                |  4 ++--
 fish_speech_src/tools/llama/build_dataset.py       |  6 +++---
 fish_speech_src/tools/llama/eval_in_context.py     |  4 ++--
 fish_speech_src/tools/llama/merge_lora.py          |  6 +++---
 fish_speech_src/tools/llama/quantize.py            |  4 ++--
 fish_speech_src/tools/run_webui.py                 |  8 ++++----
 fish_speech_src/tools/server/api_utils.py          |  4 ++--
 fish_speech_src/tools/server/inference.py          |  4 ++--
 fish_speech_src/tools/server/model_manager.py      |  8 ++++----
 fish_speech_src/tools/server/views.py              |  2 +-
 fish_speech_src/tools/vqgan/create_train_split.py  |  2 +-
 fish_speech_src/tools/vqgan/extract_vq.py          |  4 ++--
 fish_speech_src/tools/webui/__init__.py            |  2 +-
 fish_speech_src/tools/webui/inference.py           |  4 ++--
 fish_speech_src/tools/webui/variables.py           |  2 +-
 nodes/loader.py                                    | 14 +++++++-------
 nodes/model_cache.py                               |  4 ++--
 nodes/multi_speaker_node.py                        |  4 ++--
 nodes/multi_speaker_split_node.py                  |  4 ++--
 nodes/tts_node.py                                  |  2 +-
 nodes/voice_clone_node.py                          |  2 +-
 91 files changed, 137 insertions(+), 137 deletions(-)
 delete mode 100644 fish_speech_src/fish_speech/configs/lora/r_8_alpha_16.yaml
 rename fish_speech_src/{fish_speech => fish_speech_s2}/callbacks/__init__.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/callbacks/grad_norm.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/configs/base.yaml (97%)
 create mode 100644 fish_speech_src/fish_speech_s2/configs/lora/r_8_alpha_16.yaml
 rename fish_speech_src/{fish_speech => fish_speech_s2}/configs/modded_dac_vq.yaml (76%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/configs/text2semantic_finetune.yaml (74%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/content_sequence.py (99%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/conversation.py (97%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/datasets/concat_repeat.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/datasets/protos/text-data.proto (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/datasets/protos/text_data_pb2.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/datasets/protos/text_data_stream.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/datasets/semantic.py (97%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/datasets/vqgan.py (98%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/README.md (95%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/__init__.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/core.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/locale/en_US.json (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/locale/es_ES.json (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/locale/ja_JP.json (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/locale/ko_KR.json (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/locale/pt_BR.json (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/locale/zh_CN.json (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/i18n/scan.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/inference_engine/__init__.py (93%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/inference_engine/reference_loader.py (98%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/inference_engine/utils.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/inference_engine/vq_manager.py (97%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/dac/__init__.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/dac/inference.py (98%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/dac/modded_dac.py (99%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/dac/rvq.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/text2semantic/__init__.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/text2semantic/inference.py (99%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/text2semantic/lit_module.py (98%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/text2semantic/llama.py (99%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/models/text2semantic/lora.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/scheduler.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/text/__init__.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/text/clean.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/tokenizer.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/train.py (99%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/__init__.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/braceexpand.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/context.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/file.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/instantiators.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/logger.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/logging_utils.py (96%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/rich_utils.py (98%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/schema.py (98%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/spectrogram.py (100%)
 rename fish_speech_src/{fish_speech => fish_speech_s2}/utils/utils.py (100%)

diff --git a/README.md b/README.md
index e7ec832..8c69f37 100644
--- a/README.md
+++ b/README.md
@@ -412,7 +412,7 @@ If you see `RuntimeError: Failed to create AudioDecoder ... MockDecoder() takes
 
 ### Conflicting `fish_speech` Package?
 
-If you see `ImportError: cannot import name 'AUDIO_EXTENSIONS' from 'fish_speech.utils.file'` pointing to another custom node's directory (e.g. `comfyui-mixlab-nodes`), a different node has its own `fish_speech` folder that conflicts with ours via `sys.path`. Disable the conflicting node or remove it. Do **not** pip-install `fish_speech` — it is bundled inside this node.
+If you see `ImportError: cannot import name 'AUDIO_EXTENSIONS' from 'fish_speech_s2.utils.file'` pointing to another custom node's directory (e.g. `comfyui-mixlab-nodes`), a different node has its own `fish_speech` folder that conflicts with ours via `sys.path`. Disable the conflicting node or remove it. Do **not** pip-install `fish_speech` — it is bundled inside this node.
 
 ### Out of Memory?
 
diff --git a/README_ZH.md b/README_ZH.md
index 6f6632f..45d2266 100644
--- a/README_ZH.md
+++ b/README_ZH.md
@@ -406,7 +406,7 @@ pip install "descript-audiotools>=0.7.2" --no-deps
 
 ### `fish_speech` 包冲突？
 
-如果看到 `ImportError: cannot import name 'AUDIO_EXTENSIONS' from 'fish_speech.utils.file'` 且路径指向其他自定义节点目录（如 `comfyui-mixlab-nodes`），说明另一个节点有自己的 `fish_speech` 文件夹，通过 `sys.path` 与本节点冲突。请禁用或删除冲突节点。**不要** 通过 pip 安装 `fish_speech` — 它已内置于本节点中。
+如果看到 `ImportError: cannot import name 'AUDIO_EXTENSIONS' from 'fish_speech_s2.utils.file'` 且路径指向其他自定义节点目录（如 `comfyui-mixlab-nodes`），说明另一个节点有自己的 `fish_speech` 文件夹，通过 `sys.path` 与本节点冲突。请禁用或删除冲突节点。**不要** 通过 pip 安装 `fish_speech` — 它已内置于本节点中。
 
 ### 显存不足？
 
diff --git a/__init__.py b/__init__.py
index fba8cc1..19d4f0b 100644
--- a/__init__.py
+++ b/__init__.py
@@ -167,7 +167,7 @@ def _ensure_fish_source() -> bool:
         sys.path.insert(0, fish_src_str)
 
     try:
-        import fish_speech.models  # noqa: F401
+        import fish_speech_s2.models  # noqa: F401
         return True
     except ImportError as e:
         logger.error(f"fish_speech not importable from {_FISH_SRC}: {e}")
diff --git a/fish_speech_src/.gitignore b/fish_speech_src/.gitignore
index 91c3d5f..3e1feef 100644
--- a/fish_speech_src/.gitignore
+++ b/fish_speech_src/.gitignore
@@ -58,7 +58,7 @@ venv.bak/
 # Project Dependencies
 # --------------------
 .pdm-python
-/fish_speech.egg-info
+/fish_speech_s2.egg-info
 
 # Data and Model Files
 # --------------------
@@ -86,7 +86,7 @@ filelists/
 *.pkl
 *.pickle
 *.lab
-/fish_speech/text/cmudict_cache.pickle
+/fish_speech_s2/text/cmudict_cache.pickle
 
 # Cache and Temporary Files
 # --------------------------
diff --git a/fish_speech_src/docs/ar/finetune.md b/fish_speech_src/docs/ar/finetune.md
index 929397e..bc33027 100644
--- a/fish_speech_src/docs/ar/finetune.md
+++ b/fish_speech_src/docs/ar/finetune.md
@@ -95,13 +95,13 @@ huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/ope
 أخيرًا، يمكنك بدء الضبط الدقيق عن طريق تشغيل الأمر التالي:
 
 ```bash
-python fish_speech/train.py --config-name text2semantic_finetune \
+python fish_speech_s2/train.py --config-name text2semantic_finetune \
     project=$project \
     +lora@model.model.lora_config=r_8_alpha_16
 ```
 
 !!! note "ملاحظة"
-    يمكنك تعديل معلمات التدريب مثل `batch_size`، `gradient_accumulation_steps`، وما إلى ذلك لتناسب ذاكرة وحدة معالجة الرسومات الخاصة بك عن طريق تعديل `fish_speech/configs/text2semantic_finetune.yaml`.
+    يمكنك تعديل معلمات التدريب مثل `batch_size`، `gradient_accumulation_steps`، وما إلى ذلك لتناسب ذاكرة وحدة معالجة الرسومات الخاصة بك عن طريق تعديل `fish_speech_s2/configs/text2semantic_finetune.yaml`.
 
 !!! note "ملاحظة"
     لمستخدمي Windows، يمكنك استخدام `trainer.strategy.process_group_backend=gloo` لتجنب مشكلات `nccl`.
diff --git a/fish_speech_src/docs/ar/inference.md b/fish_speech_src/docs/ar/inference.md
index 293144d..0957aff 100644
--- a/fish_speech_src/docs/ar/inference.md
+++ b/fish_speech_src/docs/ar/inference.md
@@ -18,7 +18,7 @@ hf download fishaudio/s2-pro --local-dir checkpoints/s2-pro
 ### 1. الحصول على رموز VQ من الصوت المرجعي
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "test.wav" \
     --checkpoint-path "checkpoints/s2-pro/codec.pth"
 ```
@@ -28,7 +28,7 @@ python fish_speech/models/dac/inference.py \
 ### 2. توليد الرموز الدلالية (Semantic tokens) من النص:
 
 ```bash
-python fish_speech/models/text2semantic/inference.py \
+python fish_speech_s2/models/text2semantic/inference.py \
     --text "النص الذي تريد تحويله" \
     --prompt-text "النص المرجعي الخاص بك" \
     --prompt-tokens "fake.npy" \
@@ -47,7 +47,7 @@ python fish_speech/models/text2semantic/inference.py \
 ### 3. توليد الصوت من الرموز الدلالية:
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "codes_0.npy" \
 ```
 
diff --git a/fish_speech_src/docs/en/finetune.md b/fish_speech_src/docs/en/finetune.md
index 96954bb..63b4644 100644
--- a/fish_speech_src/docs/en/finetune.md
+++ b/fish_speech_src/docs/en/finetune.md
@@ -98,13 +98,13 @@ huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/ope
 Finally, you can start the fine-tuning by running the following command:
 
 ```bash
-python fish_speech/train.py --config-name text2semantic_finetune \
+python fish_speech_s2/train.py --config-name text2semantic_finetune \
     project=$project \
     +lora@model.model.lora_config=r_8_alpha_16
 ```
 
 !!! note
-    You can modify the training parameters such as `batch_size`, `gradient_accumulation_steps`, etc. to fit your GPU memory by modifying `fish_speech/configs/text2semantic_finetune.yaml`.
+    You can modify the training parameters such as `batch_size`, `gradient_accumulation_steps`, etc. to fit your GPU memory by modifying `fish_speech_s2/configs/text2semantic_finetune.yaml`.
 
 !!! note
     For Windows users, you can use `trainer.strategy.process_group_backend=gloo` to avoid `nccl` issues.
diff --git a/fish_speech_src/docs/en/inference.md b/fish_speech_src/docs/en/inference.md
index 64a312d..98607e6 100644
--- a/fish_speech_src/docs/en/inference.md
+++ b/fish_speech_src/docs/en/inference.md
@@ -18,7 +18,7 @@ hf download fishaudio/s2-pro --local-dir checkpoints/s2-pro
 ### 1. Get VQ tokens from reference audio
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "test.wav" \
     --checkpoint-path "checkpoints/s2-pro/codec.pth"
 ```
@@ -28,7 +28,7 @@ You should get a `fake.npy` and a `fake.wav`.
 ### 2. Generate Semantic tokens from text:
 
 ```bash
-python fish_speech/models/text2semantic/inference.py \
+python fish_speech_s2/models/text2semantic/inference.py \
     --text "The text you want to convert" \
     --prompt-text "Your reference text" \
     --prompt-tokens "fake.npy" \
@@ -47,7 +47,7 @@ This command will create a `codes_N` file in the working directory, where N is a
 ### 3. Generate vocals from semantic tokens:
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "codes_0.npy" \
 ```
 
diff --git a/fish_speech_src/docs/ja/finetune.md b/fish_speech_src/docs/ja/finetune.md
index 5419831..45f1231 100644
--- a/fish_speech_src/docs/ja/finetune.md
+++ b/fish_speech_src/docs/ja/finetune.md
@@ -94,13 +94,13 @@ huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/ope
 最後に、次のコマンドを実行してファインチューニングを開始できます。
 
 ```bash
-python fish_speech/train.py --config-name text2semantic_finetune \
+python fish_speech_s2/train.py --config-name text2semantic_finetune \
     project=$project \
     +lora@model.model.lora_config=r_8_alpha_16
 ```
 
 !!! note
-    `fish_speech/configs/text2semantic_finetune.yaml` を変更することで、`batch_size` や `gradient_accumulation_steps` などのトレーニングパラメータをGPUメモリに合わせて変更できます。
+    `fish_speech_s2/configs/text2semantic_finetune.yaml` を変更することで、`batch_size` や `gradient_accumulation_steps` などのトレーニングパラメータをGPUメモリに合わせて変更できます。
 
 !!! note
     Windows ユーザーの場合、`trainer.strategy.process_group_backend=gloo` を使用して `nccl` の問題を回避できます。
diff --git a/fish_speech_src/docs/ja/inference.md b/fish_speech_src/docs/ja/inference.md
index 69ac9ee..94b9e5d 100644
--- a/fish_speech_src/docs/ja/inference.md
+++ b/fish_speech_src/docs/ja/inference.md
@@ -18,7 +18,7 @@ hf download fishaudio/s2-pro --local-dir checkpoints/s2-pro
 ### 1. リファレンスオーディオから VQ トークンを取得する
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "test.wav" \
     --checkpoint-path "checkpoints/s2-pro/codec.pth"
 ```
@@ -28,7 +28,7 @@ python fish_speech/models/dac/inference.py \
 ### 2. テキストから Semantic トークンを生成する：
 
 ```bash
-python fish_speech/models/text2semantic/inference.py \
+python fish_speech_s2/models/text2semantic/inference.py \
     --text "変換したいテキスト" \
     --prompt-text "リファレンステキスト" \
     --prompt-tokens "fake.npy" \
@@ -47,7 +47,7 @@ python fish_speech/models/text2semantic/inference.py \
 ### 3. セマンティックトークンから音声を生成する：
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "codes_0.npy" \
 ```
 
diff --git a/fish_speech_src/docs/ko/finetune.md b/fish_speech_src/docs/ko/finetune.md
index 07b8dbd..f0c4695 100644
--- a/fish_speech_src/docs/ko/finetune.md
+++ b/fish_speech_src/docs/ko/finetune.md
@@ -95,13 +95,13 @@ huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/ope
 마지막으로, 다음 명령을 실행하여 미세 조정을 시작할 수 있습니다.
 
 ```bash
-python fish_speech/train.py --config-name text2semantic_finetune \
+python fish_speech_s2/train.py --config-name text2semantic_finetune \
     project=$project \
     +lora@model.model.lora_config=r_8_alpha_16
 ```
 
 !!! note
-    `fish_speech/configs/text2semantic_finetune.yaml` 파일을 수정하여 `batch_size`, `gradient_accumulation_steps` 등 훈련 매개변수를 GPU 메모리에 맞게 조정할 수 있습니다.
+    `fish_speech_s2/configs/text2semantic_finetune.yaml` 파일을 수정하여 `batch_size`, `gradient_accumulation_steps` 등 훈련 매개변수를 GPU 메모리에 맞게 조정할 수 있습니다.
 
 !!! note
     Windows 사용자의 경우, `trainer.strategy.process_group_backend=gloo`를 사용하여 `nccl` 관련 문제를 피할 수 있습니다.
diff --git a/fish_speech_src/docs/ko/inference.md b/fish_speech_src/docs/ko/inference.md
index b7afd78..3bb41ae 100644
--- a/fish_speech_src/docs/ko/inference.md
+++ b/fish_speech_src/docs/ko/inference.md
@@ -18,7 +18,7 @@ hf download fishaudio/s2-pro --local-dir checkpoints/s2-pro
 ### 1. 참조 오디오에서 VQ 토큰 가져오기
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "test.wav" \
     --checkpoint-path "checkpoints/s2-pro/codec.pth"
 ```
@@ -28,7 +28,7 @@ python fish_speech/models/dac/inference.py \
 ### 2. 텍스트에서 Semantic 토큰 생성:
 
 ```bash
-python fish_speech/models/text2semantic/inference.py \
+python fish_speech_s2/models/text2semantic/inference.py \
     --text "변환하려는 텍스트" \
     --prompt-text "참조 텍스트" \
     --prompt-tokens "fake.npy" \
@@ -47,7 +47,7 @@ python fish_speech/models/text2semantic/inference.py \
 ### 3. 시맨틱 토큰에서 음성 생성:
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "codes_0.npy" \
 ```
 
diff --git a/fish_speech_src/docs/pt/finetune.md b/fish_speech_src/docs/pt/finetune.md
index 6333634..0ba4756 100644
--- a/fish_speech_src/docs/pt/finetune.md
+++ b/fish_speech_src/docs/pt/finetune.md
@@ -95,13 +95,13 @@ huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/ope
 Finalmente, você pode iniciar o ajuste fino executando o seguinte comando:
 
 ```bash
-python fish_speech/train.py --config-name text2semantic_finetune \
+python fish_speech_s2/train.py --config-name text2semantic_finetune \
     project=$project \
     +lora@model.model.lora_config=r_8_alpha_16
 ```
 
 !!! note
-    Você pode modificar os parâmetros de treinamento, como `batch_size`, `gradient_accumulation_steps`, etc., para se adequar à memória da sua GPU, modificando `fish_speech/configs/text2semantic_finetune.yaml`.
+    Você pode modificar os parâmetros de treinamento, como `batch_size`, `gradient_accumulation_steps`, etc., para se adequar à memória da sua GPU, modificando `fish_speech_s2/configs/text2semantic_finetune.yaml`.
 
 !!! note
     Para usuários do Windows, você pode usar `trainer.strategy.process_group_backend=gloo` para evitar problemas com `nccl`.
diff --git a/fish_speech_src/docs/pt/inference.md b/fish_speech_src/docs/pt/inference.md
index 580e08e..56ba3f5 100644
--- a/fish_speech_src/docs/pt/inference.md
+++ b/fish_speech_src/docs/pt/inference.md
@@ -18,7 +18,7 @@ hf download fishaudio/s2-pro --local-dir checkpoints/s2-pro
 ### 1. Obter tokens VQ do áudio de referência
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "test.wav" \
     --checkpoint-path "checkpoints/s2-pro/codec.pth"
 ```
@@ -28,7 +28,7 @@ Você deve obter um `fake.npy` e um `fake.wav`.
 ### 2. Gerar tokens Semânticos a partir do texto:
 
 ```bash
-python fish_speech/models/text2semantic/inference.py \
+python fish_speech_s2/models/text2semantic/inference.py \
     --text "O texto que você deseja converter" \
     --prompt-text "Seu texto de referência" \
     --prompt-tokens "fake.npy" \
@@ -47,7 +47,7 @@ Este comando criará um arquivo `codes_N` no diretório de trabalho, onde N é u
 ### 3. Gerar vocais a partir de tokens semânticos:
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "codes_0.npy" \
 ```
 
diff --git a/fish_speech_src/docs/zh/finetune.md b/fish_speech_src/docs/zh/finetune.md
index 04ece3f..3617284 100644
--- a/fish_speech_src/docs/zh/finetune.md
+++ b/fish_speech_src/docs/zh/finetune.md
@@ -96,13 +96,13 @@ huggingface-cli download fishaudio/s2-pro --local-dir checkpoints/s2-pro
 最后, 你可以运行以下命令来启动微调:
 
 ```bash
-python fish_speech/train.py --config-name text2semantic_finetune \
+python fish_speech_s2/train.py --config-name text2semantic_finetune \
     project=$project \
     +lora@model.model.lora_config=r_8_alpha_16
 ```
 
 !!! note
-    你可以通过修改 `fish_speech/configs/text2semantic_finetune.yaml` 来修改训练参数如 `batch_size`, `gradient_accumulation_steps` 等, 来适应你的显存.
+    你可以通过修改 `fish_speech_s2/configs/text2semantic_finetune.yaml` 来修改训练参数如 `batch_size`, `gradient_accumulation_steps` 等, 来适应你的显存.
 
 !!! note
     对于 Windows 用户, 你可以使用 `trainer.strategy.process_group_backend=gloo` 来避免 `nccl` 的问题.
diff --git a/fish_speech_src/docs/zh/inference.md b/fish_speech_src/docs/zh/inference.md
index 959cb98..78a770c 100644
--- a/fish_speech_src/docs/zh/inference.md
+++ b/fish_speech_src/docs/zh/inference.md
@@ -18,7 +18,7 @@ hf download fishaudio/s2-pro --local-dir checkpoints/s2-pro
 ### 1. 从参考音频获取 VQ tokens
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "test.wav" \
     --checkpoint-path "checkpoints/s2-pro/codec.pth"
 ```
@@ -28,7 +28,7 @@ python fish_speech/models/dac/inference.py \
 ### 2. 从文本生成 Semantic tokens：
 
 ```bash
-python fish_speech/models/text2semantic/inference.py \
+python fish_speech_s2/models/text2semantic/inference.py \
     --text "您想要转换的文本" \
     --prompt-text "您的参考文本" \
     --prompt-tokens "fake.npy" \
@@ -47,7 +47,7 @@ python fish_speech/models/text2semantic/inference.py \
 ### 3. 从语义令牌生成声音：
 
 ```bash
-python fish_speech/models/dac/inference.py \
+python fish_speech_s2/models/dac/inference.py \
     -i "codes_0.npy" \
 ```
 
diff --git a/fish_speech_src/fish_speech/configs/lora/r_8_alpha_16.yaml b/fish_speech_src/fish_speech/configs/lora/r_8_alpha_16.yaml
deleted file mode 100644
index aecc4d9..0000000
--- a/fish_speech_src/fish_speech/configs/lora/r_8_alpha_16.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-_target_: fish_speech.models.text2semantic.lora.LoraConfig
-r: 8
-lora_alpha: 16
-lora_dropout: 0.01
diff --git a/fish_speech_src/fish_speech/callbacks/__init__.py b/fish_speech_src/fish_speech_s2/callbacks/__init__.py
similarity index 100%
rename from fish_speech_src/fish_speech/callbacks/__init__.py
rename to fish_speech_src/fish_speech_s2/callbacks/__init__.py
diff --git a/fish_speech_src/fish_speech/callbacks/grad_norm.py b/fish_speech_src/fish_speech_s2/callbacks/grad_norm.py
similarity index 100%
rename from fish_speech_src/fish_speech/callbacks/grad_norm.py
rename to fish_speech_src/fish_speech_s2/callbacks/grad_norm.py
diff --git a/fish_speech_src/fish_speech/configs/base.yaml b/fish_speech_src/fish_speech_s2/configs/base.yaml
similarity index 97%
rename from fish_speech_src/fish_speech/configs/base.yaml
rename to fish_speech_src/fish_speech_s2/configs/base.yaml
index 99e6dab..f5fcbaa 100644
--- a/fish_speech_src/fish_speech/configs/base.yaml
+++ b/fish_speech_src/fish_speech_s2/configs/base.yaml
@@ -53,7 +53,7 @@ callbacks:
     log_momentum: false
 
   grad_norm_monitor:
-    _target_: fish_speech.callbacks.GradNormMonitor
+    _target_: fish_speech_s2.callbacks.GradNormMonitor
     norm_type: 2
     logging_interval: step
 
diff --git a/fish_speech_src/fish_speech_s2/configs/lora/r_8_alpha_16.yaml b/fish_speech_src/fish_speech_s2/configs/lora/r_8_alpha_16.yaml
new file mode 100644
index 0000000..28d3ec1
--- /dev/null
+++ b/fish_speech_src/fish_speech_s2/configs/lora/r_8_alpha_16.yaml
@@ -0,0 +1,4 @@
+_target_: fish_speech_s2.models.text2semantic.lora.LoraConfig
+r: 8
+lora_alpha: 16
+lora_dropout: 0.01
diff --git a/fish_speech_src/fish_speech/configs/modded_dac_vq.yaml b/fish_speech_src/fish_speech_s2/configs/modded_dac_vq.yaml
similarity index 76%
rename from fish_speech_src/fish_speech/configs/modded_dac_vq.yaml
rename to fish_speech_src/fish_speech_s2/configs/modded_dac_vq.yaml
index 18089ed..9ca3405 100644
--- a/fish_speech_src/fish_speech/configs/modded_dac_vq.yaml
+++ b/fish_speech_src/fish_speech_s2/configs/modded_dac_vq.yaml
@@ -1,4 +1,4 @@
-_target_: fish_speech.models.dac.modded_dac.DAC
+_target_: fish_speech_s2.models.dac.modded_dac.DAC
 # Model setup
 sample_rate: 44100
 encoder_dim: 64
@@ -8,7 +8,7 @@ decoder_rates: [8, 8, 4, 2]
 encoder_transformer_layers: [0, 0, 0, 4]
 decoder_transformer_layers: [4, 0, 0, 0]
 transformer_general_config:
-  _target_: fish_speech.models.dac.modded_dac.ModelArgs
+  _target_: fish_speech_s2.models.dac.modded_dac.ModelArgs
   _partial_: true
   block_size: 8192
   n_local_heads: -1
@@ -20,7 +20,7 @@ transformer_general_config:
   channels_first: true
 # Quantization
 quantizer:
-  _target_: fish_speech.models.dac.rvq.DownsampleResidualVectorQuantize
+  _target_: fish_speech_s2.models.dac.rvq.DownsampleResidualVectorQuantize
   input_dim: 1024
   n_codebooks: 9
   codebook_size: 1024
@@ -28,12 +28,12 @@ quantizer:
   quantizer_dropout: 0.5
   downsample_factor: [2, 2]
   post_module: &transformer_module
-    _target_: fish_speech.models.dac.modded_dac.WindowLimitedTransformer
+    _target_: fish_speech_s2.models.dac.modded_dac.WindowLimitedTransformer
     causal: true
     window_size: 128  # empirically this does not seem to matter
     input_dim: 1024
     config: &transformer_config
-      _target_: fish_speech.models.dac.modded_dac.ModelArgs
+      _target_: fish_speech_s2.models.dac.modded_dac.ModelArgs
       block_size: 2048
       n_layer: 8
       n_head: 16
diff --git a/fish_speech_src/fish_speech/configs/text2semantic_finetune.yaml b/fish_speech_src/fish_speech_s2/configs/text2semantic_finetune.yaml
similarity index 74%
rename from fish_speech_src/fish_speech/configs/text2semantic_finetune.yaml
rename to fish_speech_src/fish_speech_s2/configs/text2semantic_finetune.yaml
index 00f6905..5895fbd 100644
--- a/fish_speech_src/fish_speech/configs/text2semantic_finetune.yaml
+++ b/fish_speech_src/fish_speech_s2/configs/text2semantic_finetune.yaml
@@ -21,12 +21,12 @@ trainer:
 
 # Dataset Configuration
 tokenizer:
-  _target_: fish_speech.tokenizer.FishTokenizer
+  _target_: fish_speech_s2.tokenizer.FishTokenizer
   model_path: ${pretrained_ckpt_path}/tokenizer.tiktoken
 
 # Dataset Configuration
 train_dataset:
-  _target_: fish_speech.datasets.semantic.AutoTextSemanticInstructionIterableDataset
+  _target_: fish_speech_s2.datasets.semantic.AutoTextSemanticInstructionIterableDataset
   proto_files:
     - data/protos
   tokenizer: ${tokenizer}
@@ -36,7 +36,7 @@ train_dataset:
   interactive_prob: 0.7
 
 val_dataset:
-  _target_: fish_speech.datasets.semantic.AutoTextSemanticInstructionIterableDataset
+  _target_: fish_speech_s2.datasets.semantic.AutoTextSemanticInstructionIterableDataset
   proto_files:
     - data/protos
   tokenizer: ${tokenizer}
@@ -46,7 +46,7 @@ val_dataset:
   interactive_prob: 0.7
 
 data:
-  _target_: fish_speech.datasets.semantic.SemanticDataModule
+  _target_: fish_speech_s2.datasets.semantic.SemanticDataModule
   train_dataset: ${train_dataset}
   val_dataset: ${val_dataset}
   num_workers: 4
@@ -56,9 +56,9 @@ data:
 
 # Model Configuration
 model:
-  _target_: fish_speech.models.text2semantic.lit_module.TextToSemantic
+  _target_: fish_speech_s2.models.text2semantic.lit_module.TextToSemantic
   model: 
-    _target_: fish_speech.models.text2semantic.llama.BaseTransformer.from_pretrained
+    _target_: fish_speech_s2.models.text2semantic.llama.BaseTransformer.from_pretrained
     path: ${pretrained_ckpt_path}
     load_weights: true
     max_length: ${max_length}
@@ -76,7 +76,7 @@ model:
     _target_: torch.optim.lr_scheduler.LambdaLR
     _partial_: true
     lr_lambda:
-      _target_: fish_speech.scheduler.get_constant_schedule_with_warmup_lr_lambda
+      _target_: fish_speech_s2.scheduler.get_constant_schedule_with_warmup_lr_lambda
       _partial_: true
       num_warmup_steps: 10
 
diff --git a/fish_speech_src/fish_speech/content_sequence.py b/fish_speech_src/fish_speech_s2/content_sequence.py
similarity index 99%
rename from fish_speech_src/fish_speech/content_sequence.py
rename to fish_speech_src/fish_speech_s2/content_sequence.py
index c29f8c0..99bf780 100644
--- a/fish_speech_src/fish_speech/content_sequence.py
+++ b/fish_speech_src/fish_speech_s2/content_sequence.py
@@ -4,7 +4,7 @@
 import numpy as np
 import torch
 
-from fish_speech.tokenizer import (
+from fish_speech_s2.tokenizer import (
     IM_END_TOKEN,
     MODALITY_TOKENS,
     FishTokenizer,
diff --git a/fish_speech_src/fish_speech/conversation.py b/fish_speech_src/fish_speech_s2/conversation.py
similarity index 97%
rename from fish_speech_src/fish_speech/conversation.py
rename to fish_speech_src/fish_speech_s2/conversation.py
index d0fa5c2..94979c1 100644
--- a/fish_speech_src/fish_speech/conversation.py
+++ b/fish_speech_src/fish_speech_s2/conversation.py
@@ -5,7 +5,7 @@
 import torch
 from transformers import PreTrainedTokenizerFast
 
-from fish_speech.content_sequence import (
+from fish_speech_s2.content_sequence import (
     AudioPart,
     BasePart,
     ContentSequence,
@@ -13,7 +13,7 @@
     TextPart,
     VQPart,
 )
-from fish_speech.tokenizer import IM_END_TOKEN, IM_START_TOKEN, MODALITY_TOKENS
+from fish_speech_s2.tokenizer import IM_END_TOKEN, IM_START_TOKEN, MODALITY_TOKENS
 
 
 @dataclass(kw_only=True)
diff --git a/fish_speech_src/fish_speech/datasets/concat_repeat.py b/fish_speech_src/fish_speech_s2/datasets/concat_repeat.py
similarity index 100%
rename from fish_speech_src/fish_speech/datasets/concat_repeat.py
rename to fish_speech_src/fish_speech_s2/datasets/concat_repeat.py
diff --git a/fish_speech_src/fish_speech/datasets/protos/text-data.proto b/fish_speech_src/fish_speech_s2/datasets/protos/text-data.proto
similarity index 100%
rename from fish_speech_src/fish_speech/datasets/protos/text-data.proto
rename to fish_speech_src/fish_speech_s2/datasets/protos/text-data.proto
diff --git a/fish_speech_src/fish_speech/datasets/protos/text_data_pb2.py b/fish_speech_src/fish_speech_s2/datasets/protos/text_data_pb2.py
similarity index 100%
rename from fish_speech_src/fish_speech/datasets/protos/text_data_pb2.py
rename to fish_speech_src/fish_speech_s2/datasets/protos/text_data_pb2.py
diff --git a/fish_speech_src/fish_speech/datasets/protos/text_data_stream.py b/fish_speech_src/fish_speech_s2/datasets/protos/text_data_stream.py
similarity index 100%
rename from fish_speech_src/fish_speech/datasets/protos/text_data_stream.py
rename to fish_speech_src/fish_speech_s2/datasets/protos/text_data_stream.py
diff --git a/fish_speech_src/fish_speech/datasets/semantic.py b/fish_speech_src/fish_speech_s2/datasets/semantic.py
similarity index 97%
rename from fish_speech_src/fish_speech/datasets/semantic.py
rename to fish_speech_src/fish_speech_s2/datasets/semantic.py
index 8999e26..747078c 100644
--- a/fish_speech_src/fish_speech/datasets/semantic.py
+++ b/fish_speech_src/fish_speech_s2/datasets/semantic.py
@@ -15,16 +15,16 @@
 from torch.distributed import get_rank, get_world_size, is_initialized
 from torch.utils.data import DataLoader, Dataset, IterableDataset, get_worker_info
 
-from fish_speech.content_sequence import ContentSequence, TextPart, VQPart
+from fish_speech_s2.content_sequence import ContentSequence, TextPart, VQPart
 
 CODEBOOK_PAD_TOKEN_ID = 0
 
-from fish_speech.datasets.protos.text_data_pb2 import SampledData
-from fish_speech.datasets.protos.text_data_stream import read_pb_stream
-from fish_speech.text.clean import clean_text
-from fish_speech.tokenizer import FishTokenizer
-from fish_speech.utils import RankedLogger
-from fish_speech.utils.braceexpand import braceexpand
+from fish_speech_s2.datasets.protos.text_data_pb2 import SampledData
+from fish_speech_s2.datasets.protos.text_data_stream import read_pb_stream
+from fish_speech_s2.text.clean import clean_text
+from fish_speech_s2.tokenizer import FishTokenizer
+from fish_speech_s2.utils import RankedLogger
+from fish_speech_s2.utils.braceexpand import braceexpand
 
 log = RankedLogger(__name__, rank_zero_only=True)
 
diff --git a/fish_speech_src/fish_speech/datasets/vqgan.py b/fish_speech_src/fish_speech_s2/datasets/vqgan.py
similarity index 98%
rename from fish_speech_src/fish_speech/datasets/vqgan.py
rename to fish_speech_src/fish_speech_s2/datasets/vqgan.py
index a45583d..0d91abf 100644
--- a/fish_speech_src/fish_speech/datasets/vqgan.py
+++ b/fish_speech_src/fish_speech_s2/datasets/vqgan.py
@@ -8,7 +8,7 @@
 from lightning import LightningDataModule
 from torch.utils.data import DataLoader, Dataset
 
-from fish_speech.utils import RankedLogger
+from fish_speech_s2.utils import RankedLogger
 
 logger = RankedLogger(__name__, rank_zero_only=False)
 
diff --git a/fish_speech_src/fish_speech/i18n/README.md b/fish_speech_src/fish_speech_s2/i18n/README.md
similarity index 95%
rename from fish_speech_src/fish_speech/i18n/README.md
rename to fish_speech_src/fish_speech_s2/i18n/README.md
index 700902b..d8d78a0 100644
--- a/fish_speech_src/fish_speech/i18n/README.md
+++ b/fish_speech_src/fish_speech_s2/i18n/README.md
@@ -2,7 +2,7 @@
 
 The `i18n` folder within the `fish_speech` directory contains files initially sourced from the RVC project. In compliance with the MIT license under which these files were released, we acknowledge the original authors and sources below:
 
-### fish_speech/i18n/core.py
+### fish_speech_s2/i18n/core.py
 
 **Related code from RVC:**
 [https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/i18n.py](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/i18n.py)
@@ -13,7 +13,7 @@ add localization(添加本地化) [RVC-Project/Retrieval-based-Voice-Conversion-
 **Initial author:**
 [@L4Ph](https://github.com/L4Ph)
 
-### fish_speech/i18n/scan.py
+### fish_speech_s2/i18n/scan.py
 
 **Related code from RVC:**
 [https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/scan_i18n.py](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/83d6a64e675d9bbd6e92ee450c5f807ed2bb54d8/i18n/scan_i18n.py)
diff --git a/fish_speech_src/fish_speech/i18n/__init__.py b/fish_speech_src/fish_speech_s2/i18n/__init__.py
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/__init__.py
rename to fish_speech_src/fish_speech_s2/i18n/__init__.py
diff --git a/fish_speech_src/fish_speech/i18n/core.py b/fish_speech_src/fish_speech_s2/i18n/core.py
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/core.py
rename to fish_speech_src/fish_speech_s2/i18n/core.py
diff --git a/fish_speech_src/fish_speech/i18n/locale/en_US.json b/fish_speech_src/fish_speech_s2/i18n/locale/en_US.json
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/locale/en_US.json
rename to fish_speech_src/fish_speech_s2/i18n/locale/en_US.json
diff --git a/fish_speech_src/fish_speech/i18n/locale/es_ES.json b/fish_speech_src/fish_speech_s2/i18n/locale/es_ES.json
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/locale/es_ES.json
rename to fish_speech_src/fish_speech_s2/i18n/locale/es_ES.json
diff --git a/fish_speech_src/fish_speech/i18n/locale/ja_JP.json b/fish_speech_src/fish_speech_s2/i18n/locale/ja_JP.json
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/locale/ja_JP.json
rename to fish_speech_src/fish_speech_s2/i18n/locale/ja_JP.json
diff --git a/fish_speech_src/fish_speech/i18n/locale/ko_KR.json b/fish_speech_src/fish_speech_s2/i18n/locale/ko_KR.json
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/locale/ko_KR.json
rename to fish_speech_src/fish_speech_s2/i18n/locale/ko_KR.json
diff --git a/fish_speech_src/fish_speech/i18n/locale/pt_BR.json b/fish_speech_src/fish_speech_s2/i18n/locale/pt_BR.json
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/locale/pt_BR.json
rename to fish_speech_src/fish_speech_s2/i18n/locale/pt_BR.json
diff --git a/fish_speech_src/fish_speech/i18n/locale/zh_CN.json b/fish_speech_src/fish_speech_s2/i18n/locale/zh_CN.json
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/locale/zh_CN.json
rename to fish_speech_src/fish_speech_s2/i18n/locale/zh_CN.json
diff --git a/fish_speech_src/fish_speech/i18n/scan.py b/fish_speech_src/fish_speech_s2/i18n/scan.py
similarity index 100%
rename from fish_speech_src/fish_speech/i18n/scan.py
rename to fish_speech_src/fish_speech_s2/i18n/scan.py
diff --git a/fish_speech_src/fish_speech/inference_engine/__init__.py b/fish_speech_src/fish_speech_s2/inference_engine/__init__.py
similarity index 93%
rename from fish_speech_src/fish_speech/inference_engine/__init__.py
rename to fish_speech_src/fish_speech_s2/inference_engine/__init__.py
index ff34fc5..117a581 100644
--- a/fish_speech_src/fish_speech/inference_engine/__init__.py
+++ b/fish_speech_src/fish_speech_s2/inference_engine/__init__.py
@@ -6,17 +6,17 @@
 import torch
 from loguru import logger
 
-from fish_speech.inference_engine.reference_loader import ReferenceLoader
-from fish_speech.inference_engine.utils import InferenceResult, wav_chunk_header
-from fish_speech.inference_engine.vq_manager import VQManager
-from fish_speech.models.dac.modded_dac import DAC
-from fish_speech.models.text2semantic.inference import (
+from fish_speech_s2.inference_engine.reference_loader import ReferenceLoader
+from fish_speech_s2.inference_engine.utils import InferenceResult, wav_chunk_header
+from fish_speech_s2.inference_engine.vq_manager import VQManager
+from fish_speech_s2.models.dac.modded_dac import DAC
+from fish_speech_s2.models.text2semantic.inference import (
     GenerateRequest,
     GenerateResponse,
     WrappedGenerateResponse,
 )
-from fish_speech.utils import autocast_exclude_mps, set_seed
-from fish_speech.utils.schema import ServeTTSRequest
+from fish_speech_s2.utils import autocast_exclude_mps, set_seed
+from fish_speech_s2.utils.schema import ServeTTSRequest
 
 
 class TTSInferenceEngine(ReferenceLoader, VQManager):
diff --git a/fish_speech_src/fish_speech/inference_engine/reference_loader.py b/fish_speech_src/fish_speech_s2/inference_engine/reference_loader.py
similarity index 98%
rename from fish_speech_src/fish_speech/inference_engine/reference_loader.py
rename to fish_speech_src/fish_speech_s2/inference_engine/reference_loader.py
index d44e4d3..70e08c4 100644
--- a/fish_speech_src/fish_speech/inference_engine/reference_loader.py
+++ b/fish_speech_src/fish_speech_s2/inference_engine/reference_loader.py
@@ -7,14 +7,14 @@
 import torchaudio
 from loguru import logger
 
-from fish_speech.models.dac.modded_dac import DAC
-from fish_speech.utils.file import (
+from fish_speech_s2.models.dac.modded_dac import DAC
+from fish_speech_s2.utils.file import (
     AUDIO_EXTENSIONS,
     audio_to_bytes,
     list_files,
     read_ref_text,
 )
-from fish_speech.utils.schema import ServeReferenceAudio
+from fish_speech_s2.utils.schema import ServeReferenceAudio
 
 
 class ReferenceLoader:
diff --git a/fish_speech_src/fish_speech/inference_engine/utils.py b/fish_speech_src/fish_speech_s2/inference_engine/utils.py
similarity index 100%
rename from fish_speech_src/fish_speech/inference_engine/utils.py
rename to fish_speech_src/fish_speech_s2/inference_engine/utils.py
diff --git a/fish_speech_src/fish_speech/inference_engine/vq_manager.py b/fish_speech_src/fish_speech_s2/inference_engine/vq_manager.py
similarity index 97%
rename from fish_speech_src/fish_speech/inference_engine/vq_manager.py
rename to fish_speech_src/fish_speech_s2/inference_engine/vq_manager.py
index 4481580..58db326 100644
--- a/fish_speech_src/fish_speech/inference_engine/vq_manager.py
+++ b/fish_speech_src/fish_speech_s2/inference_engine/vq_manager.py
@@ -3,7 +3,7 @@
 import torch
 from loguru import logger
 
-from fish_speech.models.dac.modded_dac import DAC
+from fish_speech_s2.models.dac.modded_dac import DAC
 
 
 class VQManager:
diff --git a/fish_speech_src/fish_speech/models/dac/__init__.py b/fish_speech_src/fish_speech_s2/models/dac/__init__.py
similarity index 100%
rename from fish_speech_src/fish_speech/models/dac/__init__.py
rename to fish_speech_src/fish_speech_s2/models/dac/__init__.py
diff --git a/fish_speech_src/fish_speech/models/dac/inference.py b/fish_speech_src/fish_speech_s2/models/dac/inference.py
similarity index 98%
rename from fish_speech_src/fish_speech/models/dac/inference.py
rename to fish_speech_src/fish_speech_s2/models/dac/inference.py
index 236312b..478c1fb 100644
--- a/fish_speech_src/fish_speech/models/dac/inference.py
+++ b/fish_speech_src/fish_speech_s2/models/dac/inference.py
@@ -14,7 +14,7 @@
 
 pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 
-from fish_speech.utils.file import AUDIO_EXTENSIONS
+from fish_speech_s2.utils.file import AUDIO_EXTENSIONS
 
 # register eval resolver (guard against duplicate registration when other
 # nodes or multiple imports have already registered it)
diff --git a/fish_speech_src/fish_speech/models/dac/modded_dac.py b/fish_speech_src/fish_speech_s2/models/dac/modded_dac.py
similarity index 99%
rename from fish_speech_src/fish_speech/models/dac/modded_dac.py
rename to fish_speech_src/fish_speech_s2/models/dac/modded_dac.py
index eb14b47..d5b5b59 100644
--- a/fish_speech_src/fish_speech/models/dac/modded_dac.py
+++ b/fish_speech_src/fish_speech_s2/models/dac/modded_dac.py
@@ -1000,7 +1000,7 @@ def forward(
     from omegaconf import OmegaConf
 
     # 配置路径
-    config_path = "fish_speech/configs/modded_dac_vq.yaml"
+    config_path = "fish_speech_s2/configs/modded_dac_vq.yaml"
     checkpoint_path = "checkpoints/s2-pro/codec.pth"
     codes_path = "./output/codes_0.npy"  # 你的 codes 文件路径
     output_path = "reconstructed_from_codes.wav"
diff --git a/fish_speech_src/fish_speech/models/dac/rvq.py b/fish_speech_src/fish_speech_s2/models/dac/rvq.py
similarity index 100%
rename from fish_speech_src/fish_speech/models/dac/rvq.py
rename to fish_speech_src/fish_speech_s2/models/dac/rvq.py
diff --git a/fish_speech_src/fish_speech/models/text2semantic/__init__.py b/fish_speech_src/fish_speech_s2/models/text2semantic/__init__.py
similarity index 100%
rename from fish_speech_src/fish_speech/models/text2semantic/__init__.py
rename to fish_speech_src/fish_speech_s2/models/text2semantic/__init__.py
diff --git a/fish_speech_src/fish_speech/models/text2semantic/inference.py b/fish_speech_src/fish_speech_s2/models/text2semantic/inference.py
similarity index 99%
rename from fish_speech_src/fish_speech/models/text2semantic/inference.py
rename to fish_speech_src/fish_speech_s2/models/text2semantic/inference.py
index a9b5d28..462cdd6 100644
--- a/fish_speech_src/fish_speech/models/text2semantic/inference.py
+++ b/fish_speech_src/fish_speech_s2/models/text2semantic/inference.py
@@ -16,12 +16,12 @@
 from loguru import logger
 from tqdm import tqdm
 
-from fish_speech.content_sequence import (
+from fish_speech_s2.content_sequence import (
     TextPart,
     VQPart,
 )
-from fish_speech.conversation import Conversation, Message
-from fish_speech.tokenizer import IM_END_TOKEN
+from fish_speech_s2.conversation import Conversation, Message
+from fish_speech_s2.tokenizer import IM_END_TOKEN
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 torch._inductor.config.coordinate_descent_tuning = True
@@ -33,7 +33,7 @@
 
 from torch.nn.attention import SDPBackend, sdpa_kernel
 
-from fish_speech.models.text2semantic.llama import (
+from fish_speech_s2.models.text2semantic.llama import (
     BaseTransformer,
     DualARTransformer,
     NaiveTransformer,
diff --git a/fish_speech_src/fish_speech/models/text2semantic/lit_module.py b/fish_speech_src/fish_speech_s2/models/text2semantic/lit_module.py
similarity index 98%
rename from fish_speech_src/fish_speech/models/text2semantic/lit_module.py
rename to fish_speech_src/fish_speech_s2/models/text2semantic/lit_module.py
index a13030c..212af76 100644
--- a/fish_speech_src/fish_speech/models/text2semantic/lit_module.py
+++ b/fish_speech_src/fish_speech_s2/models/text2semantic/lit_module.py
@@ -5,10 +5,10 @@
 import torch.nn.functional as F
 from lightning.pytorch.utilities.types import OptimizerLRScheduler
 
-import fish_speech.utils as utils
+import fish_speech_s2.utils as utils
 
 CODEBOOK_PAD_TOKEN_ID = 0
-from fish_speech.models.text2semantic.llama import NaiveTransformer
+from fish_speech_s2.models.text2semantic.llama import NaiveTransformer
 
 log = utils.RankedLogger(__name__, rank_zero_only=True)
 
diff --git a/fish_speech_src/fish_speech/models/text2semantic/llama.py b/fish_speech_src/fish_speech_s2/models/text2semantic/llama.py
similarity index 99%
rename from fish_speech_src/fish_speech/models/text2semantic/llama.py
rename to fish_speech_src/fish_speech_s2/models/text2semantic/llama.py
index 4583bdd..d586d33 100644
--- a/fish_speech_src/fish_speech/models/text2semantic/llama.py
+++ b/fish_speech_src/fish_speech_s2/models/text2semantic/llama.py
@@ -15,7 +15,7 @@
 from torch.nn.attention import SDPBackend, sdpa_kernel
 from torch.utils.checkpoint import checkpoint
 
-from fish_speech.models.text2semantic.lora import LoraConfig, setup_lora
+from fish_speech_s2.models.text2semantic.lora import LoraConfig, setup_lora
 
 
 # ---------------------------------------------------------------------------
@@ -702,7 +702,7 @@ def from_pretrained(
         Actual CUDA-side quantization occurs on the first forward pass.
         """
         # Import wrapper locally to avoid circular dependency or global import issues
-        from fish_speech.tokenizer import FishTokenizer
+        from fish_speech_s2.tokenizer import FishTokenizer
 
         config = BaseModelArgs.from_pretrained(str(path))
         if max_length is not None:
diff --git a/fish_speech_src/fish_speech/models/text2semantic/lora.py b/fish_speech_src/fish_speech_s2/models/text2semantic/lora.py
similarity index 100%
rename from fish_speech_src/fish_speech/models/text2semantic/lora.py
rename to fish_speech_src/fish_speech_s2/models/text2semantic/lora.py
diff --git a/fish_speech_src/fish_speech/scheduler.py b/fish_speech_src/fish_speech_s2/scheduler.py
similarity index 100%
rename from fish_speech_src/fish_speech/scheduler.py
rename to fish_speech_src/fish_speech_s2/scheduler.py
diff --git a/fish_speech_src/fish_speech/text/__init__.py b/fish_speech_src/fish_speech_s2/text/__init__.py
similarity index 100%
rename from fish_speech_src/fish_speech/text/__init__.py
rename to fish_speech_src/fish_speech_s2/text/__init__.py
diff --git a/fish_speech_src/fish_speech/text/clean.py b/fish_speech_src/fish_speech_s2/text/clean.py
similarity index 100%
rename from fish_speech_src/fish_speech/text/clean.py
rename to fish_speech_src/fish_speech_s2/text/clean.py
diff --git a/fish_speech_src/fish_speech/tokenizer.py b/fish_speech_src/fish_speech_s2/tokenizer.py
similarity index 100%
rename from fish_speech_src/fish_speech/tokenizer.py
rename to fish_speech_src/fish_speech_s2/tokenizer.py
diff --git a/fish_speech_src/fish_speech/train.py b/fish_speech_src/fish_speech_s2/train.py
similarity index 99%
rename from fish_speech_src/fish_speech/train.py
rename to fish_speech_src/fish_speech_s2/train.py
index e693f3a..4d84f23 100644
--- a/fish_speech_src/fish_speech/train.py
+++ b/fish_speech_src/fish_speech_s2/train.py
@@ -27,7 +27,7 @@
 # register eval resolver
 OmegaConf.register_new_resolver("eval", eval)
 
-import fish_speech.utils as utils
+import fish_speech_s2.utils as utils
 
 log = utils.RankedLogger(__name__, rank_zero_only=True)
 
diff --git a/fish_speech_src/fish_speech/utils/__init__.py b/fish_speech_src/fish_speech_s2/utils/__init__.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/__init__.py
rename to fish_speech_src/fish_speech_s2/utils/__init__.py
diff --git a/fish_speech_src/fish_speech/utils/braceexpand.py b/fish_speech_src/fish_speech_s2/utils/braceexpand.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/braceexpand.py
rename to fish_speech_src/fish_speech_s2/utils/braceexpand.py
diff --git a/fish_speech_src/fish_speech/utils/context.py b/fish_speech_src/fish_speech_s2/utils/context.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/context.py
rename to fish_speech_src/fish_speech_s2/utils/context.py
diff --git a/fish_speech_src/fish_speech/utils/file.py b/fish_speech_src/fish_speech_s2/utils/file.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/file.py
rename to fish_speech_src/fish_speech_s2/utils/file.py
diff --git a/fish_speech_src/fish_speech/utils/instantiators.py b/fish_speech_src/fish_speech_s2/utils/instantiators.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/instantiators.py
rename to fish_speech_src/fish_speech_s2/utils/instantiators.py
diff --git a/fish_speech_src/fish_speech/utils/logger.py b/fish_speech_src/fish_speech_s2/utils/logger.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/logger.py
rename to fish_speech_src/fish_speech_s2/utils/logger.py
diff --git a/fish_speech_src/fish_speech/utils/logging_utils.py b/fish_speech_src/fish_speech_s2/utils/logging_utils.py
similarity index 96%
rename from fish_speech_src/fish_speech/utils/logging_utils.py
rename to fish_speech_src/fish_speech_s2/utils/logging_utils.py
index 117c8d4..48d2f00 100644
--- a/fish_speech_src/fish_speech/utils/logging_utils.py
+++ b/fish_speech_src/fish_speech_s2/utils/logging_utils.py
@@ -5,7 +5,7 @@
     def rank_zero_only(fn):
         return fn
 
-from fish_speech.utils import logger as log
+from fish_speech_s2.utils import logger as log
 
 
 @rank_zero_only
diff --git a/fish_speech_src/fish_speech/utils/rich_utils.py b/fish_speech_src/fish_speech_s2/utils/rich_utils.py
similarity index 98%
rename from fish_speech_src/fish_speech/utils/rich_utils.py
rename to fish_speech_src/fish_speech_s2/utils/rich_utils.py
index ed77672..d3e7b5d 100644
--- a/fish_speech_src/fish_speech/utils/rich_utils.py
+++ b/fish_speech_src/fish_speech_s2/utils/rich_utils.py
@@ -13,7 +13,7 @@ def rank_zero_only(fn):
 from omegaconf import DictConfig, OmegaConf, open_dict
 from rich.prompt import Prompt
 
-from fish_speech.utils import logger as log
+from fish_speech_s2.utils import logger as log
 
 
 @rank_zero_only
diff --git a/fish_speech_src/fish_speech/utils/schema.py b/fish_speech_src/fish_speech_s2/utils/schema.py
similarity index 98%
rename from fish_speech_src/fish_speech/utils/schema.py
rename to fish_speech_src/fish_speech_s2/utils/schema.py
index 01d40de..1d7baa0 100644
--- a/fish_speech_src/fish_speech/utils/schema.py
+++ b/fish_speech_src/fish_speech_s2/utils/schema.py
@@ -9,7 +9,7 @@
 from pydantic.functional_validators import SkipValidation
 from typing_extensions import Annotated
 
-from fish_speech.content_sequence import TextPart, VQPart
+from fish_speech_s2.content_sequence import TextPart, VQPart
 
 
 class ServeVQPart(BaseModel):
diff --git a/fish_speech_src/fish_speech/utils/spectrogram.py b/fish_speech_src/fish_speech_s2/utils/spectrogram.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/spectrogram.py
rename to fish_speech_src/fish_speech_s2/utils/spectrogram.py
diff --git a/fish_speech_src/fish_speech/utils/utils.py b/fish_speech_src/fish_speech_s2/utils/utils.py
similarity index 100%
rename from fish_speech_src/fish_speech/utils/utils.py
rename to fish_speech_src/fish_speech_s2/utils/utils.py
diff --git a/fish_speech_src/tools/api_client.py b/fish_speech_src/tools/api_client.py
index 13d87b3..ea1c95d 100644
--- a/fish_speech_src/tools/api_client.py
+++ b/fish_speech_src/tools/api_client.py
@@ -9,8 +9,8 @@
 from pydub import AudioSegment
 from pydub.playback import play
 
-from fish_speech.utils.file import audio_to_bytes, read_ref_text
-from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
+from fish_speech_s2.utils.file import audio_to_bytes, read_ref_text
+from fish_speech_s2.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
 
 def parse_args():
diff --git a/fish_speech_src/tools/llama/build_dataset.py b/fish_speech_src/tools/llama/build_dataset.py
index 20e2219..6c68d40 100644
--- a/fish_speech_src/tools/llama/build_dataset.py
+++ b/fish_speech_src/tools/llama/build_dataset.py
@@ -11,9 +11,9 @@
 from loguru import logger
 from tqdm import tqdm
 
-from fish_speech.datasets.protos.text_data_pb2 import Semantics, Sentence, TextData
-from fish_speech.datasets.protos.text_data_stream import pack_pb_stream
-from fish_speech.utils.file import load_filelist
+from fish_speech_s2.datasets.protos.text_data_pb2 import Semantics, Sentence, TextData
+from fish_speech_s2.datasets.protos.text_data_stream import pack_pb_stream
+from fish_speech_s2.utils.file import load_filelist
 
 # To avoid CPU overload
 os.environ["MKL_NUM_THREADS"] = "1"
diff --git a/fish_speech_src/tools/llama/eval_in_context.py b/fish_speech_src/tools/llama/eval_in_context.py
index 41d6397..c6c08fd 100644
--- a/fish_speech_src/tools/llama/eval_in_context.py
+++ b/fish_speech_src/tools/llama/eval_in_context.py
@@ -9,8 +9,8 @@
 
 from torch.utils.data import DataLoader
 
-from fish_speech.datasets.semantic import AutoAugTextDataset, TextDataCollator
-from fish_speech.models.text2semantic.inference import load_model
+from fish_speech_s2.datasets.semantic import AutoAugTextDataset, TextDataCollator
+from fish_speech_s2.models.text2semantic.inference import load_model
 
 
 def smooth(
diff --git a/fish_speech_src/tools/llama/merge_lora.py b/fish_speech_src/tools/llama/merge_lora.py
index 1080ff5..7ffda35 100644
--- a/fish_speech_src/tools/llama/merge_lora.py
+++ b/fish_speech_src/tools/llama/merge_lora.py
@@ -9,8 +9,8 @@
 from hydra.utils import instantiate
 from loguru import logger
 
-from fish_speech.models.text2semantic.llama import BaseTransformer
-from fish_speech.models.text2semantic.lora import get_merged_state_dict
+from fish_speech_s2.models.text2semantic.llama import BaseTransformer
+from fish_speech_s2.models.text2semantic.lora import get_merged_state_dict
 
 
 @click.command()
@@ -24,7 +24,7 @@ def merge(lora_config, base_weight, lora_weight, output):
         f"Merging {base_weight} and {lora_weight} into {output} with {lora_config}"
     )
 
-    with initialize(version_base="1.3", config_path="../../fish_speech/configs/lora"):
+    with initialize(version_base="1.3", config_path="../../fish_speech_s2/configs/lora"):
         cfg = compose(config_name=lora_config)
 
     lora_config = instantiate(cfg)
diff --git a/fish_speech_src/tools/llama/quantize.py b/fish_speech_src/tools/llama/quantize.py
index c606c53..e2703f7 100644
--- a/fish_speech_src/tools/llama/quantize.py
+++ b/fish_speech_src/tools/llama/quantize.py
@@ -13,8 +13,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from fish_speech.models.text2semantic.inference import load_model
-from fish_speech.models.text2semantic.llama import find_multiple
+from fish_speech_s2.models.text2semantic.inference import load_model
+from fish_speech_s2.models.text2semantic.llama import find_multiple
 
 ##### Quantization Primitives ######
 
diff --git a/fish_speech_src/tools/run_webui.py b/fish_speech_src/tools/run_webui.py
index 37130a0..6660361 100644
--- a/fish_speech_src/tools/run_webui.py
+++ b/fish_speech_src/tools/run_webui.py
@@ -8,10 +8,10 @@
 
 pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
 
-from fish_speech.inference_engine import TTSInferenceEngine
-from fish_speech.models.dac.inference import load_model as load_decoder_model
-from fish_speech.models.text2semantic.inference import launch_thread_safe_queue
-from fish_speech.utils.schema import ServeTTSRequest
+from fish_speech_s2.inference_engine import TTSInferenceEngine
+from fish_speech_s2.models.dac.inference import load_model as load_decoder_model
+from fish_speech_s2.models.text2semantic.inference import launch_thread_safe_queue
+from fish_speech_s2.utils.schema import ServeTTSRequest
 from tools.webui import build_app
 from tools.webui.inference import get_inference_wrapper
 
diff --git a/fish_speech_src/tools/server/api_utils.py b/fish_speech_src/tools/server/api_utils.py
index 4fa9526..26932f1 100644
--- a/fish_speech_src/tools/server/api_utils.py
+++ b/fish_speech_src/tools/server/api_utils.py
@@ -13,8 +13,8 @@
 from loguru import logger
 from pydantic import BaseModel
 
-from fish_speech.inference_engine import TTSInferenceEngine
-from fish_speech.utils.schema import ServeTTSRequest
+from fish_speech_s2.inference_engine import TTSInferenceEngine
+from fish_speech_s2.utils.schema import ServeTTSRequest
 from tools.server.inference import inference_wrapper as inference
 
 
diff --git a/fish_speech_src/tools/server/inference.py b/fish_speech_src/tools/server/inference.py
index 060e24b..90bba3a 100644
--- a/fish_speech_src/tools/server/inference.py
+++ b/fish_speech_src/tools/server/inference.py
@@ -3,8 +3,8 @@
 import numpy as np
 from kui.asgi import HTTPException
 
-from fish_speech.inference_engine import TTSInferenceEngine
-from fish_speech.utils.schema import ServeTTSRequest
+from fish_speech_s2.inference_engine import TTSInferenceEngine
+from fish_speech_s2.utils.schema import ServeTTSRequest
 
 AMPLITUDE = 32768  # Needs an explaination
 
diff --git a/fish_speech_src/tools/server/model_manager.py b/fish_speech_src/tools/server/model_manager.py
index 4ec4bdf..685757e 100644
--- a/fish_speech_src/tools/server/model_manager.py
+++ b/fish_speech_src/tools/server/model_manager.py
@@ -1,10 +1,10 @@
 import torch
 from loguru import logger
 
-from fish_speech.inference_engine import TTSInferenceEngine
-from fish_speech.models.dac.inference import load_model as load_decoder_model
-from fish_speech.models.text2semantic.inference import launch_thread_safe_queue
-from fish_speech.utils.schema import ServeTTSRequest
+from fish_speech_s2.inference_engine import TTSInferenceEngine
+from fish_speech_s2.models.dac.inference import load_model as load_decoder_model
+from fish_speech_s2.models.text2semantic.inference import launch_thread_safe_queue
+from fish_speech_s2.utils.schema import ServeTTSRequest
 from tools.server.inference import inference_wrapper as inference
 
 
diff --git a/fish_speech_src/tools/server/views.py b/fish_speech_src/tools/server/views.py
index 5e47304..a745b66 100644
--- a/fish_speech_src/tools/server/views.py
+++ b/fish_speech_src/tools/server/views.py
@@ -24,7 +24,7 @@
 from loguru import logger
 from typing_extensions import Annotated
 
-from fish_speech.utils.schema import (
+from fish_speech_s2.utils.schema import (
     AddReferenceRequest,
     AddReferenceResponse,
     DeleteReferenceResponse,
diff --git a/fish_speech_src/tools/vqgan/create_train_split.py b/fish_speech_src/tools/vqgan/create_train_split.py
index 977afdf..747503f 100644
--- a/fish_speech_src/tools/vqgan/create_train_split.py
+++ b/fish_speech_src/tools/vqgan/create_train_split.py
@@ -7,7 +7,7 @@
 from pydub import AudioSegment
 from tqdm import tqdm
 
-from fish_speech.utils.file import AUDIO_EXTENSIONS, list_files, load_filelist
+from fish_speech_s2.utils.file import AUDIO_EXTENSIONS, list_files, load_filelist
 
 
 @click.command()
diff --git a/fish_speech_src/tools/vqgan/extract_vq.py b/fish_speech_src/tools/vqgan/extract_vq.py
index d50748c..70cc389 100644
--- a/fish_speech_src/tools/vqgan/extract_vq.py
+++ b/fish_speech_src/tools/vqgan/extract_vq.py
@@ -16,7 +16,7 @@
 from loguru import logger
 from omegaconf import OmegaConf
 
-from fish_speech.utils.file import AUDIO_EXTENSIONS, list_files, load_filelist
+from fish_speech_s2.utils.file import AUDIO_EXTENSIONS, list_files, load_filelist
 
 # register eval resolver
 OmegaConf.register_new_resolver("eval", eval)
@@ -50,7 +50,7 @@ def get_model(
     checkpoint_path: str = "checkpoints/openaudio-s1-mini/codec.pth",
     device: str | torch.device = "cuda",
 ):
-    with initialize(version_base="1.3", config_path="../../fish_speech/configs"):
+    with initialize(version_base="1.3", config_path="../../fish_speech_s2/configs"):
         cfg = compose(config_name=config_name)
 
     model = instantiate(cfg)
diff --git a/fish_speech_src/tools/webui/__init__.py b/fish_speech_src/tools/webui/__init__.py
index e9b9a02..52c1cd4 100644
--- a/fish_speech_src/tools/webui/__init__.py
+++ b/fish_speech_src/tools/webui/__init__.py
@@ -2,7 +2,7 @@
 
 import gradio as gr
 
-from fish_speech.i18n import i18n
+from fish_speech_s2.i18n import i18n
 from tools.webui.variables import HEADER_MD, TEXTBOX_PLACEHOLDER
 
 
diff --git a/fish_speech_src/tools/webui/inference.py b/fish_speech_src/tools/webui/inference.py
index e6cd1d7..6617487 100644
--- a/fish_speech_src/tools/webui/inference.py
+++ b/fish_speech_src/tools/webui/inference.py
@@ -2,8 +2,8 @@
 from functools import partial
 from typing import Any, Callable
 
-from fish_speech.i18n import i18n
-from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
+from fish_speech_s2.i18n import i18n
+from fish_speech_s2.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
 
 def inference_wrapper(
diff --git a/fish_speech_src/tools/webui/variables.py b/fish_speech_src/tools/webui/variables.py
index d32cc94..c72a680 100644
--- a/fish_speech_src/tools/webui/variables.py
+++ b/fish_speech_src/tools/webui/variables.py
@@ -1,4 +1,4 @@
-from fish_speech.i18n import i18n
+from fish_speech_s2.i18n import i18n
 
 HEADER_MD = f"""# Fish Speech
 
diff --git a/nodes/loader.py b/nodes/loader.py
index fe23ed2..4e030ea 100644
--- a/nodes/loader.py
+++ b/nodes/loader.py
@@ -375,9 +375,9 @@ def load_engine(
             )
             attention = "sdpa"
     try:
-        from fish_speech.models.dac.inference import load_model as load_decoder_model
-        from fish_speech.models.text2semantic.inference import launch_thread_safe_queue
-        from fish_speech.inference_engine import TTSInferenceEngine
+        from fish_speech_s2.models.dac.inference import load_model as load_decoder_model
+        from fish_speech_s2.models.text2semantic.inference import launch_thread_safe_queue
+        from fish_speech_s2.inference_engine import TTSInferenceEngine
     except ImportError as e:
         raise ImportError(
             f"fish_speech package not found: {e}\n"
@@ -465,7 +465,7 @@ def _make_attention_forward(attention: str):
 
     if attention == "sdpa":
         def _forward(self, x, freqs_cis, mask, input_pos=None):
-            from fish_speech.models.text2semantic.llama import apply_rotary_emb
+            from fish_speech_s2.models.text2semantic.llama import apply_rotary_emb
             import torch.nn.functional as F
             bsz, seqlen, _ = x.shape
             q_size = self.n_head * self.head_dim
@@ -497,7 +497,7 @@ def _forward(self, x, freqs_cis, mask, input_pos=None):
 
     if attention == "flash_attention":
         def _forward(self, x, freqs_cis, mask, input_pos=None):
-            from fish_speech.models.text2semantic.llama import apply_rotary_emb
+            from fish_speech_s2.models.text2semantic.llama import apply_rotary_emb
             from torch.nn.attention import SDPBackend, sdpa_kernel
             import torch.nn.functional as F
             bsz, seqlen, _ = x.shape
@@ -540,7 +540,7 @@ def _forward(self, x, freqs_cis, mask, input_pos=None):
             )
 
         def _forward(self, x, freqs_cis, mask, input_pos=None):
-            from fish_speech.models.text2semantic.llama import apply_rotary_emb
+            from fish_speech_s2.models.text2semantic.llama import apply_rotary_emb
             from sageattention import sageattn
             import torch.nn.functional as F
             bsz, seqlen, _ = x.shape
@@ -586,7 +586,7 @@ def _patch_attention_class(attention: str):
         return None, None
 
     try:
-        from fish_speech.models.text2semantic.llama import Attention
+        from fish_speech_s2.models.text2semantic.llama import Attention
     except ImportError as e:
         logger.warning(f"Cannot patch Attention class: {e}")
         return None, None
diff --git a/nodes/model_cache.py b/nodes/model_cache.py
index 610f16b..b9fd06e 100644
--- a/nodes/model_cache.py
+++ b/nodes/model_cache.py
@@ -85,7 +85,7 @@ def offload_engine_to_cpu() -> None:
     # our offload message as soon as it finishes that job. We use a long timeout
     # to cover the worst case (long generation cancelled mid-way).
     try:
-        from fish_speech.models.text2semantic.inference import GenerateRequest
+        from fish_speech_s2.models.text2semantic.inference import GenerateRequest
 
         offload_response: queue.Queue = queue.Queue()
         engine.llama_queue.put(
@@ -152,7 +152,7 @@ def resume_engine_to_cuda(device: str = "cuda") -> None:
 
     # --- Ask the LLaMA worker thread to move back to device ---
     try:
-        from fish_speech.models.text2semantic.inference import GenerateRequest
+        from fish_speech_s2.models.text2semantic.inference import GenerateRequest
 
         response_queue: queue.Queue = queue.Queue()
         engine.llama_queue.put(
diff --git a/nodes/multi_speaker_node.py b/nodes/multi_speaker_node.py
index 4cc6e16..57c8f8d 100644
--- a/nodes/multi_speaker_node.py
+++ b/nodes/multi_speaker_node.py
@@ -343,7 +343,7 @@ def execute(
 
             engine = _get_engine(model_path, device, precision, attention, compile_model, keep_model_loaded, offload_to_cpu)
 
-            from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
+            from fish_speech_s2.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
             # num_speakers is a dict from DynamicCombo:
             # {"num_speakers": "3", "speaker_1_audio": ..., "speaker_1_ref_text": ..., ...}
@@ -565,7 +565,7 @@ def generate(
 
             engine = _get_engine(model_path, device, precision, attention, compile_model, keep_model_loaded, offload_to_cpu)
 
-            from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
+            from fish_speech_s2.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
             # Build per-speaker reference map (0-based index)
             references = {}
diff --git a/nodes/multi_speaker_split_node.py b/nodes/multi_speaker_split_node.py
index 619f31d..b21f2d5 100644
--- a/nodes/multi_speaker_split_node.py
+++ b/nodes/multi_speaker_split_node.py
@@ -206,7 +206,7 @@ def execute(
 
             engine = _get_engine(model_path, device, precision, attention, compile_model, keep_model_loaded, offload_to_cpu)
 
-            from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
+            from fish_speech_s2.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
             n = int(num_speakers["num_speakers"])
 
@@ -399,7 +399,7 @@ def generate(
 
             engine = _get_engine(model_path, device, precision, attention, compile_model, keep_model_loaded, offload_to_cpu)
 
-            from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
+            from fish_speech_s2.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
             references = {}
             missing = []
diff --git a/nodes/tts_node.py b/nodes/tts_node.py
index 8532bc7..9f4223c 100644
--- a/nodes/tts_node.py
+++ b/nodes/tts_node.py
@@ -211,7 +211,7 @@ def generate(
 
         engine = self._get_engine(model_path, device, precision, attention, compile_model, keep_model_loaded, offload_to_cpu)
 
-        from fish_speech.utils.schema import ServeTTSRequest
+        from fish_speech_s2.utils.schema import ServeTTSRequest
 
         pbar = ProgressBar(3) if _PBAR else None
 
diff --git a/nodes/voice_clone_node.py b/nodes/voice_clone_node.py
index ddd5013..8c2a140 100644
--- a/nodes/voice_clone_node.py
+++ b/nodes/voice_clone_node.py
@@ -175,7 +175,7 @@ def generate(
 
         engine = self._get_engine(model_path, device, precision, attention, compile_model, keep_model_loaded, offload_to_cpu)
 
-        from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
+        from fish_speech_s2.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
         pbar = ProgressBar(4) if _PBAR else None