From 6bdc95478f999309e74249cd886615fe6118d178 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 17:50:21 +0800
Subject: [PATCH 01/25] init

---
 .gitignore                           |  3 +-
 build_android.sh                     | 61 +++++++++++++++++++++
 cmake/bazel.cmake                    |  3 +
 examples/c++/CMakeLists.txt          | 26 +++++++++
 examples/c++/build_android.sh        | 41 ++++++++++++++
 src/ailego/CMakeLists.txt            |  2 +-
 thirdparty/arrow/CMakeLists.txt      | 54 ++++++++++++------
 thirdparty/arrow/arrow.android.patch | 82 ++++++++++++++++++++++++++++
 thirdparty/glog/CMakeLists.txt       |  9 ++-
 thirdparty/glog/glog.android.patch   | 78 ++++++++++++++++++++++++++
 thirdparty/lz4/CMakeLists.txt        | 73 +++++++++++++++++++------
 11 files changed, 396 insertions(+), 36 deletions(-)
 create mode 100644 build_android.sh
 create mode 100644 examples/c++/build_android.sh
 create mode 100644 thirdparty/arrow/arrow.android.patch
 create mode 100644 thirdparty/glog/glog.android.patch

diff --git a/.gitignore b/.gitignore
index 755089d0..0827e539 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,6 @@ build*
 bin/*
 lib/*
 var/*
-thirdparty
 venv*
 tests/integration/conf/*
 tests/de_integration/conf/*
@@ -48,3 +47,5 @@ yarn-debug.log*
 yarn-error.log*
 
 allure-*
+
+!build_android.sh
\ No newline at end of file
diff --git a/build_android.sh b/build_android.sh
new file mode 100644
index 00000000..968db554
--- /dev/null
+++ b/build_android.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -e
+CURRENT_DIR=$(pwd)
+
+ABI=${1:-"arm64-v8a"}
+API_LEVEL=${2:-21}
+BUILD_TYPE=${3:-"Release"}
+
+# step1: use host env to compile protoc
+echo "step1: building protoc for host..."
+HOST_BUILD_DIR="build_host"
+mkdir -p $HOST_BUILD_DIR
+cd $HOST_BUILD_DIR
+
+cmake -DCMAKE_BUILD_TYPE="$BUILD_TYPE" ..
+make -j protoc
+PROTOC_EXECUTABLE=$CURRENT_DIR/$HOST_BUILD_DIR/bin/protoc
+cd $CURRENT_DIR
+
+echo "step1: Done!!!"
+
+# step2: cross build zvec based on android ndk
+echo "step2: building zvec for android..."
+export ANDROID_SDK_ROOT=$HOME/Library/Android/sdk
+export ANDROID_HOME=$ANDROID_SDK_ROOT
+export ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/28.2.13676358
+export CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake
+
+export PATH=$PATH:$ANDROID_SDK_ROOT/cmdline-tools/latest/bin
+export PATH=$PATH:$ANDROID_SDK_ROOT/platform-tools
+export PATH=$PATH:$ANDROID_NDK_HOME
+
+if [ -z "$ANDROID_NDK_HOME" ]; then
+    echo "error: ANDROID_NDK_HOME env not set"
+    echo "please install NDK and set env variable ANDROID_NDK_HOME"
+    exit 1
+fi
+
+BUILD_DIR="build_android_${ABI}_macos"
+mkdir -p $BUILD_DIR
+cd $BUILD_DIR
+
+echo "configure CMake..."
+cmake \
+    -DANDROID_NDK="$ANDROID_NDK_HOME" \
+    -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
+    -DANDROID_ABI="$ABI" \
+    -DANDROID_NATIVE_API_LEVEL="$API_LEVEL" \
+    -DANDROID_STL="c++_static" \
+    -DCMAKE_BUILD_TYPE="$BUILD_TYPE" \
+    -DBUILD_PYTHON_BINDINGS=OFF \
+    -DBUILD_TOOLS=OFF \
+    -DCMAKE_INSTALL_PREFIX="./install" \
+    -DGLOBAL_CC_PROTOBUF_PROTOC=$PROTOC_EXECUTABLE \
+    ../
+
+echo "building..."
+CORE_COUNT=$(sysctl -n hw.ncpu)
+make -j$CORE_COUNT
+
+echo "step2: Done!!!"
\ No newline at end of file
diff --git a/cmake/bazel.cmake b/cmake/bazel.cmake
index deaf1656..f1effc6d 100644
--- a/cmake/bazel.cmake
+++ b/cmake/bazel.cmake
@@ -1313,6 +1313,9 @@ function(cc_proto_library)
 
   _find_protobuf("${CC_ARGS_PROTOBUF_VERSION}")
   set(CC_PROTOBUF_PROTOC ${CC_PROTOBUF_PROTOC_${CC_ARGS_PROTOBUF_VERSION}})
+  if(DEFINED GLOBAL_CC_PROTOBUF_PROTOC)
+    set(CC_PROTOBUF_PROTOC ${GLOBAL_CC_PROTOBUF_PROTOC})
+  endif()
   set(CC_PROTOBUF_INCS ${CC_PROTOBUF_INCS_${CC_ARGS_PROTOBUF_VERSION}})
   set(CC_PROTOBUF_LIBS ${CC_PROTOBUF_LIBS_${CC_ARGS_PROTOBUF_VERSION}})
 
diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 0751bf9e..b29dce8e 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -84,6 +84,16 @@ elseif(APPLE)
         zvec-ailego
         ${zvec_core_deps}
     )
+elseif(ANDROID)
+    target_link_libraries(zvec-core INTERFACE
+        -Wl,--whole-archive
+        zvec_core
+        -Wl,--no-whole-archive
+        -Wl,--start-group
+        zvec-ailego
+        ${zvec_core_deps}
+        -Wl,--end-group
+    )
 else()
     message(FATAL_ERROR "Unsupported platform: ${CMAKE_SYSTEM_NAME}")
 endif()
@@ -106,6 +116,17 @@ elseif(APPLE)
         zvec-ailego
         ${zvec_db_deps}
     )
+elseif(ANDROID)
+    target_link_libraries(zvec-db INTERFACE
+        zvec_db
+        zvec-core
+        zvec-ailego
+        -Wl,--start-group
+        ${zvec_db_deps}
+        -Wl,--end-group
+    )
+else()
+    message(FATAL_ERROR "Unsupported platform: ${CMAKE_SYSTEM_NAME}")
 endif()
 
 
@@ -114,6 +135,11 @@ add_executable(db-example db/main.cc)
 target_link_libraries(db-example PRIVATE
     zvec-db
 )
+if(ANDROID)
+    target_link_libraries(db-example PRIVATE
+        log
+    )
+endif()
 
 add_executable(core-example core/main.cc)
 target_link_libraries(core-example PRIVATE
diff --git a/examples/c++/build_android.sh b/examples/c++/build_android.sh
new file mode 100644
index 00000000..946f8ed4
--- /dev/null
+++ b/examples/c++/build_android.sh
@@ -0,0 +1,41 @@
+export ANDROID_SDK_ROOT=$HOME/Library/Android/sdk
+export ANDROID_HOME=$ANDROID_SDK_ROOT
+export ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/28.2.13676358
+export CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake
+
+export PATH=$PATH:$ANDROID_SDK_ROOT/cmdline-tools/latest/bin
+export PATH=$PATH:$ANDROID_SDK_ROOT/platform-tools
+export PATH=$PATH:$ANDROID_NDK_HOME
+
+#!/bin/bash
+set -e
+
+if [ -z "$ANDROID_NDK_HOME" ]; then
+    echo "error: ANDROID_NDK_HOME env not set"
+    echo "please install NDK and set env variable ANDROID_NDK_HOME"
+    exit 1
+fi
+
+ABI=${1:-"arm64-v8a"}
+API_LEVEL=${2:-21}
+BUILD_TYPE=${3:-"Release"}
+
+BUILD_DIR="build_android_${ABI}_macos"
+mkdir -p $BUILD_DIR
+cd $BUILD_DIR
+
+echo "configure CMake..."
+cmake \
+    -DANDROID_NDK="$ANDROID_NDK_HOME" \
+    -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
+    -DANDROID_ABI="$ABI" \
+    -DANDROID_NATIVE_API_LEVEL="$API_LEVEL" \
+    -DCMAKE_BUILD_TYPE="$BUILD_TYPE" \
+    -DBUILD_PYTHON_BINDINGS=OFF \
+    -DBUILD_TOOLS=OFF \
+    -DCMAKE_INSTALL_PREFIX="./install" \
+    ../
+
+echo "building..."
+CORE_COUNT=$(sysctl -n hw.ncpu)
+make -j$CORE_COUNT
diff --git a/src/ailego/CMakeLists.txt b/src/ailego/CMakeLists.txt
index b01df973..5fcaacac 100644
--- a/src/ailego/CMakeLists.txt
+++ b/src/ailego/CMakeLists.txt
@@ -3,7 +3,7 @@ include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
 
 find_package(Threads REQUIRED)
 
-if(UNIX AND NOT APPLE)
+if(UNIX AND NOT APPLE AND NOT ANDROID)
     find_library(LIB_RT NAMES rt)
 else()
     set(LIB_RT "")
diff --git a/thirdparty/arrow/CMakeLists.txt b/thirdparty/arrow/CMakeLists.txt
index eb8cad06..19c7e863 100644
--- a/thirdparty/arrow/CMakeLists.txt
+++ b/thirdparty/arrow/CMakeLists.txt
@@ -1,6 +1,11 @@
 set(ARROW_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0)
-set(ARROW_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.patch)
-apply_patch_once("arrow_fix" "${ARROW_SRC_DIR}" "${ARROW_PATCH}")
+if(ANDROID)
+        set(ARROW_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.android.patch)
+        apply_patch_once("arrow_android_fix" "${ARROW_SRC_DIR}" "${ARROW_PATCH}")
+else()
+        set(ARROW_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.patch)
+        apply_patch_once("arrow_fix" "${ARROW_SRC_DIR}" "${ARROW_PATCH}")
+endif()
 
 include(ExternalProject)
 include(ProcessorCount)
@@ -14,20 +19,37 @@ set(LIB_ACERO ${EXTERNAL_LIB_DIR}/libarrow_acero.a)
 set(LIB_ARROW_DEPENDS ${EXTERNAL_LIB_DIR}/libarrow_bundled_dependencies.a)
 set(LIB_ARROW_DATASET ${EXTERNAL_LIB_DIR}/libarrow_dataset.a)
 
-ExternalProject_Add(
-        ARROW.BUILD PREFIX arrow
-        SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0
-        DOWNLOAD_COMMAND ""
-        BUILD_IN_SOURCE false
-        CONFIGURE_COMMAND "${CMAKE_COMMAND}" ${CMAKE_CACHE_ARGS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_DEBUG_POSTFIX= -DARROW_BUILD_SHARED=OFF -DARROW_ACERO=ON -DARROW_FILESYSTEM=ON -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_COMPUTE=ON -DARROW_WITH_ZLIB=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_MIMALLOC=OFF -DCMAKE_INSTALL_LIBDIR=lib "<SOURCE_DIR>/cpp"
-        BUILD_COMMAND "${CMAKE_COMMAND}" --build . --target all -- -j ${NPROC}
-        INSTALL_COMMAND "${CMAKE_COMMAND}" --install "<BINARY_DIR>" --prefix=${EXTERNAL_BINARY_DIR}/usr/local
-        BYPRODUCTS ${LIB_PARQUET} ${LIB_ARROW} ${LIB_COMPUTE} ${LIB_ACERO} ${LIB_ARROW_DEPENDS} ${LIB_ARROW_DATASET}
-        LOG_DOWNLOAD ON
-        LOG_CONFIGURE ON
-        LOG_BUILD ON
-        LOG_INSTALL ON
-)
+if(ANDROID)
+        ExternalProject_Add(
+                ARROW.BUILD PREFIX arrow
+                SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0
+                DOWNLOAD_COMMAND ""
+                BUILD_IN_SOURCE false
+                CONFIGURE_COMMAND "${CMAKE_COMMAND}" ${CMAKE_CACHE_ARGS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_DEBUG_POSTFIX= -DARROW_BUILD_SHARED=OFF -DARROW_ACERO=ON -DARROW_FILESYSTEM=ON -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_COMPUTE=ON -DARROW_WITH_ZLIB=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_MIMALLOC=OFF -DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DANDROID_ABI=${ANDROID_ABI} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} -DARROW_WITH_MUSL=OFF "<SOURCE_DIR>/cpp"
+                BUILD_COMMAND "${CMAKE_COMMAND}" --build . --target all -- -j ${NPROC}
+                INSTALL_COMMAND "${CMAKE_COMMAND}" --install "<BINARY_DIR>" --prefix=${EXTERNAL_BINARY_DIR}/usr/local
+                BYPRODUCTS ${LIB_PARQUET} ${LIB_ARROW} ${LIB_COMPUTE} ${LIB_ACERO} ${LIB_ARROW_DEPENDS} ${LIB_ARROW_DATASET}
+                LOG_DOWNLOAD ON
+                LOG_CONFIGURE ON
+                LOG_BUILD ON
+                LOG_INSTALL ON
+        )
+else()
+        ExternalProject_Add(
+                ARROW.BUILD PREFIX arrow
+                SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0
+                DOWNLOAD_COMMAND ""
+                BUILD_IN_SOURCE false
+                CONFIGURE_COMMAND "${CMAKE_COMMAND}" ${CMAKE_CACHE_ARGS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_DEBUG_POSTFIX= -DARROW_BUILD_SHARED=OFF -DARROW_ACERO=ON -DARROW_FILESYSTEM=ON -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_COMPUTE=ON -DARROW_WITH_ZLIB=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_MIMALLOC=OFF -DCMAKE_INSTALL_LIBDIR=lib "<SOURCE_DIR>/cpp"
+                BUILD_COMMAND "${CMAKE_COMMAND}" --build . --target all -- -j ${NPROC}
+                INSTALL_COMMAND "${CMAKE_COMMAND}" --install "<BINARY_DIR>" --prefix=${EXTERNAL_BINARY_DIR}/usr/local
+                BYPRODUCTS ${LIB_PARQUET} ${LIB_ARROW} ${LIB_COMPUTE} ${LIB_ACERO} ${LIB_ARROW_DEPENDS} ${LIB_ARROW_DATASET}
+                LOG_DOWNLOAD ON
+                LOG_CONFIGURE ON
+                LOG_BUILD ON
+                LOG_INSTALL ON
+        )
+endif()
 add_library(arrow UNKNOWN IMPORTED GLOBAL)
 add_dependencies(arrow ARROW.BUILD)
 
diff --git a/thirdparty/arrow/arrow.android.patch b/thirdparty/arrow/arrow.android.patch
new file mode 100644
index 00000000..a4e8bba2
--- /dev/null
+++ b/thirdparty/arrow/arrow.android.patch
@@ -0,0 +1,82 @@
+diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
+index 7fa4b66d4b..78bcb6d47e 100644
+--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
++++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
+@@ -950,6 +950,13 @@ set(EP_COMMON_CMAKE_ARGS
+     # https://github.com/apache/arrow/issues/45985
+     -DCMAKE_POLICY_VERSION_MINIMUM=3.5)
+ 
++if(ANDROID)
++  list(APPEND EP_COMMON_CMAKE_ARGS 
++  -DANDROID_ABI=${ANDROID_ABI}
++  -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL}
++  -DANDROID_NDK=${ANDROID_NDK})
++endif()
++
+ # if building with a toolchain file, pass that through
+ if(CMAKE_TOOLCHAIN_FILE)
+   list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE})
+diff --git a/cpp/src/arrow/acero/source_node.cc b/cpp/src/arrow/acero/source_node.cc
+index 0f58406760..cf68bfdcbe 100644
+--- a/cpp/src/arrow/acero/source_node.cc
++++ b/cpp/src/arrow/acero/source_node.cc
+@@ -407,7 +407,7 @@ struct SchemaSourceNode : public SourceNode {
+ struct RecordBatchReaderSourceNode : public SourceNode {
+   RecordBatchReaderSourceNode(ExecPlan* plan, std::shared_ptr<Schema> schema,
+                               arrow::AsyncGenerator<std::optional<ExecBatch>> generator)
+-      : SourceNode(plan, schema, generator) {}
++      : SourceNode(plan, schema, generator, Ordering::Implicit()) {}
+ 
+   static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
+                                 const ExecNodeOptions& options) {
+diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp
+index 2cf6c62a84..9e64b62297 100644
+--- a/cpp/src/arrow/vendored/datetime/tz.cpp
++++ b/cpp/src/arrow/vendored/datetime/tz.cpp
+@@ -605,7 +605,9 @@ tzdb_list
+ create_tzdb()
+ {
+     tzdb_list tz_db;
++#if !defined(ANDROID) && !defined(__ANDROID__)
+     tzdb_list::undocumented_helper::push_front(tz_db, init_tzdb().release());
++#endif // !defined(ANDROID) && !defined(__ANDROID__)
+     return tz_db;
+ }
+ 
+@@ -3900,7 +3902,9 @@ reload_tzdb()
+     if (!v.empty() && v == remote_version())
+         return get_tzdb_list().front();
+ #endif  // AUTO_DOWNLOAD
++#if !defined(ANDROID) && !defined(__ANDROID__)
+     tzdb_list::undocumented_helper::push_front(get_tzdb_list(), init_tzdb().release());
++#endif  // !defined(ANDROID) && !defined(__ANDROID__)
+     return get_tzdb_list().front();
+ }
+ 
+diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h
+index 61ab3df106..d456d6765f 100644
+--- a/cpp/src/arrow/vendored/datetime/tz.h
++++ b/cpp/src/arrow/vendored/datetime/tz.h
+@@ -858,7 +858,9 @@ private:
+     load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
+                                  std::int32_t tzh_typecnt, std::int32_t tzh_charcnt);
+ # if defined(ANDROID) || defined(__ANDROID__)
++public:
+     void parse_from_android_tzdata(std::ifstream& inf, const std::size_t off);
++private:
+ # endif // defined(ANDROID) || defined(__ANDROID__)
+ #else  // !USE_OS_TZDB
+     DATE_API sys_info   get_info_impl(sys_seconds tp, int tz_int) const;
+diff --git a/cpp/src/arrow/vendored/musl/strptime.c b/cpp/src/arrow/vendored/musl/strptime.c
+index 41912fd1bb..9d0b4dc1bf 100644
+--- a/cpp/src/arrow/vendored/musl/strptime.c
++++ b/cpp/src/arrow/vendored/musl/strptime.c
+@@ -17,7 +17,7 @@
+ 
+ #undef HAVE_LANGINFO
+ 
+-#ifndef _WIN32
++#if !defined(_WIN32) && !defined(__ANDROID__)
+ #define HAVE_LANGINFO 1
+ #endif
+ 
diff --git a/thirdparty/glog/CMakeLists.txt b/thirdparty/glog/CMakeLists.txt
index 04c1d085..611f18ef 100644
--- a/thirdparty/glog/CMakeLists.txt
+++ b/thirdparty/glog/CMakeLists.txt
@@ -6,8 +6,13 @@ set(HAVE_LIB_GFLAGS TRUE CACHE BOOL "")
 add_compile_options(-Wno-deprecated-declarations)
 
 set(GLOG_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/glog-0.5.0)
-set(GLOG_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/glog.patch)
-apply_patch_once("glog_fix" "${GLOG_SRC_DIR}" "${GLOG_PATCH}")
+if (ANDROID)
+    set(GLOG_ANDROID_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/glog.android.patch)
+    apply_patch_once("glog_android_fix" "${GLOG_SRC_DIR}" "${GLOG_ANDROID_PATCH}")
+else()
+    set(GLOG_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/glog.patch)
+    apply_patch_once("glog_fix" "${GLOG_SRC_DIR}" "${GLOG_PATCH}")
+endif()
 
 set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
diff --git a/thirdparty/glog/glog.android.patch b/thirdparty/glog/glog.android.patch
new file mode 100644
index 00000000..7b2d1a31
--- /dev/null
+++ b/thirdparty/glog/glog.android.patch
@@ -0,0 +1,78 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 62ebbcc..e17f67e 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -17,7 +17,7 @@ set (CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
+ set (CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
+ set (CPACK_PACKAGE_VERSION ${PROJECT_VERSION})
+ 
+-option (BUILD_SHARED_LIBS "Build shared libraries" ON)
++option (BUILD_STATIC_LIBS "Build shared libraries" ON)
+ option (PRINT_UNSYMBOLIZED_STACK_TRACES
+   "Print file offsets in traces instead of symbolizing" OFF)
+ option (WITH_CUSTOM_PREFIX "Enable support for user-generated message prefixes" OFF)
+@@ -802,12 +802,12 @@ if (BUILD_TESTING)
+     FIXTURES_REQUIRED "cmake_package_config;cmake_package_config_working")
+ endif (BUILD_TESTING)
+ 
+-install (TARGETS glog
+-  EXPORT glog-targets
+-  RUNTIME DESTINATION ${_glog_CMake_BINDIR}
+-  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog
+-  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}
+-  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})
++#install (TARGETS glog
++#  EXPORT glog-targets
++#  RUNTIME DESTINATION ${_glog_CMake_BINDIR}
++#  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog
++#  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}
++#  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})
+ 
+ if (WITH_PKGCONFIG)
+   install (
+@@ -840,8 +840,8 @@ write_basic_package_version_file (
+   ${CMAKE_CURRENT_BINARY_DIR}/glog-config-version.cmake
+   COMPATIBILITY SameMajorVersion)
+ 
+-export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)
+-export (PACKAGE glog)
++#export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)
++#export (PACKAGE glog)
+ 
+ get_filename_component (_PREFIX "${CMAKE_INSTALL_PREFIX}" ABSOLUTE)
+ 
+@@ -885,5 +885,5 @@ install (DIRECTORY ${_glog_BINARY_CMake_DATADIR}
+   FILES_MATCHING PATTERN "*.cmake"
+ )
+ 
+-install (EXPORT glog-targets NAMESPACE glog:: DESTINATION
+-  ${_glog_CMake_INSTALLDIR})
++#install (EXPORT glog-targets NAMESPACE glog:: DESTINATION
++#  ${_glog_CMake_INSTALLDIR})
+diff --git a/src/stacktrace_generic-inl.h b/src/stacktrace_generic-inl.h
+index fad81d3..67209ac 100644
+--- a/src/stacktrace_generic-inl.h
++++ b/src/stacktrace_generic-inl.h
+@@ -39,21 +39,7 @@ _START_GOOGLE_NAMESPACE_
+ 
+ // If you change this function, also change GetStackFrames below.
+ int GetStackTrace(void** result, int max_depth, int skip_count) {
+-  static const int kStackLength = 64;
+-  void * stack[kStackLength];
+-  int size;
+-
+-  size = backtrace(stack, kStackLength);
+-  skip_count++;  // we want to skip the current frame as well
+-  int result_count = size - skip_count;
+-  if (result_count < 0)
+-    result_count = 0;
+-  if (result_count > max_depth)
+-    result_count = max_depth;
+-  for (int i = 0; i < result_count; i++)
+-    result[i] = stack[i + skip_count];
+-
+-  return result_count;
++  return 0;
+ }
+ 
+ _END_GOOGLE_NAMESPACE_
diff --git a/thirdparty/lz4/CMakeLists.txt b/thirdparty/lz4/CMakeLists.txt
index ad5f96b1..7db62d4f 100644
--- a/thirdparty/lz4/CMakeLists.txt
+++ b/thirdparty/lz4/CMakeLists.txt
@@ -4,26 +4,67 @@ file(MAKE_DIRECTORY ${lz4_INCLUDE_DIR})
 file(MAKE_DIRECTORY ${lz4_LIBRARY_DIR})
 
 include(ExternalProject)
-ExternalProject_Add(
-        Lz4.BUILD
-        PREFIX lz4
-    URL "${CMAKE_CURRENT_SOURCE_DIR}/lz4-1.9.4"
-    CONFIGURE_COMMAND ""
-    BUILD_COMMAND env CFLAGS=-fPIC BUILD_SHARED=no make -j
-    INSTALL_COMMAND make DESTDIR=${EXTERNAL_BINARY_DIR} BUILD_SHARED=no install
-    BUILD_IN_SOURCE ON
-    LOG_DOWNLOAD ON
-    LOG_CONFIGURE ON
-    LOG_BUILD ON
-    LOG_INSTALL ON
-    BUILD_BYPRODUCTS ${lz4_LIBRARY_DIR}/liblz4.a
+
+set(_lz4_env "")
+if(ANDROID)
+  string(REGEX REPLACE "^android-([0-9]+)$" "\\1" ANDROID_API_LEVEL "${ANDROID_PLATFORM}")
+
+  if(ANDROID_ABI STREQUAL "arm64-v8a")
+    set(TARGET_TRIPLE "aarch64-linux-android")
+  elseif(ANDROID_ABI STREQUAL "armeabi-v7a")
+    set(TARGET_TRIPLE "armv7a-linux-androideabi")
+  elseif(ANDROID_ABI STREQUAL "x86")
+    set(TARGET_TRIPLE "i686-linux-android")
+  elseif(ANDROID_ABI STREQUAL "x86_64")
+    set(TARGET_TRIPLE "x86_64-linux-android")
+  else()
+    message(FATAL_ERROR "Unsupported ANDROID_ABI: ${ANDROID_ABI}")
+  endif()
+
+  set(SYSROOT "${ANDROID_NDK}/toolchains/llvm/prebuilt/${ANDROID_HOST_TAG}/sysroot")
+  set(COMMON_FLAGS
+    "--sysroot=${SYSROOT}"
+    "-target ${TARGET_TRIPLE}${ANDROID_API_LEVEL}"
+    "-fPIC"
+    "-D__ANDROID_API__=${ANDROID_API_LEVEL}"
+  )
+
+  list(APPEND COMMON_FLAGS ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE}})
+
+  string(JOIN " " _lz4_cflags ${COMMON_FLAGS})
+
+  list(APPEND _lz4_env
+    "CC=${CMAKE_C_COMPILER}"
+    "AR=${CMAKE_AR}"
+    "RANLIB=${CMAKE_RANLIB}"
+    "STRIP=${ANDROID_NDK}/toolchains/llvm/prebuilt/${ANDROID_HOST_TAG}/bin/llvm-strip"
+    "CFLAGS=${_lz4_cflags}"
   )
 
+else()
+  list(APPEND _lz4_env "CFLAGS=-fPIC")
+endif()
+
+ExternalProject_Add(
+  Lz4.BUILD
+  PREFIX lz4
+  URL "${CMAKE_CURRENT_SOURCE_DIR}/lz4-1.9.4"
+  CONFIGURE_COMMAND ""
+    BUILD_COMMAND env ${_lz4_env} BUILD_SHARED=no make -j
+  INSTALL_COMMAND make DESTDIR=${EXTERNAL_BINARY_DIR} BUILD_SHARED=no install
+  BUILD_IN_SOURCE ON
+  LOG_DOWNLOAD ON
+  LOG_CONFIGURE ON
+  LOG_BUILD ON
+  LOG_INSTALL ON
+  BUILD_BYPRODUCTS ${lz4_LIBRARY_DIR}/liblz4.a
+)
+
 add_library(lz4 STATIC IMPORTED GLOBAL)
 set_target_properties(
-    lz4 PROPERTIES
-    INTERFACE_INCLUDE_DIRECTORIES "${lz4_INCLUDE_DIR}"
-    IMPORTED_LOCATION "${lz4_LIBRARY_DIR}/liblz4.a"
+  lz4 PROPERTIES
+  INTERFACE_INCLUDE_DIRECTORIES "${lz4_INCLUDE_DIR}"
+  IMPORTED_LOCATION "${lz4_LIBRARY_DIR}/liblz4.a"
 )
 add_dependencies(lz4 Lz4.BUILD)
 

From dde93021930c1a4e1dc59338477c265b6bfdfdf8 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 18:09:00 +0800
Subject: [PATCH 02/25] fix

---
 build_android.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/build_android.sh b/build_android.sh
index 968db554..7a5c40a6 100644
--- a/build_android.sh
+++ b/build_android.sh
@@ -21,6 +21,10 @@ echo "step1: Done!!!"
 
 # step2: cross build zvec based on android ndk
 echo "step2: building zvec for android..."
+
+# reset thirdparty directory
+git submodule foreach --recursive 'git stash --include-untracked'
+
 export ANDROID_SDK_ROOT=$HOME/Library/Android/sdk
 export ANDROID_HOME=$ANDROID_SDK_ROOT
 export ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/28.2.13676358

From 7fad00234847d914fa6b9d4dc81a90a8e7fbdf42 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 20:26:11 +0800
Subject: [PATCH 03/25] fix ruff ignore

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index dee6728d..051bb136 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -182,6 +182,7 @@ exclude = [
     ".git/",
     ".venv/",
     "venv/",
+    "thirdparty",
 ]
 
 [tool.ruff.lint]

From 902b6efc25b2f30a28686a8734177c34cb540e9f Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 20:54:14 +0800
Subject: [PATCH 04/25] add android ci

---
 .github/workflows/android_build.yml | 72 +++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 .github/workflows/android_build.yml

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
new file mode 100644
index 00000000..d624768d
--- /dev/null
+++ b/.github/workflows/android_build.yml
@@ -0,0 +1,72 @@
+name: android-cross-build
+
+on:
+  push:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**.md'
+  merge_group:
+  pull_request:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**.md'
+  workflow_dispatch:
+
+jobs:
+  build-android:
+    runs-on: ubuntu-22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        # abi: [arm64-v8a, armeabi-v7a, x86_64]
+        abi: [arm64-v8a]
+        api: [21]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends \
+            cmake ninja-build git ca-certificates python3 \
+            build-essential make
+
+      - name: Setup Android NDK
+        uses: android-actions/setup-android@v3
+
+      - name: Install NDK (side by side)
+        shell: bash
+        run: |
+          yes | sdkmanager --licenses
+          sdkmanager "ndk;26.1.10909125"
+
+      - name: Use host env to compile protoc
+        shell: bash
+        run: |
+          cmake -S . -B build-host -G Ninja
+          cmake --build build-host --target protoc --parallel
+
+      - name: Configure (CMake)
+        shell: bash
+        run: |
+          git submodule foreach --recursive 'git stash --include-untracked'
+
+          export ANDROID_SDK_ROOT="$ANDROID_HOME"
+          export ANDROID_NDK_HOME="$ANDROID_SDK_ROOT/ndk/26.1.10909125"
+
+          cmake -S . -B build-android-${{ matrix.abi }} -G Ninja \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
+            -DANDROID_ABI=${{ matrix.abi }} \
+            -DANDROID_PLATFORM=android-${{ matrix.api }} \
+            -DANDROID_STL=c++_shared \
+            -DBUILD_PYTHON_BINDINGS=OFF \
+            -DBUILD_TOOLS=OFF \
+            -DGLOBAL_CC_PROTOBUF_PROTOC=build-host/bin/protoc \
+
+      - name: Build
+        shell: bash
+        run: |
+          cmake --build build-android-${{ matrix.abi }} --parallel

From 632a819caa258f89266b09e836106a963be7b605 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 20:59:30 +0800
Subject: [PATCH 05/25] fix

---
 .github/workflows/android_build.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index d624768d..6fcdd669 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -33,14 +33,20 @@ jobs:
             cmake ninja-build git ca-certificates python3 \
             build-essential make
 
+      - name: Setup Java 17
+        uses: actions/setup-java@v4
+        with:
+            distribution: temurin
+            java-version: '17'
+
       - name: Setup Android NDK
         uses: android-actions/setup-android@v3
 
       - name: Install NDK (side by side)
         shell: bash
         run: |
-          yes | sdkmanager --licenses
-          sdkmanager "ndk;26.1.10909125"
+            yes | sdkmanager --licenses
+            sdkmanager "ndk;26.1.10909125"
 
       - name: Use host env to compile protoc
         shell: bash

From a49cb0030841c19a596c2ce1dec538e89805cdcc Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 21:05:39 +0800
Subject: [PATCH 06/25] fix

---
 .github/workflows/android_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 6fcdd669..c21f9a86 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -45,7 +45,7 @@ jobs:
       - name: Install NDK (side by side)
         shell: bash
         run: |
-            yes | sdkmanager --licenses
+            # yes | sdkmanager --licenses
             sdkmanager "ndk;26.1.10909125"
 
       - name: Use host env to compile protoc

From 8146a6c7463289fe8dc7fd81bf3bee7a0f830d68 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 21:08:51 +0800
Subject: [PATCH 07/25] fix

---
 .github/workflows/android_build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index c21f9a86..a54e9ff8 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -51,6 +51,7 @@ jobs:
       - name: Use host env to compile protoc
         shell: bash
         run: |
+          git submodule update --init
           cmake -S . -B build-host -G Ninja
           cmake --build build-host --target protoc --parallel
 

From d2fddc13d088ad00e7dfc02796fecf5ebdd0ca39 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Tue, 10 Feb 2026 21:41:08 +0800
Subject: [PATCH 08/25] fix

---
 .github/workflows/android_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index a54e9ff8..667975e7 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -71,7 +71,7 @@ jobs:
             -DANDROID_STL=c++_shared \
             -DBUILD_PYTHON_BINDINGS=OFF \
             -DBUILD_TOOLS=OFF \
-            -DGLOBAL_CC_PROTOBUF_PROTOC=build-host/bin/protoc \
+            -DGLOBAL_CC_PROTOBUF_PROTOC="$GITHUB_WORKSPACE/build-host/bin/protoc" \
 
       - name: Build
         shell: bash

From 9c6c0ca5c260ac9d4ffd4c31c19d3b2d6b3e3043 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Wed, 11 Feb 2026 11:19:16 +0800
Subject: [PATCH 09/25] add adb shell

---
 .github/workflows/android_build.yml | 46 +++++++++++++++++++++++++++++
 build_android.sh                    |  2 +-
 examples/c++/CMakeLists.txt         |  9 ++++--
 examples/c++/build_android.sh       |  4 +--
 4 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 667975e7..ae0bc9f8 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -77,3 +77,49 @@ jobs:
         shell: bash
         run: |
           cmake --build build-android-${{ matrix.abi }} --parallel
+
+      - name: Build examples
+        shell: bash
+        run: |
+          cd examples/c++
+          
+          cmake -S . -B  build-android-examples-${{ matrix.abi }} -D Ninja \
+            -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
+            -DANDROID_ABI=${{ matrix.abi }} \
+            -DANDROID_PLATFORM=android-${{ matrix.api }} \
+            -DANDROID_STL=c++_shared \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
+          
+          cmake --build build-android-examples-${{ matrix.abi }} --parallel
+
+      - name: Install ADB and setup Android emulator
+        uses: reactivecircus/android-emulator-runner@v2
+        with:
+          api-level: ${{ matrix.api }}
+          abi: ${{ matrix.abi }}
+          script: |
+            # Wait for device to be ready
+            adb wait-for-device
+            
+            # Push executables to device
+            adb push examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example /data/local/tmp/
+            adb push examples/c++/build-android-examples-${{ matrix.abi }}/core-example /data/local/tmp/
+            adb push examples/c++/build-android-examples-${{ matrix.abi }}/db-example /data/local/tmp/
+            
+            # Make executables executable
+            adb shell chmod +x /data/local/tmp/ailego-example
+            adb shell chmod +x /data/local/tmp/core-example
+            adb shell chmod +x /data/local/tmp/db-example
+            
+            echo "Running ailego example:"
+            adb shell 'cd /data/local/tmp/ && ./ailego-example'
+            echo "Exit code: $?"
+            
+            echo "Running core example:"
+            adb shell 'cd /data/local/tmp/ && ./core-example'
+            echo "Exit code: $?"
+            
+            echo "Running db example:"
+            adb shell 'cd /data/local/tmp/ && ./db-example'
+            echo "Exit code: $?"
diff --git a/build_android.sh b/build_android.sh
index 7a5c40a6..80ff80cd 100644
--- a/build_android.sh
+++ b/build_android.sh
@@ -40,7 +40,7 @@ if [ -z "$ANDROID_NDK_HOME" ]; then
     exit 1
 fi
 
-BUILD_DIR="build_android_${ABI}_macos"
+BUILD_DIR="build_android_${ABI}"
 mkdir -p $BUILD_DIR
 cd $BUILD_DIR
 
diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index b29dce8e..13d6711c 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -7,9 +7,14 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # --- Paths to Zvec and dependencies ---
+# Allow custom host build directory, default to "build"
+if(NOT DEFINED HOST_BUILD_DIR)
+    set(HOST_BUILD_DIR "build")
+endif()
+
 set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
-set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../build/lib)
-set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../build/external/usr/local/lib)
+set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
+set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)
 
 # Add include and library search paths
 include_directories(${ZVEC_INCLUDE_DIR})
diff --git a/examples/c++/build_android.sh b/examples/c++/build_android.sh
index 946f8ed4..64409c34 100644
--- a/examples/c++/build_android.sh
+++ b/examples/c++/build_android.sh
@@ -31,9 +31,7 @@ cmake \
     -DANDROID_ABI="$ABI" \
     -DANDROID_NATIVE_API_LEVEL="$API_LEVEL" \
     -DCMAKE_BUILD_TYPE="$BUILD_TYPE" \
-    -DBUILD_PYTHON_BINDINGS=OFF \
-    -DBUILD_TOOLS=OFF \
-    -DCMAKE_INSTALL_PREFIX="./install" \
+    -DHOST_BUILD_DIR="build_android_${ABI}" \
     ../
 
 echo "building..."

From 157a2bc014fa84e118d4c45b587d16aab9b0e354 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Wed, 11 Feb 2026 11:48:07 +0800
Subject: [PATCH 10/25] add cache

---
 .github/workflows/android_build.yml | 104 ++++++++++++++++++++++------
 1 file changed, 81 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index ae0bc9f8..0c4f13c3 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -26,12 +26,21 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Cache dependencies
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.ccache
+          key: ${{ runner.os }}-dependencies-cache-${{ hashFiles('**/CMakeLists.txt', 'thirdparty/**') }}
+          restore-keys: |
+            ${{ runner.os }}-dependencies-cache-
+
       - name: Install dependencies
         run: |
           sudo apt-get update
           sudo apt-get install -y --no-install-recommends \
             cmake ninja-build git ca-certificates python3 \
-            build-essential make
+            build-essential make ccache
 
       - name: Setup Java 17
         uses: actions/setup-java@v4
@@ -48,12 +57,39 @@ jobs:
             # yes | sdkmanager --licenses
             sdkmanager "ndk;26.1.10909125"
 
+      - name: Cache host protoc build
+        uses: actions/cache@v3
+        with:
+          path: build-host
+          key: ${{ runner.os }}-host-protoc-${{ hashFiles('src/**', 'CMakeLists.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-host-protoc-
+
       - name: Use host env to compile protoc
         shell: bash
         run: |
           git submodule update --init
-          cmake -S . -B build-host -G Ninja
-          cmake --build build-host --target protoc --parallel
+          if [ ! -d "build-host" ]; then
+            # Setup ccache for host build
+            export CCACHE_BASEDIR="$GITHUB_WORKSPACE"
+            export CCACHE_NOHASHDIR=1
+            export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros
+            
+            cmake -S . -B build-host -G Ninja \
+              -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            cmake --build build-host --target protoc --parallel
+          else
+            echo "Using cached host protoc build"
+          fi
+
+      - name: Cache Android build
+        uses: actions/cache@v3
+        with:
+          path: build-android-${{ matrix.abi }}
+          key: ${{ runner.os }}-android-build-${{ matrix.abi }}-${{ hashFiles('src/**', 'CMakeLists.txt', 'cmake/**') }}
+          restore-keys: |
+            ${{ runner.os }}-android-build-${{ matrix.abi }}-
 
       - name: Configure (CMake)
         shell: bash
@@ -63,35 +99,57 @@ jobs:
           export ANDROID_SDK_ROOT="$ANDROID_HOME"
           export ANDROID_NDK_HOME="$ANDROID_SDK_ROOT/ndk/26.1.10909125"
 
-          cmake -S . -B build-android-${{ matrix.abi }} -G Ninja \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
-            -DANDROID_ABI=${{ matrix.abi }} \
-            -DANDROID_PLATFORM=android-${{ matrix.api }} \
-            -DANDROID_STL=c++_shared \
-            -DBUILD_PYTHON_BINDINGS=OFF \
-            -DBUILD_TOOLS=OFF \
-            -DGLOBAL_CC_PROTOBUF_PROTOC="$GITHUB_WORKSPACE/build-host/bin/protoc" \
+          # Setup ccache
+          export CCACHE_BASEDIR="$GITHUB_WORKSPACE"
+          export CCACHE_NOHASHDIR=1
+          export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros
+          
+          if [ ! -d "build-android-${{ matrix.abi }}" ]; then
+            cmake -S . -B build-android-${{ matrix.abi }} -G Ninja \
+              -DCMAKE_BUILD_TYPE=Release \
+              -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
+              -DANDROID_ABI=${{ matrix.abi }} \
+              -DANDROID_PLATFORM=android-${{ matrix.api }} \
+              -DANDROID_STL=c++_shared \
+              -DBUILD_PYTHON_BINDINGS=OFF \
+              -DBUILD_TOOLS=OFF \
+              -DGLOBAL_CC_PROTOBUF_PROTOC="$GITHUB_WORKSPACE/build-host/bin/protoc" \
+              -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+          else
+            echo "Using cached Android build directory"
+          fi
 
       - name: Build
         shell: bash
         run: |
           cmake --build build-android-${{ matrix.abi }} --parallel
 
+      - name: Cache examples build
+        uses: actions/cache@v3
+        with:
+          path: examples/c++/build-android-examples-${{ matrix.abi }}
+          key: ${{ runner.os }}-examples-build-${{ matrix.abi }}-${{ hashFiles('examples/c++/**', 'CMakeLists.txt', 'src/**') }}
+          restore-keys: |
+            ${{ runner.os }}-examples-build-${{ matrix.abi }}-
+
       - name: Build examples
         shell: bash
         run: |
-          cd examples/c++
-          
-          cmake -S . -B  build-android-examples-${{ matrix.abi }} -D Ninja \
-            -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
-            -DANDROID_ABI=${{ matrix.abi }} \
-            -DANDROID_PLATFORM=android-${{ matrix.api }} \
-            -DANDROID_STL=c++_shared \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
-          
-          cmake --build build-android-examples-${{ matrix.abi }} --parallel
+          if [ ! -d "examples/c++/build-android-examples-${{ matrix.abi }}" ]; then
+            cmake -S examples/c++ -B examples/c++/build-android-examples-${{ matrix.abi }} -G Ninja \
+               -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
+               -DANDROID_ABI=${{ matrix.abi }} \
+               -DANDROID_PLATFORM=android-${{ matrix.api }} \
+               -DANDROID_STL=c++_shared \
+               -DCMAKE_BUILD_TYPE=Release \
+               -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
+               -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+               -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+            cmake --build examples/c++/build-android-examples-${{ matrix.abi }} --parallel
+          else
+            echo "Using cached examples build"
+          fi
 
       - name: Install ADB and setup Android emulator
         uses: reactivecircus/android-emulator-runner@v2

From 6af5c6341779539bce8531c080abb2ebdeadf086 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Wed, 11 Feb 2026 12:39:11 +0800
Subject: [PATCH 11/25] fix

---
 .github/workflows/android_build.yml | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 0c4f13c3..a5efa721 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -91,7 +91,7 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-android-build-${{ matrix.abi }}-
 
-      - name: Configure (CMake)
+      - name: Configure and Build
         shell: bash
         run: |
           git submodule foreach --recursive 'git stash --include-untracked'
@@ -115,15 +115,12 @@ jobs:
               -DBUILD_TOOLS=OFF \
               -DGLOBAL_CC_PROTOBUF_PROTOC="$GITHUB_WORKSPACE/build-host/bin/protoc" \
               -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            cmake --build build-android-${{ matrix.abi }} --parallel
           else
             echo "Using cached Android build directory"
           fi
 
-      - name: Build
-        shell: bash
-        run: |
-          cmake --build build-android-${{ matrix.abi }} --parallel
 
       - name: Cache examples build
         uses: actions/cache@v3
@@ -145,7 +142,7 @@ jobs:
                -DCMAKE_BUILD_TYPE=Release \
                -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
                -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-               -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+               -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
             cmake --build examples/c++/build-android-examples-${{ matrix.abi }} --parallel
           else
             echo "Using cached examples build"

From 4a24d0eecac50bc4fe4dffd725c8cec514149826 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Wed, 11 Feb 2026 14:52:32 +0800
Subject: [PATCH 12/25] add strip

---
 .github/workflows/android_build.yml |  1 +
 examples/c++/CMakeLists.txt         | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index a5efa721..ba537cc8 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -140,6 +140,7 @@ jobs:
                -DANDROID_PLATFORM=android-${{ matrix.api }} \
                -DANDROID_STL=c++_shared \
                -DCMAKE_BUILD_TYPE=Release \
+               -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
                -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
                -DCMAKE_C_COMPILER_LAUNCHER=ccache \
                -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
diff --git a/examples/c++/CMakeLists.txt b/examples/c++/CMakeLists.txt
index 13d6711c..37d42d60 100644
--- a/examples/c++/CMakeLists.txt
+++ b/examples/c++/CMakeLists.txt
@@ -155,3 +155,24 @@ add_executable(ailego-example ailego/main.cc)
 target_link_libraries(ailego-example PRIVATE
     zvec-ailego
 )
+
+# Strip symbols to reduce executable size
+if(CMAKE_BUILD_TYPE STREQUAL "Release" OR ANDROID)
+    add_custom_command(TARGET db-example POST_BUILD
+        COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:db-example>"
+        COMMENT "Stripping symbols from db-example")
+    add_custom_command(TARGET core-example POST_BUILD
+        COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:core-example>"
+        COMMENT "Stripping symbols from core-example")
+    add_custom_command(TARGET ailego-example POST_BUILD
+        COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:ailego-example>"
+        COMMENT "Stripping symbols from ailego-example")
+endif()
+
+# Optimize for size
+if(CMAKE_BUILD_TYPE STREQUAL "Release" OR ANDROID)
+    set_property(TARGET db-example core-example ailego-example
+                 PROPERTY COMPILE_FLAGS "-Os")
+    set_property(TARGET db-example core-example ailego-example
+                 PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
+endif()

From 258c701fc3a8582e92870c35a17f2694304fd050 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Wed, 11 Feb 2026 15:46:04 +0800
Subject: [PATCH 13/25] fix

---
 .github/workflows/android_build.yml | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index ba537cc8..955f140f 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -158,24 +158,34 @@ jobs:
             # Wait for device to be ready
             adb wait-for-device
             
+            # Check file sizes before pushing
+            echo "Checking binary sizes:"
+            ls -lah examples/c++/build-android-examples-${{ matrix.abi }}/
+            
             # Push executables to device
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example /data/local/tmp/
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/core-example /data/local/tmp/
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/db-example /data/local/tmp/
             
             # Make executables executable
-            adb shell chmod +x /data/local/tmp/ailego-example
-            adb shell chmod +x /data/local/tmp/core-example
-            adb shell chmod +x /data/local/tmp/db-example
+            adb shell 'chmod 755 /data/local/tmp/ailego-example'
+            adb shell 'chmod 755 /data/local/tmp/core-example'
+            adb shell 'chmod 755 /data/local/tmp/db-example'
+            
+            # Verify file integrity
+            echo "File info on device:"
+            adb shell 'ls -la /data/local/tmp/ailego-example'
+            adb shell 'ls -la /data/local/tmp/core-example'
+            adb shell 'ls -la /data/local/tmp/db-example'
             
             echo "Running ailego example:"
-            adb shell 'cd /data/local/tmp/ && ./ailego-example'
+            adb shell 'cd /data/local/tmp && chmod +x ailego-example && ./ailego-example'
             echo "Exit code: $?"
             
             echo "Running core example:"
-            adb shell 'cd /data/local/tmp/ && ./core-example'
+            adb shell 'cd /data/local/tmp && chmod +x core-example && ./core-example'
             echo "Exit code: $?"
             
             echo "Running db example:"
-            adb shell 'cd /data/local/tmp/ && ./db-example'
+            adb shell 'cd /data/local/tmp && chmod +x db-example && ./db-example'
             echo "Exit code: $?"

From 5708305e5a359c1597ecd2c29154b9b806068d1d Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Wed, 11 Feb 2026 16:25:49 +0800
Subject: [PATCH 14/25] fix

---
 .github/workflows/android_build.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 955f140f..0793e6b3 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -179,13 +179,13 @@ jobs:
             adb shell 'ls -la /data/local/tmp/db-example'
             
             echo "Running ailego example:"
-            adb shell 'cd /data/local/tmp && chmod +x ailego-example && ./ailego-example'
+            adb shell 'cd /data/local/tmp && ./ailego-example'
             echo "Exit code: $?"
             
             echo "Running core example:"
-            adb shell 'cd /data/local/tmp && chmod +x core-example && ./core-example'
+            adb shell 'cd /data/local/tmp && ./core-example'
             echo "Exit code: $?"
             
             echo "Running db example:"
-            adb shell 'cd /data/local/tmp && chmod +x db-example && ./db-example'
+            adb shell 'cd /data/local/tmp && ./db-example'
             echo "Exit code: $?"

From cbeda2df80d02de44768848f9274e89a85cd807d Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Wed, 11 Feb 2026 16:36:34 +0800
Subject: [PATCH 15/25] debug

---
 .github/workflows/android_build.yml | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 0793e6b3..70d335ee 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -162,15 +162,33 @@ jobs:
             echo "Checking binary sizes:"
             ls -lah examples/c++/build-android-examples-${{ matrix.abi }}/
             
+            # Check device architecture
+            echo "Device architecture info:"
+            adb shell getprop ro.product.cpu.abi
+            adb shell getprop ro.product.cpu.abilist
+            adb shell uname -m
+            
+            # Check binary architecture (requires file command)
+            echo "Checking if file command is available:"
+            if command -v file >/dev/null 2>&1; then
+                file examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example
+                file examples/c++/build-android-examples-${{ matrix.abi }}/core-example
+                file examples/c++/build-android-examples-${{ matrix.abi }}/db-example
+            else
+                echo "file command not available on host"
+                # Check ELF header manually
+                hexdump -C examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example | head -1
+            fi
+            
             # Push executables to device
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example /data/local/tmp/
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/core-example /data/local/tmp/
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/db-example /data/local/tmp/
             
             # Make executables executable
-            adb shell 'chmod 755 /data/local/tmp/ailego-example'
-            adb shell 'chmod 755 /data/local/tmp/core-example'
-            adb shell 'chmod 755 /data/local/tmp/db-example'
+            adb shell 'chmod a+x /data/local/tmp/ailego-example'
+            adb shell 'chmod a+x /data/local/tmp/core-example'
+            adb shell 'chmod a+x /data/local/tmp/db-example'
             
             # Verify file integrity
             echo "File info on device:"

From d9100569230c80758efcb5411d7666b810bc41de Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Thu, 12 Feb 2026 10:26:37 +0800
Subject: [PATCH 16/25] fix

---
 .github/workflows/android_build.yml | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 70d335ee..b7ffc7a6 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -153,7 +153,10 @@ jobs:
         uses: reactivecircus/android-emulator-runner@v2
         with:
           api-level: ${{ matrix.api }}
-          abi: ${{ matrix.abi }}
+          arch: ${{ matrix.abi }}
+          target: google_apis
+          emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim
+          disable-animations: true
           script: |
             # Wait for device to be ready
             adb wait-for-device
@@ -164,21 +167,8 @@ jobs:
             
             # Check device architecture
             echo "Device architecture info:"
-            adb shell getprop ro.product.cpu.abi
-            adb shell getprop ro.product.cpu.abilist
-            adb shell uname -m
-            
-            # Check binary architecture (requires file command)
-            echo "Checking if file command is available:"
-            if command -v file >/dev/null 2>&1; then
-                file examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example
-                file examples/c++/build-android-examples-${{ matrix.abi }}/core-example
-                file examples/c++/build-android-examples-${{ matrix.abi }}/db-example
-            else
-                echo "file command not available on host"
-                # Check ELF header manually
-                hexdump -C examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example | head -1
-            fi
+            adb shell 'getprop ro.product.cpu.abi'
+            adb shell 'getprop ro.product.cpu.abilist'
             
             # Push executables to device
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example /data/local/tmp/

From a6df4ce0a5355da189687611be26be370be1be7f Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Thu, 12 Feb 2026 11:13:50 +0800
Subject: [PATCH 17/25] fix

---
 .github/workflows/android_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index b7ffc7a6..394014bb 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -19,7 +19,7 @@ jobs:
       fail-fast: false
       matrix:
         # abi: [arm64-v8a, armeabi-v7a, x86_64]
-        abi: [arm64-v8a]
+        abi: [x86_64]
         api: [21]
 
     steps:

From b0b0afa0b2375f24dafd9ce82eeb5889f56ec11a Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Thu, 12 Feb 2026 15:01:31 +0800
Subject: [PATCH 18/25] fix

---
 .github/workflows/android_build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 394014bb..c79568ba 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -155,8 +155,8 @@ jobs:
           api-level: ${{ matrix.api }}
           arch: ${{ matrix.abi }}
           target: google_apis
-          emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim
-          disable-animations: true
+          # emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim
+          # disable-animations: true
           script: |
             # Wait for device to be ready
             adb wait-for-device

From b6221ec1559a5a14edb8dfe47ee75c588464c386 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Thu, 12 Feb 2026 15:39:06 +0800
Subject: [PATCH 19/25] fix

---
 .github/workflows/android_build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index c79568ba..937cf6d6 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -154,7 +154,7 @@ jobs:
         with:
           api-level: ${{ matrix.api }}
           arch: ${{ matrix.abi }}
-          target: google_apis
+          # target: google_apis
           # emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim
           # disable-animations: true
           script: |

From c657d4ca1576f7d03f94da8d82a4b158a13bd37a Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Thu, 12 Feb 2026 16:19:37 +0800
Subject: [PATCH 20/25] fix

---
 .github/workflows/android_build.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 937cf6d6..2f7fed15 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -110,7 +110,7 @@ jobs:
               -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
               -DANDROID_ABI=${{ matrix.abi }} \
               -DANDROID_PLATFORM=android-${{ matrix.api }} \
-              -DANDROID_STL=c++_shared \
+              -DANDROID_STL=c++_static \
               -DBUILD_PYTHON_BINDINGS=OFF \
               -DBUILD_TOOLS=OFF \
               -DGLOBAL_CC_PROTOBUF_PROTOC="$GITHUB_WORKSPACE/build-host/bin/protoc" \
@@ -138,7 +138,7 @@ jobs:
                -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
                -DANDROID_ABI=${{ matrix.abi }} \
                -DANDROID_PLATFORM=android-${{ matrix.api }} \
-               -DANDROID_STL=c++_shared \
+               -DANDROID_STL=c++_static \
                -DCMAKE_BUILD_TYPE=Release \
                -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
                -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
@@ -176,9 +176,9 @@ jobs:
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/db-example /data/local/tmp/
             
             # Make executables executable
-            adb shell 'chmod a+x /data/local/tmp/ailego-example'
-            adb shell 'chmod a+x /data/local/tmp/core-example'
-            adb shell 'chmod a+x /data/local/tmp/db-example'
+            adb shell 'chmod 755 /data/local/tmp/ailego-example'
+            adb shell 'chmod 755 /data/local/tmp/core-example'
+            adb shell 'chmod 755 /data/local/tmp/db-example'
             
             # Verify file integrity
             echo "File info on device:"

From f711c07c2a5961a05abdfcb5db84f630fe825f9e Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Thu, 12 Feb 2026 16:39:45 +0800
Subject: [PATCH 21/25] fix

---
 .github/workflows/android_build.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 2f7fed15..e249825f 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -31,7 +31,7 @@ jobs:
         with:
           path: |
             ~/.ccache
-          key: ${{ runner.os }}-dependencies-cache-${{ hashFiles('**/CMakeLists.txt', 'thirdparty/**') }}
+          key: ${{ runner.os }}-dependencies-cache-${{ hashFiles('**/CMakeLists.txt', 'thirdparty/**') }}-stl-fix
           restore-keys: |
             ${{ runner.os }}-dependencies-cache-
 
@@ -61,7 +61,7 @@ jobs:
         uses: actions/cache@v3
         with:
           path: build-host
-          key: ${{ runner.os }}-host-protoc-${{ hashFiles('src/**', 'CMakeLists.txt') }}
+          key: ${{ runner.os }}-host-protoc-${{ hashFiles('src/**', 'CMakeLists.txt') }}-stl-fix
           restore-keys: |
             ${{ runner.os }}-host-protoc-
 
@@ -87,7 +87,7 @@ jobs:
         uses: actions/cache@v3
         with:
           path: build-android-${{ matrix.abi }}
-          key: ${{ runner.os }}-android-build-${{ matrix.abi }}-${{ hashFiles('src/**', 'CMakeLists.txt', 'cmake/**') }}
+          key: ${{ runner.os }}-android-build-${{ matrix.abi }}-${{ hashFiles('src/**', 'CMakeLists.txt', 'cmake/**') }}-stl-fix
           restore-keys: |
             ${{ runner.os }}-android-build-${{ matrix.abi }}-
 
@@ -126,7 +126,7 @@ jobs:
         uses: actions/cache@v3
         with:
           path: examples/c++/build-android-examples-${{ matrix.abi }}
-          key: ${{ runner.os }}-examples-build-${{ matrix.abi }}-${{ hashFiles('examples/c++/**', 'CMakeLists.txt', 'src/**') }}
+          key: ${{ runner.os }}-examples-build-${{ matrix.abi }}-${{ hashFiles('examples/c++/**', 'CMakeLists.txt', 'src/**') }}-stl-fix
           restore-keys: |
             ${{ runner.os }}-examples-build-${{ matrix.abi }}-
 

From 4df824d135419c157cfcc47bd3606ea40f7fee57 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Thu, 12 Feb 2026 17:08:10 +0800
Subject: [PATCH 22/25] fix

---
 .github/workflows/android_build.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index e249825f..82667254 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -87,9 +87,7 @@ jobs:
         uses: actions/cache@v3
         with:
           path: build-android-${{ matrix.abi }}
-          key: ${{ runner.os }}-android-build-${{ matrix.abi }}-${{ hashFiles('src/**', 'CMakeLists.txt', 'cmake/**') }}-stl-fix
-          restore-keys: |
-            ${{ runner.os }}-android-build-${{ matrix.abi }}-
+          key: ${{ runner.os }}-android-build-${{ matrix.abi }}-${{ hashFiles('src/**', 'CMakeLists.txt', 'cmake/**', 'thirdparty/**') }}-stl-fix-2
 
       - name: Configure and Build
         shell: bash
@@ -126,9 +124,7 @@ jobs:
         uses: actions/cache@v3
         with:
           path: examples/c++/build-android-examples-${{ matrix.abi }}
-          key: ${{ runner.os }}-examples-build-${{ matrix.abi }}-${{ hashFiles('examples/c++/**', 'CMakeLists.txt', 'src/**') }}-stl-fix
-          restore-keys: |
-            ${{ runner.os }}-examples-build-${{ matrix.abi }}-
+          key: ${{ runner.os }}-examples-build-${{ matrix.abi }}-${{ hashFiles('examples/c++/**', 'CMakeLists.txt', 'src/**') }}-stl-fix-2
 
       - name: Build examples
         shell: bash

From 1f1c75550a196986358e3a733fe3061c0e8673d1 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Sat, 28 Feb 2026 16:32:50 +0800
Subject: [PATCH 23/25] merge main

---
 .github/dependabot.yml                        |   17 +
 .github/workflows/build_test_wheel.yml        |   10 +-
 .github/workflows/build_wheel.yml             |    8 +-
 .github/workflows/continuous_bench.yml        |   26 +
 .github/workflows/linux_arm64_docker_ci.yml   |  146 --
 .github/workflows/linux_x64_docker_ci.yml     |  146 --
 .github/workflows/mac_arm64_ci.yml            |  113 -
 .github/workflows/main.yml                    |  164 ++
 .github/workflows/nightly_coverage.yml        |   88 +-
 .github/workflows/scripts/run_vdb.sh          |   88 +
 .pre-commit-config.yaml                       |   13 +-
 CONTRIBUTING.md                               |    4 +-
 README.md                                     |   70 +-
 cmake/option.cmake                            |   10 +-
 pyproject.toml                                |   29 +-
 python/tests/test_embedding.py                | 2026 ++++++++++++++++-
 python/tests/test_reranker.py                 |  934 +++++++-
 python/tests/test_util.py                     |    5 -
 python/zvec/__init__.py                       |   37 +-
 python/zvec/common/constants.py               |   13 +-
 python/zvec/extension/__init__.py             |   33 +-
 .../zvec/extension/bm25_embedding_function.py |  375 +++
 python/zvec/extension/embedding.py            |  188 --
 python/zvec/extension/embedding_function.py   |  147 ++
 .../zvec/extension/jina_embedding_function.py |  240 ++
 python/zvec/extension/jina_function.py        |  182 ++
 .../zvec/extension/multi_vector_reranker.py   |  174 ++
 .../extension/openai_embedding_function.py    |  238 ++
 python/zvec/extension/openai_function.py      |  149 ++
 .../zvec/extension/qwen_embedding_function.py |  537 +++++
 python/zvec/extension/qwen_function.py        |  186 ++
 python/zvec/extension/qwen_rerank_function.py |  162 ++
 python/zvec/extension/rerank.py               |  343 ---
 python/zvec/extension/rerank_function.py      |   69 +
 ...sentence_transformer_embedding_function.py |  839 +++++++
 .../sentence_transformer_function.py          |  150 ++
 .../sentence_transformer_rerank_function.py   |  384 ++++
 python/zvec/tool/util.py                      |    2 +-
 src/core/algorithm/flat/flat_searcher.h       |    4 +-
 src/core/algorithm/flat/flat_streamer.cc      |    6 +-
 .../algorithm/flat/flat_streamer_context.h    |    2 +-
 .../mixed_reducer/mixed_streamer_reducer.cc   |    9 +-
 src/core/quantizer/cosine_converter.cc        |    6 +-
 .../combined_vector_column_indexer.cc         |   88 +-
 .../combined_vector_column_indexer.h          |    1 +
 tests/ailego/parallel/thread_queue_test.cc    |    2 +-
 .../cluster/opt_kmeans_cluster_test.cc        |    2 +-
 .../algorithm/flat/flat_searcher_test.cpp     |    2 +-
 .../flat/flat_streamer_buffer_test.cpp        |    8 +-
 .../core/algorithm/flat/flat_streamer_test.cc |   18 +-
 .../flat_sparse/flat_sparse_streamer_test.cc  |   10 +-
 .../hnsw/hnsw_streamer_buffer_test.cpp        |    2 +-
 .../core/algorithm/hnsw/hnsw_streamer_test.cc |   20 +-
 ..._test.cc => hnsw_sparse_searcher_test.cpp} |    0
 .../hnsw_sparse/hnsw_sparse_streamer_test.cc  |    8 +-
 tests/core/algorithm/ivf/ivf_searcher_test.cc |  229 +-
 .../metric/quantized_integer_metric_test.cc   |   20 +-
 tests/db/index/segment/segment_test.cc        |   60 +
 thirdparty/antlr/antlr4.patch                 |   24 +-
 thirdparty/rocksdb/CMakeLists.txt             |    1 +
 60 files changed, 7548 insertions(+), 1319 deletions(-)
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/workflows/continuous_bench.yml
 delete mode 100644 .github/workflows/linux_arm64_docker_ci.yml
 delete mode 100644 .github/workflows/linux_x64_docker_ci.yml
 delete mode 100644 .github/workflows/mac_arm64_ci.yml
 create mode 100644 .github/workflows/main.yml
 create mode 100644 .github/workflows/scripts/run_vdb.sh
 create mode 100644 python/zvec/extension/bm25_embedding_function.py
 delete mode 100644 python/zvec/extension/embedding.py
 create mode 100644 python/zvec/extension/embedding_function.py
 create mode 100644 python/zvec/extension/jina_embedding_function.py
 create mode 100644 python/zvec/extension/jina_function.py
 create mode 100644 python/zvec/extension/multi_vector_reranker.py
 create mode 100644 python/zvec/extension/openai_embedding_function.py
 create mode 100644 python/zvec/extension/openai_function.py
 create mode 100644 python/zvec/extension/qwen_embedding_function.py
 create mode 100644 python/zvec/extension/qwen_function.py
 create mode 100644 python/zvec/extension/qwen_rerank_function.py
 delete mode 100644 python/zvec/extension/rerank.py
 create mode 100644 python/zvec/extension/rerank_function.py
 create mode 100644 python/zvec/extension/sentence_transformer_embedding_function.py
 create mode 100644 python/zvec/extension/sentence_transformer_function.py
 create mode 100644 python/zvec/extension/sentence_transformer_rerank_function.py
 rename tests/core/algorithm/hnsw_sparse/{hnsw_sparse_searcher_test.cc => hnsw_sparse_searcher_test.cpp} (100%)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..30c1e44a
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,17 @@
+version: 2
+updates:
+  # GitHub Actions dependencies
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "monday"
+      time: "02:00"
+      timezone: "Asia/Shanghai"
+    labels:
+      - "dependencies"
+      - "github-actions"
+    commit-message:
+      prefix: "ci"
+      include: "scope"
+    open-pull-requests-limit: 5
diff --git a/.github/workflows/build_test_wheel.yml b/.github/workflows/build_test_wheel.yml
index 65362db2..8636d5e2 100644
--- a/.github/workflows/build_test_wheel.yml
+++ b/.github/workflows/build_test_wheel.yml
@@ -13,12 +13,12 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           submodules: recursive
 
       - name: Set up Python (for cibuildwheel controller)
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.11'
 
@@ -61,12 +61,12 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           submodules: recursive
 
       - name: Set up Python (for cibuildwheel controller)
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.11'
 
@@ -101,4 +101,4 @@ jobs:
           pip install --index-url https://test.pypi.org/simple/ zvec
           # Run a simple smoke test
           python -c "import zvec; print('Import OK:', zvec.__version__)"
-        shell: bash
+        shell: bash
\ No newline at end of file
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index b56af990..21cf3c40 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -13,12 +13,12 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           submodules: recursive
 
       - name: Set up Python (for cibuildwheel controller)
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.11'
 
@@ -63,12 +63,12 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           submodules: recursive
 
       - name: Set up Python (for cibuildwheel controller)
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.11'
 
diff --git a/.github/workflows/continuous_bench.yml b/.github/workflows/continuous_bench.yml
new file mode 100644
index 00000000..34fe527e
--- /dev/null
+++ b/.github/workflows/continuous_bench.yml
@@ -0,0 +1,26 @@
+name: Continuous Benchmark
+on:
+  push:
+    branches: [ "main", "ci/continuous_bench_squash" ]
+    paths-ignore:
+      - '**.md'
+  workflow_dispatch:
+
+concurrency:
+  group: cb-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  benchmark:
+    runs-on: vdbbench
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Run VectorDBBench
+        env:
+          DATABASE_URL: ${{ secrets.DATABASE_URL }}
+        run: |
+          bash .github/workflows/scripts/run_vdb.sh
\ No newline at end of file
diff --git a/.github/workflows/linux_arm64_docker_ci.yml b/.github/workflows/linux_arm64_docker_ci.yml
deleted file mode 100644
index 96a0f32d..00000000
--- a/.github/workflows/linux_arm64_docker_ci.yml
+++ /dev/null
@@ -1,146 +0,0 @@
-name: Zvec LinuxARM64 CI
-
-on:
-  push:
-    branches: [ "main" ]
-    paths-ignore:
-      - '**.md'
-  merge_group:
-  pull_request:
-    branches: [ "main" ]
-    paths-ignore:
-      - '**.md'
-  workflow_dispatch:
-
-concurrency:
-  group: pr-${{ github.workflow }}-${{ github.event.pull_request.number }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  build:
-    name: Zvec LinuxARM64 CI
-    runs-on: linux_arm64
-
-    strategy:
-      matrix:
-        python-version: ['3.10']
-      fail-fast: false
-
-    container:
-      image: quay.io/pypa/manylinux_2_28_aarch64:2024-03-10-4935fcc
-      options: --user root
-
-    steps:
-      - name: Set up Python path for manylinux
-        run: |
-          case "${{ matrix.python-version }}" in
-            "3.10") PY_PATH="/opt/python/cp310-cp310" ;;
-            "3.11") PY_PATH="/opt/python/cp311-cp311" ;;
-            "3.12") PY_PATH="/opt/python/cp312-cp312" ;;
-            *) echo "Unsupported Python version: ${{ matrix.python-version }}"; exit 1 ;;
-          esac
-          echo "PYTHON_BIN=$PY_PATH/bin/python" >> $GITHUB_ENV
-          echo "PIP_BIN=$PY_PATH/bin/pip" >> $GITHUB_ENV
-          echo "CLANG_FORMATTER_BIN=$PY_PATH/bin/clang-format" >> $GITHUB_ENV
-          $PY_PATH/bin/python --version
-        shell: bash
-
-      - name: Prepare clean build directory
-        run: |
-          export CLEAN_WORKSPACE="/tmp/zvec"
-          mkdir -p "$CLEAN_WORKSPACE"
-          cd "$CLEAN_WORKSPACE"
-
-          git config --global --add safe.directory "$CLEAN_WORKSPACE"
-          git clone --recursive "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" .
-
-          if [ -n "${{ github.event.number }}" ]; then
-            git fetch origin "pull/${{ github.event.number }}/head"
-            git checkout FETCH_HEAD
-          else
-            git checkout "${{ github.sha }}"
-          fi
-
-          echo "CLEAN_WORKSPACE=$CLEAN_WORKSPACE" >> $GITHUB_ENV
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        shell: bash
-
-      - name: Install Ruff
-        run: |
-          ${{ env.PIP_BIN }} install --upgrade pip ruff
-        shell: bash
-
-      - name: Run Ruff Linter
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ${{ env.PYTHON_BIN }} -m ruff check .
-        shell: bash
-
-      - name: Run Ruff Formatter Check
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ${{ env.PYTHON_BIN }} -m ruff format --check .
-        shell: bash
-
-      - name: Run clang-format Check
-        run: |
-          ${{ env.PIP_BIN }} install clang-format==18.1.8
-          cd "$CLEAN_WORKSPACE"
-          
-
-          CPP_FILES=$(find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cc" -o -name "*.cxx" \) \
-            ! -path "./build/*" \
-            ! -path "./tests/*" \
-            ! -path "./scripts/*" \
-            ! -path "./python/*" \
-            ! -path "./thirdparty/*" \
-            ! -path "./.git/*")
-
-          if [ -z "$CPP_FILES" ]; then
-            echo "No C++ files found to check."
-            exit 0
-          fi
-
-          ${{ env.CLANG_FORMATTER_BIN }} --dry-run --Werror $CPP_FILES
-        shell: bash
-
-      - name: Install Python dependencies and build package
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          NPROC=$(nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2)
-
-          ${{ env.PIP_BIN }} install cmake ninja
-
-          CMAKE_GENERATOR="Unix Makefiles" \
-          CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
-          ${{ env.PIP_BIN }} install -v . --config-settings='cmake.define.BUILD_TOOLS="ON"'
-        shell: bash
-
-      - name: Install test dependencies
-        run: |
-          ${{ env.PIP_BIN }} install pytest pytest-cov
-        shell: bash
-
-      - name: Run Python Tests with Coverage
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ${{ env.PYTHON_BIN }} -m pytest python/tests/ --cov=zvec --cov-report=xml --no-cov-on-fail
-        shell: bash
-
-      - name: Run Cpp Tests
-        run: |
-          ${{ env.PIP_BIN }} install pybind11==3.0
-          cd "$CLEAN_WORKSPACE/build"
-          make unittest -j$(nproc)
-        shell: bash
-
-      - name: Run Cpp Examples
-        run: |
-          cd "$CLEAN_WORKSPACE/examples/c++"
-          mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release
-          make -j $(nproc) && ./db-example && ./core-example && ./ailego-example
-        shell: bash
diff --git a/.github/workflows/linux_x64_docker_ci.yml b/.github/workflows/linux_x64_docker_ci.yml
deleted file mode 100644
index 2edd0995..00000000
--- a/.github/workflows/linux_x64_docker_ci.yml
+++ /dev/null
@@ -1,146 +0,0 @@
-name: Zvec LinuxX64 CI
-
-on:
-  push:
-    branches: [ "main" ]
-    paths-ignore:
-      - '**.md'
-  merge_group:
-  pull_request:
-    branches: [ "main" ]
-    paths-ignore:
-      - '**.md'
-  workflow_dispatch:
-
-concurrency:
-  group: pr-${{ github.workflow }}-${{ github.event.pull_request.number }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  build:
-    name: Zvec LinuxX64 CI
-    runs-on: linux_x64
-
-    strategy:
-      matrix:
-        python-version: ['3.10']
-      fail-fast: false
-
-    container:
-      image: quay.io/pypa/manylinux_2_28_x86_64:2024-03-10-4935fcc
-      options: --user root
-
-    steps:
-      - name: Set up Python path for manylinux
-        run: |
-          case "${{ matrix.python-version }}" in
-            "3.10") PY_PATH="/opt/python/cp310-cp310" ;;
-            "3.11") PY_PATH="/opt/python/cp311-cp311" ;;
-            "3.12") PY_PATH="/opt/python/cp312-cp312" ;;
-            *) echo "Unsupported Python version: ${{ matrix.python-version }}"; exit 1 ;;
-          esac
-          echo "PYTHON_BIN=$PY_PATH/bin/python" >> $GITHUB_ENV
-          echo "PIP_BIN=$PY_PATH/bin/pip" >> $GITHUB_ENV
-          echo "CLANG_FORMATTER_BIN=$PY_PATH/bin/clang-format" >> $GITHUB_ENV
-          $PY_PATH/bin/python --version
-        shell: bash
-
-      - name: Prepare clean build directory
-        run: |
-          export CLEAN_WORKSPACE="/tmp/zvec"
-          mkdir -p "$CLEAN_WORKSPACE"
-          cd "$CLEAN_WORKSPACE"
-
-          git config --global --add safe.directory "$CLEAN_WORKSPACE"
-          git clone --recursive "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" .
-
-          if [ -n "${{ github.event.number }}" ]; then
-            git fetch origin "pull/${{ github.event.number }}/head"
-            git checkout FETCH_HEAD
-          else
-            git checkout "${{ github.sha }}"
-          fi
-
-          echo "CLEAN_WORKSPACE=$CLEAN_WORKSPACE" >> $GITHUB_ENV
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        shell: bash
-
-      - name: Install Ruff
-        run: |
-          ${{ env.PIP_BIN }} install --upgrade pip ruff
-        shell: bash
-
-      - name: Run Ruff Linter
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ${{ env.PYTHON_BIN }} -m ruff check .
-        shell: bash
-
-      - name: Run Ruff Formatter Check
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ${{ env.PYTHON_BIN }} -m ruff format --check .
-        shell: bash
-
-      - name: Run clang-format Check
-        run: |
-          ${{ env.PIP_BIN }} install clang-format==18.1.8 
-          cd "$CLEAN_WORKSPACE"
-          
-          
-          CPP_FILES=$(find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cc" -o -name "*.cxx" \) \
-            ! -path "./build/*" \
-            ! -path "./tests/*" \
-            ! -path "./scripts/*" \
-            ! -path "./python/*" \
-            ! -path "./thirdparty/*" \
-            ! -path "./.git/*")
-
-          if [ -z "$CPP_FILES" ]; then
-            echo "No C++ files found to check."
-            exit 0
-          fi
-
-          ${{ env.CLANG_FORMATTER_BIN }} --dry-run --Werror $CPP_FILES
-        shell: bash
-
-      - name: Install Python dependencies and build package
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          NPROC=$(nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2)
-
-          ${{ env.PIP_BIN }} install cmake ninja
-
-          CMAKE_GENERATOR="Unix Makefiles" \
-          CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
-          ${{ env.PIP_BIN }} install -v . --config-settings='cmake.define.BUILD_TOOLS="ON"'
-        shell: bash
-
-      - name: Install test dependencies
-        run: |
-          ${{ env.PIP_BIN }} install pytest pytest-cov
-        shell: bash
-
-      - name: Run Python Tests with Coverage
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ${{ env.PYTHON_BIN }} -m pytest python/tests/ --cov=zvec --cov-report=xml --no-cov-on-fail
-        shell: bash
-
-      - name: Run Cpp Tests
-        run: |
-          ${{ env.PIP_BIN }} install pybind11==3.0
-          cd "$CLEAN_WORKSPACE/build"
-          make unittest -j$(nproc)
-        shell: bash
-      
-      - name: Run Cpp Examples
-        run: |
-          cd "$CLEAN_WORKSPACE/examples/c++"
-          mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release
-          make -j $(nproc) && ./db-example && ./core-example && ./ailego-example
-        shell: bash
diff --git a/.github/workflows/mac_arm64_ci.yml b/.github/workflows/mac_arm64_ci.yml
deleted file mode 100644
index 73aa9227..00000000
--- a/.github/workflows/mac_arm64_ci.yml
+++ /dev/null
@@ -1,113 +0,0 @@
-name: Zvec MacArm64 CI
-
-on:
-  push:
-    branches: [ "main" ]
-    paths-ignore:
-      - '**.md'
-  merge_group:
-  pull_request:
-    branches: [ "main" ]
-    paths-ignore:
-      - '**.md'
-  workflow_dispatch:
-
-concurrency:
-  group: pr-${{ github.workflow }}-${{ github.event.pull_request.number }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-jobs:
-  build:
-    name: Zvec MacArm64 CI
-    runs-on: mac_m1_arm
-
-    steps:
-      - name: Run Ruff Linter
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ruff check .
-        shell: bash
-
-      - name: Run Ruff Formatter Check (ensure code is formatted)
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          ruff format --check .
-        shell: bash
-
-      - name: Run clang-format Check
-        run: |
-          cd "$CLEAN_WORKSPACE"
-
-          CPP_FILES=$(find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cc" -o -name "*.cxx" \) \
-            ! -path "./build/*" \
-            ! -path "./tests/*" \
-            ! -path "./scripts/*" \
-            ! -path "./python/*" \
-            ! -path "./thirdparty/*" \
-            ! -path "./.git/*")
-
-          if [ -z "$CPP_FILES" ]; then
-            echo "No C++ files found to check."
-            exit 0
-          fi
-
-          clang-format --dry-run --Werror $CPP_FILES
-        shell: bash
-
-      - name: Prepare clean build directory
-        run: |
-          export CLEAN_WORKSPACE="/tmp/zvec"
-          rm -rf "$CLEAN_WORKSPACE"
-          mkdir -p "$CLEAN_WORKSPACE"
-          cd "$CLEAN_WORKSPACE"
-
-          git config --global --add safe.directory "$CLEAN_WORKSPACE"
-          git clone --recursive "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" .
-
-          if [ -n "${{ github.event.number }}" ]; then
-            git fetch origin "pull/${{ github.event.number }}/head"
-            git checkout FETCH_HEAD
-          else
-            git checkout "${{ github.sha }}"
-          fi
-
-          echo "CLEAN_WORKSPACE=$CLEAN_WORKSPACE" >> $GITHUB_ENV
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        shell: bash
-
-      - name: Install Python dependencies and build package
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          pip install --upgrade pip pytest pytest-cov ruff
-          
-          NPROC=$(nproc 2>/dev/null || echo $(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2))
-          echo "ParallelGroup: Using $NPROC parallel jobs for build"
-          
-          CMAKE_GENERATOR="Unix Makefiles" \
-          CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
-          pip install -v . \
-            --config-settings='cmake.define.BUILD_TOOLS="ON"'
-        shell: bash
-
-      - name: Run Python Tests with Coverage
-        run: |
-          cd "$CLEAN_WORKSPACE"
-          python -m pytest python/tests/ --cov=zvec --cov-report=xml --no-cov-on-fail
-        shell: bash
-
-      - name: Run Cpp Tests with Coverage
-        run: |
-          cd "$CLEAN_WORKSPACE/build"
-          make unittest -j 16
-        shell: bash
-
-      - name: Run Cpp Examples
-        run: |
-          cd "$CLEAN_WORKSPACE/examples/c++"
-          mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release
-          make -j 16 && ./db-example && ./core-example && ./ailego-example
-        shell: bash
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00000000..0e9eab62
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,164 @@
+name: Main
+
+on:
+  push:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**.md'
+  merge_group:
+  pull_request:
+    branches: [ "main" ]
+    paths-ignore:
+      - '**.md'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  # Code quality checks (fast, run first)
+  lint:
+    name: Code Quality Checks
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+          cache-dependency-path: 'pyproject.toml'
+
+      - name: Install linting tools
+        run: |
+          python -m pip install --upgrade pip \
+            ruff==v0.14.4 \
+            clang-format==18.1.8
+        shell: bash
+
+      - name: Run Ruff Linter
+        run: python -m ruff check .
+        shell: bash
+
+      - name: Run Ruff Formatter Check
+        run: python -m ruff format --check .
+        shell: bash
+
+      - name: Run clang-format Check
+        run: |
+          CPP_FILES=$(find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cc" -o -name "*.cxx" \) \
+            ! -path "./build/*" \
+            ! -path "./tests/*" \
+            ! -path "./scripts/*" \
+            ! -path "./python/*" \
+            ! -path "./thirdparty/*" \
+            ! -path "./.git/*")
+
+          if [ -z "$CPP_FILES" ]; then
+            echo "No C++ files found to check."
+            exit 0
+          fi
+
+          clang-format --dry-run --Werror $CPP_FILES
+        shell: bash
+
+  # Build and test matrix (parallel execution)
+  build-and-test:
+    name: Build & Test (${{ matrix.platform }}, py${{ matrix.python-version }})
+    needs: lint
+    runs-on: ${{ matrix.os }}
+    
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.10']
+        include:
+          - os: macos-15
+            platform: macos-arm64
+            arch_flag: ""  # ARM64 uses auto-detection
+          - os: ubuntu-24.04-arm
+            platform: linux-arm64
+            arch_flag: ""  # ARM64 uses auto-detection
+          - os: ubuntu-24.04
+            platform: linux-x64
+            arch_flag: ""  # Use native CPU microarchitecture
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          submodules: recursive
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: 'pyproject.toml'
+
+      - name: Set up environment variables
+        run: |
+          # Set number of processors for parallel builds
+          if [[ "${{ matrix.platform }}" == "macos-arm64" ]]; then
+            NPROC=$(sysctl -n hw.ncpu 2>/dev/null || echo 2)
+          else
+            NPROC=$(nproc 2>/dev/null || echo 2)
+          fi
+          echo "NPROC=$NPROC" >> $GITHUB_ENV
+          echo "Using $NPROC parallel jobs for builds"
+          
+          # Add Python user base bin to PATH for pip-installed CLI tools
+          echo "$(python -c 'import site; print(site.USER_BASE)')/bin" >> $GITHUB_PATH
+        shell: bash
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip \
+            pybind11==3.0 \
+            cmake==3.30.0 \
+            ninja==1.11.1 \
+            pytest \
+            scikit-build-core \
+            setuptools_scm
+        shell: bash
+
+      - name: Build from source
+        run: |
+          cd "$GITHUB_WORKSPACE"
+          
+          CMAKE_GENERATOR="Unix Makefiles" \
+          CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
+          python -m pip install -v . \
+            --no-build-isolation \
+            --config-settings='cmake.define.BUILD_TOOLS="ON"' \
+            ${{ matrix.arch_flag }}
+        shell: bash
+
+      - name: Run C++ Tests
+        run: |
+          cd "$GITHUB_WORKSPACE/build"
+          make unittest -j$NPROC
+        shell: bash
+
+      - name: Run Python Tests
+        run: |
+          cd "$GITHUB_WORKSPACE"
+          python -m pytest python/tests/
+        shell: bash
+
+      - name: Run C++ Examples
+        run: |
+          cd "$GITHUB_WORKSPACE/examples/c++"
+          mkdir build && cd build
+          cmake .. -DCMAKE_BUILD_TYPE=Release
+          make -j $NPROC
+          ./db-example
+          ./core-example
+          ./ailego-example
+        shell: bash
diff --git a/.github/workflows/nightly_coverage.yml b/.github/workflows/nightly_coverage.yml
index b9642716..e100bf4a 100644
--- a/.github/workflows/nightly_coverage.yml
+++ b/.github/workflows/nightly_coverage.yml
@@ -13,87 +13,81 @@ permissions:
 jobs:
   coverage:
     name: Nightly Coverage Report
-    runs-on: linux_x64
+    runs-on: ubuntu-24.04
 
     strategy:
       matrix:
         python-version: ['3.10']
       fail-fast: false
 
-    container:
-      image: zvec-registry.cn-hongkong.cr.aliyuncs.com/zvec/zvec:0.0.2
-      options: --user root
-
     steps:
-      - name: Activate Conda environment
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          ref: main  # Always use main for nightly
+          submodules: recursive
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: 'pyproject.toml'
+
+      - name: Set up environment variables
         run: |
-          if [[ "${{ matrix.python-version }}" == "3.10" ]]; then
-            ENV_NAME="py310"
-          elif [[ "${{ matrix.python-version }}" == "3.11" ]]; then
-            ENV_NAME="py311"
-          elif [[ "${{ matrix.python-version }}" == "3.12" ]]; then
-            ENV_NAME="py312"
-          else
-            echo "Unsupported Python version"
-            exit 1
-          fi
-          echo "CONDA_ENV_NAME=$ENV_NAME" >> $GITHUB_ENV
-          source /opt/miniforge3/bin/activate "$ENV_NAME"
-          python --version
+          # Set number of processors for parallel builds
+          NPROC=$(nproc 2>/dev/null || echo 2)
+          echo "NPROC=$NPROC" >> $GITHUB_ENV
+          echo "Using $NPROC parallel jobs for builds"
+          
+          # Add Python user base bin to PATH for pip-installed CLI tools
+          echo "$(python -c 'import site; print(site.USER_BASE)')/bin" >> $GITHUB_PATH
         shell: bash
 
-      - name: Prepare clean build directory
+      - name: Install dependencies
         run: |
-          export CLEAN_WORKSPACE="/tmp/zvec"
-          mkdir -p "$CLEAN_WORKSPACE"
-          cd "$CLEAN_WORKSPACE"
-
-          git config --global --add safe.directory "$CLEAN_WORKSPACE"
-          git clone --recursive "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" .
-
-          git checkout main  # Always use main for nightly
-
-          echo "CLEAN_WORKSPACE=$CLEAN_WORKSPACE" >> $GITHUB_ENV
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          python -m pip install --upgrade pip \
+            cmake==3.30.0 \
+            ninja==1.11.1 \
+            pytest \
+            pytest-cov \
+            scikit-build-core \
+            setuptools_scm
         shell: bash
 
       - name: Build with COVERAGE config
         run: |
-          source /opt/miniforge3/bin/activate "${{ env.CONDA_ENV_NAME }}"
-          cd "$CLEAN_WORKSPACE"
-          pip install --upgrade pip pytest pytest-cov
-          
-          NPROC=$(nproc 2>/dev/null || echo 2)
-          echo "Using $NPROC parallel jobs"
+          cd "$GITHUB_WORKSPACE"
           
           CMAKE_GENERATOR="Unix Makefiles" \
           CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
-          pip install -v . \
-            --config-settings="cmake.build-type=COVERAGE"
+          python -m pip install -v . \
+            --no-build-isolation \
+            --config-settings="cmake.build-type=COVERAGE" \
+            --config-settings='cmake.define.ENABLE_ZEN3="ON"'
         shell: bash
 
       - name: Run Python Tests with Coverage
         run: |
-          source /opt/miniforge3/bin/activate "${{ env.CONDA_ENV_NAME }}"
-          cd "$CLEAN_WORKSPACE"
+          cd "$GITHUB_WORKSPACE"
           python -m pytest python/tests/ --cov=zvec --cov-report=xml
         shell: bash
 
       - name: Run C++ Tests and Generate Coverage
         run: |
-          cd "$CLEAN_WORKSPACE/build"
-          make unittest -j$(nproc)  # Run all (nightly can afford it)
-          cd "$CLEAN_WORKSPACE"
+          cd "$GITHUB_WORKSPACE/build"
+          make unittest -j$NPROC
+          cd "$GITHUB_WORKSPACE"
           # Ensure gcov.sh is executable
           chmod +x scripts/gcov.sh
           bash scripts/gcov.sh -k
         shell: bash
 
       - name: Upload Coverage to Codecov
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
-          files: ${{ env.CLEAN_WORKSPACE }}/proxima-zvec-filtered.lcov.info,${{ env.CLEAN_WORKSPACE }}/coverage.xml
+          files: ./proxima-zvec-filtered.lcov.info,./coverage.xml
           flags: python,cpp,nightly
           name: nightly-linux-py${{ matrix.python-version }}
           token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/scripts/run_vdb.sh b/.github/workflows/scripts/run_vdb.sh
new file mode 100644
index 00000000..f153a598
--- /dev/null
+++ b/.github/workflows/scripts/run_vdb.sh
@@ -0,0 +1,88 @@
+set -e
+
+QUANTIZE_TYPE_LIST="int8 int4 fp16 fp32"
+CASE_TYPE_LIST="Performance768D1M Performance768D10M Performance1536D500K" # respectively test cosine, ip # Performance960D1M l2 metrics
+LOG_FILE="bench.log"
+DATE=$(date +%Y-%m-%d_%H-%M-%S)
+NPROC=$(nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2)
+
+# COMMIT_ID = branch-date-sha
+COMMIT_ID=${GITHUB_REF_NAME}-"$DATE"-$(echo ${GITHUB_WORKFLOW_SHA} | cut -c1-8)
+COMMIT_ID=$(echo "$COMMIT_ID" | sed 's/\//_/g')
+echo "COMMIT_ID: $COMMIT_ID"
+echo "GITHUB_WORKFLOW_SHA: $GITHUB_WORKFLOW_SHA"
+echo "workspace: $GITHUB_WORKSPACE"
+DB_LABEL_PREFIX="Zvec16c64g-$COMMIT_ID"
+
+# install zvec
+git submodule update --init
+
+# for debug
+#cd ..
+#export SKBUILD_BUILD_DIR="$GITHUB_WORKSPACE/../build"
+pwd
+
+python3 -m venv .venv
+source .venv/bin/activate
+pip install cmake ninja psycopg2-binary loguru fire
+pip install -e /opt/VectorDBBench
+
+CMAKE_GENERATOR="Unix Makefiles" \
+CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
+pip install -v "$GITHUB_WORKSPACE"
+
+for CASE_TYPE in $CASE_TYPE_LIST; do
+    echo "Running VectorDBBench for $CASE_TYPE"
+    DATASET_DESC=""
+    if [ "$CASE_TYPE" == "Performance768D1M" ]; then
+        DATASET_DESC="Performance768D1M - Cohere Cosine"
+    elif [ "$CASE_TYPE" == "Performance768D10M" ]; then
+        DATASET_DESC="Performance768D10M - Cohere Cosine"
+    else
+        DATASET_DESC="Performance1536D500K - OpenAI IP"
+    fi
+
+    for QUANTIZE_TYPE in $QUANTIZE_TYPE_LIST; do
+        DB_LABEL="$DB_LABEL_PREFIX-$CASE_TYPE-$QUANTIZE_TYPE"
+        echo "Running VectorDBBench for $DB_LABEL"
+
+        VDB_PARAMS="--path ${DB_LABEL} --db-label ${DB_LABEL} --case-type ${CASE_TYPE} --num-concurrency 12,14,16,18,20"
+        if [ "$CASE_TYPE" == "Performance768D1M" ]; then
+            VDB_PARAMS="${VDB_PARAMS} --m 15 --ef-search 180"
+        elif [ "$CASE_TYPE" == "Performance768D10M" ]; then
+            VDB_PARAMS="${VDB_PARAMS} --m 50 --ef-search 118 --is-using-refiner"
+        else #Performance1536D500K using default params + refiner to monitor performance degradation
+            VDB_PARAMS="${VDB_PARAMS} --m 50 --ef-search 100 --is-using-refiner"
+        fi
+
+        if [ "$QUANTIZE_TYPE" == "fp32" ]; then
+            vectordbbench zvec ${VDB_PARAMS} 2>&1 | tee $LOG_FILE
+        else
+            vectordbbench zvec ${VDB_PARAMS} --quantize-type "${QUANTIZE_TYPE}" 2>&1 | tee $LOG_FILE
+        fi
+
+        RESULT_JSON_PATH=$(grep -o "/opt/VectorDBBench/.*\.json" $LOG_FILE)
+        QPS=$(jq -r '.results[0].metrics.qps' "$RESULT_JSON_PATH")
+        RECALL=$(jq -r '.results[0].metrics.recall' "$RESULT_JSON_PATH")
+        LATENCY_P99=$(jq -r '.results[0].metrics.serial_latency_p99' "$RESULT_JSON_PATH")
+        LOAD_DURATION=$(jq -r '.results[0].metrics.load_duration' "$RESULT_JSON_PATH")
+
+        #quote the var to avoid space in the label
+        label_list="case_type=\"${CASE_TYPE}\",dataset_desc=\"${DATASET_DESC}\",db_label=\"${DB_LABEL}\",commit=\"${COMMIT_ID}\",date=\"${DATE}\",quantize_type=\"${QUANTIZE_TYPE}\""
+        # replace `/` with `_` in label_list
+        label_list=$(echo "$label_list" | sed 's/\//_/g')
+        cat <<EOF > prom_metrics.txt
+        # TYPE vdb_bench_qps gauge
+        vdb_bench_qps{$label_list} $QPS
+        # TYPE vdb_bench_recall gauge
+        vdb_bench_recall{$label_list} $RECALL
+        # TYPE vdb_bench_latency_p99 gauge
+        vdb_bench_latency_p99{$label_list} $LATENCY_P99
+        # TYPE vdb_bench_load_duration gauge
+        vdb_bench_load_duration{$label_list} $LOAD_DURATION
+EOF
+        echo "prom_metrics:"
+        cat prom_metrics.txt
+        curl --data-binary @prom_metrics.txt "http://47.93.34.27:9091/metrics/job/benchmarks-${CASE_TYPE}/case_type/${CASE_TYPE}/quantize_type/${QUANTIZE_TYPE}" -v
+    done
+done
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index abe63c6b..39808c89 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,7 @@
+default_install_hook_types:
+  - pre-commit
+  - commit-msg
+  
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.14.4
@@ -31,14 +35,11 @@ repos:
 
 
   - repo: https://github.com/compilerla/conventional-pre-commit
-    rev: v3.0.0
+    rev: v4.3.0
     hooks:
       - id: conventional-pre-commit
-        stages: [ commit-msg ]
-        args: [
-          --types, feat,fix,docs,style,refactor,test,chore,perf,ci,build,revert,
-          --scope-optional
-        ]
+        stages: [commit-msg]
+        args: [--verbose]
 
 
   - repo: local
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index deff5af9..625ab54a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,14 +12,14 @@ By participating, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md
 ## Development Setup
 
 ### Prerequisites
-- Python ≥ 3.9
+- Python 3.10 - 3.12
 - CMake ≥ 3.26, < 4.0 (`cmake --version`)
 - A C++17-compatible compiler (e.g., `g++-11+`, `clang++`, Apple Clang on macOS)
 
 ### Clone & Initialize
 
 ```bash
-git clone --recursive https://github.com/your-org/zvec.git
+git clone --recursive https://github.com/alibaba/zvec.git
 cd zvec
 ```
 
diff --git a/README.md b/README.md
index 4a9f9611..226d4f15 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-<div align=" center">
+<div align="center">
   <picture>
     <source media="(prefers-color-scheme: dark)" srcset="https://zvec.oss-cn-hongkong.aliyuncs.com/logo/github_log_2.svg" />
     <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/logo/github_logo_1.svg" width="400" alt="zvec logo" />
@@ -6,23 +6,23 @@
 </div>
 
 <p align="center">
-  <a href="https://github.com/alibaba/zvec/actions/workflows/linux_x64_docker_ci.yml"><img src="https://github.com/alibaba/zvec/actions/workflows/linux_x64_docker_ci.yml/badge.svg?branch=main" alt="Linux x64 CI"/></a>
-  <a href="https://github.com/alibaba/zvec/actions/workflows/linux_arm64_docker_ci.yml"><img src="https://github.com/alibaba/zvec/actions/workflows/linux_arm64_docker_ci.yml/badge.svg?branch=main" alt="Linux ARM64 CI"/></a>
-  <a href="https://github.com/alibaba/zvec/actions/workflows/mac_arm64_ci.yml"><img src="https://github.com/alibaba/zvec/actions/workflows/mac_arm64_ci.yml/badge.svg?branch=main" alt="macOS ARM64 CI"/></a>
-  <br>
   <a href="https://codecov.io/github/alibaba/zvec"><img src="https://codecov.io/github/alibaba/zvec/graph/badge.svg?token=O81CT45B66" alt="Code Coverage"/></a>
+  <a href="https://github.com/alibaba/zvec/actions/workflows/main.yml"><img src="https://github.com/alibaba/zvec/actions/workflows/main.yml/badge.svg?branch=main" alt="Main"/></a>
   <a href="https://pypi.org/project/zvec/"><img src="https://img.shields.io/pypi/v/zvec.svg" alt="PyPI Release"/></a>
   <a href="https://pypi.org/project/zvec/"><img src="https://img.shields.io/pypi/pyversions/zvec.svg" alt="Python Versions"/></a>
   <a href="https://github.com/alibaba/zvec/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-blue.svg" alt="License"/></a>
 </p>
 
+<p align="center">
+  <a href="https://trendshift.io/repositories/20830" target="_blank"><img src="https://trendshift.io/api/badge/repositories/20830" alt="alibaba%2Fzvec | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
 <p align="center">
   <a href="https://zvec.org/en/docs/quickstart/">🚀 <strong>Quickstart</strong> </a> |
   <a href="https://zvec.org/en/">🏠 <strong>Home</strong> </a> |
   <a href="https://zvec.org/en/docs/">📚 <strong>Docs</strong> </a> |
   <a href="https://zvec.org/en/docs/benchmarks/">📊 <strong>Benchmarks</strong> </a> |
-  <a href="https://discord.gg/rKddFBBu9z">🎮 <strong>Discord</strong> </a> |
-  <a href="https://x.com/zvec_ai">🐦 <strong>X (Twitter)</strong> </a>
+  <a href="https://discord.gg/rKddFBBu9z">🎮 <strong>Discord</strong> </a>
 </p>
 
 **Zvec** is an open-source, in-process vector database — lightweight, lightning-fast, and designed to embed directly into applications. Built on **Proxima** (Alibaba's battle-tested vector search engine), it delivers production-grade, low-latency, scalable similarity search with minimal setup.
@@ -30,25 +30,33 @@
 ## 💫 Features
 
 - **Blazing Fast**: Searches billions of vectors in milliseconds.
-- **Simple, Just Works**: Install with `pip install zvec` and start searching in seconds. No servers, no config, no fuss.
+- **Simple, Just Works**: [Install](#-installation) and start searching in seconds. No servers, no config, no fuss.
 - **Dense + Sparse Vectors**: Work with both dense and sparse embeddings, with native support for multi-vector queries in a single call.
 - **Hybrid Search**: Combine semantic similarity with structured filters for precise results.
 - **Runs Anywhere**: As an in-process library, Zvec runs wherever your code runs — notebooks, servers, CLI tools, or even edge devices.
 
 ## 📦 Installation
 
-Install Zvec from PyPI with a single command:
+### [Python](https://pypi.org/project/zvec/)
+
+**Requirements**: Python 3.10 - 3.12
 
 ```bash
 pip install zvec
 ```
 
-**Requirements**:
+### [Node.js](https://www.npmjs.com/package/@zvec/zvec)
+
+```bash
+npm install @zvec/zvec
+```
+
+### ✅ Supported Platforms
 
-- Python 3.10 - 3.12
-- **Supported platforms**:
-  - Linux (x86_64/ARM64)
-  - macOS (ARM64)
+- Linux (x86_64, ARM64)
+- macOS (ARM64)
+
+### 🛠️ Building from Source
 
 If you prefer to build Zvec from source, please check the [Building from Source](https://zvec.org/en/docs/build/) guide.
 
@@ -64,7 +72,7 @@ schema = zvec.CollectionSchema(
 )
 
 # Create collection
-collection = zvec.create_and_open(path="./zvec_example", schema=schema,)
+collection = zvec.create_and_open(path="./zvec_example", schema=schema)
 
 # Insert documents
 collection.insert([
@@ -96,30 +104,14 @@ For detailed benchmark methodology, configurations, and complete results, please
 
 Stay updated and get support — scan or click:
 
-<table align="center" style="border-collapse: collapse; margin: 16px auto; width: 100%; max-width: 520px;">
-  <tr>
-    <td align="center" style="padding: 8px; width: 25%;">
-      <div style="font-weight: 600; font-size: 14px; margin-bottom: 6px;">💬 DingTalk</div>
-      <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/dingding.png" width="100" style="border-radius: 8px; border: 1px solid #ddd;">
-    </td>
-    <td align="center" style="padding: 8px; width: 25%;">
-      <div style="font-weight: 600; font-size: 14px; margin-bottom: 6px;">📱 WeChat</div>
-      <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/wechat.png" width="100" style="border-radius: 8px; border: 1px solid #ddd;">
-    </td>
-    <td align="center" style="padding: 8px; width: 25%;">
-      <div style="font-weight: 600; font-size: 14px; margin-bottom: 6px;">🎮 Discord</div>
-      <a href="https://discord.gg/rKddFBBu9z" target="_blank" style="display: inline-block; width: 100px; height: 100px; background: #5865F2; border-radius: 8px; text-decoration: none; color: white; font-size: 12px; display: flex; align-items: center; justify-content: center; line-height: 1;">
-        Join Server
-      </a>
-    </td>
-    <td align="center" style="padding: 8px; width: 25%;">
-      <div style="font-weight: 600; font-size: 14px; margin-bottom: 6px;">🐦 X (Twitter)</div>
-      <a href="https://x.com/zvec_ai" target="_blank" style="display: inline-block; width: 100px; height: 100px; background: #000; border-radius: 8px; text-decoration: none; color: white; font-size: 12px; display: flex; align-items: center; justify-content: center; line-height: 1;">
-        Follow @zvec_ai
-      </a>
-    </td>
-  </tr>
-</table>
+<div align="center">
+
+| 💬 DingTalk | 📱 WeChat | 🎮 Discord |
+|:---:|:---:|:---:|
+| <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/dingding.png" width="150"/> | <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/wechat.png?v=2" width="150"/> | [![Discord](https://img.shields.io/badge/Discord-Join%20Server-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/rKddFBBu9z) |
+| Scan to join | Scan to join | Click to join |
+
+</div>
 
 </div>
 
diff --git a/cmake/option.cmake b/cmake/option.cmake
index ca74cf05..01388564 100644
--- a/cmake/option.cmake
+++ b/cmake/option.cmake
@@ -13,6 +13,8 @@ option(ENABLE_SAPPHIRERAPIDS "Enable Intel Sapphire Rapids Server CPU microarchi
 option(ENABLE_EMERALDRAPIDS "Enable Intel Emerald Rapids Server CPU microarchitecture" OFF)
 option(ENABLE_GRANITERAPIDS "Enable Intel Granite Rapids Server CPU microarchitecture" OFF)
 
+option(ENABLE_NATIVE "Enable native CPU microarchitecture" ON)
+
 ## AMD Microarchitectures
 option(ENABLE_ZEN1 "Enable AMD Zen+ Family 17h CPU microarchitecture" OFF)
 option(ENABLE_ZEN2 "Enable AMD Zen 2 Family 17h CPU microarchitecture" OFF)
@@ -36,9 +38,10 @@ set(ARCH_OPTIONS
   ENABLE_ZEN1 ENABLE_ZEN2 ENABLE_ZEN3
   ENABLE_ARMV8A ENABLE_ARMV8.1A ENABLE_ARMV8.2A ENABLE_ARMV8.3A ENABLE_ARMV8.4A
   ENABLE_ARMV8.5A ENABLE_ARMV8.6A
+  ENABLE_NATIVE
 )
 
-set(AUTO_DETECT_ARCH ON)
+option(AUTO_DETECT_ARCH "Auto detect CPU microarchitecture" ON)
 foreach(opt IN LISTS ARCH_OPTIONS)
   if(${opt})
     set(AUTO_DETECT_ARCH OFF)
@@ -122,8 +125,11 @@ if(MSVC)
   return()
 endif()
 
-
 if(NOT AUTO_DETECT_ARCH)
+  if(ENABLE_NATIVE)
+    add_arch_flag("-march=native" NATIVE ENABLE_NATIVE)
+  endif()
+
   if(ENABLE_ZEN3)
     add_arch_flag("-march=znver3" ZNVER3 ENABLE_ZEN3)
   endif()
diff --git a/pyproject.toml b/pyproject.toml
index 051bb136..5e99edfa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,8 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Topic :: Database",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Software Development :: Libraries :: Python Modules",
@@ -80,7 +82,13 @@ dev = [
 # BUILD SYSTEM CONFIGURATION (scikit-build-core)
 ######################################################################################################
 [build-system]
-requires = ["scikit-build-core >=0.11", "pybind11 >=3.0", "setuptools_scm>=8.0"]
+requires = [
+    "scikit-build-core >=0.11",
+    "pybind11 >=3.0",
+    "setuptools_scm>=8.0",
+    "cmake>=3.26,<4.0",
+    "ninja>=1.11",
+]
 build-backend = "scikit_build_core.build"
 
 [tool.scikit-build]
@@ -155,6 +163,8 @@ build = [
     "cp310-*",
     "cp311-*",
     "cp312-*",
+    "cp313-*",
+    "cp314-*",
 ]
 build-frontend = "build"
 test-requires = ["pytest", "numpy"]
@@ -165,7 +175,8 @@ archs = ["auto"]
 test-command = "cd {project} && pytest python/tests -v --tb=short"
 manylinux-x86_64-image = "manylinux_2_28"
 manylinux-aarch64-image = "manylinux_2_28"
-skip = "*musllinux*"
+# Skip 32-bit builds and musllinux
+skip = ["*-manylinux_i686", "*-musllinux*"]
 
 [tool.cibuildwheel.macos]
 archs = ["arm64"]
@@ -218,10 +229,21 @@ ignore = [
     "E731",     # Lambda assignment (used in callbacks)
     "B019",     # `functools.lru_cache` on methods (handled manually)
     "PLR0912",  # Too many branches
+    "PLC0105",  # Ignore contravariant
+    "RUF002",   # Ignore Unicode
 ]
 fixable = ["ALL"]
 unfixable = []
 
+# Ignore all errors in docstrings
+[tool.ruff.lint.pydocstyle]
+convention = "google"  # or "numpy", "pep257"
+ignore-decorators = ["typing.overload"]
+
+[tool.ruff.lint.flake8-type-checking]
+# Don't check code examples in docstrings
+quote-annotations = true
+
 [tool.ruff.lint.isort]
 required-imports = ["from __future__ import annotations"]
 known-first-party = ["zvec"]
@@ -238,6 +260,9 @@ known-first-party = ["zvec"]
 "python/zvec/model/doc.py" = [
     "RUF023",   # Unused sort (for __slot__)
 ]
+"python/zvec/extension/**" = [
+    "PLC0415",  # Import outside top-level (dynamic imports in _get_model)
+]
 
 [tool.ruff.format]
 indent-style = "space"
diff --git a/python/tests/test_embedding.py b/python/tests/test_embedding.py
index 0eb5d6b8..e0a57a17 100644
--- a/python/tests/test_embedding.py
+++ b/python/tests/test_embedding.py
@@ -15,20 +15,31 @@
 
 import os
 from http import HTTPStatus
-from unittest.mock import MagicMock, patch
-
+from unittest.mock import MagicMock, patch, Mock
 
+import numpy as np
 import pytest
-from zvec.extension import QwenEmbeddingFunction
+from zvec.extension import (
+    BM25EmbeddingFunction,
+    DefaultLocalDenseEmbedding,
+    DefaultLocalSparseEmbedding,
+    OpenAIDenseEmbedding,
+    QwenDenseEmbedding,
+    QwenSparseEmbedding,
+)
+
+# Environment variable to control integration tests
+# Set ZVEC_RUN_INTEGRATION_TESTS=1 to run real API/model tests
+RUN_INTEGRATION_TESTS = os.environ.get("ZVEC_RUN_INTEGRATION_TESTS", "0") == "1"
 
 
 # ----------------------------
-# QwenEmbeddingFunction Test Case
+# QwenDenseEmbedding Test Case
 # ----------------------------
-class TestQwenEmbeddingFunction:
+class TestQwenDenseEmbedding:
     def test_init_with_api_key(self):
         # Test initialization with explicit API key
-        embedding_func = QwenEmbeddingFunction(dimension=128, api_key="test_key")
+        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
         assert embedding_func.dimension == 128
         assert embedding_func.model == "text-embedding-v4"
         assert embedding_func._api_key == "test_key"
@@ -36,33 +47,28 @@ def test_init_with_api_key(self):
     @patch.dict(os.environ, {"DASHSCOPE_API_KEY": "env_key"})
     def test_init_with_env_api_key(self):
         # Test initialization with API key from environment
-        embedding_func = QwenEmbeddingFunction(dimension=128)
+        embedding_func = QwenDenseEmbedding(dimension=128)
         assert embedding_func._api_key == "env_key"
 
-    def test_init_without_api_key(self):
-        # Test initialization without API key raises ValueError
-        with pytest.raises(ValueError, match="DashScope API key is required"):
-            QwenEmbeddingFunction(dimension=128)
-
     @patch.dict(os.environ, {"DASHSCOPE_API_KEY": ""})
     def test_init_with_empty_env_api_key(self):
         # Test initialization with empty API key from environment
         with pytest.raises(ValueError, match="DashScope API key is required"):
-            QwenEmbeddingFunction(dimension=128)
+            QwenDenseEmbedding(dimension=128)
 
     def test_model_property(self):
-        embedding_func = QwenEmbeddingFunction(dimension=128, api_key="test_key")
+        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
         assert embedding_func.model == "text-embedding-v4"
 
-        embedding_func = QwenEmbeddingFunction(
+        embedding_func = QwenDenseEmbedding(
             dimension=128, model="custom-model", api_key="test_key"
         )
         assert embedding_func.model == "custom-model"
 
-    @patch("zvec.extension.embedding.require_module")
+    @patch("zvec.extension.qwen_function.require_module")
     def test_embed_with_empty_text(self, mock_require_module):
         # Test embed method with empty text raises ValueError
-        embedding_func = QwenEmbeddingFunction(dimension=128, api_key="test_key")
+        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
 
         with pytest.raises(
             ValueError, match="Input text cannot be empty or whitespace only"
@@ -72,7 +78,7 @@ def test_embed_with_empty_text(self, mock_require_module):
         with pytest.raises(TypeError):
             embedding_func.embed(None)
 
-    @patch("zvec.extension.embedding.require_module")
+    @patch("zvec.extension.qwen_function.require_module")
     def test_embed_success(self, mock_require_module):
         # Test successful embedding
         mock_dashscope = MagicMock()
@@ -82,18 +88,20 @@ def test_embed_success(self, mock_require_module):
         mock_dashscope.TextEmbedding.call.return_value = mock_response
         mock_require_module.return_value = mock_dashscope
 
-        embedding_func = QwenEmbeddingFunction(dimension=128, api_key="test_key")
+        embedding_func = QwenDenseEmbedding(dimension=3, api_key="test_key")
+        # Clear cache to avoid interference
+        embedding_func.embed.cache_clear()
         result = embedding_func.embed("test text")
 
         assert result == [0.1, 0.2, 0.3]
         mock_dashscope.TextEmbedding.call.assert_called_once_with(
             model="text-embedding-v4",
             input="test text",
-            dimension=128,
+            dimension=3,
             output_type="dense",
         )
 
-    @patch("zvec.extension.embedding.require_module")
+    @patch("zvec.extension.qwen_function.require_module")
     def test_embed_http_error(self, mock_require_module):
         # Test embedding with HTTP error
         mock_dashscope = MagicMock()
@@ -103,29 +111,1989 @@ def test_embed_http_error(self, mock_require_module):
         mock_dashscope.TextEmbedding.call.return_value = mock_response
         mock_require_module.return_value = mock_dashscope
 
-        embedding_func = QwenEmbeddingFunction(dimension=128, api_key="test_key")
+        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
+        embedding_func.embed.cache_clear()
 
         with pytest.raises(ValueError):
             embedding_func.embed("test text")
 
-    @patch("zvec.extension.embedding.require_module")
+    @patch("zvec.extension.qwen_function.require_module")
     def test_embed_invalid_response(self, mock_require_module):
         # Test embedding with invalid response (wrong number of embeddings)
         mock_dashscope = MagicMock()
         mock_response = MagicMock()
         mock_response.status_code = HTTPStatus.OK
-        mock_response.output.embeddings = []
+        mock_response.output = {"embeddings": []}
         mock_dashscope.TextEmbedding.call.return_value = mock_response
         mock_require_module.return_value = mock_dashscope
 
-        embedding_func = QwenEmbeddingFunction(dimension=128, api_key="test_key")
+        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
+        embedding_func.embed.cache_clear()
 
         with pytest.raises(ValueError):
             embedding_func.embed("test text")
 
-    @pytest.mark.skip(reason="Qwen Embedding is not available in CI")
-    def test_embed(self):
-        # Test embedding with invalid dimension
-        embedding_func = QwenEmbeddingFunction(dimension=128, api_key="xxx")
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    def test_real_embed_success(self):
+        """Integration test with real DashScope API.
+
+        To run this test, set environment variable:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+            export DASHSCOPE_API_KEY=your-api-key
+        """
+        embedding_func = QwenDenseEmbedding(dimension=128)
         dense = embedding_func("test text")
         assert len(dense) == 128
+
+
+# ----------------------------
+# QwenSparseEmbedding Test Case
+# ----------------------------
+class TestQwenSparseEmbedding:
+    """Test suite for QwenSparseEmbedding (Qwen sparse embedding via DashScope API)."""
+
+    def test_init_with_api_key(self):
+        """Test initialization with explicit API key."""
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        assert embedding_func._dimension == 1024
+        assert embedding_func.model == "text-embedding-v4"
+        assert embedding_func._api_key == "test_key"
+        # encoding_type defaults to "query" via extra_params
+        assert embedding_func.extra_params.get("encoding_type", "query") == "query"
+
+    def test_init_with_custom_encoding_type(self):
+        """Test initialization with custom encoding type."""
+        embedding_func = QwenSparseEmbedding(
+            dimension=1024, encoding_type="document", api_key="test_key"
+        )
+        assert embedding_func.extra_params.get("encoding_type") == "document"
+
+    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": "env_key"})
+    def test_init_with_env_api_key(self):
+        """Test initialization with API key from environment."""
+        embedding_func = QwenSparseEmbedding(dimension=1024)
+        assert embedding_func._api_key == "env_key"
+
+    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": ""})
+    def test_init_without_api_key(self):
+        """Test initialization fails without API key."""
+        with pytest.raises(ValueError, match="DashScope API key is required"):
+            QwenSparseEmbedding(dimension=1024)
+
+    def test_model_property(self):
+        """Test model property."""
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        assert embedding_func.model == "text-embedding-v4"
+
+        embedding_func = QwenSparseEmbedding(
+            dimension=1024, model="text-embedding-v3", api_key="test_key"
+        )
+        assert embedding_func.model == "text-embedding-v3"
+
+    def test_encoding_type_property(self):
+        """Test encoding_type via extra_params."""
+        query_emb = QwenSparseEmbedding(
+            dimension=1024, encoding_type="query", api_key="test_key"
+        )
+        assert query_emb.extra_params.get("encoding_type") == "query"
+
+        doc_emb = QwenSparseEmbedding(
+            dimension=1024, encoding_type="document", api_key="test_key"
+        )
+        assert doc_emb.extra_params.get("encoding_type") == "document"
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_with_empty_text(self, mock_require_module):
+        """Test embed method with empty text raises ValueError."""
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+
+        with pytest.raises(
+            ValueError, match="Input text cannot be empty or whitespace only"
+        ):
+            embedding_func.embed("")
+
+        with pytest.raises(
+            ValueError, match="Input text cannot be empty or whitespace only"
+        ):
+            embedding_func.embed("   ")
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_with_non_string_input(self, mock_require_module):
+        """Test embed method with non-string input raises TypeError."""
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            embedding_func.embed(123)
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            embedding_func.embed(None)
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_success(self, mock_require_module):
+        """Test successful sparse embedding generation."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.OK
+        # Sparse embedding returns array of {index, value, token} objects
+        mock_response.output = {
+            "embeddings": [
+                {
+                    "sparse_embedding": [
+                        {"index": 10, "value": 0.5, "token": "机器"},
+                        {"index": 245, "value": 0.8, "token": "学习"},
+                        {"index": 1023, "value": 1.2, "token": "算法"},
+                    ]
+                }
+            ]
+        }
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        # Clear cache to avoid interference
+        embedding_func.embed.cache_clear()
+        result = embedding_func.embed("test text")
+
+        # Verify result is a dict
+        assert isinstance(result, dict)
+        # Verify keys are integers
+        assert all(isinstance(k, int) for k in result.keys())
+        # Verify values are floats
+        assert all(isinstance(v, float) for v in result.values())
+        # Verify all values are positive
+        assert all(v > 0 for v in result.values())
+        # Verify sorted by indices
+        keys = list(result.keys())
+        assert keys == sorted(keys)
+        # Verify specific keys
+        assert keys == [10, 245, 1023]
+
+        mock_dashscope.TextEmbedding.call.assert_called_once_with(
+            model="text-embedding-v4",
+            input="test text",
+            dimension=1024,
+            output_type="sparse",
+            text_type="query",
+        )
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_with_document_encoding_type(self, mock_require_module):
+        """Test embedding with document encoding type."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.OK
+        mock_response.output = {
+            "embeddings": [
+                {
+                    "sparse_embedding": [
+                        {"index": 5, "value": 0.3, "token": "文档"},
+                        {"index": 100, "value": 0.7, "token": "内容"},
+                        {"index": 500, "value": 0.9, "token": "检索"},
+                    ]
+                }
+            ]
+        }
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(
+            dimension=1024, encoding_type="document", api_key="test_key"
+        )
+        embedding_func.embed.cache_clear()
+        result = embedding_func.embed("test document")
+
+        assert isinstance(result, dict)
+        assert list(result.keys()) == [5, 100, 500]
+
+        # Verify text_type parameter is "document"
+        call_args = mock_dashscope.TextEmbedding.call.call_args
+        assert call_args[1]["text_type"] == "document"
+        assert call_args[1]["output_type"] == "sparse"
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_output_sorted_by_indices(self, mock_require_module):
+        """Test that output is always sorted by indices in ascending order."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.OK
+        # Return unsorted indices
+        mock_response.output = {
+            "embeddings": [
+                {
+                    "sparse_embedding": [
+                        {"index": 9999, "value": 1.5, "token": "A"},
+                        {"index": 5, "value": 2.0, "token": "B"},
+                        {"index": 1234, "value": 0.8, "token": "C"},
+                        {"index": 77, "value": 3.2, "token": "D"},
+                        {"index": 500, "value": 1.1, "token": "E"},
+                    ]
+                }
+            ]
+        }
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        embedding_func.embed.cache_clear()
+        result = embedding_func.embed("test sorting")
+
+        # Verify keys are sorted
+        result_keys = list(result.keys())
+        assert result_keys == sorted(result_keys)
+        # Verify expected sorted order
+        assert result_keys == [5, 77, 500, 1234, 9999]
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_filters_zero_values(self, mock_require_module):
+        """Test that zero and negative values are filtered out."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.OK
+        # Include zero and negative values
+        mock_response.output = {
+            "embeddings": [
+                {
+                    "sparse_embedding": [
+                        {"index": 10, "value": 0.5, "token": "正"},
+                        {
+                            "index": 20,
+                            "value": 0.0,
+                            "token": "零",
+                        },  # Should be filtered
+                        {
+                            "index": 30,
+                            "value": -0.3,
+                            "token": "负",
+                        },  # Should be filtered
+                        {"index": 40, "value": 0.8, "token": "正"},
+                        {
+                            "index": 50,
+                            "value": 0.0,
+                            "token": "零",
+                        },  # Should be filtered
+                    ]
+                }
+            ]
+        }
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        embedding_func.embed.cache_clear()
+        result = embedding_func.embed("test filtering")
+
+        # Only positive values should remain
+        assert list(result.keys()) == [10, 40]
+        assert all(v > 0 for v in result.values())
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_http_error(self, mock_require_module):
+        """Test embedding with HTTP error."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.BAD_REQUEST
+        mock_response.message = "Bad Request"
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        embedding_func.embed.cache_clear()
+
+        with pytest.raises(ValueError, match="DashScope API error"):
+            embedding_func.embed("test text")
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_invalid_response_no_embeddings(self, mock_require_module):
+        """Test embedding with invalid response (no embeddings)."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.OK
+        mock_response.output = {"embeddings": []}
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        embedding_func.embed.cache_clear()
+
+        with pytest.raises(ValueError, match="Expected exactly 1 embedding"):
+            embedding_func.embed("test text")
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_invalid_response_not_dict(self, mock_require_module):
+        """Test embedding with invalid response (sparse_embedding not list)."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.OK
+        # sparse_embedding should be list, not dict
+        mock_response.output = {
+            "embeddings": [{"sparse_embedding": {"index": 10, "value": 0.5}}]
+        }
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        embedding_func.embed.cache_clear()
+
+        with pytest.raises(
+            ValueError, match="'sparse_embedding' field is missing or not a list"
+        ):
+            embedding_func.embed("test text")
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_callable_interface(self, mock_require_module):
+        """Test that embedding function is callable."""
+        mock_dashscope = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = HTTPStatus.OK
+        mock_response.output = {
+            "embeddings": [
+                {
+                    "sparse_embedding": [
+                        {"index": 100, "value": 1.0, "token": "测试"},
+                        {"index": 200, "value": 0.5, "token": "调用"},
+                    ]
+                }
+            ]
+        }
+        mock_dashscope.TextEmbedding.call.return_value = mock_response
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        embedding_func.embed.cache_clear()
+
+        # Test calling the function directly
+        result = embedding_func("test text")
+        assert isinstance(result, dict)
+        assert list(result.keys()) == [100, 200]
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_embed_api_connection_error(self, mock_require_module):
+        """Test handling of API connection errors."""
+        mock_dashscope = MagicMock()
+        mock_dashscope.TextEmbedding.call.side_effect = Exception("Connection timeout")
+        mock_require_module.return_value = mock_dashscope
+
+        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
+        embedding_func.embed.cache_clear()
+
+        with pytest.raises(RuntimeError, match="Failed to call DashScope API"):
+            embedding_func.embed("test text")
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    def test_real_embed_success(self):
+        """Integration test with real DashScope API.
+
+        To run this test, set environment variable:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+            export DASHSCOPE_API_KEY=your-api-key
+        """
+        # Test query embedding
+        query_emb = QwenSparseEmbedding(dimension=1024, encoding_type="query")
+        query_vec = query_emb.embed("machine learning")
+
+        assert isinstance(query_vec, dict)
+        assert len(query_vec) > 0
+        assert all(isinstance(k, int) for k in query_vec.keys())
+        assert all(isinstance(v, float) and v > 0 for v in query_vec.values())
+
+        # Verify sorted output
+        keys = list(query_vec.keys())
+        assert keys == sorted(keys)
+
+        # Test document embedding
+        doc_emb = QwenSparseEmbedding(dimension=1024, encoding_type="document")
+        doc_vec = doc_emb.embed("Machine learning is a subset of AI")
+
+        assert isinstance(doc_vec, dict)
+        assert len(doc_vec) > 0
+
+        # Verify sorted output
+        doc_keys = list(doc_vec.keys())
+        assert doc_keys == sorted(doc_keys)
+
+
+# ----------------------------
+# OpenAIDenseEmbedding Test Case
+# ----------------------------
+class TestOpenAIDenseEmbedding:
+    def test_init_with_api_key(self):
+        """Test initialization with explicit API key."""
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test-key")
+        assert embedding_func.dimension == 1536  # Default for text-embedding-3-small
+        assert embedding_func.model == "text-embedding-3-small"
+        assert embedding_func._api_key == "sk-test-key"
+
+    @patch.dict(os.environ, {"OPENAI_API_KEY": "sk-env-key"})
+    def test_init_with_env_api_key(self):
+        """Test initialization with API key from environment."""
+        embedding_func = OpenAIDenseEmbedding()
+        assert embedding_func._api_key == "sk-env-key"
+
+    @patch.dict(os.environ, {"OPENAI_API_KEY": ""})
+    def test_init_without_api_key(self):
+        """Test initialization fails without API key."""
+        with pytest.raises(ValueError, match="OpenAI API key is required"):
+            OpenAIDenseEmbedding()
+
+    def test_init_with_custom_dimension(self):
+        """Test initialization with custom dimension."""
+        embedding_func = OpenAIDenseEmbedding(
+            model="text-embedding-3-large", dimension=1024, api_key="sk-test"
+        )
+        assert embedding_func.dimension == 1024
+        assert embedding_func.model == "text-embedding-3-large"
+
+    def test_init_with_base_url(self):
+        """Test initialization with custom base URL."""
+        embedding_func = OpenAIDenseEmbedding(
+            api_key="sk-test", base_url="https://custom.openai.com/"
+        )
+        assert embedding_func._base_url == "https://custom.openai.com/"
+
+    def test_model_property(self):
+        """Test model property."""
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
+        assert embedding_func.model == "text-embedding-3-small"
+
+        embedding_func = OpenAIDenseEmbedding(
+            model="text-embedding-ada-002", api_key="sk-test"
+        )
+        assert embedding_func.model == "text-embedding-ada-002"
+
+    def test_extra_params(self):
+        """Test extra_params property."""
+        # Test without extra params
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
+        assert embedding_func.extra_params == {}
+
+        # Test with extra params
+        embedding_func = OpenAIDenseEmbedding(
+            api_key="sk-test",
+            encoding_format="float",
+            user="test-user",
+        )
+        assert embedding_func.extra_params == {
+            "encoding_format": "float",
+            "user": "test-user",
+        }
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_with_empty_text(self, mock_require_module):
+        """Test embed method with empty text raises ValueError."""
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
+
+        with pytest.raises(
+            ValueError, match="Input text cannot be empty or whitespace only"
+        ):
+            embedding_func.embed("")
+
+        with pytest.raises(
+            ValueError, match="Input text cannot be empty or whitespace only"
+        ):
+            embedding_func.embed("   ")
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_with_non_string_input(self, mock_require_module):
+        """Test embed method with non-string input raises TypeError."""
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            embedding_func.embed(123)
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            embedding_func.embed(None)
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_success(self, mock_require_module):
+        """Test successful embedding generation."""
+        # Mock OpenAI client
+        mock_openai = Mock()
+        mock_client = Mock()
+        mock_response = Mock()
+
+        # Create mock embedding data
+        fake_embedding = [0.1, 0.2, 0.3]
+        mock_embedding_obj = Mock()
+        mock_embedding_obj.embedding = fake_embedding
+        mock_response.data = [mock_embedding_obj]
+
+        mock_client.embeddings.create.return_value = mock_response
+        mock_openai.OpenAI.return_value = mock_client
+        mock_require_module.return_value = mock_openai
+
+        embedding_func = OpenAIDenseEmbedding(dimension=3, api_key="sk-test")
+        embedding_func.embed.cache_clear()
+        result = embedding_func.embed("test text")
+
+        assert result == [0.1, 0.2, 0.3]
+        mock_client.embeddings.create.assert_called_once_with(
+            model="text-embedding-3-small", input="test text", dimensions=3
+        )
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_with_custom_model(self, mock_require_module):
+        """Test embedding with custom model."""
+        mock_openai = Mock()
+        mock_client = Mock()
+        mock_response = Mock()
+
+        fake_embedding = [0.1] * 1536
+        mock_embedding_obj = Mock()
+        mock_embedding_obj.embedding = fake_embedding
+        mock_response.data = [mock_embedding_obj]
+
+        mock_client.embeddings.create.return_value = mock_response
+        mock_openai.OpenAI.return_value = mock_client
+        mock_require_module.return_value = mock_openai
+
+        embedding_func = OpenAIDenseEmbedding(
+            model="text-embedding-ada-002", api_key="sk-test"
+        )
+        embedding_func.embed.cache_clear()
+        result = embedding_func.embed("test text")
+
+        assert len(result) == 1536
+        mock_client.embeddings.create.assert_called_once_with(
+            model="text-embedding-ada-002", input="test text"
+        )
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_api_error(self, mock_require_module):
+        """Test handling of API errors."""
+        mock_openai = Mock()
+        mock_client = Mock()
+
+        # Simulate API error
+        api_error = Mock()
+        api_error.__class__.__name__ = "APIError"
+        mock_openai.APIError = type("APIError", (Exception,), {})
+        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
+
+        mock_client.embeddings.create.side_effect = mock_openai.APIError(
+            "Rate limit exceeded"
+        )
+        mock_openai.OpenAI.return_value = mock_client
+        mock_require_module.return_value = mock_openai
+
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
+        embedding_func.embed.cache_clear()
+
+        with pytest.raises(RuntimeError, match="Failed to call OpenAI API"):
+            embedding_func.embed("test text")
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_invalid_response(self, mock_require_module):
+        """Test handling of invalid API response."""
+        mock_openai = Mock()
+        mock_client = Mock()
+        mock_response = Mock()
+
+        # Empty response data
+        mock_response.data = []
+
+        mock_client.embeddings.create.return_value = mock_response
+        mock_openai.OpenAI.return_value = mock_client
+        mock_openai.APIError = type("APIError", (Exception,), {})
+        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
+        mock_require_module.return_value = mock_openai
+
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
+        embedding_func.embed.cache_clear()
+
+        with pytest.raises(ValueError, match="no embedding data returned"):
+            embedding_func.embed("test text")
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_dimension_mismatch(self, mock_require_module):
+        """Test handling of dimension mismatch."""
+        mock_openai = Mock()
+        mock_client = Mock()
+        mock_response = Mock()
+
+        # Return embedding with wrong dimension
+        fake_embedding = [0.1] * 512
+        mock_embedding_obj = Mock()
+        mock_embedding_obj.embedding = fake_embedding
+        mock_response.data = [mock_embedding_obj]
+
+        mock_client.embeddings.create.return_value = mock_response
+        mock_openai.OpenAI.return_value = mock_client
+        mock_openai.APIError = type("APIError", (Exception,), {})
+        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
+        mock_require_module.return_value = mock_openai
+
+        embedding_func = OpenAIDenseEmbedding(dimension=1536, api_key="sk-test")
+        embedding_func.embed.cache_clear()
+
+        with pytest.raises(ValueError, match="Dimension mismatch"):
+            embedding_func.embed("test text")
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_callable(self, mock_require_module):
+        """Test that embedding function is callable."""
+        mock_openai = Mock()
+        mock_client = Mock()
+        mock_response = Mock()
+
+        fake_embedding = [0.1] * 1536
+        mock_embedding_obj = Mock()
+        mock_embedding_obj.embedding = fake_embedding
+        mock_response.data = [mock_embedding_obj]
+
+        mock_client.embeddings.create.return_value = mock_response
+        mock_openai.OpenAI.return_value = mock_client
+        mock_openai.APIError = type("APIError", (Exception,), {})
+        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
+        mock_require_module.return_value = mock_openai
+
+        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
+        embedding_func.embed.cache_clear()
+
+        # Test calling the function directly
+        result = embedding_func("test text")
+        assert isinstance(result, list)
+        assert len(result) == 1536
+
+    @patch("zvec.extension.openai_function.require_module")
+    def test_embed_with_base_url(self, mock_require_module):
+        """Test embedding with custom base URL."""
+        mock_openai = Mock()
+        mock_client = Mock()
+        mock_response = Mock()
+
+        fake_embedding = [0.1] * 1536
+        mock_embedding_obj = Mock()
+        mock_embedding_obj.embedding = fake_embedding
+        mock_response.data = [mock_embedding_obj]
+
+        mock_client.embeddings.create.return_value = mock_response
+        mock_openai.OpenAI.return_value = mock_client
+        mock_openai.APIError = type("APIError", (Exception,), {})
+        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
+        mock_require_module.return_value = mock_openai
+
+        embedding_func = OpenAIDenseEmbedding(
+            api_key="sk-test", base_url="https://custom.openai.com/"
+        )
+        embedding_func.embed.cache_clear()
+        result = embedding_func.embed("test text")
+
+        # Verify client was created with custom base URL
+        mock_openai.OpenAI.assert_called_once_with(
+            api_key="sk-test", base_url="https://custom.openai.com/"
+        )
+        assert len(result) == 1536
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    def test_real_embed_success(self):
+        """Integration test with real OpenAI API.
+
+        To run this test, set environment variable:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+            export OPENAI_API_KEY=sk-...
+        """
+        embedding_func = OpenAIDenseEmbedding(
+            model="text-embedding-v4",
+            dimension=256,
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+        vector = embedding_func.embed("Hello, world!")
+        assert len(vector) == 256
+        assert isinstance(vector, list)
+        assert all(isinstance(x, float) for x in vector)
+
+
+# ----------------------------
+# DefaultLocalDenseEmbedding Test Case
+# ----------------------------
+class TestDefaultLocalDenseEmbedding:
+    """Test cases for DefaultLocalDenseEmbedding."""
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_init_success(self, mock_require_module):
+        """Test successful initialization with mocked model."""
+        # Mock sentence_transformers module
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+        mock_model.device = "cpu"
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        # Initialize embedding function
+        emb_func = DefaultLocalDenseEmbedding()
+
+        # Assertions
+        assert emb_func.dimension == 384
+        assert emb_func.model_name == "all-MiniLM-L6-v2"
+        assert emb_func.model_source == "huggingface"
+        assert emb_func.device == "cpu"
+        mock_st.SentenceTransformer.assert_called_once_with(
+            "all-MiniLM-L6-v2", device=None, trust_remote_code=True
+        )
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_init_with_custom_device(self, mock_require_module):
+        """Test initialization with custom device."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+        mock_model.device = "cuda"
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding(device="cuda")
+
+        assert emb_func.device == "cuda"
+        mock_st.SentenceTransformer.assert_called_once_with(
+            "all-MiniLM-L6-v2", device="cuda", trust_remote_code=True
+        )
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_init_with_modelscope(self, mock_require_module):
+        """Test initialization with ModelScope as model source."""
+        mock_st = Mock()
+        mock_ms = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+        mock_model.device = "cpu"
+        mock_st.SentenceTransformer.return_value = mock_model
+
+        def require_module_side_effect(module_name):
+            if module_name == "sentence_transformers":
+                return mock_st
+            elif module_name == "modelscope":
+                return mock_ms
+            raise ImportError(f"No module named '{module_name}'")
+
+        mock_require_module.side_effect = require_module_side_effect
+
+        # Mock snapshot_download at the correct import location
+        with patch(
+            "modelscope.hub.snapshot_download.snapshot_download",
+            return_value="/path/to/cached/model",
+        ):
+            emb_func = DefaultLocalDenseEmbedding(model_source="modelscope")
+
+        # Assertions
+        assert emb_func.dimension == 384
+        assert emb_func.model_name == "iic/nlp_gte_sentence-embedding_chinese-small"
+        assert emb_func.model_source == "modelscope"
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_init_with_invalid_model_source(self, mock_require_module):
+        """Test initialization with invalid model_source raises ValueError."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        with pytest.raises(ValueError, match="Invalid model_source"):
+            DefaultLocalDenseEmbedding(model_source="invalid_source")
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_success(self, mock_require_module):
+        """Test successful embedding generation."""
+        # Mock embedding output
+        fake_embedding = np.random.rand(384).astype(np.float32)
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+
+        # Configure encode method
+        mock_model.encode = Mock(return_value=fake_embedding)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding()
+        result = emb_func.embed("Hello, world!")
+
+        # Assertions
+        assert isinstance(result, list)
+        assert len(result) == 384
+        assert all(isinstance(x, float) for x in result)
+        mock_model.encode.assert_called_once_with(
+            "Hello, world!",
+            convert_to_numpy=True,
+            normalize_embeddings=True,
+            batch_size=32,
+        )
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_with_normalization(self, mock_require_module):
+        """Test embedding with L2 normalization."""
+        # Create a normalized vector
+        fake_embedding = np.random.rand(384).astype(np.float32)
+        fake_embedding = fake_embedding / np.linalg.norm(fake_embedding)
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+
+        # Configure encode method
+        mock_model.encode = Mock(return_value=fake_embedding)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding(normalize_embeddings=True)
+        result = emb_func.embed("Test sentence")
+
+        # Check if vector is normalized (L2 norm should be close to 1.0)
+        result_array = np.array(result)
+        norm = np.linalg.norm(result_array)
+        assert abs(norm - 1.0) < 1e-5
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_empty_string(self, mock_require_module):
+        """Test embedding with empty string raises ValueError."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding()
+
+        with pytest.raises(ValueError, match="Input text cannot be empty"):
+            emb_func.embed("")
+
+        with pytest.raises(ValueError, match="Input text cannot be empty"):
+            emb_func.embed("   ")
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_non_string_input(self, mock_require_module):
+        """Test embedding with non-string input raises TypeError."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding()
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            emb_func.embed(123)
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            emb_func.embed(None)
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_callable(self, mock_require_module):
+        """Test that embedding function is callable."""
+        fake_embedding = np.random.rand(384).astype(np.float32)
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+
+        # Configure encode method
+        mock_model.encode = Mock(return_value=fake_embedding)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding()
+
+        # Test calling the function directly
+        result = emb_func("Test text")
+        assert isinstance(result, list)
+        assert len(result) == 384
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_semantic_similarity(self, mock_require_module):
+        """Test semantic similarity between similar and different texts."""
+        # Create mock embeddings for similar and different texts
+        similar_emb_1 = np.array([1.0, 0.0, 0.0] + [0.0] * 381, dtype=np.float32)
+        similar_emb_2 = np.array([0.9, 0.1, 0.0] + [0.0] * 381, dtype=np.float32)
+        different_emb = np.array([0.0, 0.0, 1.0] + [0.0] * 381, dtype=np.float32)
+
+        # Normalize
+        similar_emb_1 = similar_emb_1 / np.linalg.norm(similar_emb_1)
+        similar_emb_2 = similar_emb_2 / np.linalg.norm(similar_emb_2)
+        different_emb = different_emb / np.linalg.norm(different_emb)
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+
+        # Configure encode method with side_effect for multiple calls
+        mock_model.encode = Mock(
+            side_effect=[similar_emb_1, similar_emb_2, different_emb]
+        )
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding()
+
+        v1 = emb_func.embed("The cat sits on the mat")
+        v2 = emb_func.embed("A feline rests on a rug")
+        v3 = emb_func.embed("Python programming")
+
+        # Calculate similarities
+        similarity_high = np.dot(v1, v2)
+        similarity_low = np.dot(v1, v3)
+
+        assert similarity_high > similarity_low
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_model_loading_error(self, mock_require_module):
+        """Test handling of model loading failure."""
+        # Clear model cache
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+        mock_st = Mock()
+        mock_st.SentenceTransformer.side_effect = Exception("Model not found")
+        mock_require_module.return_value = mock_st
+
+        with pytest.raises(
+            ValueError, match="Failed to load Sentence Transformer model"
+        ):
+            DefaultLocalDenseEmbedding()
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_modelscope_import_error(self, mock_require_module):
+        """Test handling of ModelScope import error."""
+        mock_st = Mock()
+
+        def require_module_side_effect(module_name):
+            if module_name == "sentence_transformers":
+                return mock_st
+            elif module_name == "modelscope":
+                raise ImportError("No module named 'modelscope'")
+
+        mock_require_module.side_effect = require_module_side_effect
+
+        with pytest.raises(
+            ImportError, match="ModelScope support requires the 'modelscope' package"
+        ):
+            DefaultLocalDenseEmbedding(model_source="modelscope")
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_dimension_mismatch(self, mock_require_module):
+        """Test handling of dimension mismatch in embedding output."""
+        # Return embedding with wrong dimension
+        fake_embedding = np.random.rand(256).astype(np.float32)
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+
+        # Configure encode method
+        mock_model.encode = Mock(return_value=fake_embedding)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        emb_func = DefaultLocalDenseEmbedding()
+
+        with pytest.raises(ValueError, match="Dimension mismatch"):
+            emb_func.embed("Test text")
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    def test_real_embedding_generation(self):
+        """Integration test with real model (requires sentence-transformers).
+
+        To run this test, set environment variable:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+
+        Note: First run will download the model (~80MB).
+        """
+        emb_func = DefaultLocalDenseEmbedding()
+
+        # Test basic embedding
+        vector = emb_func.embed("Hello, world!")
+        assert len(vector) == 384
+        assert isinstance(vector, list)
+        assert all(isinstance(x, float) for x in vector)
+
+        # Test normalization
+        norm = np.linalg.norm(vector)
+        assert abs(norm - 1.0) < 1e-5
+
+        # Test semantic similarity
+        v1 = emb_func.embed("The cat sits on the mat")
+        v2 = emb_func.embed("A feline rests on a rug")
+        v3 = emb_func.embed("Python programming language")
+
+        similarity_high = np.dot(v1, v2)
+        similarity_low = np.dot(v1, v3)
+        assert similarity_high > similarity_low
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_model_properties(self, mock_require_module):
+        """Test model_name and model_source properties."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.get_sentence_embedding_dimension.return_value = 384
+        mock_model.device = "cpu"
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        # Test Hugging Face
+        emb_func_hf = DefaultLocalDenseEmbedding(model_source="huggingface")
+        assert emb_func_hf.model_name == "all-MiniLM-L6-v2"
+        assert emb_func_hf.model_source == "huggingface"
+
+        # Test ModelScope
+        with patch(
+            "modelscope.hub.snapshot_download.snapshot_download",
+            return_value="/path/to/model",
+        ):
+            mock_ms = Mock()
+            mock_require_module.side_effect = (
+                lambda m: mock_st if m == "sentence_transformers" else mock_ms
+            )
+            emb_func_ms = DefaultLocalDenseEmbedding(model_source="modelscope")
+            assert (
+                emb_func_ms.model_name == "iic/nlp_gte_sentence-embedding_chinese-small"
+            )
+            assert emb_func_ms.model_source == "modelscope"
+
+
+# -----------------------------------
+# DefaultLocalSparseEmbedding Test Case
+# -----------------------------------
+class TestDefaultLocalSparseEmbedding:
+    """Test suite for DefaultLocalSparseEmbedding (SPLADE sparse embedding).
+
+    Note:
+        DefaultLocalSparseEmbedding uses naver/splade-cocondenser-ensembledistil
+        instead of naver/splade-v3 because:
+
+        - splade-v3 is a gated model requiring Hugging Face authentication
+        - cocondenser-ensembledistil is publicly accessible
+        - Performance difference is minimal (~2%)
+        - Avoids "Access to model is restricted" errors
+
+        This allows all users to run tests without authentication setup.
+    """
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_init_success(self, mock_require_module):
+        """Test successful initialization.
+
+        Verifies that DefaultLocalSparseEmbedding initializes with the publicly
+        accessible naver/splade-cocondenser-ensembledistil model instead of
+        the gated naver/splade-v3 model.
+        """
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cpu"
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+
+        assert sparse_emb.model_name == "naver/splade-cocondenser-ensembledistil"
+        assert sparse_emb.model_source == "huggingface"
+        assert sparse_emb.device == "cpu"
+        mock_st.SentenceTransformer.assert_called_once_with(
+            "naver/splade-cocondenser-ensembledistil",
+            device=None,
+            trust_remote_code=True,
+        )
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_init_with_custom_device(self, mock_require_module):
+        """Test initialization with custom device."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cuda"
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding(device="cuda")
+
+        assert sparse_emb.device == "cuda"
+        mock_st.SentenceTransformer.assert_called_once_with(
+            "naver/splade-cocondenser-ensembledistil",
+            device="cuda",
+            trust_remote_code=True,
+        )
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_success(self, mock_require_module):
+        """Test successful sparse embedding generation with official API."""
+        import numpy as np
+
+        # Clear model cache to ensure fresh mock
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        # Create a mock sparse matrix that simulates scipy.sparse behavior
+        # The code will call: sparse_matrix[0].toarray().flatten()
+        mock_sparse_matrix = Mock()
+
+        # Create a dense array representation with vocab_size=30522
+        vocab_size = 30522
+        dense_array = np.zeros(vocab_size)
+        # Set specific non-zero values at indices [10, 245, 1023, 5678]
+        dense_array[10] = 0.5
+        dense_array[245] = 0.8
+        dense_array[1023] = 1.2
+        dense_array[5678] = 0.3
+
+        # Mock the method chain: sparse_matrix[0].toarray().flatten()
+        mock_row = Mock()
+        mock_dense = Mock()
+        mock_row.toarray.return_value = mock_dense
+        mock_dense.flatten.return_value = dense_array
+        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)
+
+        # Also mock hasattr check for 'toarray'
+        mock_sparse_matrix.toarray = Mock()
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cpu"
+
+        # Configure mock methods to return sparse matrix
+        # Must set return_value BEFORE hasattr() check in the code
+        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
+        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+        result = sparse_emb.embed("machine learning")
+
+        # Verify result is a dictionary
+        assert isinstance(result, dict)
+        # Verify keys are integers and values are floats
+        assert all(isinstance(k, int) for k in result.keys())
+        assert all(isinstance(v, float) for v in result.values())
+        # Verify all values are positive
+        assert all(v > 0 for v in result.values())
+        # Sparse vectors should have specific dimensions
+        assert len(result) == 4
+
+        # Verify output is sorted by indices (keys)
+        keys = list(result.keys())
+        assert keys == sorted(keys), (
+            "Sparse vector keys must be sorted in ascending order"
+        )
+
+        # Verify expected keys
+        assert keys == [10, 245, 1023, 5678]
+
+        # Verify encode_query was called with a list
+        mock_model.encode_query.assert_called_once()
+        call_args = mock_model.encode_query.call_args[0][0]
+        assert isinstance(call_args, list)
+        assert call_args == ["machine learning"]
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_empty_input(self, mock_require_module):
+        """Test embedding with empty input."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+
+        with pytest.raises(ValueError, match="Input text cannot be empty"):
+            sparse_emb.embed("")
+
+        with pytest.raises(ValueError, match="Input text cannot be empty"):
+            sparse_emb.embed("   ")
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_embed_non_string_input(self, mock_require_module):
+        """Test embedding with non-string input."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            sparse_emb.embed(123)
+
+        with pytest.raises(TypeError, match="Expected 'input' to be str"):
+            sparse_emb.embed(["text"])
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_callable_interface(self, mock_require_module):
+        """Test that DefaultSparseEmbedding is callable."""
+        import numpy as np
+
+        # Clear model cache
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        # Create a mock sparse matrix
+        mock_sparse_matrix = Mock()
+
+        # Create a dense array representation with vocab_size=30522
+        vocab_size = 30522
+        dense_array = np.zeros(vocab_size)
+        # Set specific non-zero values at indices [100, 200, 300]
+        dense_array[100] = 1.0
+        dense_array[200] = 0.5
+        dense_array[300] = 0.8
+
+        # Mock the method chain: sparse_matrix[0].toarray().flatten()
+        mock_row = Mock()
+        mock_dense = Mock()
+        mock_row.toarray.return_value = mock_dense
+        mock_dense.flatten.return_value = dense_array
+        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)
+
+        # Also mock hasattr check for 'toarray'
+        mock_sparse_matrix.toarray = Mock()
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cpu"
+
+        # Configure mock methods
+        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
+        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+
+        # Test callable interface
+        result = sparse_emb("test input")
+        assert isinstance(result, dict)
+        assert all(isinstance(k, int) for k in result.keys())
+
+        # Verify sorted output
+        keys = list(result.keys())
+        assert keys == sorted(keys), "Callable interface must also return sorted keys"
+        assert keys == [100, 200, 300]
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_model_loading_failure(self, mock_require_module):
+        """Test handling of model loading failure."""
+        # Clear model cache to ensure the test actually tries to load the model
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        mock_st = Mock()
+        mock_st.SentenceTransformer.side_effect = Exception("Model not found")
+        mock_require_module.return_value = mock_st
+
+        with pytest.raises(
+            ValueError, match="Failed to load Sentence Transformer model"
+        ):
+            DefaultLocalSparseEmbedding()
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_inference_failure(self, mock_require_module):
+        """Test handling of inference failure."""
+        # Clear model cache
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cpu"
+
+        # Configure mock methods to raise RuntimeError
+        mock_model.encode_query = Mock(side_effect=RuntimeError("CUDA out of memory"))
+        mock_model.encode_document = Mock(
+            side_effect=RuntimeError("CUDA out of memory")
+        )
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+
+        with pytest.raises(RuntimeError, match="Failed to generate sparse embedding"):
+            sparse_emb.embed("test input")
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_sparse_vector_properties(self, mock_require_module):
+        """Test properties of sparse vectors (sparsity, non-zero values, sorted order)."""
+        import numpy as np
+
+        # Clear model cache
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        # Create a mock sparse matrix that simulates scipy.sparse behavior
+        # The code will call: sparse_matrix[0].toarray().flatten()
+        mock_sparse_matrix = Mock()
+
+        # Create a dense array representation with vocab_size=30522
+        vocab_size = 30522
+        dense_array = np.zeros(vocab_size)
+        # Set specific non-zero values at indices [50, 100, 200, 400, 500]
+        dense_array[50] = 3.0
+        dense_array[100] = 2.0
+        dense_array[200] = 1.5
+        dense_array[400] = 2.5
+        dense_array[500] = 1.8
+
+        # Mock the method chain: sparse_matrix[0].toarray().flatten()
+        mock_row = Mock()
+        mock_dense = Mock()
+        mock_row.toarray.return_value = mock_dense
+        mock_dense.flatten.return_value = dense_array
+        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)
+
+        # Also mock hasattr check for 'toarray'
+        mock_sparse_matrix.toarray = Mock()
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cpu"
+
+        # Configure mock methods
+        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
+        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+        result = sparse_emb.embed("test")
+
+        # Verify sparsity: result should have much fewer dimensions than vocab_size
+        assert len(result) < vocab_size
+        # All values should be positive
+        assert all(v > 0 for v in result.values())
+
+        # Verify keys are sorted in ascending order
+        keys = list(result.keys())
+        assert keys == sorted(keys), "Sparse vector keys must be sorted"
+
+        # Verify the specific non-zero indices are present and sorted
+        # Expected order: [50, 100, 200, 400, 500] (sorted)
+        expected_keys = [50, 100, 200, 400, 500]
+        assert keys == expected_keys, f"Expected {expected_keys}, got {keys}"
+
+        # First key should be smallest
+        if len(result) > 0:
+            first_key = next(iter(result.keys()))
+            assert first_key == min(result.keys()), "First key must be the smallest"
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_output_sorted_by_indices(self, mock_require_module):
+        """Test that output dictionary is always sorted by indices (keys) in ascending order."""
+        import numpy as np
+
+        # Clear model cache
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        # Create sparse output with deliberately out-of-order indices
+        # Non-sequential indices: 9999, 5, 1234, 77, 500
+        mock_sparse_matrix = Mock()
+
+        # Create a dense array representation with vocab_size=30522
+        vocab_size = 30522
+        dense_array = np.zeros(vocab_size)
+        # Set specific non-zero values at out-of-order indices
+        dense_array[9999] = 1.5
+        dense_array[5] = 2.0
+        dense_array[1234] = 0.8
+        dense_array[77] = 3.2
+        dense_array[500] = 1.1
+
+        # Mock the method chain: sparse_matrix[0].toarray().flatten()
+        mock_row = Mock()
+        mock_dense = Mock()
+        mock_row.toarray.return_value = mock_dense
+        mock_dense.flatten.return_value = dense_array
+        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)
+
+        # Also mock hasattr check for 'toarray'
+        mock_sparse_matrix.toarray = Mock()
+
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cpu"
+
+        # Configure mock methods
+        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
+        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)
+
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+        result = sparse_emb.embed("test sorting")
+
+        # Extract keys from result
+        result_keys = list(result.keys())
+
+        # Verify keys are sorted
+        assert result_keys == sorted(result_keys), (
+            f"Keys must be sorted in ascending order. "
+            f"Got: {result_keys}, Expected: {sorted(result_keys)}"
+        )
+
+        # Verify expected keys are present and in correct order
+        # Expected sorted order: [5, 77, 500, 1234, 9999]
+        expected_sorted_keys = [5, 77, 500, 1234, 9999]
+        assert result_keys == expected_sorted_keys, (
+            f"All expected keys should be present in sorted order. "
+            f"Expected: {expected_sorted_keys}, Got: {result_keys}"
+        )
+
+        # Verify first and last keys
+        assert result_keys[0] == 5, "First key must be minimum"
+        assert result_keys[-1] == 9999, "Last key must be maximum"
+
+        # Verify iteration order matches sorted order
+        for i, (key, value) in enumerate(result.items()):
+            if i > 0:
+                prev_key = list(result.keys())[i - 1]
+                assert key > prev_key, (
+                    f"Key at position {i} must be greater than previous key"
+                )
+
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_device_property(self, mock_require_module):
+        """Test device property returns correct device."""
+        mock_st = Mock()
+        mock_model = Mock()
+        mock_model.device = "cuda"
+        mock_st.SentenceTransformer.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        sparse_emb = DefaultLocalSparseEmbedding(device="cuda")
+        assert sparse_emb.device == "cuda"
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1 and model download",
+    )
+    @patch("zvec.extension.sentence_transformer_function.require_module")
+    def test_modelscope_source(self, mock_require_module):
+        """Test initialization with ModelScope source."""
+        mock_st = Mock()
+        mock_ms = Mock()
+        mock_model = Mock()
+        mock_model.device = "cpu"
+        mock_st.SentenceTransformer.return_value = mock_model
+
+        # Mock ModelScope snapshot_download
+        with patch(
+            "modelscope.hub.snapshot_download.snapshot_download",
+            return_value="/cache/splade-cocondenser",
+        ):
+            mock_require_module.side_effect = (
+                lambda m: mock_st if m == "sentence_transformers" else mock_ms
+            )
+
+            sparse_emb = DefaultLocalSparseEmbedding(model_source="modelscope")
+
+            assert sparse_emb.model_name == "naver/splade-cocondenser-ensembledistil"
+            assert sparse_emb.model_source == "modelscope"
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1 and model download",
+    )
+    def test_integration_real_model(self):
+        """Integration test with real SPLADE model (requires model download).
+
+        This test uses naver/splade-cocondenser-ensembledistil instead of
+        naver/splade-v3 because splade-v3 requires Hugging Face authentication.
+        The cocondenser-ensembledistil model is publicly accessible and provides
+        comparable performance.
+
+        To run this test:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+            pytest tests/test_embedding.py::TestDefaultSparseEmbedding::test_integration_real_model -v
+
+        Note: First run will download ~100MB model from Hugging Face.
+
+        Alternative models:
+            If you have access to splade-v3, you can create a custom embedding
+            class following the example in DefaultSparseEmbedding docstring.
+        """
+        # Clear model cache to ensure fresh load
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+
+        # Test with real input
+        text = "machine learning and artificial intelligence"
+        result = sparse_emb.embed(text)
+
+        # Verify result structure
+        assert isinstance(result, dict)
+        assert len(result) > 0
+        assert all(isinstance(k, int) and k >= 0 for k in result.keys())
+        assert all(isinstance(v, float) and v > 0 for v in result.values())
+
+        # SPLADE typically produces 100-300 non-zero dimensions
+        assert 50 < len(result) < 500
+
+        # Verify keys are sorted in ascending order
+        keys = list(result.keys())
+        assert keys == sorted(keys), "Real model output must be sorted by indices"
+
+        # Test callable interface
+        result2 = sparse_emb(text)
+        assert result == result2
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1",
+    )
+    def test_integration_multiple_inputs(self):
+        """Integration test with multiple different inputs."""
+        # Clear model cache
+        from zvec.extension.sentence_transformer_embedding_function import (
+            DefaultLocalSparseEmbedding,
+        )
+
+        DefaultLocalSparseEmbedding.clear_cache()
+
+        sparse_emb = DefaultLocalSparseEmbedding()
+
+        texts = [
+            "Hello, world!",
+            "Machine learning is fascinating",
+            "Python programming language",
+        ]
+
+        results = [sparse_emb.embed(text) for text in texts]
+
+        # All results should be different
+        assert len(results) == 3
+        assert all(isinstance(r, dict) for r in results)
+
+        # Different inputs should produce different sparse vectors
+        assert results[0] != results[1]
+        assert results[1] != results[2]
+
+        # All results must be sorted by indices
+        for i, result in enumerate(results):
+            keys = list(result.keys())
+            assert keys == sorted(keys), f"Result {i} must have sorted keys"
+
+
+# ----------------------------
+# BM25EmbeddingFunction Test Case
+# ----------------------------
+class TestBM25EmbeddingFunction:
+    """Test suite for BM25EmbeddingFunction (BM25-based sparse embedding using DashText SDK)."""
+
+    def test_init_with_built_in_encoder(self):
+        """Test successful initialization with built-in encoder (no corpus)."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            # Test with default language (Chinese)
+            bm25 = BM25EmbeddingFunction()
+
+            assert bm25.corpus_size == 0
+            assert bm25.encoding_type == "query"
+            assert bm25.language == "zh"
+            mock_dashtext.SparseVectorEncoder.default.assert_called_once_with(name="zh")
+
+    def test_init_with_custom_encoder(self):
+        """Test successful initialization with custom encoder (with corpus)."""
+        corpus = [
+            "a cat is a feline and likes to purr",
+            "a dog is the human's best friend",
+            "a bird is a beautiful animal that can fly",
+        ]
+
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_dashtext.SparseVectorEncoder.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction(corpus=corpus, b=0.75, k1=1.2)
+
+            assert bm25.corpus_size == 3
+            assert bm25.encoding_type == "query"
+            mock_dashtext.SparseVectorEncoder.assert_called_once_with(b=0.75, k1=1.2)
+            mock_encoder.train.assert_called_once_with(corpus)
+
+    def test_init_with_empty_corpus(self):
+        """Test initialization with empty corpus raises ValueError."""
+        with pytest.raises(ValueError, match="Corpus must be a non-empty list"):
+            BM25EmbeddingFunction(corpus=[])
+
+    def test_init_with_invalid_corpus(self):
+        """Test initialization with invalid corpus elements."""
+        with pytest.raises(ValueError, match="All corpus documents must be strings"):
+            BM25EmbeddingFunction(corpus=["text", 123, "another"])
+
+        with pytest.raises(ValueError, match="All corpus documents must be strings"):
+            BM25EmbeddingFunction(corpus=[None, "text"])
+
+    def test_init_with_language_parameter(self):
+        """Test initialization with different language settings."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            # Test English language
+            bm25_en = BM25EmbeddingFunction(language="en")
+            assert bm25_en.language == "en"
+            mock_dashtext.SparseVectorEncoder.default.assert_called_with(name="en")
+
+    def test_init_with_encoding_type(self):
+        """Test initialization with different encoding types."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            # Test document encoding type
+            bm25_doc = BM25EmbeddingFunction(encoding_type="document")
+            assert bm25_doc.encoding_type == "document"
+
+    def test_init_with_missing_dashtext_library(self):
+        """Test initialization fails when dashtext library is not installed."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_require.side_effect = ImportError("dashtext package is required")
+
+            with pytest.raises(ImportError, match="dashtext package is required"):
+                BM25EmbeddingFunction()
+
+    def test_embed_with_query_encoding(self):
+        """Test successful sparse embedding generation with query encoding."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+
+            # Mock encode_queries to return sparse vector
+            mock_encoder.encode_queries.return_value = {
+                5: 0.89,
+                12: 1.45,
+                23: 0.67,
+                45: 1.12,
+            }
+
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction(encoding_type="query")
+            # Clear LRU cache to ensure fresh call
+            bm25.embed.cache_clear()
+            result = bm25.embed("cat purr loud")
+
+            # Verify result structure
+            assert isinstance(result, dict)
+            assert all(isinstance(k, int) for k in result.keys())
+            assert all(isinstance(v, float) for v in result.values())
+
+            # Verify all values are positive
+            assert all(v > 0 for v in result.values())
+
+            # Verify output is sorted by indices
+            keys = list(result.keys())
+            assert keys == sorted(keys), "Output must be sorted by indices"
+
+            # Verify expected keys from mock response
+            assert result == {5: 0.89, 12: 1.45, 23: 0.67, 45: 1.12}
+
+            # Verify encode_queries was called
+            mock_encoder.encode_queries.assert_called_once_with("cat purr loud")
+
+    def test_embed_with_document_encoding(self):
+        """Test successful sparse embedding generation with document encoding."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+
+            # Mock encode_documents to return sparse vector
+            mock_encoder.encode_documents.return_value = {10: 1.5, 20: 2.3}
+
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction(encoding_type="document")
+            bm25.embed.cache_clear()
+            result = bm25.embed("document text")
+
+            assert result == {10: 1.5, 20: 2.3}
+            mock_encoder.encode_documents.assert_called_once_with("document text")
+
+    def test_embed_with_empty_input(self):
+        """Test embedding with empty input raises ValueError."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction()
+
+            with pytest.raises(ValueError, match="Input text cannot be empty"):
+                bm25.embed("")
+
+            with pytest.raises(ValueError, match="Input text cannot be empty"):
+                bm25.embed("   ")
+
+    def test_embed_with_non_string_input(self):
+        """Test embedding with non-string input raises TypeError."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction()
+
+            # Test with hashable non-string types - should get our custom error message
+            with pytest.raises(TypeError, match="Expected 'input' to be str"):
+                bm25.embed(123)
+
+            with pytest.raises(TypeError, match="Expected 'input' to be str"):
+                bm25.embed(None)
+
+            # Test with unhashable type (list)
+            # Note: lru_cache raises TypeError("unhashable type: 'list'") before our type check
+            # This is still a valid type error, just caught at a different layer
+            with pytest.raises(TypeError, match="unhashable type"):
+                bm25.embed(["text"])
+
+    def test_embed_callable_interface(self):
+        """Test that BM25EmbeddingFunction is callable."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_encoder.encode_queries.return_value = {10: 1.5}
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction()
+            bm25.embed.cache_clear()
+
+            # Test callable interface
+            result = bm25("test query")
+            assert isinstance(result, dict)
+            assert 10 in result
+
+    def test_embed_output_sorted_by_indices(self):
+        """Test that output is always sorted by indices in ascending order."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+
+            # Mock encode_queries with unsorted indices
+            mock_encoder.encode_queries.return_value = {
+                9999: 1.5,
+                5: 2.0,
+                1234: 0.8,
+                77: 3.2,
+                500: 1.1,
+            }
+
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction()
+            bm25.embed.cache_clear()
+            result = bm25.embed("test query")
+
+            # Verify keys are sorted
+            result_keys = list(result.keys())
+            assert result_keys == sorted(result_keys), (
+                f"Keys must be sorted. Got: {result_keys}, Expected: {sorted(result_keys)}"
+            )
+
+            # Verify expected sorted order: [5, 77, 500, 1234, 9999]
+            expected_keys = [5, 77, 500, 1234, 9999]
+            assert result_keys == expected_keys
+
+    def test_embed_filters_zero_values(self):
+        """Test that zero and negative values are filtered out."""
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+
+            # Mock encode_queries with zero and negative values
+            mock_encoder.encode_queries.return_value = {
+                0: 1.5,  # Positive - should be included
+                1: 0.0,  # Zero - should be filtered
+                2: -0.5,  # Negative - should be filtered
+            }
+
+            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction()
+            bm25.embed.cache_clear()
+            result = bm25.embed("test")
+
+            # Only positive token should be in result
+            assert 0 in result
+            assert 1 not in result  # Zero value filtered
+            assert 2 not in result  # Negative value filtered
+            assert all(v > 0 for v in result.values())
+
+    def test_properties(self):
+        """Test property accessors."""
+        corpus = ["doc1", "doc2", "doc3"]
+
+        with patch(
+            "zvec.extension.bm25_embedding_function.require_module"
+        ) as mock_require:
+            mock_dashtext = Mock()
+            mock_encoder = Mock()
+            mock_dashtext.SparseVectorEncoder.return_value = mock_encoder
+            mock_require.return_value = mock_dashtext
+
+            bm25 = BM25EmbeddingFunction(
+                corpus=corpus,
+                encoding_type="document",
+                language="en",
+                b=0.8,
+                k1=1.5,
+                custom_param="test",
+            )
+
+            assert bm25.corpus_size == 3
+            assert bm25.encoding_type == "document"
+            assert bm25.language == "en"
+            assert bm25.extra_params == {"custom_param": "test"}
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    def test_real_dashtext_bm25_embedding(self):
+        """Integration test with real DashText library.
+
+        To run this test:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+            pip install dashtext
+
+        Note: This test requires the dashtext package to be installed.
+        """
+        # Test built-in encoder (Chinese)
+        bm25_zh = BM25EmbeddingFunction(language="zh", encoding_type="query")
+
+        query_zh = "什么是向量检索服务"
+        result_zh = bm25_zh.embed(query_zh)
+
+        assert isinstance(result_zh, dict)
+        assert len(result_zh) > 0
+        assert all(isinstance(k, int) for k in result_zh.keys())
+        assert all(isinstance(v, float) and v > 0 for v in result_zh.values())
+
+        # Verify sorted output
+        keys = list(result_zh.keys())
+        assert keys == sorted(keys), "Real DashText BM25 output must be sorted"
+
+        # Test custom corpus
+        corpus = [
+            "The cat sits on the mat",
+            "The dog plays in the garden",
+            "Birds fly in the sky",
+            "Fish swim in the water",
+        ]
+
+        bm25_custom = BM25EmbeddingFunction(corpus=corpus, encoding_type="query")
+
+        query_en = "cat on mat"
+        result_en = bm25_custom.embed(query_en)
+
+        assert isinstance(result_en, dict)
+        assert len(result_en) > 0
+        assert all(isinstance(k, int) for k in result_en.keys())
+        assert all(isinstance(v, float) and v > 0 for v in result_en.values())
+
+        # Test callable interface
+        result2 = bm25_custom(query_en)
+        assert result_en == result2
+
+        # Verify properties
+        assert bm25_custom.corpus_size == 4
diff --git a/python/tests/test_reranker.py b/python/tests/test_reranker.py
index 5b2c177d..dced1dd7 100644
--- a/python/tests/test_reranker.py
+++ b/python/tests/test_reranker.py
@@ -13,11 +13,23 @@
 # limitations under the License.
 from __future__ import annotations
 
-from unittest.mock import patch
+from unittest.mock import patch, MagicMock
 import pytest
 import math
+import os
 
-from zvec import RrfReRanker, WeightedReRanker, Doc, MetricType
+from zvec import Doc, MetricType
+from zvec.extension.multi_vector_reranker import (
+    RrfReRanker,
+    WeightedReRanker,
+)
+from zvec.extension.sentence_transformer_rerank_function import (
+    DefaultLocalReRanker,
+)
+from zvec.extension.qwen_rerank_function import QwenReRanker
+
+# Set ZVEC_RUN_INTEGRATION_TESTS=1 to run real API tests
+RUN_INTEGRATION_TESTS = os.environ.get("ZVEC_RUN_INTEGRATION_TESTS", "0") == "1"
 
 
 # ----------------------------
@@ -25,23 +37,20 @@
 # ----------------------------
 class TestRrfReRanker:
     def test_init(self):
-        reranker = RrfReRanker(
-            query="test", topn=5, rerank_field="content", rank_constant=100
-        )
-        assert reranker.query == "test"
+        reranker = RrfReRanker(topn=5, rerank_field="content", rank_constant=100)
         assert reranker.topn == 5
         assert reranker.rerank_field == "content"
         assert reranker.rank_constant == 100
 
     def test_rrf_score(self):
-        reranker = RrfReRanker(query="test", rank_constant=60)
+        reranker = RrfReRanker(rank_constant=60)
         # 根据公式 1.0 / (k + rank + 1)，其中k=60
         assert reranker._rrf_score(0) == 1.0 / (60 + 0 + 1)
         assert reranker._rrf_score(1) == 1.0 / (60 + 1 + 1)
         assert reranker._rrf_score(10) == 1.0 / (60 + 10 + 1)
 
     def test_rerank(self):
-        reranker = RrfReRanker(query="test", topn=3)
+        reranker = RrfReRanker(topn=3)
 
         doc1 = Doc(id="1", score=0.8)
         doc2 = Doc(id="2", score=0.7)
@@ -68,20 +77,18 @@ class TestWeightedReRanker:
     def test_init(self):
         weights = {"vector1": 0.7, "vector2": 0.3}
         reranker = WeightedReRanker(
-            query="test",
             topn=5,
             rerank_field="content",
             metric=MetricType.L2,
             weights=weights,
         )
-        assert reranker.query == "test"
         assert reranker.topn == 5
         assert reranker.rerank_field == "content"
         assert reranker.metric == MetricType.L2
         assert reranker.weights == weights
 
     def test_normalize_score(self):
-        reranker = WeightedReRanker(query="test")
+        reranker = WeightedReRanker()
 
         score = reranker._normalize_score(1.0, MetricType.L2)
         expected = 1.0 - 2 * math.atan(1.0) / math.pi
@@ -100,9 +107,7 @@ def test_normalize_score(self):
 
     def test_rerank(self):
         weights = {"vector1": 0.7, "vector2": 0.3}
-        reranker = WeightedReRanker(
-            query="test", topn=3, weights=weights, metric=MetricType.L2
-        )
+        reranker = WeightedReRanker(topn=3, weights=weights, metric=MetricType.L2)
 
         doc1 = Doc(id="1", score=0.8)
         doc2 = Doc(id="2", score=0.7)
@@ -121,64 +126,843 @@ def test_rerank(self):
         assert scores == sorted(scores, reverse=True)
 
 
-# # ----------------------------
-# # QwenReRanker Test Case
-# # ----------------------------
-# class TestQwenReRanker:
-#     def test_init_without_query(self):
-#         with pytest.raises(ValueError):
-#             QwenReRanker()
-#
-#     def test_init_without_api_key(self):
-#         with patch.dict(os.environ, {"DASHSCOPE_API_KEY": ""}):
-#             with pytest.raises(ValueError, match="DashScope API key is required"):
-#                 QwenReRanker(query="test")
-#
-#     @patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"})
-#     def test_init_with_env_api_key(self):
-#         reranker = QwenReRanker(query="test")
-#         assert reranker.query == "test"
-#         assert reranker._api_key == "test_key"
-#
-#     def test_model_property(self):
-#         reranker = QwenReRanker(query="test", api_key="test_key")
-#         assert reranker.model == "gte-rerank-v2"
-#
-#         reranker = QwenReRanker(query="test", model="custom-model", api_key="test_key")
-#         assert reranker.model == "custom-model"
-#
-#     def test_rerank_empty_results(self):
-#         reranker = QwenReRanker(query="test", api_key="test_key")
-#         results = reranker.rerank({})
-#         assert results == []
-#
-#     def test_rerank_no_documents(self):
-#         reranker = QwenReRanker(query="test", api_key="test_key")
-#         query_results = {"vector1": [Doc(id="1")]}
-#         with pytest.raises(ValueError, match="No documents to rerank"):
-#             reranker.rerank(query_results)
-#
-#     @pytest.mark.skip(reason="Qwen ReRanker is not available in CI")
-#     def test_rerank_success(self):
-#         reranker = QwenReRanker(
-#             topn=3,
-#             query="test",
-#             api_key="*",
-#             rerank_field="content",
-#         )
-#         query_results = {
-#             "vector1": [
-#                 Doc(id="1", fields={"content": "This is a test document."}),
-#                 Doc(id="2", fields={"content": "Another test document."}),
-#                 Doc(id="3", fields={"content": "Yet another test document."}),
-#                 Doc(id="4", fields={"content": "One more test document."}),
-#             ],
-#             "vector2": [
-#                 Doc(id="5", fields={"content": "This is a test document2."}),
-#                 Doc(id="6", fields={"content": "Another test document2."}),
-#                 Doc(id="7", fields={"content": "Yet another test document2."}),
-#                 Doc(id="8", fields={"content": "One more test document2."}),
-#             ],
-#         }
-#         results = reranker.rerank(query_results)
-#         assert len(results) == 3
+# ----------------------------
+# QwenReRanker Test Case
+# ----------------------------
+class TestQwenReRanker:
+    def test_init_without_query(self):
+        with pytest.raises(ValueError, match="Query is required for QwenReRanker"):
+            QwenReRanker(api_key="test_key")
+
+    def test_init_without_api_key(self):
+        with patch.dict(os.environ, {}, clear=True):
+            with pytest.raises(ValueError, match="DashScope API key is required"):
+                QwenReRanker(query="test")
+
+    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"})
+    def test_init_with_env_api_key(self):
+        reranker = QwenReRanker(query="test", rerank_field="content")
+        assert reranker.query == "test"
+        assert reranker._api_key == "test_key"
+        assert reranker.rerank_field == "content"
+
+    def test_init_with_explicit_api_key(self):
+        reranker = QwenReRanker(
+            query="test", api_key="explicit_key", rerank_field="content"
+        )
+        assert reranker.query == "test"
+        assert reranker._api_key == "explicit_key"
+
+    def test_model_property(self):
+        reranker = QwenReRanker(
+            query="test", api_key="test_key", rerank_field="content"
+        )
+        assert reranker.model == "gte-rerank-v2"
+
+        reranker = QwenReRanker(
+            query="test",
+            model="custom-model",
+            api_key="test_key",
+            rerank_field="content",
+        )
+        assert reranker.model == "custom-model"
+
+    def test_query_property(self):
+        reranker = QwenReRanker(
+            query="test query", api_key="test_key", rerank_field="content"
+        )
+        assert reranker.query == "test query"
+
+    def test_topn_property(self):
+        reranker = QwenReRanker(
+            query="test", topn=5, api_key="test_key", rerank_field="content"
+        )
+        assert reranker.topn == 5
+
+    def test_rerank_field_property(self):
+        reranker = QwenReRanker(query="test", api_key="test_key", rerank_field="title")
+        assert reranker.rerank_field == "title"
+
+    def test_rerank_empty_results(self):
+        reranker = QwenReRanker(
+            query="test", api_key="test_key", rerank_field="content"
+        )
+        results = reranker.rerank({})
+        assert results == []
+
+    def test_rerank_no_valid_documents(self):
+        reranker = QwenReRanker(
+            query="test", api_key="test_key", rerank_field="content"
+        )
+        # Document without the rerank_field
+        query_results = {"vector1": [Doc(id="1")]}
+        with pytest.raises(ValueError, match="No documents to rerank"):
+            reranker.rerank(query_results)
+
+    def test_rerank_skip_empty_content(self):
+        reranker = QwenReRanker(
+            query="test", api_key="test_key", rerank_field="content"
+        )
+        query_results = {
+            "vector1": [
+                Doc(id="1", fields={"content": ""}),
+                Doc(id="2", fields={"content": "   "}),
+            ]
+        }
+        with pytest.raises(ValueError, match="No documents to rerank"):
+            reranker.rerank(query_results)
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_rerank_success(self, mock_require_module):
+        # Mock dashscope module
+        mock_dashscope = MagicMock()
+        mock_require_module.return_value = mock_dashscope
+
+        # Mock API response
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.output = {
+            "results": [
+                {"index": 0, "relevance_score": 0.95},
+                {"index": 1, "relevance_score": 0.85},
+            ]
+        }
+        mock_dashscope.TextReRank.call.return_value = mock_response
+
+        reranker = QwenReRanker(
+            query="test query", topn=2, api_key="test_key", rerank_field="content"
+        )
+
+        query_results = {
+            "vector1": [
+                Doc(id="1", fields={"content": "Document 1"}),
+                Doc(id="2", fields={"content": "Document 2"}),
+            ]
+        }
+
+        results = reranker.rerank(query_results)
+
+        assert len(results) == 2
+        assert results[0].id == "1"
+        assert results[0].score == 0.95
+        assert results[1].id == "2"
+        assert results[1].score == 0.85
+
+        # Verify API call
+        mock_dashscope.TextReRank.call.assert_called_once_with(
+            model="gte-rerank-v2",
+            query="test query",
+            documents=["Document 1", "Document 2"],
+            top_n=2,
+            return_documents=False,
+        )
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_rerank_deduplicate_documents(self, mock_require_module):
+        # Mock dashscope module
+        mock_dashscope = MagicMock()
+        mock_require_module.return_value = mock_dashscope
+
+        # Mock API response
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.output = {
+            "results": [
+                {"index": 0, "relevance_score": 0.9},
+            ]
+        }
+        mock_dashscope.TextReRank.call.return_value = mock_response
+
+        reranker = QwenReRanker(
+            query="test", topn=5, api_key="test_key", rerank_field="content"
+        )
+
+        # Same document in multiple vector results
+        doc1 = Doc(id="1", fields={"content": "Document 1"})
+        query_results = {"vector1": [doc1], "vector2": [doc1]}
+
+        results = reranker.rerank(query_results)
+
+        # Should only call API with document once
+        call_args = mock_dashscope.TextReRank.call.call_args
+        assert len(call_args[1]["documents"]) == 1
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_rerank_api_error(self, mock_require_module):
+        # Mock dashscope module
+        mock_dashscope = MagicMock()
+        mock_require_module.return_value = mock_dashscope
+
+        # Mock API error response
+        mock_response = MagicMock()
+        mock_response.status_code = 400
+        mock_response.message = "Invalid request"
+        mock_response.code = "InvalidParameter"
+        mock_dashscope.TextReRank.call.return_value = mock_response
+
+        reranker = QwenReRanker(
+            query="test", api_key="test_key", rerank_field="content"
+        )
+
+        query_results = {"vector1": [Doc(id="1", fields={"content": "Document 1"})]}
+
+        with pytest.raises(ValueError, match="DashScope API error"):
+            reranker.rerank(query_results)
+
+    @patch("zvec.extension.qwen_function.require_module")
+    def test_rerank_runtime_error(self, mock_require_module):
+        # Mock dashscope module that raises exception
+        mock_dashscope = MagicMock()
+        mock_require_module.return_value = mock_dashscope
+        mock_dashscope.TextReRank.call.side_effect = Exception("Network error")
+
+        reranker = QwenReRanker(
+            query="test", api_key="test_key", rerank_field="content"
+        )
+
+        query_results = {"vector1": [Doc(id="1", fields={"content": "Document 1"})]}
+
+        with pytest.raises(RuntimeError, match="Failed to call DashScope API"):
+            reranker.rerank(query_results)
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    def test_real_qwen_rerank(self):
+        """Integration test with real DashScope TextReRank API.
+
+        To run this test, set environment variables:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+            export DASHSCOPE_API_KEY=your-api-key
+        """
+        # Create reranker with real API
+        reranker = QwenReRanker(
+            query="What is machine learning?",
+            topn=3,
+            rerank_field="content",
+            model="gte-rerank-v2",
+        )
+
+        # Prepare test documents
+        query_results = {
+            "vector1": [
+                Doc(
+                    id="1",
+                    score=0.8,
+                    fields={
+                        "content": "Machine learning is a subset of artificial intelligence that focuses on building systems that can learn from data."
+                    },
+                ),
+                Doc(
+                    id="2",
+                    score=0.7,
+                    fields={
+                        "content": "The weather is nice today with clear skies and sunshine."
+                    },
+                ),
+                Doc(
+                    id="3",
+                    score=0.75,
+                    fields={
+                        "content": "Deep learning is a specialized branch of machine learning using neural networks with multiple layers."
+                    },
+                ),
+            ],
+            "vector2": [
+                Doc(
+                    id="4",
+                    score=0.6,
+                    fields={
+                        "content": "Python is a popular programming language for data science and machine learning applications."
+                    },
+                ),
+                Doc(
+                    id="5",
+                    score=0.65,
+                    fields={
+                        "content": "A recipe for chocolate cake includes flour, sugar, eggs, and cocoa powder."
+                    },
+                ),
+            ],
+        }
+
+        # Call real API
+        results = reranker.rerank(query_results)
+
+        # Verify results
+        assert len(results) <= 3, "Should return at most topn documents"
+        assert len(results) > 0, "Should return at least one document"
+
+        # All results should have valid scores
+        for doc in results:
+            assert hasattr(doc, "score"), "Each document should have a score"
+            assert isinstance(doc.score, (int, float)), "Score should be numeric"
+            assert doc.score > 0, "Score should be positive"
+
+        # Verify scores are in descending order
+        scores = [doc.score for doc in results]
+        assert scores == sorted(scores, reverse=True), (
+            "Results should be sorted by score in descending order"
+        )
+
+        # Verify relevant documents are ranked higher
+        # Document 1 and 3 are about machine learning, should rank higher than weather/recipe docs
+        result_ids = [doc.id for doc in results]
+
+        # At least one of the ML-related documents should be in top results
+        ml_related_docs = {"1", "3", "4"}
+        assert any(doc_id in ml_related_docs for doc_id in result_ids[:2]), (
+            "ML-related documents should rank higher"
+        )
+
+        # Print results for manual verification (useful during development)
+        print("\nReranking results:")
+        for i, doc in enumerate(results, 1):
+            print(f"{i}. ID={doc.id}, Score={doc.score:.4f}")
+            if doc.fields:
+                content = doc.field("content")
+                if content:
+                    print(f"   Content: {content[:80]}...")
+
+
+# ----------------------------
+# DefaultLocalReRanker Test Case
+# ----------------------------
+class TestDefaultLocalReRanker:
+    """Test cases for DefaultLocalReRanker."""
+
+    def test_init_without_query(self):
+        """Test initialization fails without query."""
+        with pytest.raises(
+            ValueError, match="Query is required for DefaultLocalReRanker"
+        ):
+            DefaultLocalReRanker(rerank_field="content")
+
+    def test_init_with_empty_query(self):
+        """Test initialization fails with empty query."""
+        with pytest.raises(
+            ValueError, match="Query is required for DefaultLocalReRanker"
+        ):
+            DefaultLocalReRanker(query="", rerank_field="content")
+
+    @patch("zvec.extension.sentence_transformer_rerank_function.require_module")
+    def test_init_success(self, mock_require_module):
+        """Test successful initialization with mocked model."""
+        # Mock sentence_transformers module
+        mock_st = MagicMock()
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()  # Cross-encoder has predict method
+        mock_model.device = "cpu"
+        mock_st.CrossEncoder.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        reranker = DefaultLocalReRanker(
+            query="test query",
+            topn=5,
+            rerank_field="content",
+            model_name="cross-encoder/ms-marco-MiniLM-L6-v2",
+        )
+
+        assert reranker.query == "test query"
+        assert reranker.topn == 5
+        assert reranker.rerank_field == "content"
+        assert reranker.model_name == "cross-encoder/ms-marco-MiniLM-L6-v2"
+        assert reranker.model_source == "huggingface"
+        assert reranker.batch_size == 32
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    @patch("zvec.extension.sentence_transformer_rerank_function.require_module")
+    def test_init_with_custom_params(self, mock_require_module):
+        """Test initialization with custom parameters."""
+        mock_st = MagicMock()
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+        mock_model.device = "cuda"
+        mock_st.CrossEncoder.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        reranker = DefaultLocalReRanker(
+            query="custom query",
+            topn=10,
+            rerank_field="title",
+            model_name="cross-encoder/ms-marco-MiniLM-L12-v2",
+            model_source="modelscope",
+            device="cuda",
+            batch_size=64,
+        )
+
+        assert reranker.query == "custom query"
+        assert reranker.topn == 10
+        assert reranker.rerank_field == "title"
+        assert reranker.model_name == "cross-encoder/ms-marco-MiniLM-L12-v2"
+        assert reranker.model_source == "modelscope"
+        assert reranker.batch_size == 64
+
+    @patch("zvec.extension.sentence_transformer_rerank_function.require_module")
+    def test_init_invalid_model(self, mock_require_module):
+        """Test initialization fails with non-cross-encoder model."""
+        # Mock a model without predict method (not a cross-encoder)
+        mock_st = MagicMock()
+        mock_model = MagicMock(spec=[])  # No predict method
+        mock_st.CrossEncoder.return_value = mock_model
+        mock_require_module.return_value = mock_st
+
+        with pytest.raises(ValueError, match="does not appear to be a cross-encoder"):
+            DefaultLocalReRanker(query="test", rerank_field="content")
+
+    def test_query_property(self):
+        """Test query property."""
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(query="test query", rerank_field="content")
+            assert reranker.query == "test query"
+
+    def test_topn_property(self):
+        """Test topn property."""
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(
+                query="test", topn=15, rerank_field="content"
+            )
+            assert reranker.topn == 15
+
+    def test_rerank_field_property(self):
+        """Test rerank_field property."""
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(query="test", rerank_field="title")
+            assert reranker.rerank_field == "title"
+
+    def test_batch_size_property(self):
+        """Test batch_size property."""
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(
+                query="test", rerank_field="content", batch_size=128
+            )
+            assert reranker.batch_size == 128
+
+    def test_rerank_empty_results(self):
+        """Test rerank with empty query_results."""
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(query="test", rerank_field="content")
+            results = reranker.rerank({})
+            assert results == []
+
+    def test_rerank_no_valid_documents(self):
+        """Test rerank with documents missing rerank_field."""
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(query="test", rerank_field="content")
+
+            # Document without the rerank_field
+            query_results = {"vector1": [Doc(id="1")]}
+            with pytest.raises(ValueError, match="No documents to rerank"):
+                reranker.rerank(query_results)
+
+    def test_rerank_skip_empty_content(self):
+        """Test rerank skips documents with empty content."""
+        mock_model = MagicMock()
+        mock_model.predict = MagicMock()
+
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(query="test", rerank_field="content")
+
+            query_results = {
+                "vector1": [
+                    Doc(id="1", fields={"content": ""}),
+                    Doc(id="2", fields={"content": "   "}),
+                ]
+            }
+            with pytest.raises(ValueError, match="No documents to rerank"):
+                reranker.rerank(query_results)
+
+    def test_rerank_success(self):
+        """Test successful rerank with mocked model."""
+        # Mock standard cross-encoder model
+        mock_model = MagicMock()
+
+        # Mock predict method to return scores
+        import numpy as np
+
+        mock_scores = np.array([0.95, 0.85, 0.75])
+        mock_model.predict.return_value = mock_scores
+        mock_model.device = "cpu"
+
+        # Mock sentence_transformers module
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(
+                query="test query", topn=3, rerank_field="content"
+            )
+
+            query_results = {
+                "vector1": [
+                    Doc(id="1", score=0.8, fields={"content": "Document 1"}),
+                    Doc(id="2", score=0.7, fields={"content": "Document 2"}),
+                    Doc(id="3", score=0.6, fields={"content": "Document 3"}),
+                ]
+            }
+
+            results = reranker.rerank(query_results)
+
+            # Verify results
+            assert len(results) == 3
+            assert results[0].id == "1"
+            assert results[0].score == 0.95
+            assert results[1].id == "2"
+            assert results[1].score == 0.85
+            assert results[2].id == "3"
+            assert results[2].score == 0.75
+
+            # Verify model.predict was called correctly
+            assert mock_model.predict.called
+            call_args = mock_model.predict.call_args
+            pairs = call_args[0][0]
+            assert len(pairs) == 3
+            assert pairs[0] == ["test query", "Document 1"]
+            assert pairs[1] == ["test query", "Document 2"]
+            assert pairs[2] == ["test query", "Document 3"]
+            assert call_args[1]["batch_size"] == 32
+            assert call_args[1]["show_progress_bar"] is False
+
+    def test_rerank_with_topn_limit(self):
+        """Test rerank respects topn limit."""
+        mock_model = MagicMock()
+
+        import numpy as np
+
+        mock_scores = np.array([0.9, 0.8, 0.7, 0.6, 0.5])
+        mock_model.predict.return_value = mock_scores
+
+        # Mock sentence_transformers module
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(
+                query="test", topn=2, rerank_field="content"
+            )
+
+            query_results = {
+                "vector1": [
+                    Doc(id="1", fields={"content": "Doc 1"}),
+                    Doc(id="2", fields={"content": "Doc 2"}),
+                    Doc(id="3", fields={"content": "Doc 3"}),
+                    Doc(id="4", fields={"content": "Doc 4"}),
+                    Doc(id="5", fields={"content": "Doc 5"}),
+                ]
+            }
+
+            results = reranker.rerank(query_results)
+
+            # Should only return top 2
+            assert len(results) == 2
+            assert results[0].id == "1"
+            assert results[0].score == 0.9
+            assert results[1].id == "2"
+            assert results[1].score == 0.8
+
+    def test_rerank_deduplicate_documents(self):
+        """Test rerank deduplicates documents across multiple vectors."""
+        mock_model = MagicMock()
+
+        import numpy as np
+
+        mock_scores = np.array([0.95, 0.85])
+        mock_model.predict.return_value = mock_scores
+
+        # Mock sentence_transformers module
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(
+                query="test", topn=5, rerank_field="content"
+            )
+
+            # Same document in multiple vector results
+            doc1 = Doc(id="1", fields={"content": "Document 1"})
+            doc2 = Doc(id="2", fields={"content": "Document 2"})
+
+            query_results = {
+                "vector1": [doc1, doc2],
+                "vector2": [doc1],  # doc1 appears in both
+            }
+
+            results = reranker.rerank(query_results)
+
+            # Should only process each document once
+            assert len(results) == 2
+            assert mock_model.predict.call_count == 1
+
+            call_args = mock_model.predict.call_args
+            pairs = call_args[0][0]
+            assert len(pairs) == 2  # Only 2 unique documents
+
+    def test_rerank_sorting(self):
+        """Test rerank sorts documents by score in descending order."""
+        mock_model = MagicMock()
+
+        import numpy as np
+
+        # Return scores in non-sorted order
+        mock_scores = np.array([0.6, 0.9, 0.7])
+        mock_model.predict.return_value = mock_scores
+
+        # Mock sentence_transformers module
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(
+                query="test", topn=3, rerank_field="content"
+            )
+
+            query_results = {
+                "vector1": [
+                    Doc(id="1", fields={"content": "Doc 1"}),
+                    Doc(id="2", fields={"content": "Doc 2"}),
+                    Doc(id="3", fields={"content": "Doc 3"}),
+                ]
+            }
+
+            results = reranker.rerank(query_results)
+
+            # Should be sorted by score (descending)
+            assert len(results) == 3
+            assert results[0].id == "2"  # score 0.9
+            assert results[0].score == 0.9
+            assert results[1].id == "3"  # score 0.7
+            assert results[1].score == 0.7
+            assert results[2].id == "1"  # score 0.6
+            assert results[2].score == 0.6
+
+    def test_rerank_model_error(self):
+        """Test rerank handles model prediction errors."""
+        mock_model = MagicMock()
+
+        # Mock predict to raise exception
+        mock_model.predict.side_effect = Exception("Model inference error")
+
+        # Mock sentence_transformers module
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(query="test", rerank_field="content")
+
+            query_results = {"vector1": [Doc(id="1", fields={"content": "Document 1"})]}
+
+            with pytest.raises(RuntimeError, match="Failed to compute rerank scores"):
+                reranker.rerank(query_results)
+
+    def test_rerank_with_custom_batch_size(self):
+        """Test rerank uses custom batch_size."""
+        mock_model = MagicMock()
+
+        import numpy as np
+
+        mock_scores = np.array([0.9, 0.8])
+        mock_model.predict.return_value = mock_scores
+
+        # Mock sentence_transformers module
+        mock_st = MagicMock()
+        mock_st.CrossEncoder.return_value = mock_model
+
+        with patch(
+            "zvec.extension.sentence_transformer_rerank_function.require_module",
+            return_value=mock_st,
+        ):
+            reranker = DefaultLocalReRanker(
+                query="test", rerank_field="content", batch_size=64
+            )
+
+            query_results = {
+                "vector1": [
+                    Doc(id="1", fields={"content": "Doc 1"}),
+                    Doc(id="2", fields={"content": "Doc 2"}),
+                ]
+            }
+
+            reranker.rerank(query_results)
+
+            # Verify batch_size is passed to predict
+            call_args = mock_model.predict.call_args
+            assert call_args[1]["batch_size"] == 64
+
+    @pytest.mark.skipif(
+        not RUN_INTEGRATION_TESTS,
+        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
+    )
+    def test_real_sentence_transformer_rerank(self):
+        """Integration test with real SentenceTransformer cross-encoder model.
+
+        To run this test, set environment variable:
+            export ZVEC_RUN_INTEGRATION_TESTS=1
+
+        Note: This test requires sentence-transformers package and will
+        download the MS MARCO MiniLM model (~80MB) on first run.
+        """
+        # Create reranker with real model (using default lightweight model)
+        reranker = DefaultLocalReRanker(
+            query="What is machine learning?",
+            topn=3,
+            rerank_field="content",
+        )
+
+        # Prepare test documents
+        query_results = {
+            "vector1": [
+                Doc(
+                    id="1",
+                    score=0.8,
+                    fields={
+                        "content": "Machine learning is a subset of artificial intelligence that focuses on building systems that can learn from data."
+                    },
+                ),
+                Doc(
+                    id="2",
+                    score=0.7,
+                    fields={
+                        "content": "The weather is nice today with clear skies and sunshine."
+                    },
+                ),
+                Doc(
+                    id="3",
+                    score=0.75,
+                    fields={
+                        "content": "Deep learning is a specialized branch of machine learning using neural networks with multiple layers."
+                    },
+                ),
+            ],
+            "vector2": [
+                Doc(
+                    id="4",
+                    score=0.6,
+                    fields={
+                        "content": "Python is a popular programming language for data science and machine learning applications."
+                    },
+                ),
+                Doc(
+                    id="5",
+                    score=0.65,
+                    fields={
+                        "content": "A recipe for chocolate cake includes flour, sugar, eggs, and cocoa powder."
+                    },
+                ),
+            ],
+        }
+
+        # Call real model
+        results = reranker.rerank(query_results)
+
+        # Verify results
+        assert len(results) <= 3, "Should return at most topn documents"
+        assert len(results) > 0, "Should return at least one document"
+
+        # All results should have valid scores
+        for doc in results:
+            assert hasattr(doc, "score"), "Each document should have a score"
+            assert isinstance(doc.score, (int, float)), "Score should be numeric"
+
+        # Verify scores are in descending order
+        scores = [doc.score for doc in results]
+        assert scores == sorted(scores, reverse=True), (
+            "Results should be sorted by score in descending order"
+        )
+
+        # Verify relevant documents are ranked higher
+        # Documents 1, 3, and 4 are about machine learning, should rank higher
+        result_ids = [doc.id for doc in results]
+
+        # At least one of the ML-related documents should be in top results
+        ml_related_docs = {"1", "3", "4"}
+        assert any(doc_id in ml_related_docs for doc_id in result_ids[:2]), (
+            "ML-related documents should rank higher"
+        )
+
+        # Print results for manual verification (useful during development)
+        print("\nSentenceTransformer Reranking results:")
+        for i, doc in enumerate(results, 1):
+            print(f"{i}. ID={doc.id}, Score={doc.score:.4f}")
+            if doc.fields:
+                content = doc.field("content")
+                if content:
+                    print(f"   Content: {content[:80]}...")
diff --git a/python/tests/test_util.py b/python/tests/test_util.py
index bac8926a..c5a56c1b 100644
--- a/python/tests/test_util.py
+++ b/python/tests/test_util.py
@@ -87,8 +87,3 @@ def test_require_module_calls_importlib(mock_import_module):
 
     mock_import_module.assert_called_once_with("test_module")
     assert result is mock_module
-
-
-def test_require_module_with_openai():
-    with pytest.raises(ImportError) as exc_info:
-        require_module("openai")
diff --git a/python/zvec/__init__.py b/python/zvec/__init__.py
index ec35829d..1c8fdfc0 100644
--- a/python/zvec/__init__.py
+++ b/python/zvec/__init__.py
@@ -27,8 +27,27 @@
 
 from . import model as model
 
-# —— Extensions & typing ——
-from .extension import DenseEmbeddingFunction, ReRanker, RrfReRanker, WeightedReRanker
+# —— Extensions ——
+from .extension import (
+    BM25EmbeddingFunction,
+    DefaultLocalDenseEmbedding,
+    DefaultLocalReRanker,
+    DefaultLocalSparseEmbedding,
+    DenseEmbeddingFunction,
+    OpenAIDenseEmbedding,
+    OpenAIFunctionBase,
+    QwenDenseEmbedding,
+    QwenFunctionBase,
+    QwenReRanker,
+    QwenSparseEmbedding,
+    ReRanker,
+    RrfReRanker,
+    SentenceTransformerFunctionBase,
+    SparseEmbeddingFunction,
+    WeightedReRanker,
+)
+
+# —— Typing ——
 from .model import param as param
 from .model import schema as schema
 
@@ -100,10 +119,22 @@
     "HnswQueryParam",
     "IVFQueryParam",
     # Extensions
-    "ReRanker",
     "DenseEmbeddingFunction",
+    "SparseEmbeddingFunction",
+    "QwenFunctionBase",
+    "OpenAIFunctionBase",
+    "SentenceTransformerFunctionBase",
+    "ReRanker",
+    "DefaultLocalDenseEmbedding",
+    "DefaultLocalSparseEmbedding",
+    "BM25EmbeddingFunction",
+    "OpenAIDenseEmbedding",
+    "QwenDenseEmbedding",
+    "QwenSparseEmbedding",
     "RrfReRanker",
     "WeightedReRanker",
+    "DefaultLocalReRanker",
+    "QwenReRanker",
     # Typing
     "DataType",
     "MetricType",
diff --git a/python/zvec/common/constants.py b/python/zvec/common/constants.py
index 56b82fde..6a1654df 100644
--- a/python/zvec/common/constants.py
+++ b/python/zvec/common/constants.py
@@ -13,10 +13,21 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import Optional, Union
+from typing import Optional, TypeVar, Union
 
 import numpy as np
 
+# VectorType: DenseVectorType | SparseVectorType
 DenseVectorType = Union[list[float], list[int], np.ndarray]
 SparseVectorType = dict[int, float]
 VectorType = Optional[Union[DenseVectorType, SparseVectorType]]
+
+# Embeddable: Text | Image | Audio
+TEXT = str
+IMAGE = Union[str, bytes, np.ndarray]  # file path, raw bytes, or numpy array
+AUDIO = Union[str, bytes, np.ndarray]  # file path, raw bytes, or numpy array
+
+Embeddable = Optional[Union[TEXT, IMAGE, AUDIO]]
+
+# Multimodal Embeddable
+MD = TypeVar("MD", bound=Embeddable, contravariant=True)
diff --git a/python/zvec/extension/__init__.py b/python/zvec/extension/__init__.py
index 83421b50..cc9401f8 100644
--- a/python/zvec/extension/__init__.py
+++ b/python/zvec/extension/__init__.py
@@ -13,14 +13,41 @@
 # limitations under the License.
 from __future__ import annotations
 
-from .embedding import DenseEmbeddingFunction, QwenEmbeddingFunction
-from .rerank import QwenReRanker, ReRanker, RrfReRanker, WeightedReRanker
+from .bm25_embedding_function import BM25EmbeddingFunction
+from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
+from .jina_embedding_function import JinaDenseEmbedding
+from .jina_function import JinaFunctionBase
+from .multi_vector_reranker import RrfReRanker, WeightedReRanker
+from .openai_embedding_function import OpenAIDenseEmbedding
+from .openai_function import OpenAIFunctionBase
+from .qwen_embedding_function import QwenDenseEmbedding, QwenSparseEmbedding
+from .qwen_function import QwenFunctionBase
+from .qwen_rerank_function import QwenReRanker
+from .rerank_function import RerankFunction as ReRanker
+from .sentence_transformer_embedding_function import (
+    DefaultLocalDenseEmbedding,
+    DefaultLocalSparseEmbedding,
+)
+from .sentence_transformer_function import SentenceTransformerFunctionBase
+from .sentence_transformer_rerank_function import DefaultLocalReRanker
 
 __all__ = [
+    "BM25EmbeddingFunction",
+    "DefaultLocalDenseEmbedding",
+    "DefaultLocalReRanker",
+    "DefaultLocalSparseEmbedding",
     "DenseEmbeddingFunction",
-    "QwenEmbeddingFunction",
+    "JinaDenseEmbedding",
+    "JinaFunctionBase",
+    "OpenAIDenseEmbedding",
+    "OpenAIFunctionBase",
+    "QwenDenseEmbedding",
+    "QwenFunctionBase",
     "QwenReRanker",
+    "QwenSparseEmbedding",
     "ReRanker",
     "RrfReRanker",
+    "SentenceTransformerFunctionBase",
+    "SparseEmbeddingFunction",
     "WeightedReRanker",
 ]
diff --git a/python/zvec/extension/bm25_embedding_function.py b/python/zvec/extension/bm25_embedding_function.py
new file mode 100644
index 00000000..51ab5ac5
--- /dev/null
+++ b/python/zvec/extension/bm25_embedding_function.py
@@ -0,0 +1,375 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Literal, Optional
+
+from ..common.constants import TEXT, SparseVectorType
+from ..tool import require_module
+from .embedding_function import SparseEmbeddingFunction
+
+
+class BM25EmbeddingFunction(SparseEmbeddingFunction[TEXT]):
+    """BM25-based sparse embedding function using DashText SDK.
+
+    This class provides text-to-sparse-vector embedding capabilities using
+    the DashText library with BM25 algorithm. BM25 (Best Matching 25) is a
+    probabilistic retrieval function used for lexical search and document
+    ranking based on term frequency and inverse document frequency.
+
+    BM25 generates sparse vectors where each dimension corresponds to a term in
+    the vocabulary, and the value represents the BM25 score for that term. It's
+    particularly effective for:
+
+    - Lexical search and keyword matching
+    - Document ranking and information retrieval
+    - Combining with dense embeddings for hybrid search
+    - Traditional IR tasks where exact term matching is important
+
+    This implementation uses DashText's SparseVectorEncoder, which provides
+    efficient BM25 computation for Chinese and English text using either a
+    built-in encoder or custom corpus training.
+
+    Args:
+        corpus (Optional[list[str]], optional): List of documents to train the
+            BM25 encoder. If provided, creates a custom encoder trained on this
+            corpus for better domain-specific accuracy. If ``None``, uses the
+            built-in encoder. Defaults to ``None``.
+        encoding_type (Literal["query", "document"], optional): Encoding mode
+            for text processing. Use ``"query"`` for search queries (default) and
+            ``"document"`` for document indexing. This distinction optimizes the
+            BM25 scoring for asymmetric retrieval tasks. Defaults to ``"query"``.
+        language (Literal["zh", "en"], optional): Language for built-in encoder.
+            Only used when corpus is None. ``"zh"`` for Chinese (trained on Chinese
+            Wikipedia), ``"en"`` for English. Defaults to ``"zh"``.
+        b (float, optional): Document length normalization parameter for BM25.
+            Range [0, 1]. 0 means no normalization, 1 means full normalization.
+            Only used with custom corpus. Defaults to ``0.75``.
+        k1 (float, optional): Term frequency saturation parameter for BM25.
+            Higher values give more weight to term frequency. Only used with
+            custom corpus. Defaults to ``1.2``.
+        **kwargs: Additional parameters for DashText encoder customization.
+
+    Attributes:
+        corpus_size (int): Number of documents in the training corpus (0 if using built-in encoder).
+        encoding_type (str): The encoding type being used ("query" or "document").
+        language (str): The language of the built-in encoder ("zh" or "en").
+
+    Raises:
+        ValueError: If corpus is provided but empty or contains non-string elements.
+        TypeError: If input to ``embed()`` is not a string.
+        RuntimeError: If DashText encoder initialization or training fails.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires the ``dashtext`` package: ``pip install dashtext``
+        - Two encoder options available:
+
+          1. **Built-in encoder** (no corpus needed): Pre-trained models for
+             Chinese (zh) and English (en), good generalization, works out-of-the-box
+          2. **Custom encoder** (corpus required): Better accuracy for domain-specific
+             terminology, requires training on your full corpus with BM25 parameters
+
+        - Encoding types:
+
+          * ``encoding_type="query"``: Optimized for search queries (shorter text)
+          * ``encoding_type="document"``: Optimized for document indexing (longer text)
+
+        - BM25 parameters (b, k1) only apply to custom encoder training
+        - Output is sorted by indices (vocabulary term IDs) for consistency
+        - Results are cached (LRU cache, maxsize=10) to reduce computation
+        - No API key or network connectivity required (local computation)
+
+    Examples:
+        >>> # Option 1: Using built-in encoder for Chinese (no corpus needed)
+        >>> from zvec.extension import BM25EmbeddingFunction
+        >>>
+        >>> # For query encoding (Chinese)
+        >>> bm25_query_zh = BM25EmbeddingFunction(language="zh", encoding_type="query")
+        >>> query_vec = bm25_query_zh.embed("什么是机器学习")
+        >>> isinstance(query_vec, dict)
+        True
+        >>> # query_vec: {1169440797: 0.29, 2045788977: 0.70, ...}
+
+        >>> # For document encoding (Chinese)
+        >>> bm25_doc_zh = BM25EmbeddingFunction(language="zh", encoding_type="document")
+        >>> doc_vec = bm25_doc_zh.embed("机器学习是人工智能的一个重要分支...")
+        >>> isinstance(doc_vec, dict)
+        True
+
+        >>> # Using built-in encoder for English
+        >>> bm25_query_en = BM25EmbeddingFunction(language="en", encoding_type="query")
+        >>> query_vec_en = bm25_query_en.embed("what is vector search service")
+        >>> isinstance(query_vec_en, dict)
+        True
+
+        >>> # Option 2: Using custom corpus for domain-specific accuracy
+        >>> corpus = [
+        ...     "机器学习是人工智能的一个重要分支",
+        ...     "深度学习使用多层神经网络进行特征提取",
+        ...     "自然语言处理技术用于理解和生成人类语言"
+        ... ]
+        >>> bm25_custom = BM25EmbeddingFunction(
+        ...     corpus=corpus,
+        ...     encoding_type="query",
+        ...     b=0.75,
+        ...     k1=1.2
+        ... )
+        >>> custom_vec = bm25_custom.embed("机器学习算法")
+        >>> isinstance(custom_vec, dict)
+        True
+
+        >>> # Hybrid search: combining with dense embeddings
+        >>> from zvec.extension import DefaultLocalDenseEmbedding
+        >>> dense_emb = DefaultLocalDenseEmbedding()
+        >>> bm25_emb = BM25EmbeddingFunction(language="zh", encoding_type="query")
+        >>>
+        >>> query = "machine learning algorithms"
+        >>> dense_vec = dense_emb.embed(query)  # Semantic similarity
+        >>> sparse_vec = bm25_emb.embed(query)  # Lexical matching
+        >>> # Combine scores for hybrid retrieval
+
+        >>> # Callable interface
+        >>> sparse_vec = bm25_query_zh("information retrieval")
+        >>> isinstance(sparse_vec, dict)
+        True
+
+        >>> # Error handling
+        >>> try:
+        ...     bm25_query_zh.embed("")  # Empty query
+        ... except ValueError as e:
+        ...     print(f"Error: {e}")
+        Error: Input text cannot be empty or whitespace only
+
+    See Also:
+        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
+        - ``DefaultLocalSparseEmbedding``: SPLADE-based sparse embedding
+        - ``QwenSparseEmbedding``: API-based sparse embedding using Qwen
+        - ``DefaultLocalDenseEmbedding``: Dense embedding for semantic search
+
+    References:
+        - DashText Documentation: https://help.aliyun.com/zh/document_detail/2546039.html
+        - DashText PyPI: https://pypi.org/project/dashtext/
+        - BM25 Algorithm: Robertson & Zaragoza (2009)
+    """
+
+    def __init__(
+        self,
+        corpus: Optional[list[str]] = None,
+        encoding_type: Literal["query", "document"] = "query",
+        language: Literal["zh", "en"] = "zh",
+        b: float = 0.75,
+        k1: float = 1.2,
+        **kwargs,
+    ):
+        """Initialize the BM25 embedding function.
+
+        Args:
+            corpus (Optional[list[str]]): Optional corpus for training custom encoder.
+                If None, uses built-in encoder. Defaults to None.
+            encoding_type (Literal["query", "document"]): Text encoding mode.
+                Use "query" for search queries, "document" for indexing.
+                Defaults to "query".
+            language (Literal["zh", "en"]): Language for built-in encoder.
+                "zh" for Chinese, "en" for English. Defaults to "zh".
+            b (float): Document length normalization for BM25 [0, 1].
+                Only used with custom corpus. Defaults to 0.75.
+            k1 (float): Term frequency saturation for BM25.
+                Only used with custom corpus. Defaults to 1.2.
+            **kwargs: Additional DashText encoder parameters.
+
+        Raises:
+            ValueError: If corpus is provided but empty or invalid.
+            ImportError: If dashtext package is not installed.
+            RuntimeError: If encoder initialization or training fails.
+        """
+        # Validate corpus if provided
+        if corpus is not None:
+            if not corpus or not isinstance(corpus, list):
+                raise ValueError("Corpus must be a non-empty list of strings")
+
+            if not all(isinstance(doc, str) for doc in corpus):
+                raise ValueError("All corpus documents must be strings")
+
+        # Import dashtext
+        self._dashtext = require_module("dashtext")
+
+        self._corpus = corpus
+        self._encoding_type = encoding_type
+        self._language = language
+        self._b = b
+        self._k1 = k1
+        self._extra_params = kwargs
+
+        # Initialize the BM25 encoder
+        self._build_encoder()
+
+    def _build_encoder(self):
+        """Build the BM25 sparse vector encoder.
+
+        Creates either a built-in encoder (pre-trained) or a custom encoder
+        trained on the provided corpus.
+
+        Raises:
+            RuntimeError: If encoder initialization or training fails.
+            ImportError: If dashtext package is not installed.
+        """
+        try:
+            if self._corpus is None:
+                # Use built-in encoder (pre-trained on Wikipedia)
+                # language: 'zh' for Chinese, 'en' for English
+                self._encoder = self._dashtext.SparseVectorEncoder.default(
+                    name=self._language
+                )
+            else:
+                # Create custom encoder with BM25 parameters
+                self._encoder = self._dashtext.SparseVectorEncoder(
+                    b=self._b, k1=self._k1, **self._extra_params
+                )
+
+                # Train encoder with the corpus
+                self._encoder.train(self._corpus)
+
+        except ImportError as e:
+            raise ImportError(
+                "dashtext package is required for BM25EmbeddingFunction. "
+                "Install it with: pip install dashtext"
+            ) from e
+        except Exception as e:
+            if isinstance(e, (ValueError, RuntimeError)):
+                raise
+            raise RuntimeError(f"Failed to build BM25 encoder: {e!s}") from e
+
+    @property
+    def corpus_size(self) -> int:
+        """int: Number of documents in the training corpus (0 if using built-in encoder)."""
+        return len(self._corpus) if self._corpus is not None else 0
+
+    @property
+    def encoding_type(self) -> str:
+        """str: The encoding type being used ("query" or "document")."""
+        return self._encoding_type
+
+    @property
+    def language(self) -> str:
+        """str: The language of the built-in encoder ("zh" or "en")."""
+        return self._language
+
+    @property
+    def extra_params(self) -> dict:
+        """dict: Extra parameters for DashText encoder customization."""
+        return self._extra_params
+
+    def __call__(self, input: TEXT) -> SparseVectorType:
+        """Make the embedding function callable.
+
+        Args:
+            input (TEXT): Input text to embed.
+
+        Returns:
+            SparseVectorType: Sparse vector as dictionary.
+        """
+        return self.embed(input)
+
+    @lru_cache(maxsize=10)
+    def embed(self, input: TEXT) -> SparseVectorType:
+        """Generate BM25 sparse embedding for the input text.
+
+        This method computes BM25 scores for the input text using DashText's
+        SparseVectorEncoder. The encoding behavior depends on the encoding_type:
+
+        - ``encoding_type="query"``: Uses ``encode_queries()`` for search queries
+        - ``encoding_type="document"``: Uses ``encode_documents()`` for documents
+
+        The result is a sparse vector where keys are term indices in the
+        vocabulary and values are BM25 scores.
+
+        Args:
+            input (TEXT): Input text string to embed. Must be non-empty after
+                stripping whitespace.
+
+        Returns:
+            SparseVectorType: A dictionary mapping vocabulary term index to BM25 score.
+                Only non-zero scores are included. The dictionary is sorted by indices
+                (keys) in ascending order for consistent output.
+                Example: ``{1169440797: 0.29, 2045788977: 0.70, ...}``
+
+        Raises:
+            TypeError: If ``input`` is not a string.
+            ValueError: If input is empty or whitespace-only.
+            RuntimeError: If BM25 encoding fails.
+
+        Examples:
+            >>> bm25 = BM25EmbeddingFunction(language="zh", encoding_type="query")
+            >>> sparse_vec = bm25.embed("query text")
+            >>> isinstance(sparse_vec, dict)
+            True
+            >>> all(isinstance(k, int) and isinstance(v, float) for k, v in sparse_vec.items())
+            True
+
+            >>> # Verify sorted output
+            >>> keys = list(sparse_vec.keys())
+            >>> keys == sorted(keys)
+            True
+
+            >>> # Error: empty input
+            >>> bm25.embed("   ")
+            ValueError: Input text cannot be empty or whitespace only
+
+            >>> # Error: non-string input
+            >>> bm25.embed(123)
+            TypeError: Expected 'input' to be str, got int
+
+        Note:
+            - BM25 scores are relative to the vocabulary statistics
+            - Output dictionary is always sorted by indices for consistency
+            - Terms not in the vocabulary will have zero scores (not included)
+            - This method is cached (maxsize=10) for performance
+            - DashText automatically handles Chinese/English text segmentation
+        """
+        if not isinstance(input, str):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        try:
+            # Encode based on encoding_type
+            if self._encoding_type == "query":
+                sparse_vector = self._encoder.encode_queries(input)
+            else:  # encoding_type == "document"
+                sparse_vector = self._encoder.encode_documents(input)
+
+            # DashText returns dict with int/long keys and float values
+            # Convert to standard format: {int: float}
+            sparse_dict: dict[int, float] = {}
+            for key, value in sparse_vector.items():
+                try:
+                    idx = int(key)
+                    val = float(value)
+                    if val > 0:
+                        sparse_dict[idx] = val
+                except (ValueError, TypeError):
+                    # Skip invalid entries
+                    continue
+
+            # Sort by indices (keys) to ensure consistent ordering
+            return dict(sorted(sparse_dict.items()))
+
+        except Exception as e:
+            if isinstance(e, (TypeError, ValueError)):
+                raise
+            raise RuntimeError(f"Failed to generate BM25 embedding: {e!s}") from e
diff --git a/python/zvec/extension/embedding.py b/python/zvec/extension/embedding.py
deleted file mode 100644
index 1bbb0969..00000000
--- a/python/zvec/extension/embedding.py
+++ /dev/null
@@ -1,188 +0,0 @@
-# Copyright 2025-present the zvec project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import os
-from abc import ABC, abstractmethod
-from functools import lru_cache
-from http import HTTPStatus
-from typing import Optional, Union
-
-from ..tool import require_module
-from ..typing import DataType
-
-
-class DenseEmbeddingFunction(ABC):
-    """Abstract base class for dense vector embedding functions.
-
-    Dense embedding functions map text to fixed-length real-valued vectors.
-    Subclasses must implement the ``embed()`` method.
-
-    Args:
-        dimension (int): Dimensionality of the output embedding vector.
-        data_type (DataType, optional): Numeric type of the embedding.
-            Defaults to ``DataType.VECTOR_FP32``.
-
-    Note:
-        This class is callable: ``embedding_func("text")`` is equivalent to
-        ``embedding_func.embed("text")``.
-    """
-
-    def __init__(self, dimension: int, data_type: DataType = DataType.VECTOR_FP32):
-        self._dimension = dimension
-        self._data_type = data_type
-
-    @property
-    def dimension(self) -> int:
-        """int: The expected dimensionality of the embedding vector."""
-        return self._dimension
-
-    @property
-    def data_type(self) -> DataType:
-        """DataType: The numeric data type of the embedding (e.g., VECTOR_FP32)."""
-        return self._data_type
-
-    @abstractmethod
-    def embed(self, text: str) -> list[Union[int, float]]:
-        """Generate a dense embedding vector for the input text.
-
-        Args:
-            text (str): Input text to embed.
-
-        Returns:
-            list[Union[int, float]]: A list of numbers representing the embedding.
-                Length must equal ``self.dimension``.
-        """
-        raise NotImplementedError
-
-    def __call__(self, text: str) -> list[Union[int, float]]:
-        return self.embed(text)
-
-
-class SparseEmbeddingFunction(ABC):
-    """Abstract base class for sparse vector embedding functions.
-
-    Sparse embedding functions map text to a dictionary of {index: weight},
-    where only non-zero dimensions are stored.
-
-    Note:
-        Subclasses must implement the ``embed()`` method.
-    """
-
-    @abstractmethod
-    def embed(self, text: str) -> dict[int, float]:
-        """Generate a sparse embedding for the input text.
-        Args:
-            text (str): Input text to embed.
-
-        Returns:
-            dict[int, float]: Mapping from dimension index to non-zero weight.
-        """
-        raise NotImplementedError
-
-
-class QwenEmbeddingFunction(DenseEmbeddingFunction):
-    """Dense embedding function using Qwen (DashScope) Text Embedding API.
-
-    This implementation uses the DashScope service to generate embeddings
-    via Qwen's text embedding models (e.g., ``text-embedding-v4``).
-
-    Args:
-        dimension (int): Desired embedding dimension (e.g., 1024).
-        model (str, optional): DashScope embedding model name.
-            Defaults to ``"text-embedding-v4"``.
-        api_key (Optional[str], optional): DashScope API key. If not provided,
-            reads from ``DASHSCOPE_API_KEY`` environment variable.
-
-    Raises:
-        ValueError: If API key is missing or input text is invalid.
-
-    Note:
-        Requires the ``dashscope`` Python package.
-        Embedding results are cached using ``functools.lru_cache`` (maxsize=10).
-    """
-
-    def __init__(
-        self,
-        dimension: int,
-        model: str = "text-embedding-v4",
-        api_key: Optional[str] = None,
-    ):
-        super().__init__(dimension, DataType.VECTOR_FP32)
-        self._model = model
-        self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY")
-        if not self._api_key:
-            raise ValueError("DashScope API key is required")
-
-    @property
-    def model(self) -> str:
-        """str: The DashScope embedding model name in use."""
-        return self._model
-
-    def _connection(self):
-        dashscope = require_module("dashscope")
-        dashscope.api_key = self._api_key
-        return dashscope
-
-    @lru_cache(maxsize=10)
-    def embed(self, text: str) -> list[Union[int, float]]:
-        """
-        Generate embedding for a given text using Qwen (via DashScope).
-
-        Args:
-            text (str): Input text to embed. Must be non-empty and valid string.
-
-        Returns:
-            list[Union[int, float]]: The dense embedding vector.
-
-        Raises:
-            ValueError: If input is invalid or API response is malformed.
-            RuntimeError: If network or internal error occurs during API call.
-        """
-        if not isinstance(text, str):
-            raise TypeError(f"Expected 'text' to be str, got {type(text).__name__}")
-
-        text = text.strip()
-        if not text:
-            raise ValueError("Input text cannot be empty or whitespace only")
-
-        resp = self._connection().TextEmbedding.call(
-            model=self.model, input=text, dimension=self.dimension, output_type="dense"
-        )
-
-        if resp.status_code != HTTPStatus.OK:
-            error_msg = getattr(resp, "message", "Unknown error")
-            error_detail = f"Status={resp.status_code}, Message={error_msg}"
-            raise ValueError(f"QwenEmbedding failed: {error_detail}")
-
-        output = getattr(resp, "output", None)
-        if not isinstance(output, dict):
-            raise ValueError("Invalid response: missing or malformed 'output' field")
-
-        embeddings = output.get("embeddings")
-        if not isinstance(embeddings, list):
-            raise ValueError(
-                "Invalid response: 'embeddings' field is missing or not a list"
-            )
-
-        if len(embeddings) != 1:
-            raise ValueError(
-                f"Expected 1 embedding, got {len(embeddings)}. Response: {resp}"
-            )
-
-        first_emb = embeddings[0]
-        if not isinstance(first_emb, dict):
-            raise ValueError("Invalid response: embedding item is not a dictionary")
-
-        return list(first_emb.get("embedding"))
diff --git a/python/zvec/extension/embedding_function.py b/python/zvec/extension/embedding_function.py
new file mode 100644
index 00000000..a421f1ec
--- /dev/null
+++ b/python/zvec/extension/embedding_function.py
@@ -0,0 +1,147 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from abc import abstractmethod
+from typing import Protocol, runtime_checkable
+
+from ..common.constants import MD, DenseVectorType, SparseVectorType
+
+
+@runtime_checkable
+class DenseEmbeddingFunction(Protocol[MD]):
+    """Protocol for dense vector embedding functions.
+
+    Dense embedding functions map multimodal input (text, image, or audio) to
+    fixed-length real-valued vectors. This is a Protocol class that defines
+    the interface - implementations should provide their own initialization
+    and properties.
+
+    Type Parameters:
+        MD: The type of input data (bound to Embeddable: TEXT, IMAGE, or AUDIO).
+
+    Note:
+        - This is a Protocol class - it only defines the ``embed()`` interface.
+        - Implementations are free to define their own ``__init__``, properties,
+          and additional methods as needed.
+        - The ``embed()`` method is the only required interface.
+
+    Examples:
+        >>> # Custom text embedding implementation
+        >>> class MyTextEmbedding:
+        ...     def __init__(self, dimension: int, model_name: str):
+        ...         self.dimension = dimension
+        ...         self.model = load_model(model_name)
+        ...
+        ...     def embed(self, input: str) -> list[float]:
+        ...         return self.model.encode(input).tolist()
+
+        >>> # Custom image embedding implementation
+        >>> class MyImageEmbedding:
+        ...     def __init__(self, dimension: int = 512):
+        ...         self.dimension = dimension
+        ...         self.model = load_image_model()
+        ...
+        ...     def embed(self, input: Union[str, bytes, np.ndarray]) -> list[float]:
+        ...         if isinstance(input, str):
+        ...             image = load_image_from_path(input)
+        ...         else:
+        ...             image = input
+        ...         return self.model.extract_features(image).tolist()
+
+        >>> # Using built-in implementations
+        >>> from zvec.extension import QwenDenseEmbedding
+        >>> text_emb = QwenDenseEmbedding(dimension=768, api_key="sk-xxx")
+        >>> vector = text_emb.embed("Hello world")
+    """
+
+    @abstractmethod
+    def embed(self, input: MD) -> DenseVectorType:
+        """Generate a dense embedding vector for the input data.
+
+        Args:
+            input (MD): Multimodal input data to embed. Can be:
+                - TEXT (str): Text string
+                - IMAGE (str | bytes | np.ndarray): Image file path, raw bytes, or array
+                - AUDIO (str | bytes | np.ndarray): Audio file path, raw bytes, or array
+
+        Returns:
+            DenseVectorType: A dense vector representing the embedding.
+                Can be list[float], list[int], or np.ndarray.
+                Length should match the implementation's dimension.
+        """
+        ...
+
+
+@runtime_checkable
+class SparseEmbeddingFunction(Protocol[MD]):
+    """Abstract base class for sparse vector embedding functions.
+
+    Sparse embedding functions map multimodal input (text, image, or audio) to
+    a dictionary of {index: weight}, where only non-zero dimensions are stored.
+    You can inherit this class to create custom sparse embedding functions.
+
+    Type Parameters:
+        MD: The type of input data (bound to Embeddable: TEXT, IMAGE, or AUDIO).
+
+    Note:
+        Subclasses must implement the ``embed()`` method.
+
+    Examples:
+        >>> # Using built-in text sparse embedding (e.g., BM25, TF-IDF)
+        >>> sparse_emb = SomeSparseEmbedding()
+        >>> vector = sparse_emb.embed("Hello world")
+        >>> # Returns: {0: 0.5, 42: 1.2, 100: 0.8}
+
+        >>> # Custom BM25 sparse embedding function
+        >>> class MyBM25Embedding(SparseEmbeddingFunction):
+        ...     def __init__(self, vocab_size: int = 10000):
+        ...         self.vocab_size = vocab_size
+        ...         self.tokenizer = MyTokenizer()
+        ...
+        ...     def embed(self, input: str) -> dict[int, float]:
+        ...         tokens = self.tokenizer.tokenize(input)
+        ...         sparse_vector = {}
+        ...         for token_id, weight in self._calculate_bm25(tokens):
+        ...             if weight > 0:
+        ...                 sparse_vector[token_id] = weight
+        ...         return sparse_vector
+        ...
+        ...     def _calculate_bm25(self, tokens):
+        ...         # BM25 calculation logic
+        ...         pass
+
+        >>> # Custom sparse image feature extractor
+        >>> class MySparseImageEmbedding(SparseEmbeddingFunction):
+        ...     def embed(self, input: Union[str, bytes, np.ndarray]) -> dict[int, float]:
+        ...         image = self._load_image(input)
+        ...         features = self._extract_sparse_features(image)
+        ...         return {idx: val for idx, val in enumerate(features) if val != 0}
+    """
+
+    @abstractmethod
+    def embed(self, input: MD) -> SparseVectorType:
+        """Generate a sparse embedding for the input data.
+
+        Args:
+            input (MD): Multimodal input data to embed. Can be:
+                - TEXT (str): Text string
+                - IMAGE (str | bytes | np.ndarray): Image file path, raw bytes, or array
+                - AUDIO (str | bytes | np.ndarray): Audio file path, raw bytes, or array
+
+        Returns:
+            SparseVectorType: Mapping from dimension index to non-zero weight.
+                Only dimensions with non-zero values are included.
+        """
+        ...
diff --git a/python/zvec/extension/jina_embedding_function.py b/python/zvec/extension/jina_embedding_function.py
new file mode 100644
index 00000000..2f8b02aa
--- /dev/null
+++ b/python/zvec/extension/jina_embedding_function.py
@@ -0,0 +1,240 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Optional
+
+from ..common.constants import TEXT, DenseVectorType
+from .embedding_function import DenseEmbeddingFunction
+from .jina_function import JinaFunctionBase
+
+
+class JinaDenseEmbedding(JinaFunctionBase, DenseEmbeddingFunction[TEXT]):
+    """Dense text embedding function using Jina AI API.
+
+    This class provides text-to-vector embedding capabilities using Jina AI's
+    embedding models. It inherits from ``DenseEmbeddingFunction`` and implements
+    dense text embedding via the Jina Embeddings API (OpenAI-compatible).
+
+    Jina Embeddings v5 models support task-specific embedding through the
+    ``task`` parameter, which optimizes the embedding for different use cases
+    such as retrieval, text matching, or classification. They also support
+    Matryoshka Representation Learning, allowing flexible output dimensions.
+
+    Args:
+        model (str, optional): Jina embedding model identifier.
+            Defaults to ``"jina-embeddings-v5-text-nano"``. Available models:
+            - ``"jina-embeddings-v5-text-nano"``: 768 dims, 239M params, 8K context
+            - ``"jina-embeddings-v5-text-small"``: 1024 dims, 677M params, 32K context
+        dimension (Optional[int], optional): Desired output embedding dimension.
+            If ``None``, uses model's default dimension. Supports Matryoshka
+            dimensions: 32, 64, 128, 256, 512, 768 (nano) / 1024 (small).
+            Defaults to ``None``.
+        api_key (Optional[str], optional): Jina API authentication key.
+            If ``None``, reads from ``JINA_API_KEY`` environment variable.
+            Obtain your key from: https://jina.ai/api-dashboard
+        task (Optional[str], optional): Task type to optimize embeddings for.
+            Defaults to ``None``. Valid values:
+            - ``"retrieval.query"``: For search queries
+            - ``"retrieval.passage"``: For documents/passages to be searched
+            - ``"text-matching"``: For symmetric text similarity
+            - ``"classification"``: For text classification
+            - ``"separation"``: For clustering/separation tasks
+
+    Attributes:
+        dimension (int): The embedding vector dimension.
+        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.
+        model (str): The Jina model name being used.
+        task (Optional[str]): The task type for embedding optimization.
+
+    Raises:
+        ValueError: If API key is not provided and not found in environment,
+            if task is not a valid task type, or if API returns an error response.
+        TypeError: If input to ``embed()`` is not a string.
+        RuntimeError: If network error or Jina service error occurs.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires the ``openai`` package: ``pip install openai``
+        - Jina API is OpenAI-compatible, so it uses the ``openai`` Python client
+        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
+        - For retrieval tasks, use ``"retrieval.query"`` for queries and
+          ``"retrieval.passage"`` for documents
+        - API usage requires a Jina API key from https://jina.ai/api-dashboard
+
+    Examples:
+        >>> # Basic usage with default model
+        >>> from zvec.extension import JinaDenseEmbedding
+        >>> import os
+        >>> os.environ["JINA_API_KEY"] = "jina_..."
+        >>>
+        >>> emb_func = JinaDenseEmbedding()
+        >>> vector = emb_func.embed("Hello, world!")
+        >>> len(vector)
+        768
+
+        >>> # Retrieval use case: embed queries and documents differently
+        >>> query_emb = JinaDenseEmbedding(task="retrieval.query")
+        >>> doc_emb = JinaDenseEmbedding(task="retrieval.passage")
+        >>>
+        >>> query_vector = query_emb.embed("What is machine learning?")
+        >>> doc_vector = doc_emb.embed("Machine learning is a subset of AI...")
+
+        >>> # Using larger model with custom dimension (Matryoshka)
+        >>> emb_func = JinaDenseEmbedding(
+        ...     model="jina-embeddings-v5-text-small",
+        ...     dimension=256,
+        ...     api_key="jina_...",
+        ...     task="text-matching",
+        ... )
+        >>> vector = emb_func.embed("Semantic similarity comparison")
+        >>> len(vector)
+        256
+
+        >>> # Using with zvec collection
+        >>> import zvec
+        >>> emb_func = JinaDenseEmbedding(task="retrieval.passage")
+        >>> schema = zvec.CollectionSchema(
+        ...     name="docs",
+        ...     vectors=zvec.VectorSchema(
+        ...         "embedding", zvec.DataType.VECTOR_FP32, emb_func.dimension
+        ...     ),
+        ... )
+        >>> collection = zvec.create_and_open(path="./my_docs", schema=schema)
+
+    See Also:
+        - ``DenseEmbeddingFunction``: Base class for dense embeddings
+        - ``OpenAIDenseEmbedding``: Alternative using OpenAI API
+        - ``QwenDenseEmbedding``: Alternative using Qwen/DashScope API
+        - ``DefaultLocalDenseEmbedding``: Local model without API calls
+    """
+
+    def __init__(
+        self,
+        model: str = "jina-embeddings-v5-text-nano",
+        dimension: Optional[int] = None,
+        api_key: Optional[str] = None,
+        task: Optional[str] = None,
+        **kwargs,
+    ):
+        """Initialize the Jina dense embedding function.
+
+        Args:
+            model (str): Jina model name. Defaults to "jina-embeddings-v5-text-nano".
+            dimension (Optional[int]): Target embedding dimension or None for default.
+            api_key (Optional[str]): API key or None to use environment variable.
+            task (Optional[str]): Task type for embedding optimization or None.
+            **kwargs: Additional parameters for API calls.
+
+        Raises:
+            ValueError: If API key is not provided and not in environment,
+                or if task is not a valid task type.
+        """
+        # Initialize base class for API connection
+        JinaFunctionBase.__init__(self, model=model, api_key=api_key, task=task)
+
+        # Store dimension configuration
+        self._custom_dimension = dimension
+
+        # Determine actual dimension
+        if dimension is None:
+            self._dimension = self._MODEL_DIMENSIONS.get(model, 768)
+        else:
+            self._dimension = dimension
+
+        # Store extra attributes
+        self._extra_params = kwargs
+
+    @property
+    def dimension(self) -> int:
+        """int: The expected dimensionality of the embedding vector."""
+        return self._dimension
+
+    @property
+    def extra_params(self) -> dict:
+        """dict: Extra parameters for model-specific customization."""
+        return self._extra_params
+
+    def __call__(self, input: TEXT) -> DenseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    @lru_cache(maxsize=10)
+    def embed(self, input: TEXT) -> DenseVectorType:
+        """Generate dense embedding vector for the input text.
+
+        This method calls the Jina Embeddings API to convert input text
+        into a dense vector representation. Results are cached to improve
+        performance for repeated inputs.
+
+        Args:
+            input (TEXT): Input text string to embed. Must be non-empty after
+                stripping whitespace. Maximum length depends on model:
+                8192 tokens for v5-nano, 32768 tokens for v5-small.
+
+        Returns:
+            DenseVectorType: A list of floats representing the embedding vector.
+                Length equals ``self.dimension``. Example:
+                ``[0.123, -0.456, 0.789, ...]``
+
+        Raises:
+            TypeError: If ``input`` is not a string.
+            ValueError: If input is empty/whitespace-only, or if the API returns
+                an error or malformed response.
+            RuntimeError: If network connectivity issues or Jina service
+                errors occur.
+
+        Examples:
+            >>> emb = JinaDenseEmbedding(task="retrieval.query")
+            >>> vector = emb.embed("What is deep learning?")
+            >>> len(vector)
+            768
+            >>> isinstance(vector[0], float)
+            True
+
+            >>> # Error: empty input
+            >>> emb.embed("   ")
+            ValueError: Input text cannot be empty or whitespace only
+
+            >>> # Error: non-string input
+            >>> emb.embed(123)
+            TypeError: Expected 'input' to be str, got int
+
+        Note:
+            - This method is cached (maxsize=10). Identical inputs return cached results.
+            - The cache is based on exact string match (case-sensitive).
+            - Task type affects embedding optimization but not caching behavior.
+        """
+        if not isinstance(input, TEXT):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        # Call API
+        embedding_vector = self._call_text_embedding_api(
+            input=input,
+            dimension=self._custom_dimension,
+        )
+
+        # Verify dimension
+        if len(embedding_vector) != self.dimension:
+            raise ValueError(
+                f"Dimension mismatch: expected {self.dimension}, "
+                f"got {len(embedding_vector)}"
+            )
+
+        return embedding_vector
diff --git a/python/zvec/extension/jina_function.py b/python/zvec/extension/jina_function.py
new file mode 100644
index 00000000..f20b679c
--- /dev/null
+++ b/python/zvec/extension/jina_function.py
@@ -0,0 +1,182 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+from typing import ClassVar, Optional
+
+from ..common.constants import TEXT
+from ..tool import require_module
+
+
+class JinaFunctionBase:
+    """Base class for Jina AI functions.
+
+    This base class provides common functionality for calling Jina AI APIs
+    and handling responses. It supports embeddings (dense) operations via
+    the OpenAI-compatible Jina Embeddings API.
+
+    This class is not meant to be used directly. Use concrete implementations:
+    - ``JinaDenseEmbedding`` for dense embeddings
+
+    Args:
+        model (str): Jina embedding model identifier.
+        api_key (Optional[str]): Jina API authentication key.
+        task (Optional[str]): Task type for the embedding model.
+
+    Note:
+        - This is an internal base class for code reuse across Jina features
+        - Subclasses should inherit from appropriate Protocol
+        - Provides unified API connection and response handling
+        - Jina API is OpenAI-compatible, using the ``openai`` Python client
+    """
+
+    _BASE_URL: ClassVar[str] = "https://api.jina.ai/v1"
+
+    # Model default dimensions
+    _MODEL_DIMENSIONS: ClassVar[dict[str, int]] = {
+        "jina-embeddings-v5-text-nano": 768,
+        "jina-embeddings-v5-text-small": 1024,
+    }
+
+    # Model max tokens
+    _MODEL_MAX_TOKENS: ClassVar[dict[str, int]] = {
+        "jina-embeddings-v5-text-nano": 8192,
+        "jina-embeddings-v5-text-small": 32768,
+    }
+
+    # Valid task types
+    _VALID_TASKS: ClassVar[tuple[str, ...]] = (
+        "retrieval.query",
+        "retrieval.passage",
+        "text-matching",
+        "classification",
+        "separation",
+    )
+
+    def __init__(
+        self,
+        model: str,
+        api_key: Optional[str] = None,
+        task: Optional[str] = None,
+    ):
+        """Initialize the base Jina functionality.
+
+        Args:
+            model (str): Jina model name.
+            api_key (Optional[str]): API key or None to use environment variable.
+            task (Optional[str]): Task type for the embedding model.
+                Valid values: "retrieval.query", "retrieval.passage",
+                "text-matching", "classification", "separation".
+
+        Raises:
+            ValueError: If API key is not provided and not in environment,
+                or if task is not a valid task type.
+        """
+        self._model = model
+        self._api_key = api_key or os.environ.get("JINA_API_KEY")
+        self._task = task
+
+        if not self._api_key:
+            raise ValueError(
+                "Jina API key is required. Please provide 'api_key' parameter "
+                "or set the 'JINA_API_KEY' environment variable. "
+                "Get your key from: https://jina.ai/api-dashboard"
+            )
+
+        if task is not None and task not in self._VALID_TASKS:
+            raise ValueError(
+                f"Invalid task '{task}'. Valid tasks: {', '.join(self._VALID_TASKS)}"
+            )
+
+    @property
+    def model(self) -> str:
+        """str: The Jina model name currently in use."""
+        return self._model
+
+    @property
+    def task(self) -> Optional[str]:
+        """Optional[str]: The task type for the embedding model."""
+        return self._task
+
+    def _get_client(self):
+        """Get OpenAI-compatible client instance configured for Jina API.
+
+        Returns:
+            OpenAI: Configured OpenAI client pointing to Jina API.
+
+        Raises:
+            ImportError: If openai package is not installed.
+        """
+        openai = require_module("openai")
+        return openai.OpenAI(api_key=self._api_key, base_url=self._BASE_URL)
+
+    def _call_text_embedding_api(
+        self,
+        input: TEXT,
+        dimension: Optional[int] = None,
+    ) -> list:
+        """Call Jina Embeddings API.
+
+        Args:
+            input (TEXT): Input text to embed.
+            dimension (Optional[int]): Target dimension for Matryoshka embeddings.
+
+        Returns:
+            list: Embedding vector as list of floats.
+
+        Raises:
+            RuntimeError: If API call fails.
+            ValueError: If API returns error response.
+        """
+        try:
+            client = self._get_client()
+
+            # Prepare embedding parameters
+            params = {"model": self.model, "input": input}
+
+            # Add dimension parameter for Matryoshka support
+            if dimension is not None:
+                params["dimensions"] = dimension
+
+            # Add task parameter via extra_body
+            if self._task is not None:
+                params["extra_body"] = {"task": self._task}
+
+            # Call Jina API (OpenAI-compatible)
+            response = client.embeddings.create(**params)
+
+        except Exception as e:
+            # Check if it's an OpenAI API error
+            openai = require_module("openai")
+            if isinstance(e, (openai.APIError, openai.APIConnectionError)):
+                raise RuntimeError(f"Failed to call Jina API: {e!s}") from e
+            raise RuntimeError(f"Unexpected error during API call: {e!s}") from e
+
+        # Extract embedding from response
+        try:
+            if not response.data:
+                raise ValueError("Invalid API response: no embedding data returned")
+
+            embedding_vector = response.data[0].embedding
+
+            if not isinstance(embedding_vector, list):
+                raise ValueError(
+                    "Invalid API response: embedding is not a list of numbers"
+                )
+
+            return embedding_vector
+
+        except (AttributeError, IndexError, TypeError) as e:
+            raise ValueError(f"Failed to parse API response: {e!s}") from e
diff --git a/python/zvec/extension/multi_vector_reranker.py b/python/zvec/extension/multi_vector_reranker.py
new file mode 100644
index 00000000..ba3a2363
--- /dev/null
+++ b/python/zvec/extension/multi_vector_reranker.py
@@ -0,0 +1,174 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import heapq
+import math
+from collections import defaultdict
+from typing import Optional
+
+from ..model.doc import Doc
+from ..typing import MetricType
+from .rerank_function import RerankFunction
+
+
+class RrfReRanker(RerankFunction):
+    """Re-ranker using Reciprocal Rank Fusion (RRF) for multi-vector search.
+
+    RRF combines results from multiple vector queries without requiring relevance scores.
+    It assigns higher weight to documents that appear early in multiple result lists.
+
+    The RRF score for a document at rank ``r`` is: ``1 / (k + r + 1)``,
+    where ``k`` is the rank constant.
+
+    Note:
+        This re-ranker is specifically designed for multi-vector scenarios where
+        query results from multiple vector fields need to be combined.
+
+    Args:
+        topn (int, optional): Number of top documents to return. Defaults to 10.
+        rerank_field (Optional[str], optional): Ignored by RRF. Defaults to None.
+        rank_constant (int, optional): Smoothing constant ``k`` in RRF formula.
+            Larger values reduce the impact of early ranks. Defaults to 60.
+    """
+
+    def __init__(
+        self,
+        topn: int = 10,
+        rerank_field: Optional[str] = None,
+        rank_constant: int = 60,
+    ):
+        super().__init__(topn=topn, rerank_field=rerank_field)
+        self._rank_constant = rank_constant
+
+    @property
+    def rank_constant(self) -> int:
+        return self._rank_constant
+
+    def _rrf_score(self, rank: int) -> float:
+        return 1.0 / (self._rank_constant + rank + 1)
+
+    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
+        """Apply Reciprocal Rank Fusion to combine multiple query results.
+
+        Args:
+            query_results (dict[str, list[Doc]]): Results from one or more vector queries.
+
+        Returns:
+            list[Doc]: Re-ranked documents with RRF scores in the ``score`` field.
+        """
+        rrf_scores: dict[str, float] = defaultdict(float)
+        id_to_doc: dict[str, Doc] = {}
+
+        for _, query_result in query_results.items():
+            for rank, doc in enumerate(query_result):
+                doc_id = doc.id
+                rrf_score = self._rrf_score(rank)
+                rrf_scores[doc_id] += rrf_score
+                if doc_id not in id_to_doc:
+                    id_to_doc[doc_id] = doc
+
+        top_docs = heapq.nlargest(self.topn, rrf_scores.items(), key=lambda x: x[1])
+        results: list[Doc] = []
+        for doc_id, rrf_score in top_docs:
+            doc = id_to_doc[doc_id]
+            new_doc = doc._replace(score=rrf_score)
+            results.append(new_doc)
+        return results
+
+
+class WeightedReRanker(RerankFunction):
+    """Re-ranker that combines scores from multiple vector fields using weights.
+
+    Each vector field's relevance score is normalized based on its metric type,
+    then scaled by a user-provided weight. Final scores are summed across fields.
+
+    Note:
+        This re-ranker is specifically designed for multi-vector scenarios where
+        query results from multiple vector fields need to be combined with
+        configurable weights.
+
+    Args:
+        topn (int, optional): Number of top documents to return. Defaults to 10.
+        rerank_field (Optional[str], optional): Ignored. Defaults to None.
+        metric (MetricType, optional): Distance metric used for score normalization.
+            Defaults to ``MetricType.L2``.
+        weights (Optional[dict[str, float]], optional): Weight per vector field.
+            Fields not listed use weight 1.0. Defaults to None.
+
+    Note:
+        Supported metrics: L2, IP, COSINE. Scores are normalized to [0, 1].
+    """
+
+    def __init__(
+        self,
+        topn: int = 10,
+        rerank_field: Optional[str] = None,
+        metric: MetricType = MetricType.L2,
+        weights: Optional[dict[str, float]] = None,
+    ):
+        super().__init__(topn=topn, rerank_field=rerank_field)
+        self._weights = weights or {}
+        self._metric = metric
+
+    @property
+    def weights(self) -> dict[str, float]:
+        """dict[str, float]: Weight mapping for vector fields."""
+        return self._weights
+
+    @property
+    def metric(self) -> MetricType:
+        """MetricType: Distance metric used for score normalization."""
+        return self._metric
+
+    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
+        """Combine scores from multiple vector fields using weighted sum.
+
+        Args:
+            query_results (dict[str, list[Doc]]): Results per vector field.
+
+        Returns:
+            list[Doc]: Re-ranked documents with combined scores in ``score`` field.
+        """
+        weighted_scores: dict[str, float] = defaultdict(float)
+        id_to_doc: dict[str, Doc] = {}
+
+        for vector_name, query_result in query_results.items():
+            for _, doc in enumerate(query_result):
+                doc_id = doc.id
+                weighted_score = self._normalize_score(
+                    doc.score, self.metric
+                ) * self.weights.get(vector_name, 1.0)
+                weighted_scores[doc_id] += weighted_score
+                if doc_id not in id_to_doc:
+                    id_to_doc[doc_id] = doc
+
+        top_docs = heapq.nlargest(
+            self.topn, weighted_scores.items(), key=lambda x: x[1]
+        )
+        results: list[Doc] = []
+        for doc_id, weighted_score in top_docs:
+            doc = id_to_doc[doc_id]
+            new_doc = doc._replace(score=weighted_score)
+            results.append(new_doc)
+        return results
+
+    def _normalize_score(self, score: float, metric: MetricType) -> float:
+        if metric == MetricType.L2:
+            return 1.0 - 2 * math.atan(score) / math.pi
+        if metric == MetricType.IP:
+            return 0.5 + math.atan(score) / math.pi
+        if metric == MetricType.COSINE:
+            return 1.0 - score / 2.0
+        raise ValueError("Unsupported metric type")
diff --git a/python/zvec/extension/openai_embedding_function.py b/python/zvec/extension/openai_embedding_function.py
new file mode 100644
index 00000000..03a34ede
--- /dev/null
+++ b/python/zvec/extension/openai_embedding_function.py
@@ -0,0 +1,238 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Optional
+
+from ..common.constants import TEXT, DenseVectorType
+from .embedding_function import DenseEmbeddingFunction
+from .openai_function import OpenAIFunctionBase
+
+
+class OpenAIDenseEmbedding(OpenAIFunctionBase, DenseEmbeddingFunction[TEXT]):
+    """Dense text embedding function using OpenAI API.
+
+    This class provides text-to-vector embedding capabilities using OpenAI's
+    embedding models. It inherits from ``DenseEmbeddingFunction`` and implements
+    dense text embedding via the OpenAI API.
+
+    The implementation supports various OpenAI embedding models with different
+    dimensions and includes automatic result caching for improved performance.
+
+    Args:
+        model (str, optional): OpenAI embedding model identifier.
+            Defaults to ``"text-embedding-3-small"``. Common options:
+            - ``"text-embedding-3-small"``: 1536 dims, cost-efficient, good performance
+            - ``"text-embedding-3-large"``: 3072 dims, highest quality
+            - ``"text-embedding-ada-002"``: 1536 dims, legacy model
+        dimension (Optional[int], optional): Desired output embedding dimension.
+            If ``None``, uses model's default dimension. For text-embedding-3 models,
+            you can specify custom dimensions (e.g., 256, 512, 1024, 1536).
+            Defaults to ``None``.
+        api_key (Optional[str], optional): OpenAI API authentication key.
+            If ``None``, reads from ``OPENAI_API_KEY`` environment variable.
+            Obtain your key from: https://platform.openai.com/api-keys
+        base_url (Optional[str], optional): Custom API base URL for OpenAI-compatible
+            services. Defaults to ``None`` (uses official OpenAI endpoint).
+
+    Attributes:
+        dimension (int): The embedding vector dimension.
+        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.
+        model (str): The OpenAI model name being used.
+
+    Raises:
+        ValueError: If API key is not provided and not found in environment,
+            or if API returns an error response.
+        TypeError: If input to ``embed()`` is not a string.
+        RuntimeError: If network error or OpenAI service error occurs.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires the ``openai`` package: ``pip install openai``
+        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
+        - Network connectivity to OpenAI API endpoints is required
+        - API usage incurs costs based on your OpenAI subscription plan
+        - Rate limits apply based on your OpenAI account tier
+
+    Examples:
+        >>> # Basic usage with default model
+        >>> from zvec.extension import OpenAIDenseEmbedding
+        >>> import os
+        >>> os.environ["OPENAI_API_KEY"] = "sk-..."
+        >>>
+        >>> emb_func = OpenAIDenseEmbedding()
+        >>> vector = emb_func.embed("Hello, world!")
+        >>> len(vector)
+        1536
+
+        >>> # Using specific model with custom dimension
+        >>> emb_func = OpenAIDenseEmbedding(
+        ...     model="text-embedding-3-large",
+        ...     dimension=1024,
+        ...     api_key="sk-..."
+        ... )
+        >>> vector = emb_func.embed("Machine learning is fascinating")
+        >>> len(vector)
+        1024
+
+        >>> # Using with custom base URL (e.g., Azure OpenAI)
+        >>> emb_func = OpenAIDenseEmbedding(
+        ...     model="text-embedding-ada-002",
+        ...     api_key="your-azure-key",
+        ...     base_url="https://your-resource.openai.azure.com/"
+        ... )
+        >>> vector = emb_func("Natural language processing")
+        >>> isinstance(vector, list)
+        True
+
+        >>> # Batch processing with caching benefit
+        >>> texts = ["First text", "Second text", "First text"]
+        >>> vectors = [emb_func.embed(text) for text in texts]
+        >>> # Third call uses cached result for "First text"
+
+        >>> # Error handling
+        >>> try:
+        ...     emb_func.embed("")  # Empty string
+        ... except ValueError as e:
+        ...     print(f"Error: {e}")
+        Error: Input text cannot be empty or whitespace only
+
+    See Also:
+        - ``DenseEmbeddingFunction``: Base class for dense embeddings
+        - ``QwenDenseEmbedding``: Alternative using Qwen/DashScope API
+        - ``DefaultDenseEmbedding``: Local model without API calls
+        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
+    """
+
+    def __init__(
+        self,
+        model: str = "text-embedding-3-small",
+        dimension: Optional[int] = None,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        **kwargs,
+    ):
+        """Initialize the OpenAI dense embedding function.
+
+        Args:
+            model (str): OpenAI model name. Defaults to "text-embedding-3-small".
+            dimension (Optional[int]): Target embedding dimension or None for default.
+            api_key (Optional[str]): API key or None to use environment variable.
+            base_url (Optional[str]): Custom API base URL or None for default.
+            **kwargs: Additional parameters for API calls. Examples:
+                - ``encoding_format`` (str): Format of embeddings, "float" or "base64".
+                - ``user`` (str): User identifier for tracking.
+
+        Raises:
+            ValueError: If API key is not provided and not in environment.
+        """
+        # Initialize base class for API connection
+        OpenAIFunctionBase.__init__(
+            self, model=model, api_key=api_key, base_url=base_url
+        )
+
+        # Store dimension configuration
+        self._custom_dimension = dimension
+
+        # Determine actual dimension
+        if dimension is None:
+            # Use model default dimension
+            self._dimension = self._MODEL_DIMENSIONS.get(model, 1536)
+        else:
+            self._dimension = dimension
+
+        # Store dense-specific attributes
+        self._extra_params = kwargs
+
+    @property
+    def dimension(self) -> int:
+        """int: The expected dimensionality of the embedding vector."""
+        return self._dimension
+
+    @property
+    def extra_params(self) -> dict:
+        """dict: Extra parameters for model-specific customization."""
+        return self._extra_params
+
+    def __call__(self, input: TEXT) -> DenseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    @lru_cache(maxsize=10)
+    def embed(self, input: TEXT) -> DenseVectorType:
+        """Generate dense embedding vector for the input text.
+
+        This method calls the OpenAI Embeddings API to convert input text
+        into a dense vector representation. Results are cached to improve
+        performance for repeated inputs.
+
+        Args:
+            input (TEXT): Input text string to embed. Must be non-empty after
+                stripping whitespace. Maximum length is 8191 tokens for most models.
+
+        Returns:
+            DenseVectorType: A list of floats representing the embedding vector.
+                Length equals ``self.dimension``. Example:
+                ``[0.123, -0.456, 0.789, ...]``
+
+        Raises:
+            TypeError: If ``input`` is not a string.
+            ValueError: If input is empty/whitespace-only, or if the API returns
+                an error or malformed response.
+            RuntimeError: If network connectivity issues or OpenAI service
+                errors occur.
+
+        Examples:
+            >>> emb = OpenAIDenseEmbedding()
+            >>> vector = emb.embed("Natural language processing")
+            >>> len(vector)
+            1536
+            >>> isinstance(vector[0], float)
+            True
+
+            >>> # Error: empty input
+            >>> emb.embed("   ")
+            ValueError: Input text cannot be empty or whitespace only
+
+            >>> # Error: non-string input
+            >>> emb.embed(123)
+            TypeError: Expected 'input' to be str, got int
+
+        Note:
+            - This method is cached (maxsize=10). Identical inputs return cached results.
+            - The cache is based on exact string match (case-sensitive).
+            - Consider pre-processing text (lowercasing, normalization) for better caching.
+        """
+        if not isinstance(input, TEXT):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        # Call API
+        embedding_vector = self._call_text_embedding_api(
+            input=input,
+            dimension=self._custom_dimension,
+        )
+
+        # Verify dimension
+        if len(embedding_vector) != self.dimension:
+            raise ValueError(
+                f"Dimension mismatch: expected {self.dimension}, "
+                f"got {len(embedding_vector)}"
+            )
+
+        return embedding_vector
diff --git a/python/zvec/extension/openai_function.py b/python/zvec/extension/openai_function.py
new file mode 100644
index 00000000..d3f4de2d
--- /dev/null
+++ b/python/zvec/extension/openai_function.py
@@ -0,0 +1,149 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+from typing import ClassVar, Optional
+
+from ..common.constants import TEXT
+from ..tool import require_module
+
+
+class OpenAIFunctionBase:
+    """Base class for OpenAI functions.
+
+    This base class provides common functionality for calling OpenAI APIs
+    and handling responses. It supports embeddings (dense) operations.
+
+    This class is not meant to be used directly. Use concrete implementations:
+    - ``OpenAIDenseEmbedding`` for dense embeddings
+
+    Args:
+        model (str): OpenAI model identifier.
+        api_key (Optional[str]): OpenAI API authentication key.
+        base_url (Optional[str]): Custom API base URL.
+
+    Note:
+        - This is an internal base class for code reuse across OpenAI features
+        - Subclasses should inherit from appropriate Protocol
+        - Provides unified API connection and response handling
+    """
+
+    # Model default dimensions
+    _MODEL_DIMENSIONS: ClassVar[dict[str, int]] = {
+        "text-embedding-3-small": 1536,
+        "text-embedding-3-large": 3072,
+        "text-embedding-ada-002": 1536,
+    }
+
+    def __init__(
+        self,
+        model: str,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+    ):
+        """Initialize the base OpenAI functionality.
+
+        Args:
+            model (str): OpenAI model name.
+            api_key (Optional[str]): API key or None to use environment variable.
+            base_url (Optional[str]): Custom API base URL or None for default.
+
+        Raises:
+            ValueError: If API key is not provided and not in environment.
+        """
+        self._model = model
+        self._api_key = api_key or os.environ.get("OPENAI_API_KEY")
+        self._base_url = base_url
+
+        if not self._api_key:
+            raise ValueError(
+                "OpenAI API key is required. Please provide 'api_key' parameter "
+                "or set the 'OPENAI_API_KEY' environment variable."
+            )
+
+    @property
+    def model(self) -> str:
+        """str: The OpenAI model name currently in use."""
+        return self._model
+
+    def _get_client(self):
+        """Get OpenAI client instance.
+
+        Returns:
+            OpenAI: Configured OpenAI client.
+
+        Raises:
+            ImportError: If openai package is not installed.
+        """
+        openai = require_module("openai")
+
+        if self._base_url:
+            return openai.OpenAI(api_key=self._api_key, base_url=self._base_url)
+        return openai.OpenAI(api_key=self._api_key)
+
+    def _call_text_embedding_api(
+        self,
+        input: TEXT,
+        dimension: Optional[int] = None,
+    ) -> list:
+        """Call OpenAI Embeddings API.
+
+        Args:
+            input (TEXT): Input text to embed.
+            dimension (Optional[int]): Target dimension (for models that support it).
+
+        Returns:
+            list: Embedding vector as list of floats.
+
+        Raises:
+            RuntimeError: If API call fails.
+            ValueError: If API returns error response.
+        """
+        try:
+            client = self._get_client()
+
+            # Prepare embedding parameters
+            params = {"model": self.model, "input": input}
+
+            # Add dimension parameter for models that support it
+            if dimension is not None:
+                params["dimensions"] = dimension
+
+            # Call OpenAI API
+            response = client.embeddings.create(**params)
+
+        except Exception as e:
+            # Check if it's an OpenAI API error
+            openai = require_module("openai")
+            if isinstance(e, (openai.APIError, openai.APIConnectionError)):
+                raise RuntimeError(f"Failed to call OpenAI API: {e!s}") from e
+            raise RuntimeError(f"Unexpected error during API call: {e!s}") from e
+
+        # Extract embedding from response
+        try:
+            if not response.data:
+                raise ValueError("Invalid API response: no embedding data returned")
+
+            embedding_vector = response.data[0].embedding
+
+            if not isinstance(embedding_vector, list):
+                raise ValueError(
+                    "Invalid API response: embedding is not a list of numbers"
+                )
+
+            return embedding_vector
+
+        except (AttributeError, IndexError, TypeError) as e:
+            raise ValueError(f"Failed to parse API response: {e!s}") from e
diff --git a/python/zvec/extension/qwen_embedding_function.py b/python/zvec/extension/qwen_embedding_function.py
new file mode 100644
index 00000000..7bdb69b5
--- /dev/null
+++ b/python/zvec/extension/qwen_embedding_function.py
@@ -0,0 +1,537 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Optional
+
+from ..common.constants import TEXT, DenseVectorType, SparseVectorType
+from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
+from .qwen_function import QwenFunctionBase
+
+
+class QwenDenseEmbedding(QwenFunctionBase, DenseEmbeddingFunction[TEXT]):
+    """Dense text embedding function using Qwen (DashScope) API.
+
+    This class provides text-to-vector embedding capabilities using Alibaba Cloud's
+    DashScope service and Qwen embedding models. It inherits from
+    ``DenseEmbeddingFunction`` and implements dense text embedding.
+
+    The implementation supports various Qwen embedding models with configurable
+    dimensions and includes automatic result caching for improved performance.
+
+    Args:
+        dimension (int): Desired output embedding dimension. Common values:
+            - 512: Balanced performance and accuracy
+            - 1024: Higher accuracy, larger storage
+            - 1536: Maximum accuracy for supported models
+        model (str, optional): DashScope embedding model identifier.
+            Defaults to ``"text-embedding-v4"``. Other options include:
+            - ``"text-embedding-v3"``
+            - ``"text-embedding-v2"``
+            - ``"text-embedding-v1"``
+        api_key (Optional[str], optional): DashScope API authentication key.
+            If ``None``, reads from ``DASHSCOPE_API_KEY`` environment variable.
+            Obtain your key from: https://dashscope.console.aliyun.com/
+        **kwargs: Additional DashScope API parameters. Supported options:
+            - ``text_type`` (str): Specifies the text role in retrieval tasks.
+              Options: ``"query"`` (search query) or ``"document"`` (indexed content).
+              This parameter optimizes embeddings for asymmetric search scenarios.
+
+            Reference: https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api
+
+    Attributes:
+        dimension (int): The embedding vector dimension.
+        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.
+        model (str): The DashScope model name being used.
+
+    Raises:
+        ValueError: If API key is not provided and not found in environment,
+            or if API returns an error response.
+        TypeError: If input to ``embed()`` is not a string.
+        RuntimeError: If network error or DashScope service error occurs.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires the ``dashscope`` package: ``pip install dashscope``
+        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
+        - Network connectivity to DashScope API endpoints is required
+        - API usage may incur costs based on your DashScope subscription plan
+
+        **Parameter Guidelines:**
+
+        - Use ``text_type="query"`` for search queries and ``text_type="document"``
+          for indexed content to optimize asymmetric retrieval tasks.
+        - For detailed API specifications and parameter usage, refer to:
+          https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api
+
+    Examples:
+        >>> # Basic usage with default model
+        >>> from zvec.extension import QwenDenseEmbedding
+        >>> import os
+        >>> os.environ["DASHSCOPE_API_KEY"] = "your-api-key"
+        >>>
+        >>> emb_func = QwenDenseEmbedding(dimension=1024)
+        >>> vector = emb_func.embed("Hello, world!")
+        >>> len(vector)
+        1024
+
+        >>> # Using specific model with explicit API key
+        >>> emb_func = QwenDenseEmbedding(
+        ...     dimension=512,
+        ...     model="text-embedding-v3",
+        ...     api_key="sk-xxxxx"
+        ... )
+        >>> vector = emb_func("Machine learning is fascinating")
+        >>> isinstance(vector, list)
+        True
+
+        >>> # Using with custom parameters (text_type)
+        >>> # For search queries - optimize for query-document matching
+        >>> emb_func = QwenDenseEmbedding(
+        ...     dimension=1024,
+        ...     text_type="query"
+        ... )
+        >>> query_vector = emb_func.embed("What is machine learning?")
+        >>>
+        >>> # For document embeddings - optimize for being matched by queries
+        >>> doc_emb_func = QwenDenseEmbedding(
+        ...     dimension=1024,
+        ...     text_type="document"
+        ... )
+        >>> doc_vector = doc_emb_func.embed(
+        ...     "Machine learning is a subset of artificial intelligence..."
+        ... )
+
+        >>> # Batch processing with caching benefit
+        >>> texts = ["First text", "Second text", "First text"]
+        >>> vectors = [emb_func.embed(text) for text in texts]
+        >>> # Third call uses cached result for "First text"
+
+        >>> # Error handling
+        >>> try:
+        ...     emb_func.embed("")  # Empty string
+        ... except ValueError as e:
+        ...     print(f"Error: {e}")
+        Error: Input text cannot be empty or whitespace only
+
+    See Also:
+        - ``DenseEmbeddingFunction``: Base class for dense embeddings
+        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
+    """
+
+    def __init__(
+        self,
+        dimension: int,
+        model: str = "text-embedding-v4",
+        api_key: Optional[str] = None,
+        **kwargs,
+    ):
+        """Initialize the Qwen dense embedding function.
+
+        Args:
+            dimension (int): Target embedding dimension.
+            model (str): DashScope model name. Defaults to "text-embedding-v4".
+            api_key (Optional[str]): API key or None to use environment variable.
+            **kwargs: Additional DashScope API parameters. Supported options:
+                - ``text_type`` (str): Text role in asymmetric retrieval.
+                  * ``"query"``: Optimize for search queries (short, question-like).
+                  * ``"document"``: Optimize for indexed documents (longer content).
+                  Using appropriate text_type improves retrieval accuracy by
+                  optimizing the embedding space for query-document matching.
+
+                For detailed API documentation, see:
+                https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api
+
+        Raises:
+            ValueError: If API key is not provided and not in environment.
+        """
+        # Initialize base class for API connection
+        QwenFunctionBase.__init__(self, model=model, api_key=api_key)
+
+        # Store dense-specific attributes
+        self._dimension = dimension
+        self._extra_params = kwargs
+
+    @property
+    def dimension(self) -> int:
+        """int: The expected dimensionality of the embedding vector."""
+        return self._dimension
+
+    @property
+    def extra_params(self) -> dict:
+        """dict: Extra parameters for model-specific customization."""
+        return self._extra_params
+
+    def __call__(self, input: TEXT) -> DenseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    @lru_cache(maxsize=10)
+    def embed(self, input: TEXT) -> DenseVectorType:
+        """Generate dense embedding vector for the input text.
+
+        This method calls the DashScope TextEmbedding API to convert input text
+        into a dense vector representation. Results are cached to improve
+        performance for repeated inputs.
+
+        Args:
+            input (TEXT): Input text string to embed. Must be non-empty after
+                stripping whitespace. Maximum length depends on the model used
+                (typically 2048-8192 tokens).
+
+        Returns:
+            DenseVectorType: A list of floats representing the embedding vector.
+                Length equals ``self.dimension``. Example:
+                ``[0.123, -0.456, 0.789, ...]``
+
+        Raises:
+            TypeError: If ``input`` is not a string.
+            ValueError: If input is empty/whitespace-only, or if the API returns
+                an error or malformed response.
+            RuntimeError: If network connectivity issues or DashScope service
+                errors occur.
+
+        Examples:
+            >>> emb = QwenDenseEmbedding(dimension=1024)
+            >>> vector = emb.embed("Natural language processing")
+            >>> len(vector)
+            1024
+            >>> isinstance(vector[0], float)
+            True
+
+            >>> # Error: empty input
+            >>> emb.embed("   ")
+            ValueError: Input text cannot be empty or whitespace only
+
+            >>> # Error: non-string input
+            >>> emb.embed(123)
+            TypeError: Expected 'input' to be str, got int
+
+        Note:
+            - This method is cached (maxsize=10). Identical inputs return cached results.
+            - The cache is based on exact string match (case-sensitive).
+            - Consider pre-processing text (lowercasing, normalization) for better caching.
+        """
+        if not isinstance(input, TEXT):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        # Call API with dense output type
+        output = self._call_text_embedding_api(
+            input=input,
+            dimension=self.dimension,
+            output_type="dense",
+            text_type=self.extra_params.get("text_type"),
+        )
+
+        embeddings = output.get("embeddings")
+        if not isinstance(embeddings, list):
+            raise ValueError(
+                "Invalid API response: 'embeddings' field is missing or not a list"
+            )
+
+        if len(embeddings) != 1:
+            raise ValueError(
+                f"Expected exactly 1 embedding in response, got {len(embeddings)}"
+            )
+
+        first_emb = embeddings[0]
+        if not isinstance(first_emb, dict):
+            raise ValueError("Invalid API response: embedding item is not a dictionary")
+
+        embedding_vector = first_emb.get("embedding")
+        if not isinstance(embedding_vector, list):
+            raise ValueError(
+                "Invalid API response: 'embedding' field is missing or not a list"
+            )
+
+        if len(embedding_vector) != self.dimension:
+            raise ValueError(
+                f"Dimension mismatch: expected {self.dimension}, "
+                f"got {len(embedding_vector)}"
+            )
+
+        return list(embedding_vector)
+
+
+class QwenSparseEmbedding(QwenFunctionBase, SparseEmbeddingFunction[TEXT]):
+    """Sparse text embedding function using Qwen (DashScope) API.
+
+    This class provides text-to-sparse-vector embedding capabilities using
+    Alibaba Cloud's DashScope service and Qwen embedding models. It generates
+    sparse keyword-weighted vectors suitable for lexical matching and BM25-style
+    retrieval scenarios.
+
+    Sparse embeddings are particularly useful for:
+    - Keyword-based search and exact matching
+    - Hybrid retrieval (combining with dense embeddings)
+    - Interpretable search results (weights show term importance)
+
+    Args:
+        dimension (int): Desired output embedding dimension. Common values:
+            - 512: Balanced performance and accuracy
+            - 1024: Higher accuracy, larger storage
+            - 1536: Maximum accuracy for supported models
+        model (str, optional): DashScope embedding model identifier.
+            Defaults to ``"text-embedding-v4"``. Other options include:
+            - ``"text-embedding-v3"``
+            - ``"text-embedding-v2"``
+        api_key (Optional[str], optional): DashScope API authentication key.
+            If ``None``, reads from ``DASHSCOPE_API_KEY`` environment variable.
+            Obtain your key from: https://dashscope.console.aliyun.com/
+        **kwargs: Additional DashScope API parameters. Supported options:
+            - ``encoding_type`` (Literal["query", "document"]): Encoding type.
+              * ``"query"``: Optimize for search queries (default).
+              * ``"document"``: Optimize for indexed documents.
+              This distinction is important for asymmetric retrieval tasks.
+
+    Attributes:
+        model (str): The DashScope model name being used.
+        encoding_type (str): The encoding type ("query" or "document").
+
+    Raises:
+        ValueError: If API key is not provided and not found in environment,
+            or if API returns an error response.
+        TypeError: If input to ``embed()`` is not a string.
+        RuntimeError: If network error or DashScope service error occurs.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires the ``dashscope`` package: ``pip install dashscope``
+        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
+        - Network connectivity to DashScope API endpoints is required
+        - API usage may incur costs based on your DashScope subscription plan
+        - Sparse vectors have only non-zero dimensions stored as dict
+        - Output is sorted by indices (keys) in ascending order
+
+        **Parameter Guidelines:**
+
+        - Use ``encoding_type="query"`` for search queries and
+          ``encoding_type="document"`` for indexed content to optimize
+          asymmetric retrieval tasks.
+        - For detailed API specifications, refer to:
+          https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api
+
+    Examples:
+        >>> # Basic usage for query embedding
+        >>> from zvec.extension import QwenSparseEmbedding
+        >>> import os
+        >>> os.environ["DASHSCOPE_API_KEY"] = "your-api-key"
+        >>>
+        >>> query_emb = QwenSparseEmbedding(dimension=1024, encoding_type="query")
+        >>> query_vec = query_emb.embed("machine learning")
+        >>> type(query_vec)
+        <class 'dict'>
+        >>> len(query_vec)  # Only non-zero dimensions
+        156
+
+        >>> # Document embedding
+        >>> doc_emb = QwenSparseEmbedding(dimension=1024, encoding_type="document")
+        >>> doc_vec = doc_emb.embed("Machine learning is a subset of AI")
+        >>> isinstance(doc_vec, dict)
+        True
+
+        >>> # Asymmetric retrieval example
+        >>> query_vec = query_emb.embed("what causes aging fast")
+        >>> doc_vec = doc_emb.embed(
+        ...     "UV-A light causes tanning, skin aging, and cataracts..."
+        ... )
+        >>>
+        >>> # Calculate similarity (dot product for sparse vectors)
+        >>> similarity = sum(
+        ...     query_vec.get(k, 0) * doc_vec.get(k, 0)
+        ...     for k in set(query_vec) | set(doc_vec)
+        ... )
+
+        >>> # Output is sorted by indices
+        >>> list(query_vec.items())[:5]  # First 5 dimensions (by index)
+        [(10, 0.45), (23, 0.87), (56, 0.32), (89, 1.12), (120, 0.65)]
+
+        >>> # Hybrid retrieval (combining dense + sparse)
+        >>> from zvec.extension import QwenDenseEmbedding
+        >>> dense_emb = QwenDenseEmbedding(dimension=1024)
+        >>> sparse_emb = QwenSparseEmbedding(dimension=1024)
+        >>>
+        >>> query = "deep learning neural networks"
+        >>> dense_vec = dense_emb.embed(query)   # [0.1, -0.3, 0.5, ...]
+        >>> sparse_vec = sparse_emb.embed(query)  # {12: 0.8, 45: 1.2, ...}
+
+        >>> # Error handling
+        >>> try:
+        ...     sparse_emb.embed("")  # Empty string
+        ... except ValueError as e:
+        ...     print(f"Error: {e}")
+        Error: Input text cannot be empty or whitespace only
+
+    See Also:
+        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
+        - ``QwenDenseEmbedding``: Dense embedding using Qwen API
+        - ``DefaultSparseEmbedding``: Sparse embedding with SPLADE model
+    """
+
+    def __init__(
+        self,
+        dimension: int,
+        model: str = "text-embedding-v4",
+        api_key: Optional[str] = None,
+        **kwargs,
+    ):
+        """Initialize the Qwen sparse embedding function.
+
+        Args:
+            dimension (int): Target embedding dimension.
+            model (str): DashScope model name. Defaults to "text-embedding-v4".
+            api_key (Optional[str]): API key or None to use environment variable.
+            **kwargs: Additional DashScope API parameters. Supported options:
+                - ``encoding_type`` (Literal["query", "document"]): Encoding type.
+                  * ``"query"``: Optimize for search queries (default).
+                  * ``"document"``: Optimize for indexed documents.
+                  This distinction is important for asymmetric retrieval tasks.
+
+        Raises:
+            ValueError: If API key is not provided and not in environment.
+        """
+        # Initialize base class for API connection
+        QwenFunctionBase.__init__(self, model=model, api_key=api_key)
+
+        self._dimension = dimension
+        self._extra_params = kwargs
+
+    @property
+    def extra_params(self) -> dict:
+        """dict: Extra parameters for model-specific customization."""
+        return self._extra_params
+
+    def __call__(self, input: TEXT) -> SparseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    @lru_cache(maxsize=10)
+    def embed(self, input: TEXT) -> SparseVectorType:
+        """Generate sparse embedding vector for the input text.
+
+        This method calls the DashScope TextEmbedding API with sparse output type
+        to convert input text into a sparse vector representation. The result is
+        a dictionary where keys are dimension indices and values are importance
+        weights (only non-zero values included).
+
+        The embedding is optimized based on the ``encoding_type`` specified during
+        initialization: "query" for search queries or "document" for indexed content.
+
+        Args:
+            input (TEXT): Input text string to embed. Must be non-empty after
+                stripping whitespace. Maximum length depends on the model used
+                (typically 2048-8192 tokens).
+
+        Returns:
+            SparseVectorType: A dictionary mapping dimension index to weight.
+                Only non-zero dimensions are included. The dictionary is sorted
+                by indices (keys) in ascending order for consistent output.
+                Example: ``{10: 0.5, 245: 0.8, 1023: 1.2, 5678: 0.5}``
+
+        Raises:
+            TypeError: If ``input`` is not a string.
+            ValueError: If input is empty/whitespace-only, or if the API returns
+                an error or malformed response.
+            RuntimeError: If network connectivity issues or DashScope service
+                errors occur.
+
+        Examples:
+            >>> emb = QwenSparseEmbedding(dimension=1024, encoding_type="query")
+            >>> sparse_vec = emb.embed("machine learning")
+            >>> isinstance(sparse_vec, dict)
+            True
+            >>>
+            >>> # Verify sorted output
+            >>> keys = list(sparse_vec.keys())
+            >>> keys == sorted(keys)
+            True
+
+            >>> # Error: empty input
+            >>> emb.embed("   ")
+            ValueError: Input text cannot be empty or whitespace only
+
+            >>> # Error: non-string input
+            >>> emb.embed(123)
+            TypeError: Expected 'input' to be str, got int
+
+        Note:
+            - This method is cached (maxsize=10). Identical inputs return cached results.
+            - The cache is based on exact string match (case-sensitive).
+            - Output dictionary is always sorted by indices for consistency.
+        """
+        if not isinstance(input, TEXT):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        # Call API with sparse output type
+        output = self._call_text_embedding_api(
+            input=input,
+            dimension=self._dimension,
+            output_type="sparse",
+            text_type=self.extra_params.get("encoding_type", "query"),
+        )
+
+        embeddings = output.get("embeddings")
+        if not isinstance(embeddings, list):
+            raise ValueError(
+                "Invalid API response: 'embeddings' field is missing or not a list"
+            )
+
+        if len(embeddings) != 1:
+            raise ValueError(
+                f"Expected exactly 1 embedding in response, got {len(embeddings)}"
+            )
+
+        first_emb = embeddings[0]
+        if not isinstance(first_emb, dict):
+            raise ValueError("Invalid API response: embedding item is not a dictionary")
+
+        sparse_embedding = first_emb.get("sparse_embedding")
+        if not isinstance(sparse_embedding, list):
+            raise ValueError(
+                "Invalid API response: 'sparse_embedding' field is missing or not a list"
+            )
+
+        # Parse sparse embedding: convert array of {index, value, token} to dict
+        sparse_dict = {}
+        for item in sparse_embedding:
+            if not isinstance(item, dict):
+                raise ValueError(
+                    "Invalid API response: sparse_embedding item is not a dictionary"
+                )
+
+            index = item.get("index")
+            value = item.get("value")
+
+            if index is None or value is None:
+                raise ValueError(
+                    "Invalid API response: sparse_embedding item missing 'index' or 'value'"
+                )
+
+            # Convert to int and float, filter positive values
+            idx = int(index)
+            val = float(value)
+            if val > 0:
+                sparse_dict[idx] = val
+
+        # Sort by indices (keys) to ensure consistent ordering
+        return dict(sorted(sparse_dict.items()))
diff --git a/python/zvec/extension/qwen_function.py b/python/zvec/extension/qwen_function.py
new file mode 100644
index 00000000..b15ee4b1
--- /dev/null
+++ b/python/zvec/extension/qwen_function.py
@@ -0,0 +1,186 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import os
+from http import HTTPStatus
+from typing import Optional
+
+from ..common.constants import TEXT
+from ..tool import require_module
+
+
+class QwenFunctionBase:
+    """Base class for Qwen (DashScope) functions.
+
+    This base class provides common functionality for calling DashScope APIs
+    and handling responses. It supports embeddings (dense and sparse) and
+    re-ranking operations.
+
+    This class is not meant to be used directly. Use concrete implementations:
+    - ``QwenDenseEmbedding`` for dense embeddings
+    - ``QwenSparseEmbedding`` for sparse embeddings
+    - ``QwenReRanker`` for semantic re-ranking
+
+    Args:
+        model (str): DashScope model identifier.
+        api_key (Optional[str]): DashScope API authentication key.
+
+    Note:
+        - This is an internal base class for code reuse across Qwen features
+        - Subclasses should inherit from appropriate Protocol/ABC
+        - Provides unified API connection and response handling
+    """
+
+    def __init__(
+        self,
+        model: str,
+        api_key: Optional[str] = None,
+    ):
+        """Initialize the base Qwen embedding functionality.
+
+        Args:
+            model (str): DashScope model name.
+            api_key (Optional[str]): API key or None to use environment variable.
+
+        Raises:
+            ValueError: If API key is not provided and not in environment.
+        """
+        self._model = model
+        self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY")
+        if not self._api_key:
+            raise ValueError(
+                "DashScope API key is required. Please provide 'api_key' parameter "
+                "or set the 'DASHSCOPE_API_KEY' environment variable."
+            )
+
+    @property
+    def model(self) -> str:
+        """str: The DashScope embedding model name currently in use."""
+        return self._model
+
+    def _get_connection(self):
+        """Establish connection to DashScope API.
+
+        Returns:
+            module: The dashscope module with API key configured.
+
+        Raises:
+            ImportError: If dashscope package is not installed.
+        """
+        dashscope = require_module("dashscope")
+        dashscope.api_key = self._api_key
+        return dashscope
+
+    def _call_text_embedding_api(
+        self,
+        input: TEXT,
+        dimension: int,
+        output_type: str,
+        text_type: Optional[str] = None,
+    ) -> dict:
+        """Call DashScope TextEmbedding API.
+
+        Args:
+            input (TEXT): Input text to embed.
+            dimension (int): Target embedding dimension.
+            output_type (str): Output type ("dense" or "sparse").
+            text_type (Optional[str]): Text type ("query" or "document").
+
+        Returns:
+            dict: API response output field.
+
+        Raises:
+            RuntimeError: If API call fails.
+            ValueError: If API returns error response.
+        """
+        try:
+            # Prepare API call parameters
+            call_params = {
+                "model": self.model,
+                "input": input,
+                "dimension": dimension,
+                "output_type": output_type,
+            }
+
+            # Add optional text_type parameter if provided
+            if text_type is not None:
+                call_params["text_type"] = text_type
+
+            resp = self._get_connection().TextEmbedding.call(**call_params)
+        except Exception as e:
+            raise RuntimeError(f"Failed to call DashScope API: {e!s}") from e
+
+        if resp.status_code != HTTPStatus.OK:
+            error_msg = getattr(resp, "message", "Unknown error")
+            error_code = getattr(resp, "code", "N/A")
+            raise ValueError(
+                f"DashScope API error: [Code={error_code}, "
+                f"Status={resp.status_code}] {error_msg}"
+            )
+
+        output = getattr(resp, "output", None)
+        if not isinstance(output, dict):
+            raise ValueError(
+                "Invalid API response: missing or malformed 'output' field"
+            )
+
+        return output
+
+    def _call_rerank_api(
+        self,
+        query: str,
+        documents: list[str],
+        top_n: int,
+    ) -> dict:
+        """Call DashScope TextReRank API.
+
+        Args:
+            query (str): Query text for semantic matching.
+            documents (list[str]): List of document texts to re-rank.
+            top_n (int): Maximum number of documents to return.
+
+        Returns:
+            dict: API response output field containing re-ranked results.
+
+        Raises:
+            RuntimeError: If API call fails.
+            ValueError: If API returns error response.
+        """
+        try:
+            resp = self._get_connection().TextReRank.call(
+                model=self.model,
+                query=query,
+                documents=documents,
+                top_n=top_n,
+                return_documents=False,
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to call DashScope API: {e!s}") from e
+
+        if resp.status_code != HTTPStatus.OK:
+            error_msg = getattr(resp, "message", "Unknown error")
+            error_code = getattr(resp, "code", "N/A")
+            raise ValueError(
+                f"DashScope API error: [Code={error_code}, "
+                f"Status={resp.status_code}] {error_msg}"
+            )
+
+        output = getattr(resp, "output", None)
+        if not isinstance(output, dict):
+            raise ValueError(
+                "Invalid API response: missing or malformed 'output' field"
+            )
+
+        return output
diff --git a/python/zvec/extension/qwen_rerank_function.py b/python/zvec/extension/qwen_rerank_function.py
new file mode 100644
index 00000000..9b4a66b3
--- /dev/null
+++ b/python/zvec/extension/qwen_rerank_function.py
@@ -0,0 +1,162 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from typing import Optional
+
+from ..model.doc import Doc
+from .qwen_function import QwenFunctionBase
+from .rerank_function import RerankFunction
+
+
+class QwenReRanker(QwenFunctionBase, RerankFunction):
+    """Re-ranker using Qwen (DashScope) cross-encoder API for semantic re-ranking.
+
+    This re-ranker leverages DashScope's TextReRank service to perform
+    cross-encoder style re-ranking. It sends query and document pairs to the
+    API and receives relevance scores based on deep semantic understanding.
+
+    The re-ranker is suitable for single-vector or multi-vector search scenarios
+    where semantic relevance to a specific query is required.
+
+    Args:
+        query (str): Query text for semantic re-ranking. **Required**.
+        topn (int, optional): Maximum number of documents to return after re-ranking.
+            Defaults to 10.
+        rerank_field (str): Document field name to use as re-ranking input text.
+            **Required** (e.g., "content", "title", "body").
+        model (str, optional): DashScope re-ranking model identifier.
+            Defaults to ``"gte-rerank-v2"``.
+        api_key (Optional[str], optional): DashScope API authentication key.
+            If not provided, reads from ``DASHSCOPE_API_KEY`` environment variable.
+
+    Raises:
+        ValueError: If ``query`` is empty/None, ``rerank_field`` is None,
+            or API key is not available.
+
+    Note:
+        - Requires ``dashscope`` Python package installed
+        - Documents without valid content in ``rerank_field`` are skipped
+        - API rate limits and quotas apply per DashScope subscription
+
+    Example:
+        >>> reranker = QwenReRanker(
+        ...     query="machine learning algorithms",
+        ...     topn=5,
+        ...     rerank_field="content",
+        ...     model="gte-rerank-v2",
+        ...     api_key="your-api-key"
+        ... )
+        >>> # Use in collection.query(reranker=reranker)
+    """
+
+    def __init__(
+        self,
+        query: Optional[str] = None,
+        topn: int = 10,
+        rerank_field: Optional[str] = None,
+        model: str = "gte-rerank-v2",
+        api_key: Optional[str] = None,
+    ):
+        """Initialize QwenReRanker with query and configuration.
+
+        Args:
+            query (Optional[str]): Query text for semantic matching. Required.
+            topn (int): Number of top results to return.
+            rerank_field (Optional[str]): Document field for re-ranking input.
+            model (str): DashScope model name.
+            api_key (Optional[str]): API key or None to use environment variable.
+
+        Raises:
+            ValueError: If query is empty or API key is unavailable.
+        """
+        QwenFunctionBase.__init__(self, model=model, api_key=api_key)
+        RerankFunction.__init__(self, topn=topn, rerank_field=rerank_field)
+
+        if not query:
+            raise ValueError("Query is required for QwenReRanker")
+        self._query = query
+
+    @property
+    def query(self) -> str:
+        """str: Query text used for semantic re-ranking."""
+        return self._query
+
+    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
+        """Re-rank documents using Qwen's TextReRank API.
+
+        Sends document texts to DashScope TextReRank service along with the query.
+        Returns documents sorted by relevance scores from the cross-encoder model.
+
+        Args:
+            query_results (dict[str, list[Doc]]): Mapping from vector field names
+                to lists of retrieved documents. Documents from all fields are
+                deduplicated and re-ranked together.
+
+        Returns:
+            list[Doc]: Re-ranked documents (up to ``topn``) with updated ``score``
+                fields containing relevance scores from the API.
+
+        Raises:
+            ValueError: If no valid documents are found or API call fails.
+
+        Note:
+            - Duplicate documents (same ID) across fields are processed once
+            - Documents with empty/missing ``rerank_field`` content are skipped
+            - Returned scores are relevance scores from the cross-encoder model
+        """
+        if not query_results:
+            return []
+
+        # Collect and deduplicate documents
+        id_to_doc: dict[str, Doc] = {}
+        doc_ids: list[str] = []
+        contents: list[str] = []
+
+        for _, query_result in query_results.items():
+            for doc in query_result:
+                doc_id = doc.id
+                if doc_id in id_to_doc:
+                    continue
+
+                # Extract text content from specified field
+                field_value = doc.field(self.rerank_field)
+                rank_content = str(field_value).strip() if field_value else ""
+                if not rank_content:
+                    continue
+
+                id_to_doc[doc_id] = doc
+                doc_ids.append(doc_id)
+                contents.append(rank_content)
+
+        if not contents:
+            raise ValueError("No documents to rerank")
+
+        # Call DashScope TextReRank API
+        output = self._call_rerank_api(
+            query=self.query,
+            documents=contents,
+            top_n=self.topn,
+        )
+
+        # Build result list with updated scores
+        results: list[Doc] = []
+        for item in output["results"]:
+            idx = item["index"]
+            doc_id = doc_ids[idx]
+            doc = id_to_doc[doc_id]
+            new_doc = doc._replace(score=item["relevance_score"])
+            results.append(new_doc)
+
+        return results
diff --git a/python/zvec/extension/rerank.py b/python/zvec/extension/rerank.py
deleted file mode 100644
index 021f6ed4..00000000
--- a/python/zvec/extension/rerank.py
+++ /dev/null
@@ -1,343 +0,0 @@
-# Copyright 2025-present the zvec project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import heapq
-import math
-import os
-from abc import ABC, abstractmethod
-from collections import defaultdict
-from http import HTTPStatus
-from typing import Optional
-
-from ..model.doc import Doc
-from ..tool import require_module
-from ..typing import MetricType
-
-
-class ReRanker(ABC):
-    """Abstract base class for re-ranking search results.
-
-    Re-rankers refine the output of one or more vector queries by applying
-    a secondary scoring strategy. They are used in the ``query()`` method of
-    ``Collection`` via the ``reranker`` parameter.
-
-    Args:
-        query (Optional[str], optional): Query text used for re-ranking.
-            Required for LLM-based re-rankers. Defaults to None.
-        topn (int, optional): Number of top documents to return after re-ranking.
-            Defaults to 10.
-        rerank_field (Optional[str], optional): Field name used as input for
-            re-ranking (e.g., document title or body). Defaults to None.
-
-    Note:
-        Subclasses must implement the ``rerank()`` method.
-    """
-
-    def __init__(
-        self,
-        query: Optional[str] = None,
-        topn: int = 10,
-        rerank_field: Optional[str] = None,
-    ):
-        self._query = query
-        self._topn = topn
-        self._rerank_field = rerank_field
-
-    @property
-    def topn(self) -> int:
-        """int: Number of top documents to return after re-ranking."""
-        return self._topn
-
-    @property
-    def query(self) -> str:
-        """str: Query text used for re-ranking."""
-        return self._query
-
-    @property
-    def rerank_field(self) -> Optional[str]:
-        """Optional[str]: Field name used as re-ranking input."""
-        return self._rerank_field
-
-    @abstractmethod
-    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
-        """Re-rank documents from one or more vector queries.
-
-        Args:
-            query_results (dict[str, list[Doc]]): Mapping from vector field name
-                to list of retrieved documents (sorted by relevance).
-
-        Returns:
-            list[Doc]: Re-ranked list of documents (length ≤ ``topn``),
-                with updated ``score`` fields.
-        """
-        raise NotImplementedError
-
-
-class RrfReRanker(ReRanker):
-    """Re-ranker using Reciprocal Rank Fusion (RRF).
-
-    RRF combines results from multiple queries without requiring relevance scores.
-    It assigns higher weight to documents that appear early in multiple result lists.
-
-    The RRF score for a document at rank ``r`` is: ``1 / (k + r + 1)``,
-    where ``k`` is the rank constant.
-
-    Args:
-        query (Optional[str], optional): Ignored by RRF. Defaults to None.
-        topn (int, optional): Number of top documents to return. Defaults to 10.
-        rerank_field (Optional[str], optional): Ignored by RRF. Defaults to None.
-        rank_constant (int, optional): Smoothing constant ``k`` in RRF formula.
-            Larger values reduce the impact of early ranks. Defaults to 60.
-    """
-
-    def __init__(
-        self,
-        query: Optional[str] = None,
-        topn: int = 10,
-        rerank_field: Optional[str] = None,
-        rank_constant: int = 60,
-    ):
-        super().__init__(query, topn, rerank_field)
-        self._rank_constant = rank_constant
-
-    @property
-    def rank_constant(self) -> int:
-        return self._rank_constant
-
-    def _rrf_score(self, rank: int):
-        return 1.0 / (self._rank_constant + rank + 1)
-
-    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
-        """Apply Reciprocal Rank Fusion to combine multiple query results.
-
-        Args:
-            query_results (dict[str, list[Doc]]): Results from one or more vector queries.
-
-        Returns:
-            list[Doc]: Re-ranked documents with RRF scores in the ``score`` field.
-        """
-        rrf_scores: dict[str, float] = defaultdict(float)
-        id_to_doc: dict[str, Doc] = {}
-
-        for _, query_result in query_results.items():
-            for rank, doc in enumerate(query_result):
-                doc_id = doc.id
-                rrf_score = self._rrf_score(rank)
-                rrf_scores[doc_id] += rrf_score
-                if doc_id not in id_to_doc:
-                    id_to_doc[doc_id] = doc
-
-        top_docs = heapq.nlargest(self.topn, rrf_scores.items(), key=lambda x: x[1])
-        results = []
-        for doc_id, rrf_score in top_docs:
-            doc = id_to_doc[doc_id]
-            new_doc = doc._replace(score=rrf_score)
-            results.append(new_doc)
-        return results
-
-
-class WeightedReRanker(ReRanker):
-    """Re-ranker that combines scores from multiple vector fields using weights.
-
-    Each vector field's relevance score is normalized based on its metric type,
-    then scaled by a user-provided weight. Final scores are summed across fields.
-
-    Args:
-        query (Optional[str], optional): Ignored. Defaults to None.
-        topn (int, optional): Number of top documents to return. Defaults to 10.
-        rerank_field (Optional[str], optional): Ignored. Defaults to None.
-        metric (MetricType, optional): Distance metric used for score normalization.
-            Defaults to ``MetricType.L2``.
-        weights (Optional[dict[str, float]], optional): Weight per vector field.
-            Fields not listed use weight 1.0. Defaults to None.
-
-    Note:
-        Supported metrics: L2, IP, COSINE. Scores are normalized to [0, 1].
-    """
-
-    def __init__(
-        self,
-        query: Optional[str] = None,
-        topn: int = 10,
-        rerank_field: Optional[str] = None,
-        metric: MetricType = MetricType.L2,
-        weights: Optional[dict[str, float]] = None,
-    ):
-        super().__init__(query, topn, rerank_field)
-        self._weights = weights
-        self._metric = metric
-
-    @property
-    def weights(self) -> dict[str, float]:
-        """dict[str, float]: Weight mapping for vector fields."""
-        return self._weights
-
-    @property
-    def metric(self) -> MetricType:
-        """MetricType: Distance metric used for score normalization."""
-        return self._metric
-
-    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
-        """Combine scores from multiple vector fields using weighted sum.
-
-        Args:
-            query_results (dict[str, list[Doc]]): Results per vector field.
-
-        Returns:
-            list[Doc]: Re-ranked documents with combined scores in ``score`` field.
-        """
-        weighted_scores: dict[str, float] = defaultdict(float)
-        id_to_doc: dict[str, Doc] = {}
-
-        for vector_name, query_result in query_results.items():
-            for _, doc in enumerate(query_result):
-                doc_id = doc.id
-                weighted_score = self._normalize_score(
-                    doc.score, self.metric
-                ) * self.weights.get(vector_name, 1.0)
-                weighted_scores[doc_id] += weighted_score
-                if doc_id not in id_to_doc:
-                    id_to_doc[doc_id] = doc
-
-        top_docs = heapq.nlargest(
-            self.topn, weighted_scores.items(), key=lambda x: x[1]
-        )
-        results = []
-        for doc_id, weighted_score in top_docs:
-            doc = id_to_doc[doc_id]
-            new_doc = doc._replace(score=weighted_score)
-            results.append(new_doc)
-        return results
-
-    def _normalize_score(self, score: float, metric: MetricType) -> float:
-        if metric == MetricType.L2:
-            return 1.0 - 2 * math.atan(score) / math.pi
-        if metric == MetricType.IP:
-            return 0.5 + math.atan(score) / math.pi
-        if metric == MetricType.COSINE:
-            return 1.0 - score / 2.0
-        raise ValueError("Unsupported metric type")
-
-
-class QwenReRanker(ReRanker):
-    """Re-ranker using Qwen (DashScope) LLM-based re-ranking API.
-
-    This re-ranker sends documents to the DashScope TextReRank service for
-    cross-encoder style re-ranking based on semantic relevance to the query.
-
-    Args:
-        query (str): Query text for semantic re-ranking. **Required**.
-        topn (int, optional): Number of top documents to return. Defaults to 10.
-        rerank_field (str): Field name containing document text for re-ranking.
-            **Required**.
-        model (str, optional): DashScope re-ranking model name.
-            Defaults to ``"gte-rerank-v2"``.
-        api_key (Optional[str], optional): DashScope API key. If not provided,
-            reads from ``DASHSCOPE_API_KEY`` environment variable.
-
-    Raises:
-        ValueError: If ``query`` is missing, ``rerank_field`` is missing,
-            or API key is not provided.
-
-    Note:
-        Requires the ``dashscope`` Python package.
-        Documents without content in ``rerank_field`` are skipped.
-    """
-
-    def __init__(
-        self,
-        query: Optional[str] = None,
-        topn: int = 10,
-        rerank_field: Optional[str] = None,
-        model: str = "gte-rerank-v2",
-        api_key: Optional[str] = None,
-    ):
-        super().__init__(query, topn, rerank_field)
-        if not query:
-            raise ValueError("Query is required for reranking")
-        self._model = model
-        self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY")
-        if not self._api_key:
-            raise ValueError("DashScope API key is required")
-
-    @property
-    def model(self) -> str:
-        """str: DashScope re-ranking model name."""
-        return self._model
-
-    def _connection(self):
-        dashscope = require_module("dashscope")
-        dashscope.api_key = self._api_key
-        return dashscope
-
-    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
-        """Re-rank documents using Qwen's TextReRank API.
-
-        Args:
-            query_results (dict[str, list[Doc]]): Results from vector search.
-
-        Returns:
-            list[Doc]: Re-ranked documents with relevance scores from Qwen.
-
-        Raises:
-            ValueError: If API call fails or no valid documents are found.
-        """
-        if not query_results:
-            return []
-
-        id_to_doc: dict[str, Doc] = {}
-        doc_ids = []
-        contents = []
-
-        for _, query_result in query_results.items():
-            for doc in query_result:
-                doc_id = doc.id
-                if doc_id in id_to_doc:
-                    continue
-
-                field_value = doc.field(self.rerank_field)
-                rank_content = str(field_value).strip() if field_value else ""
-                if not rank_content:
-                    continue
-
-                id_to_doc[doc_id] = doc
-                doc_ids.append(doc_id)
-                contents.append(rank_content)
-
-        if not contents:
-            raise ValueError("No documents to rerank")
-
-        resp = self._connection().TextReRank.call(
-            model=self.model,
-            query=self.query,
-            documents=list(contents),
-            top_n=self.topn,
-            return_documents=False,
-        )
-
-        if resp.status_code != HTTPStatus.OK:
-            raise ValueError(
-                f"QwenReranker failed with status {resp.status_code}: {resp.message}"
-            )
-
-        results = []
-        for item in resp.output.results:
-            idx = item.index
-            doc_id = doc_ids[idx]
-            doc = id_to_doc[doc_id]
-            new_doc = doc._replace(score=item.relevance_score)
-            results.append(new_doc)
-
-        return results
diff --git a/python/zvec/extension/rerank_function.py b/python/zvec/extension/rerank_function.py
new file mode 100644
index 00000000..c558a2bc
--- /dev/null
+++ b/python/zvec/extension/rerank_function.py
@@ -0,0 +1,69 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from ..model.doc import Doc
+
+
+class RerankFunction(ABC):
+    """Abstract base class for re-ranking search results.
+
+    Re-rankers refine the output of one or more vector queries by applying
+    a secondary scoring strategy. They are used in the ``query()`` method of
+    ``Collection`` via the ``reranker`` parameter.
+
+    Args:
+        topn (int, optional): Number of top documents to return after re-ranking.
+            Defaults to 10.
+        rerank_field (Optional[str], optional): Field name used as input for
+            re-ranking (e.g., document title or body). Defaults to None.
+
+    Note:
+        Subclasses must implement the ``rerank()`` method.
+    """
+
+    def __init__(
+        self,
+        topn: int = 10,
+        rerank_field: Optional[str] = None,
+    ):
+        self._topn = topn
+        self._rerank_field = rerank_field
+
+    @property
+    def topn(self) -> int:
+        """int: Number of top documents to return after re-ranking."""
+        return self._topn
+
+    @property
+    def rerank_field(self) -> Optional[str]:
+        """Optional[str]: Field name used as re-ranking input."""
+        return self._rerank_field
+
+    @abstractmethod
+    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
+        """Re-rank documents from one or more vector queries.
+
+        Args:
+            query_results (dict[str, list[Doc]]): Mapping from vector field name
+                to list of retrieved documents (sorted by relevance).
+
+        Returns:
+            list[Doc]: Re-ranked list of documents (length ≤ ``topn``),
+                with updated ``score`` fields.
+        """
+        ...
diff --git a/python/zvec/extension/sentence_transformer_embedding_function.py b/python/zvec/extension/sentence_transformer_embedding_function.py
new file mode 100644
index 00000000..032f02e0
--- /dev/null
+++ b/python/zvec/extension/sentence_transformer_embedding_function.py
@@ -0,0 +1,839 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from typing import ClassVar, Literal, Optional
+
+import numpy as np
+
+from ..common.constants import TEXT, DenseVectorType, SparseVectorType
+from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
+from .sentence_transformer_function import SentenceTransformerFunctionBase
+
+
+class DefaultLocalDenseEmbedding(
+    SentenceTransformerFunctionBase, DenseEmbeddingFunction[TEXT]
+):
+    """Default local dense embedding using all-MiniLM-L6-v2 model.
+
+    This is the default implementation for dense text embedding that uses the
+    ``all-MiniLM-L6-v2`` model from Hugging Face by default. This model provides
+    a good balance between speed and quality for general-purpose text embedding.
+
+    The class provides text-to-vector dense embedding capabilities using the
+    sentence-transformers library. It supports models from Hugging Face Hub and
+    ModelScope, runs locally without API calls, and supports CPU/GPU acceleration.
+
+    The model produces 384-dimensional embeddings and is optimized for semantic
+    similarity tasks. It runs locally without requiring API keys.
+
+    Args:
+        model_source (Literal["huggingface", "modelscope"], optional): Model source.
+            - ``"huggingface"``: Use Hugging Face Hub (default, for international users)
+            - ``"modelscope"``: Use ModelScope (recommended for users in China)
+            Defaults to ``"huggingface"``.
+        device (Optional[str], optional): Device to run the model on.
+            Options: ``"cpu"``, ``"cuda"``, ``"mps"`` (for Apple Silicon), or ``None``
+            for automatic detection. Defaults to ``None``.
+        normalize_embeddings (bool, optional): Whether to normalize embeddings to
+            unit length (L2 normalization). Useful for cosine similarity.
+            Defaults to ``True``.
+        batch_size (int, optional): Batch size for encoding. Defaults to ``32``.
+        **kwargs: Additional parameters for future extension.
+
+    Attributes:
+        dimension (int): Always 384 for both models.
+        model_name (str): "all-MiniLM-L6-v2" (HF) or "iic/nlp_gte_sentence-embedding_chinese-small" (MS).
+        model_source (str): The model source being used.
+        device (str): The device the model is running on.
+
+    Raises:
+        ValueError: If the model cannot be loaded or input is invalid.
+        TypeError: If input to ``embed()`` is not a string.
+        RuntimeError: If model inference fails.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires the ``sentence-transformers`` package:
+          ``pip install sentence-transformers``
+        - For ModelScope, also requires: ``pip install modelscope``
+        - First run downloads the model (~50-80MB) from chosen source
+        - Hugging Face cache: ``~/.cache/torch/sentence_transformers/``
+        - ModelScope cache: ``~/.cache/modelscope/hub/``
+        - No API keys or network required after initial download
+        - Inference speed: ~1000 sentences/sec on CPU, ~10000 on GPU
+
+        **For users in China:**
+
+        If you encounter Hugging Face access issues, use ModelScope instead:
+
+        .. code-block:: python
+
+            # Recommended for users in China
+            emb = DefaultLocalDenseEmbedding(model_source="modelscope")
+
+        Alternatively, use Hugging Face mirror:
+
+        .. code-block:: bash
+
+            export HF_ENDPOINT=https://hf-mirror.com
+            # Then use default Hugging Face mode
+
+    Examples:
+        >>> # Basic usage with Hugging Face (default)
+        >>> from zvec.extension import DefaultLocalDenseEmbedding
+        >>>
+        >>> emb_func = DefaultLocalDenseEmbedding()
+        >>> vector = emb_func.embed("Hello, world!")
+        >>> len(vector)
+        384
+        >>> isinstance(vector, list)
+        True
+
+        >>> # Recommended for users in China (uses ModelScope)
+        >>> emb_func = DefaultLocalDenseEmbedding(model_source="modelscope")
+        >>> vector = emb_func.embed("你好，世界！")  # Works well with Chinese text
+        >>> len(vector)
+        384
+
+        >>> # Alternative for China users: Use Hugging Face mirror
+        >>> import os
+        >>> os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
+        >>> emb_func = DefaultLocalDenseEmbedding()  # Uses HF mirror
+        >>> vector = emb_func.embed("Hello, world!")
+
+        >>> # Using GPU for faster inference
+        >>> emb_func = DefaultLocalDenseEmbedding(device="cuda")
+        >>> vector = emb_func("Machine learning is fascinating")
+        >>> # Normalized vector has unit length
+        >>> import numpy as np
+        >>> np.linalg.norm(vector)
+        1.0
+
+        >>> # Batch processing
+        >>> texts = ["First text", "Second text", "Third text"]
+        >>> vectors = [emb_func.embed(text) for text in texts]
+        >>> len(vectors)
+        3
+        >>> all(len(v) == 384 for v in vectors)
+        True
+
+        >>> # Semantic similarity
+        >>> v1 = emb_func.embed("The cat sits on the mat")
+        >>> v2 = emb_func.embed("A feline rests on a rug")
+        >>> v3 = emb_func.embed("Python programming")
+        >>> similarity_high = np.dot(v1, v2)  # Similar sentences
+        >>> similarity_low = np.dot(v1, v3)   # Different topics
+        >>> similarity_high > similarity_low
+        True
+
+        >>> # Error handling
+        >>> try:
+        ...     emb_func.embed("")  # Empty string
+        ... except ValueError as e:
+        ...     print(f"Error: {e}")
+        Error: Input text cannot be empty or whitespace only
+
+    See Also:
+        - ``DenseEmbeddingFunction``: Base class for dense embeddings
+        - ``DefaultLocalSparseEmbedding``: Sparse embedding with SPLADE
+        - ``QwenDenseEmbedding``: Alternative using Qwen API
+    """
+
+    def __init__(
+        self,
+        model_source: Literal["huggingface", "modelscope"] = "huggingface",
+        device: Optional[str] = None,
+        normalize_embeddings: bool = True,
+        batch_size: int = 32,
+        **kwargs,
+    ):
+        """Initialize with all-MiniLM-L6-v2 model.
+
+        Args:
+            model_source (Literal["huggingface", "modelscope"]): Model source.
+                Defaults to "huggingface".
+            device (Optional[str]): Target device ("cpu", "cuda", "mps", or None).
+                Defaults to None (automatic detection).
+            normalize_embeddings (bool): Whether to L2-normalize output vectors.
+                Defaults to True.
+            batch_size (int): Batch size for encoding. Defaults to 32.
+            **kwargs: Additional parameters for future extension.
+
+        Raises:
+            ImportError: If sentence-transformers or modelscope is not installed.
+            ValueError: If model cannot be loaded.
+        """
+        # Use different models based on source
+        if model_source == "modelscope":
+            # Use Chinese-optimized model for ModelScope (better for Chinese text)
+            model_name = "iic/nlp_gte_sentence-embedding_chinese-small"
+        else:
+            model_name = "all-MiniLM-L6-v2"
+
+        # Initialize base class for model loading
+        SentenceTransformerFunctionBase.__init__(
+            self, model_name=model_name, model_source=model_source, device=device
+        )
+
+        self._normalize_embeddings = normalize_embeddings
+        self._batch_size = batch_size
+
+        # Load model and get dimension
+        model = self._get_model()
+        self._dimension = model.get_sentence_embedding_dimension()
+
+        # Store extra parameters
+        self._extra_params = kwargs
+
+    @property
+    def dimension(self) -> int:
+        """int: The expected dimensionality of the embedding vector."""
+        return self._dimension
+
+    @property
+    def extra_params(self) -> dict:
+        """dict: Extra parameters for model-specific customization."""
+        return self._extra_params
+
+    def __call__(self, input: str) -> DenseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    def embed(self, input: str) -> DenseVectorType:
+        """Generate dense embedding vector for the input text.
+
+        This method uses the Sentence Transformer model to convert input text
+        into a dense vector representation. The model runs locally without
+        requiring API calls.
+
+        Args:
+            input (str): Input text string to embed. Must be non-empty after
+                stripping whitespace. Maximum length depends on the model used
+                (typically 128-512 tokens for most models).
+
+        Returns:
+            DenseVectorType: A list of floats representing the embedding vector.
+                Length equals ``self.dimension``. If ``normalize_embeddings=True``,
+                the vector has unit length. Example:
+                ``[0.123, -0.456, 0.789, ...]``
+
+        Raises:
+            TypeError: If ``input`` is not a string.
+            ValueError: If input is empty or whitespace-only.
+            RuntimeError: If model inference fails.
+
+        Examples:
+            >>> emb = DefaultLocalDenseEmbedding()
+            >>> vector = emb.embed("Natural language processing")
+            >>> len(vector)
+            384
+            >>> isinstance(vector[0], float)
+            True
+
+            >>> # Normalized vectors have unit length
+            >>> import numpy as np
+            >>> emb = DefaultLocalDenseEmbedding(normalize_embeddings=True)
+            >>> vector = emb.embed("Test sentence")
+            >>> np.linalg.norm(vector)
+            1.0
+
+            >>> # Error: empty input
+            >>> emb.embed("   ")
+            ValueError: Input text cannot be empty or whitespace only
+
+            >>> # Error: non-string input
+            >>> emb.embed(123)
+            TypeError: Expected 'input' to be str, got int
+
+            >>> # Semantic similarity example
+            >>> v1 = emb.embed("The cat sits on the mat")
+            >>> v2 = emb.embed("A feline rests on a rug")
+            >>> similarity = np.dot(v1, v2)  # High similarity due to semantic meaning
+            >>> similarity > 0.7
+            True
+
+        Note:
+            - First call may be slower due to model loading
+            - Subsequent calls are much faster as the model stays in memory
+            - For batch processing, consider encoding multiple texts together
+              (though this method handles single texts only)
+            - GPU acceleration provides 5-10x speedup over CPU
+        """
+        if not isinstance(input, str):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        try:
+            model = self._get_model()
+            embedding = model.encode(
+                input,
+                convert_to_numpy=True,
+                normalize_embeddings=self._normalize_embeddings,
+                batch_size=self._batch_size,
+            )
+
+            # Convert numpy array to list
+            if isinstance(embedding, np.ndarray):
+                embedding_list = embedding.tolist()
+            else:
+                embedding_list = list(embedding)
+
+            # Validate dimension
+            if len(embedding_list) != self.dimension:
+                raise ValueError(
+                    f"Dimension mismatch: expected {self.dimension}, "
+                    f"got {len(embedding_list)}"
+                )
+
+            return embedding_list
+
+        except Exception as e:
+            if isinstance(e, (TypeError, ValueError)):
+                raise
+            raise RuntimeError(f"Failed to generate embedding: {e!s}") from e
+
+
+class DefaultLocalSparseEmbedding(
+    SentenceTransformerFunctionBase, SparseEmbeddingFunction[TEXT]
+):
+    """Default local sparse embedding using SPLADE model.
+
+    This class provides sparse vector embedding using the SPLADE (SParse Lexical
+    AnD Expansion) model. SPLADE generates sparse, interpretable representations
+    where each dimension corresponds to a vocabulary term with learned importance
+    weights. It's ideal for lexical matching, BM25-style retrieval, and hybrid
+    search scenarios.
+
+    The default model is ``naver/splade-cocondenser-ensembledistil``, which is
+    publicly available without authentication. It produces sparse vectors with
+    thousands of dimensions but only hundreds of non-zero values, making them
+    efficient for storage and retrieval while maintaining strong lexical matching.
+
+    **Model Caching:**
+
+    This class uses class-level caching to share the SPLADE model across all instances
+    with the same configuration (model_source, device). This significantly reduces
+    memory usage when creating multiple instances for different encoding types
+    (query vs document).
+
+    **Cache Management:**
+
+    The class provides methods to manage the model cache:
+
+    - ``clear_cache()``: Clear all cached models to free memory
+    - ``get_cache_info()``: Get information about cached models
+    - ``remove_from_cache(model_source, device)``: Remove a specific model from cache
+
+    .. note::
+        **Why not use splade-v3?**
+
+        The newer ``naver/splade-v3`` model is gated (requires access approval).
+        We use ``naver/splade-cocondenser-ensembledistil`` instead.
+
+        **To use splade-v3 (if you have access):**
+
+        1. Request access at https://huggingface.co/naver/splade-v3
+        2. Get your Hugging Face token from https://huggingface.co/settings/tokens
+        3. Set environment variable:
+
+           .. code-block:: bash
+
+               export HF_TOKEN="your_huggingface_token"
+
+        4. Or login programmatically:
+
+           .. code-block:: python
+
+               from huggingface_hub import login
+               login(token="your_huggingface_token")
+
+        5. To use a custom SPLADE model, you can subclass this class and override
+           the model_name in ``__init__``, or create your own implementation
+           inheriting from ``SentenceTransformerFunctionBase`` and
+           ``SparseEmbeddingFunction``.
+
+    Args:
+        model_source (Literal["huggingface", "modelscope"], optional): Model source.
+            Defaults to ``"huggingface"``. ModelScope support may vary for SPLADE models.
+        device (Optional[str], optional): Device to run the model on.
+            Options: ``"cpu"``, ``"cuda"``, ``"mps"`` (for Apple Silicon), or ``None``
+            for automatic detection. Defaults to ``None``.
+        encoding_type (Literal["query", "document"], optional): Encoding type.
+            - ``"query"``: Optimize for search queries (default)
+            - ``"document"``: Optimize for indexed documents
+        **kwargs: Additional parameters (currently unused, for future extension).
+
+    Attributes:
+        model_name (str): Model identifier.
+        model_source (str): The model source being used.
+        device (str): The device the model is running on.
+
+    Raises:
+        ValueError: If the model cannot be loaded or input is invalid.
+        TypeError: If input to ``embed()`` is not a string.
+        RuntimeError: If model inference fails.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires the ``sentence-transformers`` package:
+          ``pip install sentence-transformers``
+        - First run downloads the model (~100MB) from Hugging Face
+        - Cache location: ``~/.cache/torch/sentence_transformers/``
+        - No API keys or authentication required
+        - Sparse vectors have ~30k dimensions but only ~100-200 non-zero values
+        - Best combined with dense embeddings for hybrid retrieval
+
+        **SPLADE vs Dense Embeddings:**
+
+        - **Dense**: Continuous semantic vectors, good for semantic similarity
+        - **Sparse**: Lexical keyword-based, interpretable, good for exact matching
+        - **Hybrid**: Combine both for best retrieval performance
+
+    Examples:
+        >>> # Memory-efficient: both instances share the same model (~200MB)
+        >>> from zvec.extension import DefaultLocalSparseEmbedding
+        >>>
+        >>> # Query embedding
+        >>> query_emb = DefaultLocalSparseEmbedding(encoding_type="query")
+        >>> query_vec = query_emb.embed("machine learning algorithms")
+        >>> type(query_vec)
+        <class 'dict'>
+        >>> len(query_vec)  # Only non-zero dimensions
+        156
+
+        >>> # Document embedding (shares model with query_emb)
+        >>> doc_emb = DefaultLocalSparseEmbedding(encoding_type="document")
+        >>> doc_vec = doc_emb.embed("Machine learning is a subset of AI")
+        >>> # Total memory: ~200MB (not 400MB) thanks to model caching
+
+        >>> # Asymmetric retrieval example
+        >>> query_vec = query_emb.embed("what causes aging fast")
+        >>> doc_vec = doc_emb.embed(
+        ...     "UV-A light causes tanning, skin aging, and cataracts..."
+        ... )
+        >>>
+        >>> # Calculate similarity (dot product for sparse vectors)
+        >>> similarity = sum(
+        ...     query_vec.get(k, 0) * doc_vec.get(k, 0)
+        ...     for k in set(query_vec) | set(doc_vec)
+        ... )
+
+        >>> # Batch processing
+        >>> queries = ["query 1", "query 2", "query 3"]
+        >>> query_vecs = [query_emb.embed(q) for q in queries]
+        >>>
+        >>> documents = ["doc 1", "doc 2", "doc 3"]
+        >>> doc_vecs = [doc_emb.embed(d) for d in documents]
+
+        >>> # Inspecting sparse dimensions (output is sorted by indices)
+        >>> query_vec = query_emb.embed("machine learning")
+        >>> list(query_vec.items())[:5]  # First 5 dimensions (by index)
+        [(10, 0.45), (23, 0.87), (56, 0.32), (89, 1.12), (120, 0.65)]
+        >>>
+        >>> # Sort by weight to find most important terms
+        >>> sorted_by_weight = sorted(query_vec.items(), key=lambda x: x[1], reverse=True)
+        >>> top_5 = sorted_by_weight[:5]  # Top 5 most important terms
+        >>> top_5
+        [(1023, 1.45), (245, 1.23), (8901, 0.98), (5678, 0.87), (12034, 0.76)]
+
+        >>> # Using GPU for faster inference
+        >>> sparse_emb = DefaultLocalSparseEmbedding(device="cuda")
+        >>> vector = sparse_emb.embed("natural language processing")
+
+        >>> # Hybrid retrieval example (combining dense + sparse)
+        >>> from zvec.extension import DefaultDenseEmbedding
+        >>> dense_emb = DefaultDenseEmbedding()
+        >>> sparse_emb = DefaultLocalSparseEmbedding()
+        >>>
+        >>> query = "deep learning neural networks"
+        >>> dense_vec = dense_emb.embed(query)   # [0.1, -0.3, 0.5, ...]
+        >>> sparse_vec = sparse_emb.embed(query)  # {12: 0.8, 45: 1.2, ...}
+
+        >>> # Error handling
+        >>> try:
+        ...     sparse_emb.embed("")  # Empty string
+        ... except ValueError as e:
+        ...     print(f"Error: {e}")
+        Error: Input text cannot be empty or whitespace only
+
+        >>> # Cache management
+        >>> # Check cache status
+        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
+        >>> print(f"Cached models: {info['cached_models']}")
+        Cached models: 1
+        >>>
+        >>> # Clear cache to free memory
+        >>> DefaultLocalSparseEmbedding.clear_cache()
+        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
+        >>> print(f"Cached models: {info['cached_models']}")
+        Cached models: 0
+        >>>
+        >>> # Remove specific model from cache
+        >>> query_emb = DefaultLocalSparseEmbedding()  # Creates CPU model
+        >>> cuda_emb = DefaultLocalSparseEmbedding(device="cuda")  # Creates CUDA model
+        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
+        >>> print(f"Cached models: {info['cached_models']}")
+        Cached models: 2
+        >>>
+        >>> # Remove only CPU model
+        >>> removed = DefaultLocalSparseEmbedding.remove_from_cache(device=None)
+        >>> print(f"Removed: {removed}")
+        True
+        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
+        >>> print(f"Cached models: {info['cached_models']}")
+        Cached models: 1
+
+    See Also:
+        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
+        - ``DefaultDenseEmbedding``: Dense embedding with all-MiniLM-L6-v2
+        - ``QwenDenseEmbedding``: Alternative using Qwen API
+
+    References:
+        - SPLADE Paper: https://arxiv.org/abs/2109.10086
+        - Model: https://huggingface.co/naver/splade-cocondenser-ensembledistil
+    """
+
+    # Class-level model cache: {(model_name, model_source, device): model}
+    # Shared across all DefaultLocalSparseEmbedding instances to save memory
+    _model_cache: ClassVar[dict] = {}
+
+    @classmethod
+    def clear_cache(cls) -> None:
+        """Clear all cached SPLADE models from memory.
+
+        This is useful for:
+        - Freeing memory when models are no longer needed
+        - Forcing a fresh model reload
+        - Testing and debugging
+                Examples:
+            >>> # Clear cache to free memory
+            >>> DefaultLocalSparseEmbedding.clear_cache()
+
+            >>> # Or in tests to ensure fresh model loading
+            >>> def test_something():
+            ...     DefaultLocalSparseEmbedding.clear_cache()
+            ...     emb = DefaultLocalSparseEmbedding()
+            ...     # Test with fresh model
+        """
+        cls._model_cache.clear()
+
+    @classmethod
+    def get_cache_info(cls) -> dict:
+        """Get information about currently cached models.
+
+        Returns:
+            dict: Dictionary with cache statistics:
+                - cached_models (int): Number of cached model instances
+                - cache_keys (list): List of cache keys (model_name, model_source, device)
+
+        Examples:
+            >>> info = DefaultLocalSparseEmbedding.get_cache_info()
+            >>> print(f"Cached models: {info['cached_models']}")
+            Cached models: 2
+            >>> print(f"Cache keys: {info['cache_keys']}")
+            Cache keys: [('naver/splade-cocondenser-ensembledistil', 'huggingface', None),
+                        ('naver/splade-cocondenser-ensembledistil', 'huggingface', 'cuda')]
+        """
+        return {
+            "cached_models": len(cls._model_cache),
+            "cache_keys": list(cls._model_cache.keys()),
+        }
+
+    @classmethod
+    def remove_from_cache(
+        cls, model_source: str = "huggingface", device: Optional[str] = None
+    ) -> bool:
+        """Remove a specific model from cache.
+
+        Args:
+            model_source (str): Model source ("huggingface" or "modelscope").
+                Defaults to "huggingface".
+            device (Optional[str]): Device identifier. Defaults to None.
+
+        Returns:
+            bool: True if model was found and removed, False otherwise.
+
+        Examples:
+            >>> # Remove CPU model from cache
+            >>> removed = DefaultLocalSparseEmbedding.remove_from_cache()
+            >>> print(f"Removed: {removed}")
+            True
+
+            >>> # Remove CUDA model from cache
+            >>> removed = DefaultLocalSparseEmbedding.remove_from_cache(device="cuda")
+            >>> print(f"Removed: {removed}")
+            True
+        """
+        model_name = "naver/splade-cocondenser-ensembledistil"
+        cache_key = (model_name, model_source, device)
+
+        if cache_key in cls._model_cache:
+            del cls._model_cache[cache_key]
+            return True
+        return False
+
+    def __init__(
+        self,
+        model_source: Literal["huggingface", "modelscope"] = "huggingface",
+        device: Optional[str] = None,
+        encoding_type: Literal["query", "document"] = "query",
+        **kwargs,
+    ):
+        """Initialize with SPLADE model.
+
+        Args:
+            model_source (Literal["huggingface", "modelscope"]): Model source.
+                Defaults to "huggingface".
+            device (Optional[str]): Target device ("cpu", "cuda", "mps", or None).
+                Defaults to None (automatic detection).
+            encoding_type (Literal["query", "document"]): Encoding type for embeddings.
+                - "query": Optimize for search queries (default)
+                - "document": Optimize for indexed documents
+                This distinction is important for asymmetric retrieval tasks.
+            **kwargs: Additional parameters (reserved for future use).
+
+        Raises:
+            ImportError: If sentence-transformers is not installed.
+            ValueError: If model cannot be loaded.
+
+        Note:
+            Multiple instances with the same (model_source, device) configuration
+            will share the same underlying model to save memory. Different
+            instances can use different encoding_type settings while sharing
+            the model.
+
+            **Model Selection:**
+
+            Uses ``naver/splade-cocondenser-ensembledistil`` instead of the newer
+            ``naver/splade-v3`` because splade-v3 is a gated model requiring
+            Hugging Face authentication. The cocondenser-ensembledistil variant:
+
+            - Does not require authentication or API tokens
+            - Is immediately available for all users
+            - Provides comparable retrieval performance (~2% difference)
+            - Avoids "Access to model is restricted" errors
+
+            If you need splade-v3 and have obtained access, you can subclass
+            this class and override the model_name parameter.
+
+        Examples:
+            >>> # Both instances share the same model (saves memory)
+            >>> query_emb = DefaultLocalSparseEmbedding(encoding_type="query")
+            >>> doc_emb = DefaultLocalSparseEmbedding(encoding_type="document")
+            >>> # Only one model is loaded in memory
+        """
+        # Use publicly available SPLADE model (no gated access required)
+        # Note: naver/splade-v3 requires authentication, so we use the
+        # cocondenser-ensembledistil variant which is publicly accessible
+        model_name = "naver/splade-cocondenser-ensembledistil"
+
+        # Initialize base class for model loading
+        SentenceTransformerFunctionBase.__init__(
+            self, model_name=model_name, model_source=model_source, device=device
+        )
+
+        self._encoding_type = encoding_type
+        self._extra_params = kwargs
+
+        # Create cache key for this model configuration
+        self._cache_key = (model_name, model_source, device)
+
+        # Load model to ensure it's available (will use cache if exists)
+        self._get_model()
+
+    @property
+    def extra_params(self) -> dict:
+        """dict: Extra parameters for model-specific customization."""
+        return self._extra_params
+
+    def __call__(self, input: str) -> SparseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    def embed(self, input: str) -> SparseVectorType:
+        """Generate sparse embedding vector for the input text.
+
+        This method uses the SPLADE model to convert input text into a sparse
+        vector representation. The result is a dictionary where keys are dimension
+        indices and values are importance weights (only non-zero values included).
+
+        The embedding is optimized based on the ``encoding_type`` specified during
+        initialization: "query" for search queries or "document" for indexed content.
+
+        Args:
+            input (str): Input text string to embed. Must be non-empty after
+                stripping whitespace.
+
+        Returns:
+            SparseVectorType: A dictionary mapping dimension index to weight.
+                Only non-zero dimensions are included. The dictionary is sorted
+                by indices (keys) in ascending order for consistent output.
+                Example: ``{10: 0.5, 245: 0.8, 1023: 1.2, 5678: 0.5}``
+
+        Raises:
+            TypeError: If ``input`` is not a string.
+            ValueError: If input is empty or whitespace-only.
+            RuntimeError: If model inference fails.
+
+        Examples:
+            >>> # Query embedding
+            >>> query_emb = DefaultLocalSparseEmbedding(encoding_type="query")
+            >>> query_vec = query_emb.embed("machine learning")
+            >>> isinstance(query_vec, dict)
+            True
+
+        Note:
+            - First call may be slower due to model loading
+            - Subsequent calls are much faster as the model stays in memory
+            - GPU acceleration provides significant speedup
+            - Sparse vectors are memory-efficient (only store non-zero values)
+        """
+        if not isinstance(input, str):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        try:
+            model = self._get_model()
+
+            # Use appropriate encoding method based on type
+            if self._encoding_type == "document" and hasattr(model, "encode_document"):
+                # Use document encoding
+                sparse_matrix = model.encode_document([input])
+            elif hasattr(model, "encode_query"):
+                # Use query encoding (default)
+                sparse_matrix = model.encode_query([input])
+            else:
+                # Fallback: manual implementation for older sentence-transformers
+                return self._manual_sparse_encode(input)
+
+            # Convert sparse matrix to dictionary
+            # SPLADE returns shape [1, vocab_size] for single input
+
+            # Check if it's a sparse matrix (duck typing - has toarray method)
+            if hasattr(sparse_matrix, "toarray"):
+                # Sparse matrix (CSR/CSC/etc.) - convert to dense array
+                sparse_array = sparse_matrix[0].toarray().flatten()
+                sparse_dict = {
+                    int(idx): float(val)
+                    for idx, val in enumerate(sparse_array)
+                    if val > 0
+                }
+            else:
+                # Dense array format (numpy array or similar)
+                if isinstance(sparse_matrix, np.ndarray):
+                    sparse_array = sparse_matrix[0]
+                else:
+                    sparse_array = sparse_matrix
+
+                sparse_dict = {
+                    int(idx): float(val)
+                    for idx, val in enumerate(sparse_array)
+                    if val > 0
+                }
+
+            # Sort by indices (keys) to ensure consistent ordering
+            return dict(sorted(sparse_dict.items()))
+
+        except Exception as e:
+            if isinstance(e, (TypeError, ValueError)):
+                raise
+            raise RuntimeError(f"Failed to generate sparse embedding: {e!s}") from e
+
+    def _manual_sparse_encode(self, input: str) -> SparseVectorType:
+        """Fallback manual SPLADE encoding for older sentence-transformers.
+
+        Args:
+            input (str): Input text to encode.
+
+        Returns:
+            SparseVectorType: Sparse vector as dictionary.
+        """
+        import torch
+
+        model = self._get_model()
+
+        # Tokenize input
+        features = model.tokenize([input])
+
+        # Move to correct device
+        features = {k: v.to(model.device) for k, v in features.items()}
+
+        # Forward pass with no gradient
+        with torch.no_grad():
+            embeddings = model.forward(features)
+
+            # Get logits from model output
+            # SPLADE models typically output 'token_embeddings'
+            if isinstance(embeddings, dict) and "token_embeddings" in embeddings:
+                logits = embeddings["token_embeddings"][0]  # First batch item
+            elif hasattr(embeddings, "token_embeddings"):
+                logits = embeddings.token_embeddings[0]
+            # Fallback: try to get first value
+            elif isinstance(embeddings, dict):
+                logits = next(iter(embeddings.values()))[0]
+            else:
+                logits = embeddings[0]
+
+            # Apply SPLADE activation: log(1 + relu(x))
+            relu_log = torch.log(1 + torch.relu(logits))
+
+            # Max pooling over token dimension (reduce to vocab size)
+            if relu_log.dim() > 1:
+                sparse_vec, _ = torch.max(relu_log, dim=0)
+            else:
+                sparse_vec = relu_log
+
+            # Convert to sparse dictionary (only non-zero values)
+            sparse_vec_np = sparse_vec.cpu().numpy()
+            sparse_dict = {
+                int(idx): float(val) for idx, val in enumerate(sparse_vec_np) if val > 0
+            }
+
+            # Sort by indices (keys) to ensure consistent ordering
+            return dict(sorted(sparse_dict.items()))
+
+    def _get_model(self):
+        """Load or retrieve the SPLADE model from class-level cache.
+
+        Returns:
+            SentenceTransformer: The loaded SPLADE model instance.
+
+        Raises:
+            ImportError: If required packages are not installed.
+            ValueError: If model cannot be loaded.
+
+        Note:
+            Models are cached at class level and shared across all instances
+            with the same (model_name, model_source, device) configuration.
+            This allows memory-efficient usage when creating multiple instances
+            with different encoding_type settings.
+        """
+        # Check class-level cache first
+        if self._cache_key in self._model_cache:
+            return self._model_cache[self._cache_key]
+
+        # Use parent class method to load model
+        model = super()._get_model()
+
+        # Cache the model at class level
+        self._model_cache[self._cache_key] = model
+
+        return model
diff --git a/python/zvec/extension/sentence_transformer_function.py b/python/zvec/extension/sentence_transformer_function.py
new file mode 100644
index 00000000..1ba1662a
--- /dev/null
+++ b/python/zvec/extension/sentence_transformer_function.py
@@ -0,0 +1,150 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from ..tool import require_module
+
+
+class SentenceTransformerFunctionBase:
+    """Base class for Sentence Transformer functions (both dense and sparse).
+
+    This base class provides common functionality for loading and managing
+    sentence-transformers models from Hugging Face or ModelScope. It supports
+    both dense models (e.g., all-MiniLM-L6-v2) and sparse models (e.g., SPLADE).
+
+    This class is not meant to be used directly. Use concrete implementations:
+    - ``SentenceTransformerEmbeddingFunction`` for dense embeddings
+    - ``SentenceTransformerSparseEmbeddingFunction`` for sparse embeddings
+    - ``DefaultDenseEmbedding`` for default dense embeddings
+    - ``DefaultSparseEmbedding`` for default sparse embeddings
+
+    Args:
+        model_name (str): Model identifier or local path.
+        model_source (Literal["huggingface", "modelscope"]): Model source.
+        device (Optional[str]): Device to run the model on.
+
+    Note:
+        - This is an internal base class for code reuse
+        - Subclasses should inherit from appropriate Protocol (Dense/Sparse)
+        - Provides model loading and management functionality
+    """
+
+    def __init__(
+        self,
+        model_name: str,
+        model_source: Literal["huggingface", "modelscope"] = "huggingface",
+        device: Optional[str] = None,
+    ):
+        """Initialize the base Sentence Transformer functionality.
+
+        Args:
+            model_name (str): Model identifier or local path.
+            model_source (Literal["huggingface", "modelscope"]): Model source.
+            device (Optional[str]): Device to run the model on.
+
+        Raises:
+            ValueError: If model_source is invalid.
+        """
+        # Validate model_source
+        if model_source not in ("huggingface", "modelscope"):
+            raise ValueError(
+                f"Invalid model_source: '{model_source}'. "
+                "Must be 'huggingface' or 'modelscope'."
+            )
+
+        self._model_name = model_name
+        self._model_source = model_source
+        self._device = device
+        self._model = None
+
+    @property
+    def model_name(self) -> str:
+        """str: The Sentence Transformer model name currently in use."""
+        return self._model_name
+
+    @property
+    def model_source(self) -> str:
+        """str: The model source being used ("huggingface" or "modelscope")."""
+        return self._model_source
+
+    @property
+    def device(self) -> str:
+        """str: The device the model is running on."""
+        model = self._get_model()
+        if model is not None:
+            return str(model.device)
+        return self._device or "cpu"
+
+    def _get_model(self):
+        """Load or retrieve the Sentence Transformer model.
+
+        Returns:
+            SentenceTransformer or SparseEncoder: The loaded model instance.
+
+        Raises:
+            ImportError: If required packages are not installed.
+            ValueError: If model cannot be loaded.
+        """
+        # Return cached model if exists
+        if self._model is not None:
+            return self._model
+
+        # Load model
+        try:
+            sentence_transformers = require_module("sentence_transformers")
+
+            if self._model_source == "modelscope":
+                # Load from ModelScope
+                require_module("modelscope")
+                from modelscope.hub.snapshot_download import snapshot_download
+
+                # Download model to cache
+                model_dir = snapshot_download(self._model_name)
+
+                # Load from local path
+                self._model = sentence_transformers.SentenceTransformer(
+                    model_dir, device=self._device, trust_remote_code=True
+                )
+            else:
+                # Load from Hugging Face (default)
+                self._model = sentence_transformers.SentenceTransformer(
+                    self._model_name, device=self._device, trust_remote_code=True
+                )
+
+            return self._model
+
+        except ImportError as e:
+            if "modelscope" in str(e) and self._model_source == "modelscope":
+                raise ImportError(
+                    "ModelScope support requires the 'modelscope' package. "
+                    "Please install it with: pip install modelscope"
+                ) from e
+            raise
+        except Exception as e:
+            raise ValueError(
+                f"Failed to load Sentence Transformer model '{self._model_name}' "
+                f"from {self._model_source}: {e!s}"
+            ) from e
+
+    def _is_sparse_model(self) -> bool:
+        """Check if the loaded model is a sparse encoder (e.g., SPLADE).
+
+        Returns:
+            bool: True if model supports sparse encoding.
+        """
+        model = self._get_model()
+        # Check if model has sparse encoding methods
+        return hasattr(model, "encode_query") or hasattr(model, "encode_document")
diff --git a/python/zvec/extension/sentence_transformer_rerank_function.py b/python/zvec/extension/sentence_transformer_rerank_function.py
new file mode 100644
index 00000000..58c5838f
--- /dev/null
+++ b/python/zvec/extension/sentence_transformer_rerank_function.py
@@ -0,0 +1,384 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from typing import Literal, Optional
+
+from ..model.doc import Doc
+from ..tool import require_module
+from .rerank_function import RerankFunction
+from .sentence_transformer_function import SentenceTransformerFunctionBase
+
+
+class DefaultLocalReRanker(SentenceTransformerFunctionBase, RerankFunction):
+    """Re-ranker using Sentence Transformer cross-encoder models for semantic re-ranking.
+
+    This re-ranker leverages pre-trained cross-encoder models to perform deep semantic
+    re-ranking of search results. It runs locally without API calls, supports GPU
+    acceleration, and works with models from Hugging Face or ModelScope.
+
+    Cross-encoder models evaluate query-document pairs jointly, providing more
+    accurate relevance scores than bi-encoder (embedding-based) similarity.
+
+    Args:
+        query (str): Query text for semantic re-ranking. **Required**.
+        topn (int, optional): Maximum number of documents to return after re-ranking.
+            Defaults to 10.
+        rerank_field (Optional[str], optional): Document field name to use as
+            re-ranking input text. **Required** (e.g., "content", "title", "body").
+        model_name (str, optional): Cross-encoder model identifier or local path.
+            Defaults to ``"cross-encoder/ms-marco-MiniLM-L6-v2"`` (MS MARCO MiniLM).
+            Common options:
+            - ``"cross-encoder/ms-marco-MiniLM-L6-v2"``: Lightweight, fast (~80MB, recommended)
+            - ``"cross-encoder/ms-marco-MiniLM-L12-v2"``: Better accuracy (~120MB)
+            - ``"BAAI/bge-reranker-base"``: BGE Reranker Base (~280MB)
+            - ``"BAAI/bge-reranker-large"``: BGE Reranker Large (highest quality, ~560MB)
+        model_source (Literal["huggingface", "modelscope"], optional): Model source.
+            Defaults to ``"huggingface"``.
+            - ``"huggingface"``: Load from Hugging Face Hub
+            - ``"modelscope"``: Load from ModelScope (recommended for users in China)
+        device (Optional[str], optional): Device to run the model on.
+            Options: ``"cpu"``, ``"cuda"``, ``"mps"`` (for Apple Silicon), or ``None``
+            for automatic detection. Defaults to ``None``.
+        batch_size (int, optional): Batch size for processing query-document pairs.
+            Larger values speed up processing but use more memory. Defaults to ``32``.
+
+    Attributes:
+        query (str): The query text used for re-ranking.
+        topn (int): Maximum number of documents to return.
+        rerank_field (Optional[str]): Field name used for re-ranking input.
+        model_name (str): The cross-encoder model being used.
+        model_source (str): The model source ("huggingface" or "modelscope").
+        device (str): The device the model is running on.
+
+    Raises:
+        ValueError: If ``query`` is empty/None, ``rerank_field`` is None,
+            or model cannot be loaded.
+        TypeError: If input types are invalid.
+        RuntimeError: If model inference fails.
+
+    Note:
+        - Requires Python 3.10, 3.11, or 3.12
+        - Requires ``sentence-transformers`` package: ``pip install sentence-transformers``
+        - For ModelScope support, also requires: ``pip install modelscope``
+        - First run downloads the model (~80-560MB depending on model) from chosen source
+        - No API keys or network required after initial download
+        - Cross-encoders are slower than bi-encoders but more accurate
+        - GPU acceleration provides significant speedup (5-10x)
+
+        **MS MARCO MiniLM-L6-v2 Model (Default):**
+
+        The default model ``cross-encoder/ms-marco-MiniLM-L6-v2`` is a lightweight and
+        efficient cross-encoder trained on MS MARCO dataset. It provides:
+
+        - Fast inference speed (suitable for real-time applications)
+        - Small model size (~80MB, quick to download)
+        - Good balance between speed and accuracy
+        - Trained on 500K+ query-document pairs
+        - Public availability without authentication
+
+        **For users in China:**
+
+        If you encounter Hugging Face access issues, use ModelScope instead:
+
+        .. code-block:: python
+
+            # Recommended for users in China
+            reranker = SentenceTransformerReRanker(
+                query="机器学习算法",
+                rerank_field="content",
+                model_source="modelscope"
+            )
+
+        Alternatively, use Hugging Face mirror:
+
+        .. code-block:: bash
+
+            export HF_ENDPOINT=https://hf-mirror.com
+
+    Examples:
+        >>> # Basic usage with default MS MARCO MiniLM model
+        >>> from zvec.extension import SentenceTransformerReRanker
+        >>>
+        >>> reranker = SentenceTransformerReRanker(
+        ...     query="machine learning algorithms",
+        ...     topn=5,
+        ...     rerank_field="content"
+        ... )
+        >>>
+        >>> # Use in collection.query()
+        >>> results = collection.query(
+        ...     data={"vector_field": query_vector},
+        ...     reranker=reranker,
+        ...     topk=20
+        ... )
+
+        >>> # Using ModelScope for users in China
+        >>> reranker = SentenceTransformerReRanker(
+        ...     query="深度学习",
+        ...     topn=10,
+        ...     rerank_field="content",
+        ...     model_source="modelscope"
+        ... )
+
+        >>> # Using larger model for better quality
+        >>> reranker = SentenceTransformerReRanker(
+        ...     query="neural networks",
+        ...     topn=5,
+        ...     rerank_field="content",
+        ...     model_name="BAAI/bge-reranker-large",
+        ...     device="cuda",
+        ...     batch_size=64
+        ... )
+
+        >>> # Direct rerank call (for testing)
+        >>> query_results = {
+        ...     "vector1": [
+        ...         Doc(id="1", score=0.9, fields={"content": "Machine learning is..."}),
+        ...         Doc(id="2", score=0.8, fields={"content": "Deep learning is..."}),
+        ...     ]
+        ... }
+        >>> reranked = reranker.rerank(query_results)
+        >>> for doc in reranked:
+        ...     print(f"ID: {doc.id}, Score: {doc.score:.4f}")
+        ID: 2, Score: 0.9234
+        ID: 1, Score: 0.8567
+
+    See Also:
+        - ``RerankFunction``: Abstract base class for re-rankers
+        - ``QwenReRanker``: Re-ranker using Qwen API
+        - ``RrfReRanker``: Multi-vector re-ranker using RRF
+        - ``WeightedReRanker``: Multi-vector re-ranker using weighted scores
+
+    References:
+        - MS MARCO Cross-Encoder: https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2
+        - BGE Reranker: https://huggingface.co/BAAI/bge-reranker-base
+        - Cross-Encoder vs Bi-Encoder: https://www.sbert.net/examples/applications/cross-encoder/README.html
+    """
+
+    def __init__(
+        self,
+        query: Optional[str] = None,
+        topn: int = 10,
+        rerank_field: Optional[str] = None,
+        model_name: str = "cross-encoder/ms-marco-MiniLM-L6-v2",
+        model_source: Literal["huggingface", "modelscope"] = "huggingface",
+        device: Optional[str] = None,
+        batch_size: int = 32,
+    ):
+        """Initialize SentenceTransformerReRanker with query and configuration.
+
+        Args:
+            query (Optional[str]): Query text for semantic matching. Required.
+            topn (int): Number of top results to return.
+            rerank_field (Optional[str]): Document field for re-ranking input.
+            model_name (str): Cross-encoder model identifier.
+            model_source (Literal["huggingface", "modelscope"]): Model source.
+            device (Optional[str]): Target device ("cpu", "cuda", "mps", or None).
+            batch_size (int): Batch size for processing query-document pairs.
+
+        Raises:
+            ValueError: If query is empty or model cannot be loaded.
+        """
+        # Initialize base class for model loading
+        SentenceTransformerFunctionBase.__init__(
+            self, model_name=model_name, model_source=model_source, device=device
+        )
+
+        # Initialize rerank function
+        RerankFunction.__init__(self, topn=topn, rerank_field=rerank_field)
+
+        # Validate query
+        if not query:
+            raise ValueError("Query is required for DefaultLocalReRanker")
+        self._query = query
+        self._batch_size = batch_size
+
+        # Load and validate cross-encoder model
+        model = self._get_model()
+        if not hasattr(model, "predict"):
+            raise ValueError(
+                f"Model '{model_name}' does not appear to be a cross-encoder model. "
+                "Cross-encoder models should have a 'predict' method."
+            )
+        self._model = model
+
+    def _get_model(self):
+        """Load or retrieve the CrossEncoder model.
+
+        This overrides the base class method to load CrossEncoder instead of
+        SentenceTransformer, as reranking requires cross-encoder models.
+
+        Returns:
+            CrossEncoder: The loaded cross-encoder model instance.
+
+        Raises:
+            ImportError: If required packages are not installed.
+            ValueError: If model cannot be loaded.
+        """
+        # Return cached model if exists
+        if self._model is not None:
+            return self._model
+
+        # Load cross-encoder model
+        try:
+            sentence_transformers = require_module("sentence_transformers")
+
+            if self._model_source == "modelscope":
+                # Load from ModelScope
+                require_module("modelscope")
+                from modelscope.hub.snapshot_download import snapshot_download
+
+                # Download model to cache
+                model_dir = snapshot_download(self._model_name)
+
+                # Load CrossEncoder from local path
+                model = sentence_transformers.CrossEncoder(
+                    model_dir, device=self._device
+                )
+            else:
+                # Load CrossEncoder from Hugging Face (default)
+                model = sentence_transformers.CrossEncoder(
+                    self._model_name, device=self._device
+                )
+
+            return model
+
+        except ImportError as e:
+            if "modelscope" in str(e) and self._model_source == "modelscope":
+                raise ImportError(
+                    "ModelScope support requires the 'modelscope' package. "
+                    "Please install it with: pip install modelscope"
+                ) from e
+            raise
+        except Exception as e:
+            raise ValueError(
+                f"Failed to load CrossEncoder model '{self._model_name}' "
+                f"from {self._model_source}: {e!s}"
+            ) from e
+
+    @property
+    def query(self) -> str:
+        """str: Query text used for semantic re-ranking."""
+        return self._query
+
+    @property
+    def batch_size(self) -> int:
+        """int: Batch size for processing query-document pairs."""
+        return self._batch_size
+
+    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
+        """Re-rank documents using Sentence Transformer cross-encoder model.
+
+        Evaluates each query-document pair using the cross-encoder model to compute
+        relevance scores. Documents are then sorted by these scores and the top-k
+        results are returned.
+
+        Args:
+            query_results (dict[str, list[Doc]]): Mapping from vector field names
+                to lists of retrieved documents. Documents from all fields are
+                deduplicated and re-ranked together.
+
+        Returns:
+            list[Doc]: Re-ranked documents (up to ``topn``) with updated ``score``
+                fields containing relevance scores from the cross-encoder model.
+
+        Raises:
+            ValueError: If no valid documents are found or model inference fails.
+
+        Note:
+            - Duplicate documents (same ID) across fields are processed once
+            - Documents with empty/missing ``rerank_field`` content are skipped
+            - Returned scores are logits from the cross-encoder model
+            - Higher scores indicate higher relevance
+            - Processing time is O(n) where n is the number of documents
+
+        Examples:
+            >>> reranker = SentenceTransformerReRanker(
+            ...     query="machine learning",
+            ...     topn=3,
+            ...     rerank_field="content"
+            ... )
+            >>> query_results = {
+            ...     "vector1": [
+            ...         Doc(id="1", score=0.9, fields={"content": "ML basics"}),
+            ...         Doc(id="2", score=0.8, fields={"content": "DL tutorial"}),
+            ...     ]
+            ... }
+            >>> reranked = reranker.rerank(query_results)
+            >>> len(reranked) <= 3
+            True
+        """
+        if not query_results:
+            return []
+
+        # Collect and deduplicate documents
+        id_to_doc: dict[str, Doc] = {}
+        doc_ids: list[str] = []
+        contents: list[str] = []
+
+        for _, query_result in query_results.items():
+            for doc in query_result:
+                doc_id = doc.id
+                if doc_id in id_to_doc:
+                    continue
+
+                # Extract text content from specified field
+                field_value = doc.field(self.rerank_field)
+                rank_content = str(field_value).strip() if field_value else ""
+                if not rank_content:
+                    continue
+
+                id_to_doc[doc_id] = doc
+                doc_ids.append(doc_id)
+                contents.append(rank_content)
+
+        if not contents:
+            raise ValueError("No documents to rerank")
+
+        try:
+            # Use standard cross-encoder predict method
+            pairs = [[self.query, content] for content in contents]
+            scores = self._model.predict(
+                pairs,
+                batch_size=self.batch_size,
+                show_progress_bar=False,
+                convert_to_numpy=True,
+            )
+
+            # Convert to float list if needed
+            if hasattr(scores, "tolist"):
+                scores = scores.tolist()
+            else:
+                scores = [float(s) for s in scores]
+
+        except Exception as e:
+            raise RuntimeError(f"Failed to compute rerank scores: {e!s}") from e
+
+        # Create scored documents
+        scored_docs = [
+            (doc_ids[i], id_to_doc[doc_ids[i]], scores[i]) for i in range(len(doc_ids))
+        ]
+
+        # Sort by score (descending) and take top-k
+        scored_docs.sort(key=lambda x: x[2], reverse=True)
+        top_scored_docs = scored_docs[: self.topn]
+
+        # Build result list with updated scores
+        results: list[Doc] = []
+        for _, doc, score in top_scored_docs:
+            new_doc = doc._replace(score=score)
+            results.append(new_doc)
+
+        return results
diff --git a/python/zvec/tool/util.py b/python/zvec/tool/util.py
index a836876c..409a4d5b 100644
--- a/python/zvec/tool/util.py
+++ b/python/zvec/tool/util.py
@@ -59,5 +59,5 @@ def require_module(module: str, mitigation: Optional[str] = None) -> Any:
             else:
                 msg += f"please pip install '{top_level}'."
         else:
-            msg += f"Please pip install '{package}."
+            msg += f"Please pip install '{package}'."
         raise ImportError(msg) from e
diff --git a/src/core/algorithm/flat/flat_searcher.h b/src/core/algorithm/flat/flat_searcher.h
index 78dcb1d3..207f38f5 100644
--- a/src/core/algorithm/flat/flat_searcher.h
+++ b/src/core/algorithm/flat/flat_searcher.h
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
-
+#include <unordered_map>
 #include <zvec/ailego/container/params.h>
 #include <zvec/core/framework/index_searcher.h>
 #include "flat_distance_matrix.h"
@@ -163,7 +163,7 @@ class FlatSearcher : public IndexSearcher {
  private:
   //! Members
   const uint64_t *keys_{nullptr};
-  std::map<key_t, node_id_t> key_id_mapping_;
+  std::unordered_map<key_t, node_id_t> key_id_mapping_;
   uint32_t magic_{IndexContext::GenerateMagic()};
   uint32_t read_block_size_{FLAT_DEFAULT_READ_BLOCK_SIZE};
   bool column_major_order_{false};
diff --git a/src/core/algorithm/flat/flat_streamer.cc b/src/core/algorithm/flat/flat_streamer.cc
index a721cf5b..8969efc1 100644
--- a/src/core/algorithm/flat/flat_streamer.cc
+++ b/src/core/algorithm/flat/flat_streamer.cc
@@ -376,7 +376,7 @@ int FlatStreamer<BATCH_SIZE>::search_bf_by_p_keys_impl(
       if (!filter.is_valid() || !filter(key)) {
         dist_t dist = 0;
         IndexStorage::MemoryBlock block;
-        entity_.get_vector_by_key(key, block);
+        if (entity_.get_vector_by_key(key, block) != 0) continue;
         entity_.row_major_distance(query, block.data(), 1, &dist);
         heap->emplace(key, dist);
       }
@@ -418,7 +418,7 @@ int FlatStreamer<BATCH_SIZE>::group_by_search_impl(
       if (!bf_context->filter().is_valid() || !bf_context->filter()(key)) {
         dist_t dist = 0;
         IndexStorage::MemoryBlock block;
-        entity_.get_vector_by_key(key, block);
+        if (entity_.get_vector_by_key(key, block) != 0) continue;
         entity_.row_major_distance(query, block.data(), 1, &dist);
 
         std::string group_id = group_by(key);
@@ -466,7 +466,7 @@ int FlatStreamer<BATCH_SIZE>::group_by_search_p_keys_impl(
       if (!bf_context->filter().is_valid() || !bf_context->filter()(key)) {
         dist_t dist = 0;
         IndexStorage::MemoryBlock block;
-        entity_.get_vector_by_key(key, block);
+        if (entity_.get_vector_by_key(key, block) != 0) continue;
         entity_.row_major_distance(query, block.data(), 1, &dist);
 
         std::string group_id = group_by(key);
diff --git a/src/core/algorithm/flat/flat_streamer_context.h b/src/core/algorithm/flat/flat_streamer_context.h
index 24cfd9e5..1626880d 100644
--- a/src/core/algorithm/flat/flat_streamer_context.h
+++ b/src/core/algorithm/flat/flat_streamer_context.h
@@ -122,7 +122,7 @@ class FlatStreamerContext : public IndexStreamer::Context {
         owner_->entity().get_vector_by_key(key, block);
         results_[idx].emplace_back(key, score, key, block);
       } else {
-        results_[idx].emplace_back(key, score);
+        results_[idx].emplace_back(key, score, key);
       }
     }
   }
diff --git a/src/core/mixed_reducer/mixed_streamer_reducer.cc b/src/core/mixed_reducer/mixed_streamer_reducer.cc
index b5e241bb..bb84e3d6 100644
--- a/src/core/mixed_reducer/mixed_streamer_reducer.cc
+++ b/src/core/mixed_reducer/mixed_streamer_reducer.cc
@@ -239,6 +239,9 @@ int MixedStreamerReducer::read_vec(size_t source_streamer_index,
   bool need_revert = (target_streamer_->meta().reformer_name() !=
                           streamer->meta().reformer_name() &&
                       reformer != nullptr);
+  if (target_builder_ && reformer) {
+    need_revert = true;
+  }
 
   IndexProvider::Pointer provider = streamer->create_provider();
   IndexProvider::Iterator::Pointer iterator = provider->create_iterator();
@@ -366,7 +369,7 @@ void MixedStreamerReducer::add_vec_with_builder(int *result) {
     std::string out_vector_buffer = std::string(
         static_cast<const char *>(vector),
         original_query_meta_.dimension() * original_query_meta_.unit_size());
-    PushToDocCache(target_streamer_query_meta, (uint32_t)vector_item.pkey_,
+    PushToDocCache(original_query_meta_, (uint32_t)vector_item.pkey_,
                    out_vector_buffer);
   }
 
@@ -509,8 +512,6 @@ void MixedStreamerReducer::PushToDocCache(const IndexQueryMeta &meta,
 }
 
 int MixedStreamerReducer::IndexBuild() {
-  const bool need_convert = !is_target_and_source_same_reformer_ &&
-                            target_streamer_reformer_ != nullptr;
   IndexHolder::Pointer target_holder;
   if (original_query_meta_.data_type() == core::IndexMeta::DataType::DT_FP16) {
     auto holder = std::make_shared<
@@ -563,7 +564,7 @@ int MixedStreamerReducer::IndexBuild() {
     LOG_ERROR("data_type is not support");
     return core::IndexError_Runtime;
   }
-  if (target_builder_converter_ && need_convert) {
+  if (target_builder_converter_) {
     core::IndexConverter::TrainAndTransform(target_builder_converter_,
                                             target_holder);
     target_holder = target_builder_converter_->result();
diff --git a/src/core/quantizer/cosine_converter.cc b/src/core/quantizer/cosine_converter.cc
index e8deaca8..dda76b01 100644
--- a/src/core/quantizer/cosine_converter.cc
+++ b/src/core/quantizer/cosine_converter.cc
@@ -64,8 +64,8 @@ class CosineConverterHolder : public IndexHolder {
 
     //! Retrieve pointer of data
     const void *data(void) const override {
-      return type_ == IndexMeta::DataType::DT_FP32 ? normalize_buffer_.data()
-                                                   : buffer_.data();
+      return type_ == original_type_ ? normalize_buffer_.data()
+                                     : buffer_.data();
     }
 
     //! Test if the iterator is valid
@@ -325,7 +325,7 @@ class CosineConverter : public IndexConverter {
 
   //! Transform the data
   int transform(IndexHolder::Pointer holder) override {
-    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||
+    if (holder->data_type() != original_type_ ||
         holder->dimension() != meta_.dimension() - ExtraDimension(dst_type_)) {
       return IndexError_Mismatch;
     }
diff --git a/src/db/index/column/vector_column/combined_vector_column_indexer.cc b/src/db/index/column/vector_column/combined_vector_column_indexer.cc
index f1385b01..70c71d07 100644
--- a/src/db/index/column/vector_column/combined_vector_column_indexer.cc
+++ b/src/db/index/column/vector_column/combined_vector_column_indexer.cc
@@ -40,22 +40,53 @@ CombinedVectorColumnIndexer::CombinedVectorColumnIndexer(
     }
   }
 
+  int block_offset = 0;
+  for (size_t i = 0; i < indexers_.size(); ++i) {
+    auto &block_meta = blocks_[i];
+    block_offsets_.push_back(block_offset);
+    block_offset += block_meta.doc_count_;
+  }
+
   min_doc_id_ = segment_meta.min_doc_id();
 }
 
-
 Result<IndexResults::Ptr> CombinedVectorColumnIndexer::Search(
     const vector_column_params::VectorData &vector_data,
     const vector_column_params::QueryParams &query_params) {
   core::IndexDocumentList doc_list;
   std::vector<std::string> reverted_vector_list;
   std::vector<std::string> reverted_sparse_values_list;
-  int block_offset = 0;
+
+  // query_params.bf_pks is segment level, here we need to convert it to block
+  // level
+  std::vector<std::vector<uint64_t>> block_bf_pks(indexers_.size());
+
+  if (!query_params.bf_pks.empty()) {
+    // dispatcher pks to corresponding block_bf_pks
+    for (auto &pk : query_params.bf_pks[0]) {
+      for (size_t i = 0; i < block_offsets_.size(); ++i) {
+        if (pk >= block_offsets_[i] &&
+            pk < block_offsets_[i] + blocks_[i].doc_count_) {
+          block_bf_pks[i].push_back(
+              static_cast<uint64_t>(pk - block_offsets_[i]));
+          break;
+        }
+      }
+    }
+  }
 
   auto q_params = query_params.query_params;
   for (size_t i = 0; i < indexers_.size(); ++i) {
-    auto &block_meta = blocks_[i];
+    if (!query_params.bf_pks.empty() && block_bf_pks[i].empty()) {
+      LOG_DEBUG(
+          "query_params has bf_pks, but block_bf_pks[%zu] is empty, just skip "
+          "this indexer",
+          i);
+      continue;
+    }
     zvec::Result<zvec::IndexResults::Ptr> result{nullptr};
+    float scale_factor{};
+    bool need_refine{false};
     if (q_params && q_params->is_using_refiner()) {
       if (normal_indexers_.size() != indexers_.size()) {
         return tl::make_unexpected(Status::InvalidArgument(
@@ -63,7 +94,6 @@ Result<IndexResults::Ptr> CombinedVectorColumnIndexer::Search(
             "] not match indexers size[", indexers_.size(), "]"));
       }
       // query_params of HNSW doesn't have scale_factor
-      float scale_factor{};
       if (q_params->type() == IndexType::FLAT) {
         scale_factor = std::dynamic_pointer_cast<FlatQueryParams>(q_params)
                            ->scale_factor();
@@ -71,29 +101,34 @@ Result<IndexResults::Ptr> CombinedVectorColumnIndexer::Search(
         scale_factor =
             std::dynamic_pointer_cast<IVFQueryParams>(q_params)->scale_factor();
       }
-      vector_column_params::QueryParams modified_query_params{
-          query_params.data_type,
-          query_params.dimension,
-          query_params.topk,
-          query_params.filter,
-          query_params.fetch_vector,
-          query_params.query_params,
-          query_params.group_by
-              ? std::make_unique<vector_column_params::GroupByParams>(
-                    query_params.group_by->group_topk,
-                    query_params.group_by->group_count,
-                    query_params.group_by->group_by)
-              : nullptr,
-          query_params.bf_pks,
-          std::shared_ptr<vector_column_params::RefinerParam>(
-              new vector_column_params::RefinerParam{scale_factor,
-                                                     normal_indexers_[i]}),
-          query_params.extra_params};
-      result = indexers_[i]->Search(vector_data, modified_query_params);
-    } else {
-      result = indexers_[i]->Search(vector_data, query_params);
+      need_refine = true;
     }
 
+    vector_column_params::QueryParams modified_query_params{
+        query_params.data_type,
+        query_params.dimension,
+        query_params.topk,
+        query_params.filter,
+        query_params.fetch_vector,
+        query_params.query_params,
+        query_params.group_by
+            ? std::make_unique<vector_column_params::GroupByParams>(
+                  query_params.group_by->group_topk,
+                  query_params.group_by->group_count,
+                  query_params.group_by->group_by)
+            : nullptr,
+        {},
+        need_refine ? std::shared_ptr<vector_column_params::RefinerParam>(
+                          new vector_column_params::RefinerParam{
+                              scale_factor, normal_indexers_[i]})
+                    : nullptr,
+        query_params.extra_params};
+
+    if (!query_params.bf_pks.empty()) {
+      modified_query_params.bf_pks.emplace_back(block_bf_pks[i]);
+    }
+
+    result = indexers_[i]->Search(vector_data, modified_query_params);
     if (!result) {
       return tl::make_unexpected(result.error());
     }
@@ -105,10 +140,9 @@ Result<IndexResults::Ptr> CombinedVectorColumnIndexer::Search(
     const auto &sub_docs = vector_index_results->docs();
     for (size_t j = 0; j < sub_docs.size(); ++j) {
       auto doc = sub_docs[j];
-      doc.set_index(block_offset + sub_docs[j].index());
+      doc.set_key(block_offsets_[i] + sub_docs[j].key());
       doc_list.emplace_back(std::move(doc));
     }
-    block_offset += block_meta.doc_count_;
 
     auto &&temp_vector_list = vector_index_results->reverted_vector_list();
     reverted_vector_list.insert(
diff --git a/src/db/index/column/vector_column/combined_vector_column_indexer.h b/src/db/index/column/vector_column/combined_vector_column_indexer.h
index 2e723c19..b0b0589f 100644
--- a/src/db/index/column/vector_column/combined_vector_column_indexer.h
+++ b/src/db/index/column/vector_column/combined_vector_column_indexer.h
@@ -52,6 +52,7 @@ class CombinedVectorColumnIndexer {
   std::vector<VectorColumnIndexer::Ptr> indexers_;
   std::vector<VectorColumnIndexer::Ptr> normal_indexers_;
   std::vector<BlockMeta> blocks_;
+  std::vector<uint32_t> block_offsets_;
   MetricType metric_type_{MetricType::UNDEFINED};
   bool is_quantized_{false};
   uint64_t min_doc_id_{0};
diff --git a/tests/ailego/parallel/thread_queue_test.cc b/tests/ailego/parallel/thread_queue_test.cc
index 6a18b4ee..a7000181 100644
--- a/tests/ailego/parallel/thread_queue_test.cc
+++ b/tests/ailego/parallel/thread_queue_test.cc
@@ -103,7 +103,7 @@ TEST(ThreadQueue, MultiThreadWithHighPriority) {
   }
 
   // Wait for all tasks to complete
-  std::this_thread::sleep_for(std::chrono::seconds(1));
+  std::this_thread::sleep_for(std::chrono::seconds(3));
 
   EXPECT_EQ(count, 1000);
   EXPECT_EQ(high_priority_count, 1000);
diff --git a/tests/core/algorithm/cluster/opt_kmeans_cluster_test.cc b/tests/core/algorithm/cluster/opt_kmeans_cluster_test.cc
index b195b48d..d9197f1a 100644
--- a/tests/core/algorithm/cluster/opt_kmeans_cluster_test.cc
+++ b/tests/core/algorithm/cluster/opt_kmeans_cluster_test.cc
@@ -514,7 +514,7 @@ TEST(OptKmeansCluster, IN4Correctness) {
   EXPECT_EQ(centroids1.size(), centroids2.size());
   for (size_t i = 0; i < centroids1.size(); ++i) {
     EXPECT_EQ(centroids1[i].follows(), centroids2[i].follows());
-    EXPECT_EQ(centroids1[i].score(), centroids2[i].score());
+    EXPECT_DOUBLE_EQ(centroids1[i].score(), centroids2[i].score());
   }
 }
 
diff --git a/tests/core/algorithm/flat/flat_searcher_test.cpp b/tests/core/algorithm/flat/flat_searcher_test.cpp
index 573cb739..9536b1cd 100644
--- a/tests/core/algorithm/flat/flat_searcher_test.cpp
+++ b/tests/core/algorithm/flat/flat_searcher_test.cpp
@@ -1346,7 +1346,7 @@ TEST(FlatProvider, Provider_FP32) {
     const float *features1 = (const float *)provider1->get_vector(it1->key());
     const float *features2 = (const float *)provider2->get_vector(it2->key());
     for (size_t idx = 0; idx < dim; idx++) {
-      ASSERT_EQ(*features1, *features2);
+      ASSERT_FLOAT_EQ(*features1, *features2);
       features1++;
       features2++;
     }
diff --git a/tests/core/algorithm/flat/flat_streamer_buffer_test.cpp b/tests/core/algorithm/flat/flat_streamer_buffer_test.cpp
index 62b25e23..e9988692 100644
--- a/tests/core/algorithm/flat/flat_streamer_buffer_test.cpp
+++ b/tests/core/algorithm/flat/flat_streamer_buffer_test.cpp
@@ -104,7 +104,7 @@ TEST_F(FlatStreamerTest, TestLinearSearch) {
     ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
     const float *data = (float *)block.data();
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(data[j], i);
+      ASSERT_FLOAT_EQ(data[j], i);
     }
     ASSERT_EQ(i, result1[0].key());
 
@@ -150,7 +150,7 @@ TEST_F(FlatStreamerTest, TestLinearSearch) {
     ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
     const float *data = (float *)block.data();
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(data[j], i);
+      ASSERT_FLOAT_EQ(data[j], i);
     }
     ASSERT_EQ(i, result1[0].key());
 
@@ -226,7 +226,7 @@ TEST_F(FlatStreamerTest, TestLinearSearchMMap) {
     ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
     const float *data = (float *)block.data();
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(data[j], i);
+      ASSERT_FLOAT_EQ(data[j], i);
     }
     ASSERT_EQ(i, result1[0].key());
 
@@ -320,7 +320,7 @@ TEST_F(FlatStreamerTest, TestBufferStorage) {
     EXPECT_EQ(topk, result1.size());
     for (size_t j = 0; j < dim; ++j) {
       const float *data = (float *)provider->get_vector(result1[0].key());
-      EXPECT_EQ(data[j], i);
+      EXPECT_FLOAT_EQ(data[j], i);
     }
     EXPECT_EQ(i, result1[0].key());
 
diff --git a/tests/core/algorithm/flat/flat_streamer_test.cc b/tests/core/algorithm/flat/flat_streamer_test.cc
index 022c1063..ff64ce17 100644
--- a/tests/core/algorithm/flat/flat_streamer_test.cc
+++ b/tests/core/algorithm/flat/flat_streamer_test.cc
@@ -93,7 +93,7 @@ TEST_F(FlatStreamerTest, TestAddVector) {
     streamer->add_impl(i, vec.data(), qmeta, ctx);
     const float *data = (float *)provider->get_vector(i);
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(data[j], i);
+      ASSERT_FLOAT_EQ(data[j], i);
     }
   }
 
@@ -141,7 +141,7 @@ TEST_F(FlatStreamerTest, TestLinearSearch) {
     ASSERT_EQ(topk, result1.size());
     for (size_t j = 0; j < dim; ++j) {
       const float *data = (float *)provider->get_vector(result1[0].key());
-      ASSERT_EQ(data[j], i);
+      ASSERT_FLOAT_EQ(data[j], i);
     }
     ASSERT_EQ(i, result1[0].key());
 
@@ -376,7 +376,7 @@ TEST_F(FlatStreamerTest, TestOpenClose) {
     while (iter->is_valid()) {
       float *data = (float *)provider->get_vector(cur);
       for (size_t d = 0; d < dim; ++d) {
-        ASSERT_EQ((float)cur, data[d]);
+        ASSERT_FLOAT_EQ((float)cur, data[d]);
       }
       iter->next();
       cur += 2;
@@ -463,7 +463,7 @@ TEST_F(FlatStreamerTest, TestForceFlush) {
     while (iter->is_valid()) {
       float *data = (float *)provider->get_vector(cur);
       for (size_t d = 0; d < dim; ++d) {
-        ASSERT_EQ((float)cur, data[d]);
+        ASSERT_FLOAT_EQ((float)cur, data[d]);
       }
       iter->next();
       cur++;
@@ -501,7 +501,7 @@ TEST_F(FlatStreamerTest, TestForceFlush) {
     const float *data = (const float *)provider->get_vector(i);
     ASSERT_NE(data, nullptr);
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(i, data[j]);
+      ASSERT_FLOAT_EQ(i, data[j]);
     }
   }
 }
@@ -556,7 +556,7 @@ TEST_F(FlatStreamerTest, TestMultiThread) {
   while (iter->is_valid()) {
     float *data = (float *)iter->data();
     for (size_t d = 0; d < dim; ++d) {
-      ASSERT_EQ((float)iter->key(), data[d]);
+      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
     }
     total++;
     min = std::min(min, iter->key());
@@ -716,7 +716,7 @@ TEST_F(FlatStreamerTest, TestConcurrentAddAndSearch) {
   while (iter->is_valid()) {
     float *data = (float *)iter->data();
     for (size_t d = 0; d < dim; ++d) {
-      ASSERT_EQ((float)iter->key(), data[d]);
+      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
     }
     total++;
     min = std::min(min, iter->key());
@@ -847,8 +847,8 @@ TEST_F(FlatStreamerTest, TestMaxIndexSize) {
       writeCnt1 * 128 * 4 + writeCnt1 * 8 + writeCnt1 * 28 / 32;
   LOG_INFO("increment1: %lu, expect_size: %lu", increment1, expect_size);
 
-  ASSERT_GT(expect_size, increment1 * 0.8f);
-  ASSERT_LT(expect_size, increment1 * 1.2f);
+  ASSERT_GT(expect_size, increment1 * 0.75f);
+  ASSERT_LT(expect_size, increment1 * 1.25f);
 
   streamer->flush(0UL);
   streamer.reset();
diff --git a/tests/core/algorithm/flat_sparse/flat_sparse_streamer_test.cc b/tests/core/algorithm/flat_sparse/flat_sparse_streamer_test.cc
index 73d85eb3..cad6a4d3 100644
--- a/tests/core/algorithm/flat_sparse/flat_sparse_streamer_test.cc
+++ b/tests/core/algorithm/flat_sparse/flat_sparse_streamer_test.cc
@@ -387,7 +387,7 @@ TEST_F(FlatSparseStreamerTest, TestCreateIterator) {
       float *sparse_data = (float *)iter->sparse_data();
       ASSERT_EQ(cur, iter->key());
       for (size_t d = 0; d < sparse_dim_count; ++d) {
-        ASSERT_EQ((float)cur, sparse_data[d]);
+        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
       }
       iter->next();
       cur++;
@@ -487,7 +487,7 @@ TEST_F(FlatSparseStreamerTest, TestOpenAndClose) {
       float *sparse_data = (float *)iter->sparse_data();
       ASSERT_EQ(cur, iter->key());
       for (size_t d = 0; d < sparse_dim_count; ++d) {
-        ASSERT_EQ((float)cur, sparse_data[d]);
+        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
       }
       iter->next();
       cur += 2;
@@ -589,7 +589,7 @@ TEST_F(FlatSparseStreamerTest, TestForceFlush) {
 
       const float *data = reinterpret_cast<const float *>(iter->sparse_data());
       for (size_t j = 0; j < sparse_dim_count; ++j) {
-        ASSERT_EQ((float)cur, data[j]);
+        ASSERT_FLOAT_EQ((float)cur, data[j]);
       }
 
       iter->next();
@@ -710,7 +710,7 @@ TEST_F(FlatSparseStreamerTest, TestMultiThread) {
 
     const float *data = reinterpret_cast<const float *>(iter->sparse_data());
     for (size_t j = 0; j < sparse_dim_count; ++j) {
-      ASSERT_EQ((float)iter->key(), data[j]);
+      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);
     }
     total++;
     min = std::min(min, iter->key());
@@ -915,7 +915,7 @@ TEST_F(FlatSparseStreamerTest, TestConcurrentAddAndSearch) {
 
     const float *data = reinterpret_cast<const float *>(iter->sparse_data());
     for (size_t j = 0; j < sparse_dim_count; ++j) {
-      ASSERT_EQ((float)iter->key(), data[j]);
+      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);
     }
     total++;
     min = std::min(min, iter->key());
diff --git a/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cpp b/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cpp
index a3dda598..bd96789a 100644
--- a/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cpp
+++ b/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cpp
@@ -229,7 +229,7 @@ TEST_F(HnswStreamerTest, TestHnswSearchMMap) {
     ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
     const float *data = (float *)block.data();
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(data[j], i);
+      ASSERT_FLOAT_EQ(data[j], i);
     }
     ASSERT_EQ(i, result1[0].key());
 
diff --git a/tests/core/algorithm/hnsw/hnsw_streamer_test.cc b/tests/core/algorithm/hnsw/hnsw_streamer_test.cc
index d39f1c07..d1619e49 100644
--- a/tests/core/algorithm/hnsw/hnsw_streamer_test.cc
+++ b/tests/core/algorithm/hnsw/hnsw_streamer_test.cc
@@ -576,7 +576,7 @@ TEST_F(HnswStreamerTest, TestOpenClose) {
       float *data = (float *)iter->data();
       ASSERT_EQ(cur, iter->key());
       for (size_t d = 0; d < dim; ++d) {
-        ASSERT_EQ((float)cur, data[d]);
+        ASSERT_FLOAT_EQ((float)cur, data[d]);
       }
       iter->next();
       cur += 2;
@@ -657,7 +657,7 @@ TEST_F(HnswStreamerTest, TestCreateIterator) {
       float *data = (float *)iter->data();
       ASSERT_EQ(cur, iter->key());
       for (size_t d = 0; d < dim; ++d) {
-        ASSERT_EQ((float)cur, data[d]);
+        ASSERT_FLOAT_EQ((float)cur, data[d]);
       }
       iter->next();
       cur++;
@@ -689,7 +689,7 @@ TEST_F(HnswStreamerTest, TestCreateIterator) {
     const float *data = (const float *)provider->get_vector(i);
     ASSERT_NE(data, nullptr);
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(i, data[j]);
+      ASSERT_FLOAT_EQ(i, data[j]);
     }
   }
 }
@@ -730,7 +730,7 @@ TEST_F(HnswStreamerTest, TestForceFlush) {
       float *data = (float *)iter->data();
       ASSERT_EQ(cur, iter->key());
       for (size_t d = 0; d < dim; ++d) {
-        ASSERT_EQ((float)cur, data[d]);
+        ASSERT_FLOAT_EQ((float)cur, data[d]);
       }
       iter->next();
       cur++;
@@ -768,7 +768,7 @@ TEST_F(HnswStreamerTest, TestForceFlush) {
     const float *data = (const float *)provider->get_vector(i);
     ASSERT_NE(data, nullptr);
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(i, data[j]);
+      ASSERT_FLOAT_EQ(i, data[j]);
     }
   }
 }
@@ -830,7 +830,7 @@ TEST_F(HnswStreamerTest, TestKnnMultiThread) {
   while (iter->is_valid()) {
     float *data = (float *)iter->data();
     for (size_t d = 0; d < dim; ++d) {
-      ASSERT_EQ((float)iter->key(), data[d]);
+      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
     }
     total++;
     min = std::min(min, iter->key());
@@ -1008,7 +1008,7 @@ TEST_F(HnswStreamerTest, TestKnnConcurrentAddAndSearch) {
   while (iter->is_valid()) {
     float *data = (float *)iter->data();
     for (size_t d = 0; d < dim; ++d) {
-      ASSERT_EQ((float)iter->key(), data[d]);
+      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
     }
     total++;
     min = std::min(min, iter->key());
@@ -1584,7 +1584,7 @@ TEST_F(HnswStreamerTest, TestCheckDuplicateAndGetVector) {
     const float *data = (const float *)provider->get_vector(i);
     ASSERT_NE(data, nullptr);
     for (size_t j = 0; j < dim; ++j) {
-      ASSERT_EQ(i, data[j]);
+      ASSERT_FLOAT_EQ(i, data[j]);
     }
   }
 
@@ -2275,7 +2275,7 @@ TEST_F(HnswStreamerTest, TestFetchVector) {
     ASSERT_NE(vector, nullptr);
 
     float vector_value = *(float *)(vector);
-    ASSERT_EQ(vector_value, i);
+    ASSERT_FLOAT_EQ(vector_value, i);
   }
 
   auto linearCtx = streamer->create_context();
@@ -2310,7 +2310,7 @@ TEST_F(HnswStreamerTest, TestFetchVector) {
 
     ASSERT_NE(knnResult[0].vector(), nullptr);
     float vector_value = *((float *)(knnResult[0].vector()));
-    ASSERT_EQ(vector_value, i);
+    ASSERT_FLOAT_EQ(vector_value, i);
   }
   std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
   std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
diff --git a/tests/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_test.cc b/tests/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_test.cpp
similarity index 100%
rename from tests/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_test.cc
rename to tests/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_test.cpp
diff --git a/tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_test.cc b/tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_test.cc
index 0b3275e3..9192fdb1 100644
--- a/tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_test.cc
+++ b/tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_test.cc
@@ -494,7 +494,7 @@ TEST_F(HnswSparseStreamerTest, TestOpenClose) {
       float *sparse_data = (float *)iter->sparse_data();
       ASSERT_EQ(cur, iter->key());
       for (size_t d = 0; d < sparse_dim_count; ++d) {
-        ASSERT_EQ((float)cur, sparse_data[d]);
+        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
       }
       iter->next();
       cur += 2;
@@ -587,7 +587,7 @@ TEST_F(HnswSparseStreamerTest, TestCreateIterator) {
       float *sparse_data = (float *)iter->sparse_data();
       ASSERT_EQ(cur, iter->key());
       for (size_t d = 0; d < sparse_dim_count; ++d) {
-        ASSERT_EQ((float)cur, sparse_data[d]);
+        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
       }
       iter->next();
       cur++;
@@ -678,7 +678,7 @@ TEST_F(HnswSparseStreamerTest, TestForceFlush) {
 
       const float *data = reinterpret_cast<const float *>(iter->sparse_data());
       for (size_t j = 0; j < sparse_dim_count; ++j) {
-        ASSERT_EQ((float)cur, data[j]);
+        ASSERT_FLOAT_EQ((float)cur, data[j]);
       }
 
       iter->next();
@@ -1017,7 +1017,7 @@ TEST_F(HnswSparseStreamerTest, TestKnnConcurrentAddAndSearch) {
 
     const float *data = reinterpret_cast<const float *>(iter->sparse_data());
     for (size_t j = 0; j < sparse_dim_count; ++j) {
-      ASSERT_EQ((float)iter->key(), data[j]);
+      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);
     }
     total++;
     min = std::min(min, iter->key());
diff --git a/tests/core/algorithm/ivf/ivf_searcher_test.cc b/tests/core/algorithm/ivf/ivf_searcher_test.cc
index 0ce94ced..75d5df1c 100644
--- a/tests/core/algorithm/ivf/ivf_searcher_test.cc
+++ b/tests/core/algorithm/ivf/ivf_searcher_test.cc
@@ -282,7 +282,7 @@ TEST_F(IVFSearcherTest, TestSimple) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -297,7 +297,7 @@ TEST_F(IVFSearcherTest, TestSimple) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)32 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -312,7 +312,7 @@ TEST_F(IVFSearcherTest, TestSimple) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -320,6 +320,97 @@ TEST_F(IVFSearcherTest, TestSimple) {
   EXPECT_EQ(0, ret);
 }
 
+TEST_F(IVFSearcherTest, TestSimpleCosine) {
+  IVFBuilder builder;
+  //    index_meta_.set_major_order(IndexMeta::MO_ROW);
+  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "1");
+  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");
+
+  Params converter_params;
+  auto converter = IndexFactory::CreateConverter("CosineNormalizeConverter");
+  ASSERT_TRUE(converter != nullptr);
+  auto original_index_meta = index_meta_;
+  original_index_meta.set_metric("Cosine", 0, Params());
+  EXPECT_EQ(0, converter->init(original_index_meta, converter_params));
+  IndexMeta index_meta = converter->meta();
+  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
+  ASSERT_TRUE(reformer != nullptr);
+  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));
+
+  int ret = builder.init(index_meta, params_);
+  EXPECT_EQ(0, ret);
+  prepare_index_holder(0, 33);
+  converter->transform(holder_);
+  auto holder = converter->result();
+
+  EXPECT_EQ(0, builder.train(threads_, holder));
+  EXPECT_EQ(0, builder.build(threads_, holder));
+  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
+  EXPECT_EQ(0, dumper->create(index_path_));
+
+  ret = builder.dump(dumper);
+  EXPECT_EQ((size_t)33, builder.stats().built_count());
+  EXPECT_EQ((size_t)33, builder.stats().dumped_count());
+  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
+  EXPECT_EQ(0, dumper->close());
+
+  IVFSearcher searcher;
+  Params params;
+  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
+  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);
+
+  ret = searcher.init(params);
+  EXPECT_EQ(0, ret);
+
+  IndexStorage::Pointer container =
+      IndexFactory::CreateStorage("MMapFileReadStorage");
+  EXPECT_TRUE(!!container);
+
+  Params container_params;
+  container_params.set("proxima.mmap_file.container.memory_warmup", true);
+  container->init(container_params);
+  ret = container->open(index_path_, false);
+  EXPECT_EQ(0, ret);
+
+  ret = searcher.load(container, IndexMetric::Pointer());
+  EXPECT_EQ(0, ret);
+
+  std::vector<float> query;
+  for (size_t i = 0; i < dimension_; ++i) {
+    query.push_back(32.0f + i);
+  }
+
+  size_t qnum = 33;
+  std::vector<float> query1;
+  for (size_t i = 0; i < dimension_ * qnum; ++i) {
+    query1.push_back(i / dimension_);
+  }
+  auto context = searcher.create_context();
+  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);
+
+  // single bf search
+  {
+    size_t topk = 33;
+    context->set_topk(topk);
+
+    std::string new_vec;
+    IndexQueryMeta new_meta;
+    ASSERT_EQ(0, reformer->convert(query.data(), qmeta, &new_vec, &new_meta));
+
+    ret = searcher.search_bf_impl(new_vec.data(), new_meta, context);
+    EXPECT_EQ(0, ret);
+
+    const IndexDocumentList &result = context->result(0);
+    EXPECT_EQ((size_t)topk, result.size());
+    for (size_t i = 0; i < 1; ++i) {
+      // ASSERT_EQ(29, result[i].key());
+      EXPECT_NEAR(0, result[i].score(), 1e-2);
+    }
+  }
+  ret = searcher.unload();
+  EXPECT_EQ(0, ret);
+}
+
 TEST_F(IVFSearcherTest, TestColumnMajorFloatWithBuildMemory) {
   IVFBuilder builder;
   //    index_meta_.set_major_order(IndexMeta::MO_ROW);
@@ -389,7 +480,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       ASSERT_EQ((uint64_t)(total - 1) - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -404,7 +495,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -419,7 +510,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -434,7 +525,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -513,7 +604,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFilter) {
     EXPECT_EQ((size_t)1, result.size());
     for (size_t i = 0; i < 1; ++i) {
       EXPECT_EQ((uint64_t)0, result[i].key());
-      EXPECT_EQ((float)999 * 999 * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
     }
   }
 
@@ -528,7 +619,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFilter) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)1, result.size());
       EXPECT_EQ((uint64_t)0, result[0].key());
-      EXPECT_EQ((float)q * q * dimension_, result[0].score());
+      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
     }
   }
 
@@ -543,7 +634,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFilter) {
     EXPECT_EQ((size_t)1, result.size());
     for (size_t i = 0; i < 1; ++i) {
       EXPECT_EQ((uint64_t)0, result[i].key());
-      EXPECT_EQ((float)999 * 999 * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
     }
   }
 
@@ -558,7 +649,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFilter) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)1, result.size());
       EXPECT_EQ((uint64_t)0, result[0].key());
-      EXPECT_EQ((float)q * q * dimension_, result[0].score());
+      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
     }
   }
 
@@ -634,7 +725,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -649,7 +740,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -664,7 +755,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -679,7 +770,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -759,7 +850,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithFilter) {
     EXPECT_EQ((size_t)1, result.size());
     for (size_t i = 0; i < 1; ++i) {
       EXPECT_EQ((uint64_t)0, result[i].key());
-      EXPECT_EQ((float)999 * 999 * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
     }
   }
 
@@ -774,7 +865,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithFilter) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)1, result.size());
       EXPECT_EQ((uint64_t)0, result[0].key());
-      EXPECT_EQ((float)q * q * dimension_, result[0].score());
+      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
     }
   }
 
@@ -789,7 +880,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithFilter) {
     EXPECT_EQ((size_t)1, result.size());
     for (size_t i = 0; i < 1; ++i) {
       EXPECT_EQ((uint64_t)0, result[i].key());
-      EXPECT_EQ((float)999 * 999 * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
     }
   }
 
@@ -804,7 +895,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWithFilter) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)1, result.size());
       EXPECT_EQ((uint64_t)0, result[0].key());
-      EXPECT_EQ((float)q * q * dimension_, result[0].score());
+      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
     }
   }
 
@@ -886,7 +977,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWith1LevelAndBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -901,7 +992,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWith1LevelAndBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -916,7 +1007,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWith1LevelAndBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -931,7 +1022,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFloatWith1LevelAndBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1013,7 +1104,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWith1LevelAndBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -1028,7 +1119,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWith1LevelAndBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1043,7 +1134,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWith1LevelAndBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -1058,7 +1149,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWith1LevelAndBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1137,7 +1228,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorInt8WithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)127 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -1152,7 +1243,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorInt8WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1167,7 +1258,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorInt8WithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)127 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -1182,7 +1273,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorInt8WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1261,7 +1352,7 @@ TEST_F(IVFSearcherTest, TestRowMajorInt8WithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)127 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -1276,7 +1367,7 @@ TEST_F(IVFSearcherTest, TestRowMajorInt8WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1291,7 +1382,7 @@ TEST_F(IVFSearcherTest, TestRowMajorInt8WithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)127 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -1306,7 +1397,7 @@ TEST_F(IVFSearcherTest, TestRowMajorInt8WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1387,7 +1478,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorBinaryWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)256 - i, result[i].key());
-      EXPECT_EQ((float)i, result[i].score());
+      EXPECT_FLOAT_EQ((float)i, result[i].score());
     }
   }
 
@@ -1402,7 +1493,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorBinaryWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1417,7 +1508,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorBinaryWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)256 - i, result[i].key());
-      EXPECT_EQ((float)i, result[i].score());
+      EXPECT_FLOAT_EQ((float)i, result[i].score());
     }
   }
 
@@ -1432,7 +1523,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorBinaryWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1513,7 +1604,7 @@ TEST_F(IVFSearcherTest, TestRowMajorBinaryWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)256 - i, result[i].key());
-      EXPECT_EQ((float)i, result[i].score());
+      EXPECT_FLOAT_EQ((float)i, result[i].score());
     }
   }
 
@@ -1528,7 +1619,7 @@ TEST_F(IVFSearcherTest, TestRowMajorBinaryWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1543,7 +1634,7 @@ TEST_F(IVFSearcherTest, TestRowMajorBinaryWithBuildMemory) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)256 - i, result[i].key());
-      EXPECT_EQ((float)i, result[i].score());
+      EXPECT_FLOAT_EQ((float)i, result[i].score());
     }
   }
 
@@ -1558,7 +1649,7 @@ TEST_F(IVFSearcherTest, TestRowMajorBinaryWithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1770,7 +1861,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFp16WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1802,7 +1893,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFp16WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1909,7 +2000,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFp16WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -1941,7 +2032,7 @@ TEST_F(IVFSearcherTest, TestRowMajorFp16WithBuildMemory) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2019,7 +2110,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithHnswGraphType) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -2034,7 +2125,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithHnswGraphType) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2049,7 +2140,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithHnswGraphType) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -2064,7 +2155,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithHnswGraphType) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2143,7 +2234,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithSsgGraphType) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -2158,7 +2249,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithSsgGraphType) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2173,7 +2264,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithSsgGraphType) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -2188,7 +2279,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithSsgGraphType) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2265,7 +2356,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithInt8Converter) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -2280,7 +2371,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithInt8Converter) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2295,7 +2386,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithInt8Converter) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_EQ((uint64_t)999 - i, result[i].key());
-      EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -2310,7 +2401,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithInt8Converter) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2406,7 +2497,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFloat16Quantizer) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2438,7 +2529,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFloat16Quantizer) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2535,7 +2626,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithConverterAndQuantizer) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       ASSERT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2567,7 +2658,7 @@ TEST_F(IVFSearcherTest, TestColumnMajorFloatWithConverterAndQuantizer) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       EXPECT_EQ((uint64_t)q, result[0].key());
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2664,7 +2755,7 @@ TEST_F(IVFSearcherTest, TestQuantizedPerCentroid) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       ASSERT_NEAR((uint64_t)(total - 1) - q, result[0].key(), 100);
-      // EXPECT_EQ((float)0, result[0].score());
+      // EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -2679,7 +2770,7 @@ TEST_F(IVFSearcherTest, TestQuantizedPerCentroid) {
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
       EXPECT_NEAR((uint64_t)total - i - 1, result[i].key(), 100);
-      // EXPECT_EQ((float)i * i * dimension_, result[i].score());
+      // EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
     }
   }
 
@@ -2694,7 +2785,7 @@ TEST_F(IVFSearcherTest, TestQuantizedPerCentroid) {
       const IndexDocumentList &result = context->result(q);
       EXPECT_EQ((size_t)topk, result.size());
       ASSERT_NEAR((uint64_t)(total - 1) - q, result[0].key(), 100);
-      // EXPECT_EQ((float)0, result[0].score());
+      // EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -3383,7 +3474,7 @@ TEST_F(IVFSearcherTest, TestSameValue) {
 
     for (size_t q = 0; q < qnum; ++q) {
       const IndexDocumentList &result = context->result(q);
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
@@ -3397,7 +3488,7 @@ TEST_F(IVFSearcherTest, TestSameValue) {
     const IndexDocumentList &result = context->result(0);
     EXPECT_EQ((size_t)topk, result.size());
     for (size_t i = 0; i < topk; ++i) {
-      EXPECT_EQ((float)0, result[i].score());
+      EXPECT_FLOAT_EQ((float)0, result[i].score());
     }
   }
 
@@ -3410,7 +3501,7 @@ TEST_F(IVFSearcherTest, TestSameValue) {
 
     for (size_t q = 0; q < qnum; ++q) {
       const IndexDocumentList &result = context->result(q);
-      EXPECT_EQ((float)0, result[0].score());
+      EXPECT_FLOAT_EQ((float)0, result[0].score());
     }
   }
 
diff --git a/tests/core/metric/quantized_integer_metric_test.cc b/tests/core/metric/quantized_integer_metric_test.cc
index d0deac84..30e8c256 100644
--- a/tests/core/metric/quantized_integer_metric_test.cc
+++ b/tests/core/metric/quantized_integer_metric_test.cc
@@ -192,7 +192,7 @@ TEST(QuantizedIntegerMetric, TestInt8SquaredEuclidean) {
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
     // printf("%f %f\n", v1, v2);
-    ASSERT_NEAR(v1, v2, 1e-2 * (DIMENSION + 1));
+    ASSERT_NEAR(v1, v2, 0.1 * (DIMENSION + 1));
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -394,7 +394,7 @@ TEST(QuantizedIntegerMetric, TestInt4SquaredEuclidean) {
         ailego::Distance::SquaredEuclidean(mf, vec.data(), holder->dimension());
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
-    ASSERT_NEAR(v1, v2, 0.17 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -516,7 +516,7 @@ void TestDistanceMatrixInt4(const std::string &metric_name) {
   matrix_compute(&matrix2[0], &query2[0], meta2.dimension(), &result2[0]);
 
   for (size_t i = 0; i < batch_size * query_size; ++i) {
-    EXPECT_NEAR(result1[i], result2[i], 1e-4);
+    EXPECT_NEAR(result1[i], result2[i], 1e-2 * dimension);
     EXPECT_TRUE(IsAlmostEqual(result1[i], result2[i], 1e4));
   }
 }
@@ -597,7 +597,7 @@ TEST(QuantizedIntegerMetric, TestInt8InnerProduct) {
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
     // printf("%f %f\n", v1, v2);
-    ASSERT_NEAR(v1, v2, 1e-2 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -682,7 +682,7 @@ TEST(QuantizedIntegerMetric, TestInt4InnerProduct) {
                                                    holder->dimension());
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
-    ASSERT_NEAR(v1, v2, 0.15 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -771,7 +771,7 @@ TEST(QuantizedIntegerMetric, TestInt8MipsSquaredEuclidean) {
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
     // printf("%f %f\n", v1, v2);
-    ASSERT_NEAR(v1, v2, 1e-2 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -856,7 +856,7 @@ TEST(QuantizedIntegerMetric, TestInt4MipsSquaredEuclidean) {
                                                       holder->dimension(), 0.0);
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
-    ASSERT_NEAR(v1, v2, 0.15 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -956,7 +956,7 @@ TEST(QuantizedIntegerMetric, TestInt8NormalizedCosine) {
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
     // printf("%f %f\n", v1, v2);
-    ASSERT_NEAR(v1, v2, 1e-2 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -1061,7 +1061,7 @@ TEST(QuantizedIntegerMetric, TestInt8Cosine) {
     compute_batch(reinterpret_cast<const void **>(&mi), qi, 1,
                   holder2->dimension(), &v2);
     // printf("%f %f\n", v1, v2);
-    ASSERT_NEAR(v1, v2, 1e-2 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
@@ -1136,7 +1136,7 @@ TEST(QuantizedIntegerMetric, TestInt4NormalizedCosine) {
         normalized_mf.data(), normalized_vec.data(), holder->dimension());
     float v2;
     compute(mi, qi, holder2->dimension(), &v2);
-    ASSERT_NEAR(v1, v2, 0.15 * DIMENSION);
+    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);
 
     std::string out2;
     ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
diff --git a/tests/db/index/segment/segment_test.cc b/tests/db/index/segment/segment_test.cc
index 5db3f0be..6ca6fffe 100644
--- a/tests/db/index/segment/segment_test.cc
+++ b/tests/db/index/segment/segment_test.cc
@@ -1170,6 +1170,66 @@ TEST_P(SegmentTest, CombinedVectorColumnIndexerWithQuantVectorIndex) {
   ASSERT_EQ(count, 10);
 }
 
+TEST_P(SegmentTest, CombinedVectorColumnIndexerQueryWithPks) {
+  options.max_buffer_size_ = 10 * 1024;
+
+  auto tmp_schema = test::TestHelper::CreateSchemaWithVectorIndex(
+      false, "demo", std::make_shared<HnswIndexParams>(MetricType::IP));
+
+  auto segment = test::TestHelper::CreateSegmentWithDoc(
+      col_path, *tmp_schema, 0, 0, id_map, delete_store, version_manager,
+      options, 0, 0);
+  ASSERT_TRUE(segment != nullptr);
+
+
+  uint64_t MAX_DOC = 1000;
+  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);
+
+  auto combined_indexer = segment->get_combined_vector_indexer("dense_fp32");
+  ASSERT_TRUE(combined_indexer != nullptr);
+
+  Doc verify_doc = test::TestHelper::CreateDoc(999, *schema);
+  std::vector<std::vector<uint64_t>> bf_pks = {
+      {10, 20, 30, 40, 50, 60, 70, 80, 90, 999}};
+  // query
+  auto dense_fp32_field = schema->get_field("dense_fp32");
+  auto query_vector = verify_doc.get<std::vector<float>>("dense_fp32").value();
+  auto query = vector_column_params::VectorData{
+      vector_column_params::DenseVector{.data = query_vector.data()}};
+  auto query_params = vector_column_params::QueryParams{
+      .data_type = dense_fp32_field->data_type(),
+      .dimension = dense_fp32_field->dimension(),
+      .topk = 10,
+      .filter = nullptr,
+      .fetch_vector = false,
+      .query_params = std::make_shared<zvec::QueryParams>(IndexType::HNSW),
+      .group_by = nullptr,
+      .bf_pks = bf_pks,
+      .refiner_param = nullptr,
+      .extra_params = {}};
+
+  auto results = combined_indexer->Search(query, query_params);
+  ASSERT_TRUE(results.has_value());
+
+  auto vector_results =
+      dynamic_cast<VectorIndexResults *>(results.value().get());
+  ASSERT_TRUE(vector_results);
+  ASSERT_EQ(vector_results->count(), 10);
+
+  int count = 0;
+  std::vector<uint64_t> result_doc_ids;
+  auto iter = vector_results->create_iterator();
+  while (iter->valid()) {
+    count++;
+    result_doc_ids.push_back(iter->doc_id());
+    iter->next();
+  }
+  ASSERT_EQ(count, 10);
+  // need reverse result_doc_ids
+  std::reverse(result_doc_ids.begin(), result_doc_ids.end());
+  ASSERT_EQ(result_doc_ids, bf_pks[0]);
+}
+
 
 TEST_P(SegmentTest, ConcurrentInsertOperations) {
   auto segment = test::TestHelper::CreateSegmentWithDoc(
diff --git a/thirdparty/antlr/antlr4.patch b/thirdparty/antlr/antlr4.patch
index 1156a4f7..81dbe1c6 100644
--- a/thirdparty/antlr/antlr4.patch
+++ b/thirdparty/antlr/antlr4.patch
@@ -1,9 +1,25 @@
 diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt
-index 390078151..de657dcc4 100644
+index 390078151..213258ac8 100644
 --- a/runtime/Cpp/CMakeLists.txt
 +++ b/runtime/Cpp/CMakeLists.txt
-@@ -39,10 +39,10 @@ if(CMAKE_VERSION VERSION_EQUAL "3.3.0" OR
-   CMAKE_POLICY(SET CMP0054 OLD)
+@@ -28,21 +28,21 @@ project(LIBANTLR4)
+ if(CMAKE_VERSION VERSION_EQUAL "3.0.0" OR
+    CMAKE_VERSION VERSION_GREATER "3.0.0")
+   CMAKE_POLICY(SET CMP0026 NEW)
+-  CMAKE_POLICY(SET CMP0054 OLD)
+-  CMAKE_POLICY(SET CMP0045 OLD)
+-  CMAKE_POLICY(SET CMP0042 OLD)
++  CMAKE_POLICY(SET CMP0054 NEW)
++  CMAKE_POLICY(SET CMP0045 NEW)
++  CMAKE_POLICY(SET CMP0042 NEW)
+ endif()
+ 
+ if(CMAKE_VERSION VERSION_EQUAL "3.3.0" OR
+    CMAKE_VERSION VERSION_GREATER "3.3.0")
+-  CMAKE_POLICY(SET CMP0059 OLD)
+-  CMAKE_POLICY(SET CMP0054 OLD)
++  CMAKE_POLICY(SET CMP0059 NEW)
++  CMAKE_POLICY(SET CMP0054 NEW)
  endif()
  
 -if(CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -18,7 +34,7 @@ index 390078151..de657dcc4 100644
    find_library(COREFOUNDATION_LIBRARY CoreFoundation)
  endif()
 diff --git a/runtime/Cpp/runtime/CMakeLists.txt b/runtime/Cpp/runtime/CMakeLists.txt
-index 2c5e7376f..bcc0134dc 100644
+index 2c5e7376f..ae992f9cc 100644
 --- a/runtime/Cpp/runtime/CMakeLists.txt
 +++ b/runtime/Cpp/runtime/CMakeLists.txt
 @@ -25,7 +25,7 @@ file(GLOB libantlrcpp_SRC
diff --git a/thirdparty/rocksdb/CMakeLists.txt b/thirdparty/rocksdb/CMakeLists.txt
index 8458d338..d081e05c 100644
--- a/thirdparty/rocksdb/CMakeLists.txt
+++ b/thirdparty/rocksdb/CMakeLists.txt
@@ -8,6 +8,7 @@ set(WITH_TOOLS OFF CACHE BOOL "build with tools" FORCE)
 set(WITH_LZ4 ON CACHE BOOL "build with lz4" FORCE)
 set(USE_RTTI ON CACHE BOOL "build with RTTI" FORCE)
 set(FAIL_ON_WARNINGS OFF CACHE BOOL "build with no Werror" FORCE)
+set(PORTABLE ON CACHE BOOL "build a portable lib" FORCE)
 
 set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})

From 2a70a1728483291679799d637273c1786a76b0b7 Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Sat, 28 Feb 2026 17:44:32 +0800
Subject: [PATCH 24/25] fix

---
 .github/workflows/android_build.yml | 97 +++++++++++++----------------
 1 file changed, 45 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 82667254..864c73d4 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -18,9 +18,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # abi: [arm64-v8a, armeabi-v7a, x86_64]
-        abi: [x86_64]
-        api: [21]
+        abi: [arm64-v8a]
+        api: [30]   # arm64 emulator 建议 >= 30，21 常见拉不到/不稳定
 
     steps:
       - name: Checkout
@@ -45,17 +44,16 @@ jobs:
       - name: Setup Java 17
         uses: actions/setup-java@v4
         with:
-            distribution: temurin
-            java-version: '17'
+          distribution: temurin
+          java-version: '17'
 
-      - name: Setup Android NDK
+      - name: Setup Android SDK
         uses: android-actions/setup-android@v3
 
       - name: Install NDK (side by side)
         shell: bash
         run: |
-            # yes | sdkmanager --licenses
-            sdkmanager "ndk;26.1.10909125"
+          sdkmanager "ndk;26.1.10909125"
 
       - name: Cache host protoc build
         uses: actions/cache@v3
@@ -70,11 +68,10 @@ jobs:
         run: |
           git submodule update --init
           if [ ! -d "build-host" ]; then
-            # Setup ccache for host build
             export CCACHE_BASEDIR="$GITHUB_WORKSPACE"
             export CCACHE_NOHASHDIR=1
             export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros
-            
+
             cmake -S . -B build-host -G Ninja \
               -DCMAKE_C_COMPILER_LAUNCHER=ccache \
               -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
@@ -97,11 +94,10 @@ jobs:
           export ANDROID_SDK_ROOT="$ANDROID_HOME"
           export ANDROID_NDK_HOME="$ANDROID_SDK_ROOT/ndk/26.1.10909125"
 
-          # Setup ccache
           export CCACHE_BASEDIR="$GITHUB_WORKSPACE"
           export CCACHE_NOHASHDIR=1
           export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros
-          
+
           if [ ! -d "build-android-${{ matrix.abi }}" ]; then
             cmake -S . -B build-android-${{ matrix.abi }} -G Ninja \
               -DCMAKE_BUILD_TYPE=Release \
@@ -119,7 +115,6 @@ jobs:
             echo "Using cached Android build directory"
           fi
 
-
       - name: Cache examples build
         uses: actions/cache@v3
         with:
@@ -129,67 +124,65 @@ jobs:
       - name: Build examples
         shell: bash
         run: |
+          export ANDROID_SDK_ROOT="$ANDROID_HOME"
+          export ANDROID_NDK_HOME="$ANDROID_SDK_ROOT/ndk/26.1.10909125"
+
           if [ ! -d "examples/c++/build-android-examples-${{ matrix.abi }}" ]; then
             cmake -S examples/c++ -B examples/c++/build-android-examples-${{ matrix.abi }} -G Ninja \
-               -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
-               -DANDROID_ABI=${{ matrix.abi }} \
-               -DANDROID_PLATFORM=android-${{ matrix.api }} \
-               -DANDROID_STL=c++_static \
-               -DCMAKE_BUILD_TYPE=Release \
-               -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
-               -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
-               -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-               -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+              -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
+              -DANDROID_ABI=${{ matrix.abi }} \
+              -DANDROID_PLATFORM=android-${{ matrix.api }} \
+              -DANDROID_STL=c++_static \
+              -DCMAKE_BUILD_TYPE=Release \
+              -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
+              -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
+              -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
             cmake --build examples/c++/build-android-examples-${{ matrix.abi }} --parallel
           else
             echo "Using cached examples build"
           fi
 
-      - name: Install ADB and setup Android emulator
+      # 注意：emulator-runner 的 arch 需要 "arm64"（不是 arm64-v8a）
+      - name: Run on Android emulator (arm64) and verify
         uses: reactivecircus/android-emulator-runner@v2
         with:
           api-level: ${{ matrix.api }}
-          arch: ${{ matrix.abi }}
-          # target: google_apis
-          # emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim
-          # disable-animations: true
+          arch: arm64
+          target: google_apis
+          emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -netdelay none -netspeed full
+          disable-animations: true
           script: |
-            # Wait for device to be ready
+            set -euxo pipefail
+
             adb wait-for-device
-            
-            # Check file sizes before pushing
+
+            echo "Device ABI:"
+            adb shell getprop ro.product.cpu.abi
+            adb shell getprop ro.product.cpu.abilist
+
             echo "Checking binary sizes:"
             ls -lah examples/c++/build-android-examples-${{ matrix.abi }}/
-            
-            # Check device architecture
-            echo "Device architecture info:"
-            adb shell 'getprop ro.product.cpu.abi'
-            adb shell 'getprop ro.product.cpu.abilist'
-            
+
             # Push executables to device
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example /data/local/tmp/
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/core-example /data/local/tmp/
             adb push examples/c++/build-android-examples-${{ matrix.abi }}/db-example /data/local/tmp/
-            
-            # Make executables executable
-            adb shell 'chmod 755 /data/local/tmp/ailego-example'
-            adb shell 'chmod 755 /data/local/tmp/core-example'
-            adb shell 'chmod 755 /data/local/tmp/db-example'
-            
-            # Verify file integrity
+
+            adb shell chmod 755 /data/local/tmp/ailego-example
+            adb shell chmod 755 /data/local/tmp/core-example
+            adb shell chmod 755 /data/local/tmp/db-example
+
             echo "File info on device:"
-            adb shell 'ls -la /data/local/tmp/ailego-example'
-            adb shell 'ls -la /data/local/tmp/core-example'
-            adb shell 'ls -la /data/local/tmp/db-example'
-            
+            adb shell ls -la /data/local/tmp/ailego-example
+            adb shell ls -la /data/local/tmp/core-example
+            adb shell ls -la /data/local/tmp/db-example
+
             echo "Running ailego example:"
             adb shell 'cd /data/local/tmp && ./ailego-example'
-            echo "Exit code: $?"
-            
+
             echo "Running core example:"
             adb shell 'cd /data/local/tmp && ./core-example'
-            echo "Exit code: $?"
-            
+
             echo "Running db example:"
             adb shell 'cd /data/local/tmp && ./db-example'
-            echo "Exit code: $?"

From d8a23b15fd0e3420e3c4af732471c4f4117fbd2d Mon Sep 17 00:00:00 2001
From: "xufeihong.xfh" <xufeihong.xfh@alibaba-inc.com>
Date: Mon, 2 Mar 2026 18:59:48 +0800
Subject: [PATCH 25/25] fix

---
 .github/workflows/android_build.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/android_build.yml b/.github/workflows/android_build.yml
index 864c73d4..5796c642 100644
--- a/.github/workflows/android_build.yml
+++ b/.github/workflows/android_build.yml
@@ -106,6 +106,8 @@ jobs:
               -DANDROID_PLATFORM=android-${{ matrix.api }} \
               -DANDROID_STL=c++_static \
               -DBUILD_PYTHON_BINDINGS=OFF \
+              -DENABLE_ARMV8A=ON \
+              -DENABLE_NATIVE=OFF \
               -DBUILD_TOOLS=OFF \
               -DGLOBAL_CC_PROTOBUF_PROTOC="$GITHUB_WORKSPACE/build-host/bin/protoc" \
               -DCMAKE_C_COMPILER_LAUNCHER=ccache \
@@ -143,7 +145,6 @@ jobs:
             echo "Using cached examples build"
           fi
 
-      # 注意：emulator-runner 的 arch 需要 "arm64"（不是 arm64-v8a）
       - name: Run on Android emulator (arm64) and verify
         uses: reactivecircus/android-emulator-runner@v2
         with: