From 0e2e224b8a4611544ecbc40831a62e18d161cc8d Mon Sep 17 00:00:00 2001 From: Ian Miller Date: Wed, 28 Jan 2026 19:22:38 +0000 Subject: [PATCH] feat(PGVector): add support for IVFFlat vector index for ANN search in PGVector Signed-off-by: Ian Miller --- .../providers/vector_io/remote_pgvector.mdx | 36 +++- .../distributions/ci-tests/config.yaml | 5 + .../ci-tests/run-with-postgres-store.yaml | 5 + .../distributions/open-benchmark/config.yaml | 5 + .../distributions/starter-gpu/config.yaml | 5 + .../starter-gpu/run-with-postgres-store.yaml | 5 + .../distributions/starter/config.yaml | 5 + .../starter/run-with-postgres-store.yaml | 5 + .../providers/registry/vector_io.py | 21 ++ .../remote/vector_io/pgvector/config.py | 76 ++++++- .../remote/vector_io/pgvector/pgvector.py | 203 ++++++++++++++++-- tests/unit/providers/vector_io/conftest.py | 8 +- 12 files changed, 341 insertions(+), 38 deletions(-) diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx index b48c6ce159..d8cea49a8e 100644 --- a/docs/docs/providers/vector_io/remote_pgvector.mdx +++ b/docs/docs/providers/vector_io/remote_pgvector.mdx @@ -82,15 +82,29 @@ description: | ``` 2. Pull pgvector image with that tag you want: + + Via Podman: ```bash podman pull pgvector/pgvector:0.8.1-pg18-trixie ``` + Via Docker: + ```bash + docker pull pgvector/pgvector:0.8.1-pg18-trixie + ``` + 3. Run container with PGVector: + + Via Podman ```bash podman run -d --name pgvector -e POSTGRES_PASSWORD=password -e POSTGRES_USER=user -e POSTGRES_DB=testvectordb -p 5432:5432 -v pgvector_data:/var/lib/postgresql pgvector/pgvector:0.8.1-pg18-trixie ``` + Via Docker + ```bash + docker run -d --name pgvector -e POSTGRES_PASSWORD=password -e POSTGRES_USER=user -e POSTGRES_DB=testvectordb -p 5432:5432 -v pgvector_data:/var/lib/postgresql pgvector/pgvector:0.8.1-pg18-trixie + ``` + ## Documentation See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. sidebar_label: Remote - Pgvector @@ -184,15 +198,29 @@ export PGVECTOR_PASSWORD=password ``` 2. Pull pgvector image with that tag you want: + +Via Podman: ```bash podman pull pgvector/pgvector:0.8.1-pg18-trixie ``` +Via Docker: +```bash +docker pull pgvector/pgvector:0.8.1-pg18-trixie +``` + 3. Run container with PGVector: + +Via Podman ```bash podman run -d --name pgvector -e POSTGRES_PASSWORD=password -e POSTGRES_USER=user -e POSTGRES_DB=testvectordb -p 5432:5432 -v pgvector_data:/var/lib/postgresql pgvector/pgvector:0.8.1-pg18-trixie ``` +Via Docker +```bash +docker run -d --name pgvector -e POSTGRES_PASSWORD=password -e POSTGRES_USER=user -e POSTGRES_DB=testvectordb -p 5432:5432 -v pgvector_data:/var/lib/postgresql pgvector/pgvector:0.8.1-pg18-trixie +``` + ## Documentation See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. @@ -207,8 +235,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de | `user` | `str \| None` | No | postgres | | | `password` | `str \| None` | No | mysecretpassword | | | `distance_metric` | `Literal[COSINE, L2, L1, INNER_PRODUCT] \| None` | No | COSINE | PGVector distance metric used for vector search in PGVectorIndex | -| `hnsw_m` | `int \| None` | No | 16 | PGVector's HNSW index parameter - maximum number of edges each vertex has to its neighboring vertices in the graph | -| `hnsw_ef_construction` | `int \| None` | No | 64 | PGVector's HNSW index parameter - size of the dynamic candidate list used for graph construction | +| `vector_index` | `PGVectorHNSWVectorIndex \| PGVectorIVFFlatVectorIndex \| None` | No | type=<PGVectorIndexType.HNSW: 'HNSW'> m=16 ef_construction=64 | PGVector vector index used for Approximate Nearest Neighbor (ANN) search | | `persistence` | `KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration @@ -219,6 +246,11 @@ port: ${env.PGVECTOR_PORT:=5432} db: ${env.PGVECTOR_DB} user: ${env.PGVECTOR_USER} password: ${env.PGVECTOR_PASSWORD} +distance_metric: COSINE +vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/distributions/ci-tests/config.yaml b/src/llama_stack/distributions/ci-tests/config.yaml index c28e2ccf9a..17adac06b1 100644 --- a/src/llama_stack/distributions/ci-tests/config.yaml +++ b/src/llama_stack/distributions/ci-tests/config.yaml @@ -127,6 +127,11 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} + distance_metric: COSINE + vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 4b7f1e0e0f..aba0421930 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -127,6 +127,11 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} + distance_metric: COSINE + vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/distributions/open-benchmark/config.yaml b/src/llama_stack/distributions/open-benchmark/config.yaml index f9f9157f53..3706623bca 100644 --- a/src/llama_stack/distributions/open-benchmark/config.yaml +++ b/src/llama_stack/distributions/open-benchmark/config.yaml @@ -57,6 +57,11 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} + distance_metric: COSINE + vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/distributions/starter-gpu/config.yaml b/src/llama_stack/distributions/starter-gpu/config.yaml index 8a282ba7ad..58bd838c7b 100644 --- a/src/llama_stack/distributions/starter-gpu/config.yaml +++ b/src/llama_stack/distributions/starter-gpu/config.yaml @@ -127,6 +127,11 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} + distance_metric: COSINE + vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index 9dbbb69ae7..f51f408168 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -127,6 +127,11 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} + distance_metric: COSINE + vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/distributions/starter/config.yaml b/src/llama_stack/distributions/starter/config.yaml index 619e38551d..b9827e5afd 100644 --- a/src/llama_stack/distributions/starter/config.yaml +++ b/src/llama_stack/distributions/starter/config.yaml @@ -127,6 +127,11 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} + distance_metric: COSINE + vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 7357fc4b11..457ce17a1f 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -127,6 +127,11 @@ providers: db: ${env.PGVECTOR_DB:=} user: ${env.PGVECTOR_USER:=} password: ${env.PGVECTOR_PASSWORD:=} + distance_metric: COSINE + vector_index: + type: HNSW + m: 16 + ef_construction: 64 persistence: namespace: vector_io::pgvector backend: kv_default diff --git a/src/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py index 81df213d5a..4383e2e938 100644 --- a/src/llama_stack/providers/registry/vector_io.py +++ b/src/llama_stack/providers/registry/vector_io.py @@ -480,11 +480,20 @@ def available_providers() -> list[ProviderSpec]: ``` 2. Pull pgvector image with that tag you want: + +Via Podman: ```bash podman pull pgvector/pgvector:0.8.1-pg18-trixie ``` +Via Docker: +```bash +docker pull pgvector/pgvector:0.8.1-pg18-trixie +``` + 3. Run container with PGVector: + +Via Podman ```bash podman run -d \ --name pgvector \ @@ -496,6 +505,18 @@ def available_providers() -> list[ProviderSpec]: pgvector/pgvector:0.8.1-pg18-trixie ``` +Via Docker +```bash +docker run -d \ + --name pgvector \ + -e POSTGRES_PASSWORD=password \ + -e POSTGRES_USER=user \ + -e POSTGRES_DB=testvectordb \ + -p 5432:5432 \ + -v pgvector_data:/var/lib/postgresql \ + pgvector/pgvector:0.8.1-pg18-trixie +``` + ## Documentation See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. """, diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py index 08811a590d..c26e8edf76 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -4,14 +4,70 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any, Literal +from enum import StrEnum +from typing import Annotated, Any, Literal, Self -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack_api import json_schema_type +class PGVectorIndexType(StrEnum): + """Supported pgvector vector index types in Llama Stack.""" + + HNSW = "HNSW" + IVFFlat = "IVFFlat" + + +class PGVectorHNSWVectorIndex(BaseModel): + """Configuration for PGVector HNSW (Hierarchical Navigable Small Worlds) vector index. + https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw + """ + + type: Literal[PGVectorIndexType.HNSW] = PGVectorIndexType.HNSW + m: int | None = Field( + gt=0, + default=16, + description="PGVector's HNSW index parameter - maximum number of edges each vertex has to its neighboring vertices in the graph", + ) + ef_construction: int | None = Field( + gt=0, + default=64, + description="PGVector's HNSW index parameter - size of the dynamic candidate list used for graph construction", + ) + + +class PGVectorIVFFlatVectorIndex(BaseModel): + """Configuration for PGVector IVFFlat (Inverted File with Flat Compression) vector index. + https://github.com/pgvector/pgvector?tab=readme-ov-file#ivfflat + """ + + type: Literal[PGVectorIndexType.IVFFlat] = PGVectorIndexType.IVFFlat + lists: int | None = Field( + gt=0, default=100, description="PGVector's IVFFlat index parameter - number of lists index divides vectors into" + ) + probes: int | None = Field( + gt=0, + default=10, + description="PGVector's IVFFlat index parameter - number of lists index searches through during ANN search", + ) + + @model_validator(mode="after") + def validate_probes(self) -> Self: + if self.probes >= self.lists: + raise ValueError( + "probes parameter for PGVector IVFFlat index can't be greater than or equal to the number of lists in the index to allow ANN search." + ) + return self + + +PGVectorIndexConfig = Annotated[ + PGVectorHNSWVectorIndex | PGVectorIVFFlatVectorIndex, + Field(discriminator="type"), +] + + @json_schema_type class PGVectorVectorIOConfig(BaseModel): host: str | None = Field(default="localhost") @@ -22,15 +78,9 @@ class PGVectorVectorIOConfig(BaseModel): distance_metric: Literal["COSINE", "L2", "L1", "INNER_PRODUCT"] | None = Field( default="COSINE", description="PGVector distance metric used for vector search in PGVectorIndex" ) - hnsw_m: int | None = Field( - gt=0, - default=16, - description="PGVector's HNSW index parameter - maximum number of edges each vertex has to its neighboring vertices in the graph", - ) - hnsw_ef_construction: int | None = Field( - gt=0, - default=64, - description="PGVector's HNSW index parameter - size of the dynamic candidate list used for graph construction", + vector_index: PGVectorIndexConfig | None = Field( + default_factory=PGVectorHNSWVectorIndex, + description="PGVector vector index used for Approximate Nearest Neighbor (ANN) search", ) persistence: KVStoreReference | None = Field( description="Config for KV store backend (SQLite only for now)", default=None @@ -53,6 +103,10 @@ def sample_run_config( "db": db, "user": user, "password": password, + "distance_metric": "COSINE", + "vector_index": PGVectorHNSWVectorIndex(m=16, ef_construction=64).model_dump( + mode="json", exclude_none=True + ), "persistence": KVStoreReference( backend="kv_default", namespace="vector_io::pgvector", diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index e11e43e59b..01ec8f8409 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -37,7 +37,7 @@ ) from llama_stack_api.internal.kvstore import KVStore -from .config import PGVectorVectorIOConfig +from .config import PGVectorIndexConfig, PGVectorIndexType, PGVectorVectorIOConfig log = get_logger(name=__name__, category="vector_io::pgvector") @@ -132,8 +132,7 @@ def __init__( dimension: int, conn: psycopg2.extensions.connection, distance_metric: str, - hnsw_m: int, - hnsw_ef_construction: int, + vector_index: PGVectorIndexConfig, kvstore: KVStore | None = None, ): self.vector_store = vector_store @@ -142,8 +141,7 @@ def __init__( self.kvstore = kvstore self.check_distance_metric_availability(distance_metric) self.distance_metric = distance_metric - self.hnsw_m = hnsw_m - self.hnsw_ef_construction = hnsw_ef_construction + self.vector_index = vector_index self.table_name = None async def initialize(self) -> None: @@ -167,15 +165,17 @@ async def initialize(self) -> None: """ ) - # Create HNSW (Hierarchical Navigable Small Worlds) index on embedding column to allow efficient and performant vector search in pgvector - # HNSW finds the approximate nearest neighbors by only calculating distance metric for vectors it visits during graph traversal instead of processing all vectors - index_operator_class = self.get_pgvector_index_operator_class() - cur.execute( - f""" - CREATE INDEX IF NOT EXISTS {self.table_name}_hnsw_idx - ON {self.table_name} USING hnsw(embedding {index_operator_class}) WITH (m = {self.hnsw_m}, ef_construction = {self.hnsw_ef_construction}); - """ - ) + if self.vector_index.type == PGVectorIndexType.HNSW: + await self.create_hnsw_vector_index(cur) + + # Create the index only after the table has some data (https://github.com/pgvector/pgvector?tab=readme-ov-file#ivfflat) + elif ( + self.vector_index.type == PGVectorIndexType.IVFFlat + and not await self.check_conflicting_vector_index_exists(cur) + ): + log.info( + f"Creation of {PGVectorIndexType.IVFFlat} vector index in vector_store: {self.vector_store.identifier} was deferred. It will be created when the table has some data." + ) # Create GIN index for full-text search performance cur.execute( @@ -219,6 +219,10 @@ async def add_chunks(self, chunks: list[EmbeddedChunk]): with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: execute_values(cur, query, values, template="(%s, %s, %s::vector, %s, to_tsvector('english', %s))") + # Create the IVFFlat index only after the table has some data (https://github.com/pgvector/pgvector?tab=readme-ov-file#ivfflat) + if self.vector_index.type == PGVectorIndexType.IVFFlat: + await self.create_ivfflat_vector_index(cur) + async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: """ Performs vector similarity search using PostgreSQL's search function. Default distance metric is COSINE. @@ -234,6 +238,14 @@ async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) pgvector_search_function = self.get_pgvector_search_function() with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # Specify the number of probes to allow PGVector to use Index Scan using IVFFlat index if it was configured (https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options-1) + if self.vector_index.type == PGVectorIndexType.IVFFlat: + cur.execute( + f""" + SET ivfflat.probes = {self.vector_index.probes}; + """ + ) + cur.execute( f""" SELECT document, embedding {pgvector_search_function} %s::vector AS distance @@ -393,6 +405,160 @@ def check_distance_metric_availability(self, distance_metric: str) -> None: f"Supported metrics are: {', '.join(supported_metrics)}" ) + async def create_hnsw_vector_index(self, cur: cursor) -> None: + """Create PGVector HNSW vector index for Approximate Nearest Neighbor (ANN) search + + Args: + cur: PostgreSQL cursor + + Raises: + RuntimeError: If the error occurred when creating vector index in PGVector + """ + + # prevents from creating index for the table that already has conflicting index (HNSW or IVFFlat) + if await self.check_conflicting_vector_index_exists(cur): + return + + try: + index_operator_class = self.get_pgvector_index_operator_class() + + # Create HNSW (Hierarchical Navigable Small Worlds) index on embedding column to allow efficient and performant vector search in pgvector + # HNSW finds the approximate nearest neighbors by only calculating distance metric for vectors it visits during graph traversal instead of processing all vectors + cur.execute( + f""" + CREATE INDEX IF NOT EXISTS {self.table_name}_hnsw_idx + ON {self.table_name} USING hnsw(embedding {index_operator_class}) WITH (m = {self.vector_index.m}, ef_construction = {self.vector_index.ef_construction}); + """ + ) + log.info( + f"{PGVectorIndexType.HNSW} vector index was created with parameters m = {self.vector_index.m}, ef_construction = {self.vector_index.ef_construction} for vector_store: {self.vector_store.identifier}." + ) + + except psycopg2.Error as e: + raise RuntimeError( + f"Failed to create {PGVectorIndexType.HNSW} vector index for vector_store: {self.vector_store.identifier}: {e}" + ) from e + + async def create_ivfflat_vector_index(self, cur: cursor) -> None: + """Create PGVector IVFFlat vector index for Approximate Nearest Neighbor (ANN) search + + Args: + cur: PostgreSQL cursor + + Raises: + RuntimeError: If the error occurred when creating vector index in PGVector + """ + + # prevents from creating index for the table that already has conflicting index (HNSW or IVFFlat) + if await self.check_conflicting_vector_index_exists(cur): + return + + # don't create index too early as it decreases a performance (https://github.com/pgvector/pgvector?tab=readme-ov-file#ivfflat) + # create IVFFLAT index only if vector store has rows >= lists * 1000 + if await self.fetch_number_of_records(cur) < self.vector_index.lists * 1000: + log.info( + f"IVFFlat index wasn't created for vector_store {self.vector_store.identifier} because table doesn't have enough records." + ) + return + + try: + index_operator_class = self.get_pgvector_index_operator_class() + + # Create Inverted File with Flat Compression (IVFFlat) index on embedding column to allow efficient and performant vector search in pgvector + # IVFFlat index divides vectors into lists, and then searches a subset of those lists that are closest to the query vector + # Index should be created only after the table has some data (https://github.com/pgvector/pgvector?tab=readme-ov-file#ivfflat) + cur.execute( + f""" + CREATE INDEX IF NOT EXISTS {self.table_name}_ivfflat_idx + ON {self.table_name} USING ivfflat(embedding {index_operator_class}) WITH (lists = {self.vector_index.lists}); + """ + ) + log.info( + f"{PGVectorIndexType.IVFFlat} vector index was created with parameter lists = {self.vector_index.lists} for vector_store: {self.vector_store.identifier}." + ) + + except psycopg2.Error as e: + raise RuntimeError( + f"Failed to create {PGVectorIndexType.IVFFlat} vector index for vector_store: {self.vector_store.identifier}: {e}" + ) from e + + async def check_conflicting_vector_index_exists(self, cur: cursor) -> bool: + """Check if vector index of any type has already been created for the table to prevent the conflict + + Args: + cur: PostgreSQL cursor + + Returns: + True if exists, otherwise False + + Raises: + RuntimeError: If the error occurred when checking vector index exists in PGVector + """ + try: + log.info( + f"Checking vector_store: {self.vector_store.identifier} for conflicting vector index in PGVector..." + ) + cur.execute( + """ + SELECT indexname FROM pg_indexes + WHERE (indexname LIKE %s OR indexname LIKE %s) AND tablename = %s; + """, + ( + "%hnsw%", + "%ivfflat%", + self.table_name, + ), + ) + result = cur.fetchone() + + if result: + log.warning( + f"Conflicting vector index {result[0]} already exists in vector_store: {self.vector_store.identifier}" + ) + log.warning( + f"vector_store: {self.vector_store.identifier} will continue to use vector index {result[0]} to preserve performance." + ) + return True + + log.info(f"vector_store: {self.vector_store.identifier} currently doesn't have conflicting vector index") + log.info(f"Proceeding with creation of vector index for {self.vector_store.identifier}") + return False + + except psycopg2.Error as e: + raise RuntimeError(f"Failed to check if vector index exists in PGVector: {e}") from e + + async def fetch_number_of_records(self, cur: cursor) -> int: + """Returns number of records in a vector store + + Args: + cur: PostgreSQL cursor + + Returns: + number of records in a vector store + + Raises: + RuntimeError: If the error occurred when fetching a number of records in a vector store in PGVector + """ + try: + log.info(f"Fetching number of records in vector_store: {self.vector_store.identifier}...") + cur.execute( + f""" + SELECT COUNT(DISTINCT id) + FROM {self.table_name}; + """ + ) + result = cur.fetchone() + + if result: + log.info(f"vector_store: {self.vector_store.identifier} has {result[0]} records.") + return result[0] + + log.info(f"vector_store: {self.vector_store.identifier} currently doesn't have any records.") + return 0 + + except psycopg2.Error as e: + raise RuntimeError(f"Failed to check if vector store has records in PGVector: {e}") from e + class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( @@ -452,8 +618,7 @@ async def initialize(self) -> None: conn=self.conn, kvstore=self.kvstore, distance_metric=self.config.distance_metric, - hnsw_m=self.config.hnsw_m, - hnsw_ef_construction=self.config.hnsw_ef_construction, + vector_index=self.config.vector_index, ) await pgvector_index.initialize() index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) @@ -485,8 +650,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: conn=self.conn, kvstore=self.kvstore, distance_metric=self.config.distance_metric, - hnsw_m=self.config.hnsw_m, - hnsw_ef_construction=self.config.hnsw_ef_construction, + vector_index=self.config.vector_index, ) await pgvector_index.initialize() index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) @@ -537,8 +701,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store.embedding_dimension, self.conn, distance_metric=self.config.distance_metric, - hnsw_m=self.config.hnsw_m, - hnsw_ef_construction=self.config.hnsw_ef_construction, + vector_index=self.config.vector_index, ) await index.initialize() self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index cc313a6a0b..4b2d2fc911 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -17,7 +17,7 @@ from llama_stack.providers.inline.vector_io.qdrant.config import QdrantVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter -from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig +from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorHNSWVectorIndex, PGVectorVectorIOConfig from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantIndex, QdrantVectorIOAdapter from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore @@ -252,8 +252,7 @@ async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): embedding_dimension, connection, distance_metric="COSINE", - hnsw_m=16, - hnsw_ef_construction=64, + vector_index=PGVectorHNSWVectorIndex(m=16, ef_construction=64), ) index._test_chunks = [] original_add_chunks = index.add_chunks @@ -284,8 +283,7 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd user="test_user", password="test_password", distance_metric="COSINE", - hnsw_m=16, - hnsw_ef_construction=64, + vector_index=PGVectorHNSWVectorIndex(m=16, ef_construction=64), persistence=unique_kvstore_config, )