From 59162e53799483f6381606e50c06b0e4dc3f3322 Mon Sep 17 00:00:00 2001 From: peopleig Date: Tue, 14 Apr 2026 01:58:37 +0530 Subject: [PATCH] Update Python client to support batch insert and search --- client/python/USAGE.md | 61 ++++++++++ client/python/examples/batch_insert_usage.py | 39 ++++++ client/python/examples/search_query_usage.py | 69 +++++++++++ client/python/tests/test_client.py | 111 +++++++++++++++++- client/python/vortexdb/__init__.py | 3 + client/python/vortexdb/client.py | 110 +++++++++++++++-- client/python/vortexdb/grpc/vector_db_pb2.py | 22 ++-- .../vortexdb/grpc/vector_db_pb2_grpc.py | 88 +++++++++++++- client/python/vortexdb/models.py | 18 ++- client/python/vortexdb/protoutils.py | 65 ++++++++++ 10 files changed, 566 insertions(+), 20 deletions(-) create mode 100644 client/python/examples/batch_insert_usage.py create mode 100644 client/python/examples/search_query_usage.py diff --git a/client/python/USAGE.md b/client/python/USAGE.md index 082bfd0..db47a5d 100644 --- a/client/python/USAGE.md +++ b/client/python/USAGE.md @@ -39,6 +39,12 @@ The client supports usage as a context manager, which automatically closes the u Example available in: ```examples/context_manager_usage.py``` +### Batch Insertion and Search Support + +The client now supports batch insertion and batch search queries. +Methods of usage and examples available in: +```examples/batch_insert_usage.py``` & ```examples/search_query_usage.py``` + --- ## Client API @@ -78,6 +84,22 @@ Raises --- +#### **Batch Insert** + +Insert multiple vectors with payloads in a single request +``` +batch_insert(*, items: list[tuple[DenseVector, Payload]]) -> list[str] +``` + +Returns +- List of `point_id` (UUID string) + +Raises +- `TypeError` if input structure is invalid +- gRPC-mapped errors (see Error Handling) + +--- + #### **Get** Fetch a point by its ID @@ -112,6 +134,32 @@ Raises --- +#### **Batch Search** + +Search for nearest neighbours for multiple queries in a single request +``` +batch_search( + *, + queries, + similarity: Similarity | None = None, + limit: int | None = None, +) -> list[list[str]] +``` + +Returns +- `TypeError` for invalid query formats +- `ValueError` if required parameters are missing + +Supported Input Formats: +The `queries` parameter is flexible and supports multiple formats: +- List of `SearchQuery` objects +- List of `(DenseVector, Similarity, Limit)` tuples +- List of `(DenseVector, Similarity)` tuples with a global `Limit` +- List of `(DenseVector, Limit)` tuples with a global `Similarity` +- List of `DenseVector` with global `Similarity` and `Limit` + +--- + #### **Delete** Delete a point by its ID @@ -177,6 +225,19 @@ All fields are directly accessible: --- +### `SearchQuery` + +``` +SearchQuery( + vector: DenseVector, + similarity: Similarity, + limit: int, +) +``` +Structured representation of a search request + +--- + ### `Similarity` Enum representing distance functions: diff --git a/client/python/examples/batch_insert_usage.py b/client/python/examples/batch_insert_usage.py new file mode 100644 index 0000000..3c3aa6c --- /dev/null +++ b/client/python/examples/batch_insert_usage.py @@ -0,0 +1,39 @@ +from vortexdb import VortexDB +from vortexdb import DenseVector, Payload, to_dense_vectors + + +def main(): + db = VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + p1 = Payload.text("hello world") + p2 = Payload.image("/img/a.png") + p3 = Payload.text("foo bar") + + items = [ + (vectors[0], p1), + (vectors[1], p2), + (vectors[2], p3), + ] + + # Batch Insert + point_ids = db.batch_insert(items=items) + print("Inserted ids:\n", point_ids) + + for pid in point_ids: + db.delete(point_id=pid) + + db.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/client/python/examples/search_query_usage.py b/client/python/examples/search_query_usage.py new file mode 100644 index 0000000..097fa56 --- /dev/null +++ b/client/python/examples/search_query_usage.py @@ -0,0 +1,69 @@ +from vortexdb import VortexDB +from vortexdb import DenseVector, Similarity, SearchQuery, to_dense_vectors + + +def main(): + db = VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) + + raw_vectors = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + vectors = to_dense_vectors(raw_vectors) + + q = SearchQuery( + vector=vectors[0], + similarity=Similarity.COSINE, + limit=3, + ) + res = db.search(query=q) + print("Single SearchQuery:\n", res) + + # List of SearchQuery + queries = [ + SearchQuery(vectors[0], Similarity.HAMMING, 3), + SearchQuery(vectors[1], Similarity.EUCLIDEAN, 2), + q, + ] + res = db.batch_search(queries=queries) + print("\nBatch SearchQuery:\n", res) + + # List of vectors with global Similarity and Limit + res = db.batch_search( + queries=vectors, + similarity=Similarity.COSINE, + limit=3, + ) + print("\nList of DenseVectors:\n", res) + + # List of tuple (DenseVector, Similarity) with global Limit + queries = [ + (vectors[0], Similarity.COSINE), + (vectors[1], Similarity.MANHATTAN), + ] + res = db.batch_search( + queries=queries, + limit=3, + ) + print("\nList of (DenseVector, Similarity):\n", res) + + # List of tuple (DenseVector, Limit) with global Similarity + queries = [ + (vectors[0], 2), + (vectors[1], 4), + ] + res = db.batch_search( + queries=queries, + similarity=Similarity.COSINE, + ) + print("\nList of (DenseVector, Limit):\n", res) + + db.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/client/python/tests/test_client.py b/client/python/tests/test_client.py index a752320..67f72c6 100644 --- a/client/python/tests/test_client.py +++ b/client/python/tests/test_client.py @@ -5,6 +5,7 @@ from vortexdb.connection import GRPCConnection from vortexdb.models import DenseVector, Payload, Similarity, ContentType, Point from vortexdb.exceptions import InvalidArgumentError +from vortexdb.models import SearchQuery @@ -45,7 +46,6 @@ def test_insert_success(client, mock_connection): assert point_id == "point-123" - def test_insert_rejects_invalid_vector(client): with pytest.raises(TypeError): client.insert( @@ -54,6 +54,41 @@ def test_insert_rejects_invalid_vector(client): ) +# Batch Insert + +def test_batch_insert_success(client, mock_connection): + response = Mock() + response.ids = [ + Mock(id=Mock(value="p1")), + Mock(id=Mock(value="p2")), + ] + mock_connection.call.return_value = response + items = [ + (DenseVector([1, 2, 3]), Payload.text("a")), + (DenseVector([4, 5, 6]), Payload.text("b")), + ] + result = client.batch_insert(items=items) + assert result == ["p1", "p2"] + +def test_batch_insert_invalid_items_type(client): + with pytest.raises(TypeError): + client.batch_insert(items="not-a-list") + +def test_batch_insert_invalid_tuple_structure(client): + items = [ + (DenseVector([1, 2, 3]),), # only one element + ] + with pytest.raises(TypeError): + client.batch_insert(items=items) + +def test_batch_insert_invalid_vector(client): + items = [ + ([1, 2, 3], Payload.text("a")), # not DenseVector + ] + with pytest.raises(TypeError): + client.batch_insert(items=items) + + # Get def test_get_point_success(client, mock_connection): @@ -118,6 +153,80 @@ def test_search_invalid_vector(client): ) +# Batch Search + +def test_batch_search_full_tuple(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + Mock(result_point_ids=[Mock(id=Mock(value="p2"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE, 2), + (DenseVector([4, 5, 6]), Similarity.EUCLIDEAN, 1), + ] + result = client.batch_search(queries=queries) + assert result == [["p1"], ["p2"]] + +def test_batch_search_vectors_with_global_params(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [DenseVector([1, 2, 3])] + result = client.batch_search( + queries=queries, + similarity=Similarity.MANHATTAN, + limit=2, + ) + assert result == [["p1"]] + +def test_batch_search_vector_similarity_with_global_limit(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + result = client.batch_search( + queries=queries, + limit=2, + ) + assert result == [["p1"]] + +def test_batch_search_searchquery_objects(client, mock_connection): + mock_connection.call.return_value = Mock( + results=[ + Mock(result_point_ids=[Mock(id=Mock(value="p1"))]), + ] + ) + queries = [ + SearchQuery(DenseVector([1, 2, 3]), Similarity.COSINE, 2), + ] + result = client.batch_search(queries=queries) + assert result == [["p1"]] + +def test_batch_search_missing_globals_for_vector(client): + queries = [DenseVector([1, 2, 3])] + with pytest.raises(ValueError): + client.batch_search(queries=queries) + +def test_batch_search_missing_limit(client): + queries = [ + (DenseVector([1, 2, 3]), Similarity.COSINE), + ] + with pytest.raises(ValueError): + client.batch_search(queries=queries) + +def test_batch_search_invalid_format(client): + queries = ["invalid"] + with pytest.raises(TypeError): + client.batch_search(queries=queries) + # Close def test_close_closes_connection(client, mock_connection): diff --git a/client/python/vortexdb/__init__.py b/client/python/vortexdb/__init__.py index 62c100f..08ec797 100644 --- a/client/python/vortexdb/__init__.py +++ b/client/python/vortexdb/__init__.py @@ -6,6 +6,8 @@ Payload, Point, Similarity, + SearchQuery, + to_dense_vectors, ) from vortexdb.exceptions import ( VortexDBError, @@ -23,6 +25,7 @@ "Payload", "Point", "Similarity", + "SearchQuery", "VortexDBError", "AuthenticationError", "NotFoundError", diff --git a/client/python/vortexdb/client.py b/client/python/vortexdb/client.py index 38e0553..9ad7885 100644 --- a/client/python/vortexdb/client.py +++ b/client/python/vortexdb/client.py @@ -7,6 +7,7 @@ Payload, Point, Similarity, + SearchQuery, ) from vortexdb import protoutils as proto @@ -56,6 +57,19 @@ def insert(self, *, vector: DenseVector, payload: Payload) -> str: return response.id.value + def batch_insert(self, *, items: list[tuple[DenseVector, Payload]]) -> list[str]: + """ + Insert multiple vectors. + Returns: list of point_id (str) + """ + request = proto.build_batch_insert_request(items=items) + + response = self._conn.call( + self._conn.stub.InsertVectorsBatch, + request, + ) + return [pid.id.value for pid in response.ids] + def get(self, *, point_id: str) -> Point | None: """ Retrieve a point by ID. @@ -87,32 +101,108 @@ def delete(self, *, point_id: str) -> None: def search( self, *, - vector: DenseVector, - similarity: Similarity, - limit: int, + vector: DenseVector | None = None, + similarity: Similarity | None = None, + limit: int | None = None, + query: SearchQuery | None = None, ) -> List[str]: """ Search for nearest neighbors. + Accepts: + - vector + similarity + limit + - SearchQuery object via `query` Returns: List of point IDs """ - if not isinstance(vector, DenseVector): - raise TypeError( - "vector must be a DenseVector. " - "Use: DenseVector([1.0, 2.0, 3.0])" - ) + # SearchQuery support + if query is not None: + if not isinstance(query, SearchQuery): + raise TypeError("query must be a SearchQuery") + vector = query.vector + similarity = query.similarity + limit = query.limit + + else: + if not isinstance(vector, DenseVector): + raise TypeError( + "vector must be a DenseVector. " + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + if not isinstance(similarity, Similarity): + raise TypeError("similarity must be Similarity enum") + if not isinstance(limit, int): + raise TypeError("limit must be int") request = proto.build_search_request( vector=vector, similarity=similarity, limit=limit, ) - response = self._conn.call( self._conn.stub.SearchPoints, request, ) - return [pid.id.value for pid in response.result_point_ids] + + def batch_search( + self, + *, + queries, + similarity: Similarity | None = None, + limit: int | None = None, + ) -> List[List[str]]: + """ + Flexible batch search. + + Accepts: + - List[SearchQuery] + - List[(DenseVector, Similarity, int)] + - List[(DenseVector, Similarity)] + global limit + - List[(DenseVector, int)] + global similarity + - List[DenseVector] + global similarity + limit + """ + + normalized = [] + + for i, q in enumerate(queries): + if hasattr(q, "vector") and hasattr(q, "similarity") and hasattr(q, "limit"): + normalized.append((q.vector, q.similarity, q.limit)) + continue + + if isinstance(q, DenseVector): + if similarity is None or limit is None: + raise ValueError( + f"queries[{i}] requires global similarity and limit" + ) + normalized.append((q, similarity, limit)) + continue + + if isinstance(q, (list, tuple)): + if len(q) == 3: + normalized.append(q) + continue + if len(q) == 2: + a, b = q + + if isinstance(a, DenseVector) and isinstance(b, Similarity): + if limit is None: + raise ValueError(f"queries[{i}] missing global limit") + normalized.append((a, b, limit)) + continue + + if isinstance(a, DenseVector) and isinstance(b, int): + if similarity is None: + raise ValueError(f"queries[{i}] missing global similarity") + normalized.append((a, similarity, b)) + continue + + raise TypeError(f"Invalid query format at index {i}") + + request = proto.build_batch_search_request(queries=normalized) + response = self._conn.call(self._conn.stub.SearchPointsBatch,request) + return [ + [pid.id.value for pid in result.result_point_ids] + for result in response.results + ] def close(self) -> None: """ diff --git a/client/python/vortexdb/grpc/vector_db_pb2.py b/client/python/vortexdb/grpc/vector_db_pb2.py index 2b8cbb8..af08cd2 100644 --- a/client/python/vortexdb/grpc/vector_db_pb2.py +++ b/client/python/vortexdb/grpc/vector_db_pb2.py @@ -25,17 +25,17 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0fvector-db.proto\x12\x08vectordb\x1a\x1bgoogle/protobuf/empty.proto\"\x15\n\x04UUID\x12\r\n\x05value\x18\x01 \x01(\t\"`\n\x13InsertVectorRequest\x12%\n\x06vector\x18\x01 \x01(\x0b\x32\x15.vectordb.DenseVector\x12\"\n\x07payload\x18\x02 \x01(\x0b\x32\x11.vectordb.Payload\"u\n\rSearchRequest\x12+\n\x0cquery_vector\x18\x01 \x01(\x0b\x32\x15.vectordb.DenseVector\x12(\n\nsimilarity\x18\x02 \x01(\x0e\x32\x14.vectordb.Similarity\x12\r\n\x05limit\x18\x03 \x01(\x04\"=\n\x0eSearchResponse\x12+\n\x10result_point_ids\x18\x01 \x03(\x0b\x32\x11.vectordb.PointID\"\x1d\n\x0b\x44\x65nseVector\x12\x0e\n\x06values\x18\x01 \x03(\x02\"q\n\x05Point\x12\x1d\n\x02id\x18\x01 \x01(\x0b\x32\x11.vectordb.PointID\x12\"\n\x07payload\x18\x02 \x01(\x0b\x32\x11.vectordb.Payload\x12%\n\x06vector\x18\x03 \x01(\x0b\x32\x15.vectordb.DenseVector\"%\n\x07PointID\x12\x1a\n\x02id\x18\x01 \x01(\x0b\x32\x0e.vectordb.UUID\"G\n\x07Payload\x12+\n\x0c\x63ontent_type\x18\x01 \x01(\x0e\x32\x15.vectordb.ContentType\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t*C\n\nSimilarity\x12\r\n\tEuclidean\x10\x00\x12\r\n\tManhattan\x10\x01\x12\x0b\n\x07Hamming\x10\x02\x12\n\n\x06\x43osine\x10\x03*\"\n\x0b\x43ontentType\x12\t\n\x05Image\x10\x00\x12\x08\n\x04Text\x10\x01\x32\x81\x02\n\x08VectorDB\x12\x42\n\x0cInsertVector\x12\x1d.vectordb.InsertVectorRequest\x1a\x11.vectordb.PointID\"\x00\x12:\n\x0b\x44\x65letePoint\x12\x11.vectordb.PointID\x1a\x16.google.protobuf.Empty\"\x00\x12\x30\n\x08GetPoint\x12\x11.vectordb.PointID\x1a\x0f.vectordb.Point\"\x00\x12\x43\n\x0cSearchPoints\x12\x17.vectordb.SearchRequest\x1a\x18.vectordb.SearchResponse\"\x00\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0fvector-db.proto\x12\x08vectordb\x1a\x1bgoogle/protobuf/empty.proto\"\x15\n\x04UUID\x12\r\n\x05value\x18\x01 \x01(\t\"`\n\x13InsertVectorRequest\x12%\n\x06vector\x18\x01 \x01(\x0b\x32\x15.vectordb.DenseVector\x12\"\n\x07payload\x18\x02 \x01(\x0b\x32\x11.vectordb.Payload\"u\n\rSearchRequest\x12+\n\x0cquery_vector\x18\x01 \x01(\x0b\x32\x15.vectordb.DenseVector\x12(\n\nsimilarity\x18\x02 \x01(\x0e\x32\x14.vectordb.Similarity\x12\r\n\x05limit\x18\x03 \x01(\x04\"=\n\x0eSearchResponse\x12+\n\x10result_point_ids\x18\x01 \x03(\x0b\x32\x11.vectordb.PointID\"\x1d\n\x0b\x44\x65nseVector\x12\x0e\n\x06values\x18\x01 \x03(\x02\"q\n\x05Point\x12\x1d\n\x02id\x18\x01 \x01(\x0b\x32\x11.vectordb.PointID\x12\"\n\x07payload\x18\x02 \x01(\x0b\x32\x11.vectordb.Payload\x12%\n\x06vector\x18\x03 \x01(\x0b\x32\x15.vectordb.DenseVector\"%\n\x07PointID\x12\x1a\n\x02id\x18\x01 \x01(\x0b\x32\x0e.vectordb.UUID\"G\n\x07Payload\x12+\n\x0c\x63ontent_type\x18\x01 \x01(\x0e\x32\x15.vectordb.ContentType\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\"K\n\x19InsertVectorsBatchRequest\x12.\n\x07vectors\x18\x01 \x03(\x0b\x32\x1d.vectordb.InsertVectorRequest\"<\n\x1aInsertVectorsBatchResponse\x12\x1e\n\x03ids\x18\x01 \x03(\x0b\x32\x11.vectordb.PointID\"D\n\x18SearchPointsBatchRequest\x12(\n\x07queries\x18\x01 \x03(\x0b\x32\x17.vectordb.SearchRequest\"F\n\x19SearchPointsBatchResponse\x12)\n\x07results\x18\x01 \x03(\x0b\x32\x18.vectordb.SearchResponse*C\n\nSimilarity\x12\r\n\tEuclidean\x10\x00\x12\r\n\tManhattan\x10\x01\x12\x0b\n\x07Hamming\x10\x02\x12\n\n\x06\x43osine\x10\x03*\"\n\x0b\x43ontentType\x12\t\n\x05Image\x10\x00\x12\x08\n\x04Text\x10\x01\x32\xc4\x03\n\x08VectorDB\x12\x42\n\x0cInsertVector\x12\x1d.vectordb.InsertVectorRequest\x1a\x11.vectordb.PointID\"\x00\x12:\n\x0b\x44\x65letePoint\x12\x11.vectordb.PointID\x1a\x16.google.protobuf.Empty\"\x00\x12\x30\n\x08GetPoint\x12\x11.vectordb.PointID\x1a\x0f.vectordb.Point\"\x00\x12\x43\n\x0cSearchPoints\x12\x17.vectordb.SearchRequest\x1a\x18.vectordb.SearchResponse\"\x00\x12\x61\n\x12InsertVectorsBatch\x12#.vectordb.InsertVectorsBatchRequest\x1a$.vectordb.InsertVectorsBatchResponse\"\x00\x12^\n\x11SearchPointsBatch\x12\".vectordb.SearchPointsBatchRequest\x1a#.vectordb.SearchPointsBatchResponse\"\x00\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'vector_db_pb2', _globals) if not _descriptor._USE_C_DESCRIPTORS: DESCRIPTOR._loaded_options = None - _globals['_SIMILARITY']._serialized_start=619 - _globals['_SIMILARITY']._serialized_end=686 - _globals['_CONTENTTYPE']._serialized_start=688 - _globals['_CONTENTTYPE']._serialized_end=722 + _globals['_SIMILARITY']._serialized_start=900 + _globals['_SIMILARITY']._serialized_end=967 + _globals['_CONTENTTYPE']._serialized_start=969 + _globals['_CONTENTTYPE']._serialized_end=1003 _globals['_UUID']._serialized_start=58 _globals['_UUID']._serialized_end=79 _globals['_INSERTVECTORREQUEST']._serialized_start=81 @@ -52,6 +52,14 @@ _globals['_POINTID']._serialized_end=544 _globals['_PAYLOAD']._serialized_start=546 _globals['_PAYLOAD']._serialized_end=617 - _globals['_VECTORDB']._serialized_start=725 - _globals['_VECTORDB']._serialized_end=982 + _globals['_INSERTVECTORSBATCHREQUEST']._serialized_start=619 + _globals['_INSERTVECTORSBATCHREQUEST']._serialized_end=694 + _globals['_INSERTVECTORSBATCHRESPONSE']._serialized_start=696 + _globals['_INSERTVECTORSBATCHRESPONSE']._serialized_end=756 + _globals['_SEARCHPOINTSBATCHREQUEST']._serialized_start=758 + _globals['_SEARCHPOINTSBATCHREQUEST']._serialized_end=826 + _globals['_SEARCHPOINTSBATCHRESPONSE']._serialized_start=828 + _globals['_SEARCHPOINTSBATCHRESPONSE']._serialized_end=898 + _globals['_VECTORDB']._serialized_start=1006 + _globals['_VECTORDB']._serialized_end=1458 # @@protoc_insertion_point(module_scope) diff --git a/client/python/vortexdb/grpc/vector_db_pb2_grpc.py b/client/python/vortexdb/grpc/vector_db_pb2_grpc.py index edc3c8f..fdbde34 100644 --- a/client/python/vortexdb/grpc/vector_db_pb2_grpc.py +++ b/client/python/vortexdb/grpc/vector_db_pb2_grpc.py @@ -4,7 +4,7 @@ import warnings from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -from vortexdb.grpc import vector_db_pb2 as vector__db__pb2 +from . import vector_db_pb2 as vector__db__pb2 GRPC_GENERATED_VERSION = '1.76.0' GRPC_VERSION = grpc.__version__ @@ -55,6 +55,16 @@ def __init__(self, channel): request_serializer=vector__db__pb2.SearchRequest.SerializeToString, response_deserializer=vector__db__pb2.SearchResponse.FromString, _registered_method=True) + self.InsertVectorsBatch = channel.unary_unary( + '/vectordb.VectorDB/InsertVectorsBatch', + request_serializer=vector__db__pb2.InsertVectorsBatchRequest.SerializeToString, + response_deserializer=vector__db__pb2.InsertVectorsBatchResponse.FromString, + _registered_method=True) + self.SearchPointsBatch = channel.unary_unary( + '/vectordb.VectorDB/SearchPointsBatch', + request_serializer=vector__db__pb2.SearchPointsBatchRequest.SerializeToString, + response_deserializer=vector__db__pb2.SearchPointsBatchResponse.FromString, + _registered_method=True) class VectorDBServicer(object): @@ -88,6 +98,18 @@ def SearchPoints(self, request, context): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def InsertVectorsBatch(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SearchPointsBatch(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_VectorDBServicer_to_server(servicer, server): rpc_method_handlers = { @@ -111,6 +133,16 @@ def add_VectorDBServicer_to_server(servicer, server): request_deserializer=vector__db__pb2.SearchRequest.FromString, response_serializer=vector__db__pb2.SearchResponse.SerializeToString, ), + 'InsertVectorsBatch': grpc.unary_unary_rpc_method_handler( + servicer.InsertVectorsBatch, + request_deserializer=vector__db__pb2.InsertVectorsBatchRequest.FromString, + response_serializer=vector__db__pb2.InsertVectorsBatchResponse.SerializeToString, + ), + 'SearchPointsBatch': grpc.unary_unary_rpc_method_handler( + servicer.SearchPointsBatch, + request_deserializer=vector__db__pb2.SearchPointsBatchRequest.FromString, + response_serializer=vector__db__pb2.SearchPointsBatchResponse.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'vectordb.VectorDB', rpc_method_handlers) @@ -229,3 +261,57 @@ def SearchPoints(request, timeout, metadata, _registered_method=True) + + @staticmethod + def InsertVectorsBatch(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/vectordb.VectorDB/InsertVectorsBatch', + vector__db__pb2.InsertVectorsBatchRequest.SerializeToString, + vector__db__pb2.InsertVectorsBatchResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def SearchPointsBatch(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/vectordb.VectorDB/SearchPointsBatch', + vector__db__pb2.SearchPointsBatchRequest.SerializeToString, + vector__db__pb2.SearchPointsBatchResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) diff --git a/client/python/vortexdb/models.py b/client/python/vortexdb/models.py index f2cbe19..0bfab5f 100644 --- a/client/python/vortexdb/models.py +++ b/client/python/vortexdb/models.py @@ -70,7 +70,9 @@ def to_proto(self) -> vector_db_pb2.DenseVector: def to_list(self) -> list[float]: return list(self.values) - +# & Helper Function for Batch of DenseVectors +def to_dense_vectors(arr): + return [DenseVector(x) for x in arr] @dataclass(frozen=True) @@ -129,3 +131,17 @@ def pretty(self) -> str: f" payload_type = {self.payload.content_type.name},\n" f" payload = '{self.payload.content}'" ) + +# I added this because using tuples will get messy if we increase fields in a search query +@dataclass(frozen=True) +class SearchQuery: + vector: DenseVector + similarity: Similarity + limit: int + + def to_proto(self) -> vector_db_pb2.SearchRequest: + return vector_db_pb2.SearchRequest( + query_vector=self.vector.to_proto(), + similarity=self.similarity.to_proto(), + limit=self.limit, + ) \ No newline at end of file diff --git a/client/python/vortexdb/protoutils.py b/client/python/vortexdb/protoutils.py index 8562b18..f6fdc47 100644 --- a/client/python/vortexdb/protoutils.py +++ b/client/python/vortexdb/protoutils.py @@ -11,6 +11,36 @@ def build_insert_request( payload=payload.to_proto(), ) +def build_batch_insert_request( + *, + items: list[tuple[DenseVector, Payload]], +) -> vector_db_pb2.InsertVectorsBatchRequest: + if not isinstance(items, (list,tuple)): + raise TypeError("Items must be a list of (DenseVector, Payload) tuples") + + if not items: + raise ValueError("Items cannot be empty") + + requests = [] + + for i, pair in enumerate(items): + if not isinstance(pair, (list,tuple)) or len(pair)!=2: + raise TypeError(f"items[{i}] must be a tuple of (DenseVector, Payload)") + + vector, payload = pair + if not isinstance(vector, DenseVector): + raise TypeError( + f"items[{i}][0] must be a DenseVector" + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + + if not isinstance(payload, Payload): + raise TypeError(f"items[{i}][1] must be Payload") + + requests.append(build_insert_request(vector=vector, payload=payload)) + + return vector_db_pb2.InsertVectorsBatchRequest(vectors = requests) + def build_point_id_request(point_id: str) -> vector_db_pb2.PointID: return vector_db_pb2.PointID( id=vector_db_pb2.UUID(value=point_id) @@ -27,3 +57,38 @@ def build_search_request( similarity=similarity.to_proto(), limit=limit, ) + +def build_batch_search_request( + *, + queries: list[tuple[DenseVector, Similarity, int]], +) -> vector_db_pb2.SearchPointsBatchRequest: + if not isinstance(queries, (list,tuple)): + raise TypeError("Queries must be a list of (DenseVector, Similarity, Limit (int)) tuples") + + if not queries: + raise ValueError("Queries cannot be empty") + + requests = [] + + for i, trio in enumerate(queries): + if not isinstance(trio, (list,tuple)) or len(trio)!=3: + raise TypeError(f"queries[{i}] must be a tuple of (DenseVector, Similarity, Limit(int))") + + vector, similarity, limit = trio + if not isinstance(vector, DenseVector): + raise TypeError( + f"queries[{i}][0] must be a DenseVector" + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + if not isinstance(similarity, Similarity): + raise TypeError(f"queries[{i}][1] must be Similarity") + if not isinstance(limit, int): + raise TypeError(f"queries[{i}][2] must be an integer value") + + requests.append(vector_db_pb2.SearchRequest( + query_vector=vector.to_proto(), + similarity=similarity.to_proto(), + limit=limit, + )) + + return vector_db_pb2.SearchPointsBatchRequest(queries=requests) \ No newline at end of file