From fb5d117f9a3165da84dcce93d5bb9831d7d9f827 Mon Sep 17 00:00:00 2001 From: g97iulio1609 Date: Sat, 28 Feb 2026 05:55:37 +0100 Subject: [PATCH 1/2] perf: use deque for iterator object cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both _ObjectIterator and _ObjectAIterator consume cached objects front-to-back via list.pop(0), which is O(n) per removal. With the default ITERATOR_CACHE_SIZE (typically 100), each batch drain is O(n²). Switch to collections.deque with popleft() for O(1) front removal. --- weaviate/collections/iterator.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/weaviate/collections/iterator.py b/weaviate/collections/iterator.py index 6952d7be9..f095e95b9 100644 --- a/weaviate/collections/iterator.py +++ b/weaviate/collections/iterator.py @@ -1,3 +1,4 @@ +from collections import deque from dataclasses import dataclass from typing import ( Any, @@ -54,14 +55,14 @@ def __init__( self.__query = query self.__inputs = inputs - self.__iter_object_cache: List[Object[TProperties, TReferences]] = [] + self.__iter_object_cache: deque[Object[TProperties, TReferences]] = deque() self.__iter_object_last_uuid: Optional[UUID] = _parse_after(self.__inputs.after) self.__iter_cache_size = cache_size or ITERATOR_CACHE_SIZE def __iter__( self, ) -> Iterator[Object[TProperties, TReferences]]: - self.__iter_object_cache = [] + self.__iter_object_cache = deque() self.__iter_object_last_uuid = _parse_after(self.__inputs.after) return self @@ -75,11 +76,11 @@ def __next__(self) -> Object[TProperties, TReferences]: return_properties=self.__inputs.return_properties, return_references=self.__inputs.return_references, ) - self.__iter_object_cache = res.objects # type: ignore + self.__iter_object_cache = deque(res.objects) # type: ignore if len(self.__iter_object_cache) == 0: raise StopIteration - ret_object = self.__iter_object_cache.pop(0) + ret_object = self.__iter_object_cache.popleft() self.__iter_object_last_uuid = ret_object.uuid assert ( self.__iter_object_last_uuid is not None @@ -100,14 +101,14 @@ def __init__( self.__query = query self.__inputs = inputs - self.__iter_object_cache: List[Object[TProperties, TReferences]] = [] + self.__iter_object_cache: deque[Object[TProperties, TReferences]] = deque() self.__iter_object_last_uuid: Optional[UUID] = _parse_after(self.__inputs.after) self.__iter_cache_size = cache_size or ITERATOR_CACHE_SIZE def __aiter__( self, ) -> AsyncIterator[Object[TProperties, TReferences]]: - self.__iter_object_cache = [] + self.__iter_object_cache = deque() self.__iter_object_last_uuid = _parse_after(self.__inputs.after) return self @@ -123,11 +124,11 @@ async def __anext__( return_properties=self.__inputs.return_properties, return_references=self.__inputs.return_references, ) - self.__iter_object_cache = res.objects # type: ignore + self.__iter_object_cache = deque(res.objects) # type: ignore if len(self.__iter_object_cache) == 0: raise StopAsyncIteration - ret_object = self.__iter_object_cache.pop(0) + ret_object = self.__iter_object_cache.popleft() self.__iter_object_last_uuid = ret_object.uuid assert ( self.__iter_object_last_uuid is not None From aaa105a865a1bfecaeca588bdad041ee448e8bcf Mon Sep 17 00:00:00 2001 From: g97iulio1609 Date: Mon, 2 Mar 2026 07:28:23 +0100 Subject: [PATCH 2/2] fix: remove unused List import Fixes linting issue flagged by ruff (F401: unused import). --- weaviate/collections/iterator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weaviate/collections/iterator.py b/weaviate/collections/iterator.py index f095e95b9..326000f9a 100644 --- a/weaviate/collections/iterator.py +++ b/weaviate/collections/iterator.py @@ -7,7 +7,6 @@ Generic, Iterable, Iterator, - List, Optional, ) from uuid import UUID