From 366e3029005f375f0f8330ac265f6c34bafb33ed Mon Sep 17 00:00:00 2001 From: dorren002 <63645716+dorren002@users.noreply.github.com> Date: Wed, 5 Feb 2025 12:54:11 +0800 Subject: [PATCH] enable milvus pagination query (#422) --- lazyllm/tools/rag/milvus_store.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lazyllm/tools/rag/milvus_store.py b/lazyllm/tools/rag/milvus_store.py index f0e1a5da..0c84379a 100644 --- a/lazyllm/tools/rag/milvus_store.py +++ b/lazyllm/tools/rag/milvus_store.py @@ -14,6 +14,7 @@ from lazyllm.common import override, obj2str, str2obj MILVUS_UPSERT_BATCH_SIZE = 500 +MILVUS_PAGINATION_OFFSET = 1000 class MilvusStore(StoreBase): # we define these variables as members so that pymilvus is not imported until MilvusStore is instantiated. @@ -246,8 +247,25 @@ def _gen_field_key(self, k: str) -> str: def _load_all_nodes_to(self, store: StoreBase) -> None: uid2node = {} for group_name in self._client.list_collections(): - results = self._client.query(collection_name=group_name, - filter=f'{self._primary_key} != ""') + collection_desc = self._client.describe_collection(collection_name=group_name) + field_names = [field.get("name") for field in collection_desc.get('fields', [])] + + iterator = self._client.query_iterator( + collection_name=group_name, + batch_size=MILVUS_PAGINATION_OFFSET, + filter=f'{self._primary_key} != ""', + output_fields=field_names + ) + + results = [] + while True: + result = iterator.next() + + if not result: + iterator.close() + break + results += result + for result in results: node = self._deserialize_node_partial(result) node._group = group_name