Refactor pydantic 2 (#465)

* fmt and default bh * update pydantic 2 * update readme * refactor batch handler * update type hints
michaelfeil · Nov 16, 2024 · 8ac0b3c · 8ac0b3c
1 parent f59df4f
commit 8ac0b3c
Show file tree

Hide file tree

Showing 8 changed files with 164 additions and 190 deletions.
diff --git a/README.md b/README.md
@@ -42,6 +42,8 @@ Infinity is a high-throughput, low-latency REST API for serving text-embeddings,
   <a href="https://embeddedllm.com/blog/"><img src="https://avatars.githubusercontent.com/u/148834374" alt="Logo JamAI" width="50"/></a>
   <a href="https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct#infinity_emb"><img src="https://avatars.githubusercontent.com/u/1961952" alt="Logo Alibaba Group" width="50"/></a>
   <a href="https://github.com/bentoml/BentoInfinity/"><img src="https://avatars.githubusercontent.com/u/49176046" alt="Logo BentoML" width="50"/></a>
+  <a href="https://x.com/bo_wangbo/status/1766371909086724481"><img src="https://avatars.githubusercontent.com/u/60539444" alt="Logo JinaAi" width="50"/></a>
+  <a href="https://github.com/dwarvesf/llm-hosting"><img src="https://avatars.githubusercontent.com/u/10388449" alt="Logo Dwarves Foundation" width="50"/></a>
 </p> 
 
 ### Latest News 🔥
@@ -477,6 +479,7 @@ https://github.com/michaelfeil/infinity/tree/main/libs/client_infinity/infinity_
 - [gpt_server - gpt_server is an open-source framework designed for production-level deployment of LLMs (Large Language Models) or Embeddings.](https://github.com/shell-nlp/gpt_server)
 - [KubeAI: Kubernetes AI Operator for inferencing](https://github.com/substratusai/kubeai)
 - [LangChain](https://python.langchain.com/docs/integrations/text_embedding/infinity)
+- [Batched, modification of the Batching algoritm in Infinity](https://github.com/mixedbread-ai/batched)
 
 ## Documentation
 View the docs at [https:///michaelfeil.github.io/infinity](https://michaelfeil.github.io/infinity) on how to get started.

diff --git a/docs/benchmarks/simple_app.py b/docs/benchmarks/simple_app.py
@@ -6,11 +6,10 @@
 from fastapi import FastAPI, responses
 
 from infinity_emb.fastapi_schemas.pymodels import (
-    OpenAIEmbeddingInput,
+    MultiModalOpenAIEmbedding,
     OpenAIEmbeddingResult,
 )
 from embed import BatchedInference
-from infinity_emb.fastapi_schemas.convert import list_embeddings_to_response
 from infinity_emb import AsyncEmbeddingEngine, EngineArgs
 import asyncio
 import numpy as np
@@ -100,7 +99,7 @@ async def lifespan(app: FastAPI):
         response_model=OpenAIEmbeddingResult,
         response_class=responses.ORJSONResponse,
     )
-    async def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:
+    async def embed(request: MultiModalOpenAIEmbedding) -> OpenAIEmbeddingResult:
         """the goal of this code is to write an as simple as possible server
         that can we rebuild by any other p
         """
@@ -109,17 +108,16 @@ async def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:
 
         encoded = await encode_infinity(sentences)
         # response parsing
-        response = list_embeddings_to_response(
+        return OpenAIEmbeddingResult.to_embeddings_response(
             encoded, MODEL_NAME, sum(len(t) for t in sentences)
         )
-        return OpenAIEmbeddingResult(**response)
 else:
     @app.post(
         "/embeddings",
         response_model=OpenAIEmbeddingResult,
         response_class=responses.ORJSONResponse,
     )
-    def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:
+    def embed(request: MultiModalOpenAIEmbedding) -> OpenAIEmbeddingResult:
         """the goal of this code is to write an as simple as possible server
         that can we rebuild by any other p
         """
@@ -134,10 +132,9 @@ def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:
             encoded = encode_sentence_transformer(sentences)
 
         # response parsing
-        response = list_embeddings_to_response(
+        return OpenAIEmbeddingResult.to_embeddings_response(
             encoded, MODEL_NAME, sum(len(t) for t in sentences)
         )
-        return OpenAIEmbeddingResult(**response)
 
 if __name__ == "__main__":
     import uvicorn

diff --git a/libs/embed_package/embed/_infer.py b/libs/embed_package/embed/_infer.py
@@ -87,7 +87,7 @@ def embed(
     def image_embed(
         self,
         *,
-        images: list[str],
+        images: list[Union[str, bytes]],
         model_id: ModelIndex = 0,
     ) -> Future[tuple[list[list[float]], int]]:
         """Embed images with a model.
@@ -105,7 +105,7 @@ def image_embed(
     def audio_embed(
         self,
         *,
-        audios: list[str],
+        audios: list[Union[str, bytes]],
         model_id: ModelIndex = 0,
     ) -> Future[tuple[list[list[float]], int]]:
         """Embed audios with a model.

diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/convert.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/convert.py
diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: MIT
 # Copyright (c) 2023-now michaelfeil
+# IMPORT of this file requires pydantic 2.x
 
 from __future__ import annotations
 
@@ -10,6 +11,29 @@
 
 import numpy as np
 
+
+from infinity_emb._optional_imports import CHECK_PYDANTIC
+from infinity_emb.primitives import EmbeddingEncodingFormat, Modality
+
+CHECK_PYDANTIC.mark_required()
+# pydantic 2.x is strictly needed starting v0.0.70
+from pydantic import (  # noqa
+    BaseModel,
+    Discriminator,
+    Field,
+    RootModel,
+    Tag,
+    conlist,
+)
+
+from .data_uri import DataURI  # noqa
+from .pydantic_v2 import (  # noqa
+    INPUT_STRING,
+    ITEMS_LIMIT,
+    ITEMS_LIMIT_SMALL,
+    HttpUrl,
+)
+
 if TYPE_CHECKING:
     from infinity_emb.args import EngineArgs
     from infinity_emb.primitives import (
@@ -18,70 +42,7 @@
         RerankReturnType,
     )
 
-from infinity_emb._optional_imports import CHECK_PYDANTIC
-from infinity_emb.primitives import EmbeddingEncodingFormat, Modality
-
-# potential backwards compatibility to pydantic 1.X
-# pydantic 2.x is preferred by not strictly needed
-if CHECK_PYDANTIC.is_available:
-    from pydantic import BaseModel, Field, conlist
-
-    try:
-        from pydantic import (
-            BaseModel,
-            Discriminator,
-            Field,
-            RootModel,
-            Tag,
-        )
-
-        from .data_uri import DataURI
-        from .pydantic_v2 import (
-            INPUT_STRING,
-            ITEMS_LIMIT,
-            ITEMS_LIMIT_SMALL,
-            HttpUrl,
-        )
-    except ImportError:
-        from pydantic import constr
-
-        INPUT_STRING = constr(max_length=8192 * 15, strip_whitespace=True)  # type: ignore
-        ITEMS_LIMIT = {
-            "min_items": 1,
-            "max_items": 2048,
-        }
-        ITEMS_LIMIT_SMALL = {
-            "min_items": 1,
-            "max_items": 32,
-        }
-        HttpUrl = str  # type: ignore
-        DataURI = str  # type: ignore
-    DataURIorURL = Union[Annotated[DataURI, str], HttpUrl]
-
-else:
-
-    class BaseModel:  # type: ignore[no-redef]
-        pass
-
-    class RootModel:  # type: ignore
-        pass
-
-    class Tag:  # type: ignore
-        pass
-
-    class HttpUrl:  # type: ignore
-        pass
-
-    class DataURI:  # type: ignore
-        pass
-
-    def Field(*args, **kwargs):  # type: ignore
-        pass
-
-    def conlist():  # type: ignore
-        pass
-
-    DataURIorURL = None  # type: ignore
+DataURIorURL = Union[Annotated[DataURI, str], HttpUrl]
 
 
 class _Usage(BaseModel):