Skip to content

Commit

Permalink
Refactor pydantic 2 (#465)
Browse files Browse the repository at this point in the history
* fmt and default bh

* update pydantic 2

* update readme

* refactor batch handler

* update type hints
  • Loading branch information
michaelfeil authored Nov 16, 2024
1 parent f59df4f commit 8ac0b3c
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 190 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ Infinity is a high-throughput, low-latency REST API for serving text-embeddings,
<a href="https://embeddedllm.com/blog/"><img src="https://avatars.githubusercontent.com/u/148834374" alt="Logo JamAI" width="50"/></a>
<a href="https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct#infinity_emb"><img src="https://avatars.githubusercontent.com/u/1961952" alt="Logo Alibaba Group" width="50"/></a>
<a href="https://github.com/bentoml/BentoInfinity/"><img src="https://avatars.githubusercontent.com/u/49176046" alt="Logo BentoML" width="50"/></a>
<a href="https://x.com/bo_wangbo/status/1766371909086724481"><img src="https://avatars.githubusercontent.com/u/60539444" alt="Logo JinaAi" width="50"/></a>
<a href="https://github.com/dwarvesf/llm-hosting"><img src="https://avatars.githubusercontent.com/u/10388449" alt="Logo Dwarves Foundation" width="50"/></a>
</p>

### Latest News 🔥
Expand Down Expand Up @@ -477,6 +479,7 @@ https://github.com/michaelfeil/infinity/tree/main/libs/client_infinity/infinity_
- [gpt_server - gpt_server is an open-source framework designed for production-level deployment of LLMs (Large Language Models) or Embeddings.](https://github.com/shell-nlp/gpt_server)
- [KubeAI: Kubernetes AI Operator for inferencing](https://github.com/substratusai/kubeai)
- [LangChain](https://python.langchain.com/docs/integrations/text_embedding/infinity)
- [Batched, modification of the Batching algoritm in Infinity](https://github.com/mixedbread-ai/batched)

## Documentation
View the docs at [https:///michaelfeil.github.io/infinity](https://michaelfeil.github.io/infinity) on how to get started.
Expand Down
13 changes: 5 additions & 8 deletions docs/benchmarks/simple_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
from fastapi import FastAPI, responses

from infinity_emb.fastapi_schemas.pymodels import (
OpenAIEmbeddingInput,
MultiModalOpenAIEmbedding,
OpenAIEmbeddingResult,
)
from embed import BatchedInference
from infinity_emb.fastapi_schemas.convert import list_embeddings_to_response
from infinity_emb import AsyncEmbeddingEngine, EngineArgs
import asyncio
import numpy as np
Expand Down Expand Up @@ -100,7 +99,7 @@ async def lifespan(app: FastAPI):
response_model=OpenAIEmbeddingResult,
response_class=responses.ORJSONResponse,
)
async def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:
async def embed(request: MultiModalOpenAIEmbedding) -> OpenAIEmbeddingResult:
"""the goal of this code is to write an as simple as possible server
that can we rebuild by any other p
"""
Expand All @@ -109,17 +108,16 @@ async def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:

encoded = await encode_infinity(sentences)
# response parsing
response = list_embeddings_to_response(
return OpenAIEmbeddingResult.to_embeddings_response(
encoded, MODEL_NAME, sum(len(t) for t in sentences)
)
return OpenAIEmbeddingResult(**response)
else:
@app.post(
"/embeddings",
response_model=OpenAIEmbeddingResult,
response_class=responses.ORJSONResponse,
)
def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:
def embed(request: MultiModalOpenAIEmbedding) -> OpenAIEmbeddingResult:
"""the goal of this code is to write an as simple as possible server
that can we rebuild by any other p
"""
Expand All @@ -134,10 +132,9 @@ def embed(request: OpenAIEmbeddingInput) -> OpenAIEmbeddingResult:
encoded = encode_sentence_transformer(sentences)

# response parsing
response = list_embeddings_to_response(
return OpenAIEmbeddingResult.to_embeddings_response(
encoded, MODEL_NAME, sum(len(t) for t in sentences)
)
return OpenAIEmbeddingResult(**response)

if __name__ == "__main__":
import uvicorn
Expand Down
4 changes: 2 additions & 2 deletions libs/embed_package/embed/_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def embed(
def image_embed(
self,
*,
images: list[str],
images: list[Union[str, bytes]],
model_id: ModelIndex = 0,
) -> Future[tuple[list[list[float]], int]]:
"""Embed images with a model.
Expand All @@ -105,7 +105,7 @@ def image_embed(
def audio_embed(
self,
*,
audios: list[str],
audios: list[Union[str, bytes]],
model_id: ModelIndex = 0,
) -> Future[tuple[list[list[float]], int]]:
"""Embed audios with a model.
Expand Down
8 changes: 0 additions & 8 deletions libs/infinity_emb/infinity_emb/fastapi_schemas/convert.py

This file was deleted.

89 changes: 25 additions & 64 deletions libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2023-now michaelfeil
# IMPORT of this file requires pydantic 2.x

from __future__ import annotations

Expand All @@ -10,6 +11,29 @@

import numpy as np


from infinity_emb._optional_imports import CHECK_PYDANTIC
from infinity_emb.primitives import EmbeddingEncodingFormat, Modality

CHECK_PYDANTIC.mark_required()
# pydantic 2.x is strictly needed starting v0.0.70
from pydantic import ( # noqa
BaseModel,
Discriminator,
Field,
RootModel,
Tag,
conlist,
)

from .data_uri import DataURI # noqa
from .pydantic_v2 import ( # noqa
INPUT_STRING,
ITEMS_LIMIT,
ITEMS_LIMIT_SMALL,
HttpUrl,
)

if TYPE_CHECKING:
from infinity_emb.args import EngineArgs
from infinity_emb.primitives import (
Expand All @@ -18,70 +42,7 @@
RerankReturnType,
)

from infinity_emb._optional_imports import CHECK_PYDANTIC
from infinity_emb.primitives import EmbeddingEncodingFormat, Modality

# potential backwards compatibility to pydantic 1.X
# pydantic 2.x is preferred by not strictly needed
if CHECK_PYDANTIC.is_available:
from pydantic import BaseModel, Field, conlist

try:
from pydantic import (
BaseModel,
Discriminator,
Field,
RootModel,
Tag,
)

from .data_uri import DataURI
from .pydantic_v2 import (
INPUT_STRING,
ITEMS_LIMIT,
ITEMS_LIMIT_SMALL,
HttpUrl,
)
except ImportError:
from pydantic import constr

INPUT_STRING = constr(max_length=8192 * 15, strip_whitespace=True) # type: ignore
ITEMS_LIMIT = {
"min_items": 1,
"max_items": 2048,
}
ITEMS_LIMIT_SMALL = {
"min_items": 1,
"max_items": 32,
}
HttpUrl = str # type: ignore
DataURI = str # type: ignore
DataURIorURL = Union[Annotated[DataURI, str], HttpUrl]

else:

class BaseModel: # type: ignore[no-redef]
pass

class RootModel: # type: ignore
pass

class Tag: # type: ignore
pass

class HttpUrl: # type: ignore
pass

class DataURI: # type: ignore
pass

def Field(*args, **kwargs): # type: ignore
pass

def conlist(): # type: ignore
pass

DataURIorURL = None # type: ignore
DataURIorURL = Union[Annotated[DataURI, str], HttpUrl]


class _Usage(BaseModel):
Expand Down
Loading

0 comments on commit 8ac0b3c

Please sign in to comment.