Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨Deepsparse Backend implementation #29

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c3abc8d
WIP
Aug 22, 2024
a6d9a05
✅ Tests are fixed
Aug 22, 2024
d116c0c
📌 deepsparse is added to dependencies
Aug 22, 2024
c000dbf
✨ deepsparse backend integration is added
Aug 22, 2024
52e1d3b
deepsparse package limitations are applied
Aug 26, 2024
7218795
⚰️ removed `pytest.mark.asyncio()` due to pytest-asyncio module
Aug 27, 2024
a5357ca
📝 fixed class example
Aug 27, 2024
68381a5
🧵 rollback `pytest.mark.asyncio` fixtures
Aug 28, 2024
5acb3a8
✨ Deepsparse Backend integration first implementation
Aug 28, 2024
45e07d0
code quality is provided
Aug 28, 2024
1753469
Merge branch 'main' into parfeniukink/features/deepsparse-backend
Aug 28, 2024
1f1e038
fit Deepsparse Backend to work with new Backend abstraction
Aug 28, 2024
ce1c3ba
🔧 `GUIDELLM__LLM_MODEL` shared across all the backends
Aug 29, 2024
8e88bae
Test emulated data source constant -> settings value
Aug 29, 2024
75e708b
💄 mdformat is happy
Aug 29, 2024
3c03961
Merge branch 'main' into parfeniukink/features/deepsparse-backend
Aug 29, 2024
913253f
✅ Tests are fixed according to a new Backend base implementation
Aug 29, 2024
e376ed9
🔨 tox tests include `deepsparse` dependency
Aug 30, 2024
3a2c6c1
🏷️ Type annotations are added
Aug 30, 2024
74a6dfd
🐛 Assert with config values instead of constants
Aug 30, 2024
1a53951
📌 .[deepsparse] dependency is skipped if Python>3.11
Aug 30, 2024
39ffcb3
🚚 DeepsparseBackend is moved to a another module
Aug 30, 2024
29e38e4
✅ Deepsparse tests are ignored if Python>=3.12
Aug 30, 2024
4b3b4b5
💚 Linters are happy
Aug 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,14 @@ The project configuration entrypoint is represented by lazy-loaded `settigns` si
The project is fully configurable with environment variables. All the default values and

```py
class NestedIntoLogging(BaseModel):
class Nested(BaseModel):
nested: str = "default value"

class LoggingSettings(BaseModel):
# ...

disabled: bool = False
nested: Nested = Nested()


class Settings(BaseSettings):
Expand Down
2 changes: 1 addition & 1 deletion src/guidellm/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .base import Backend, BackendEngine, GenerativeResponse
from .deepsparse import DeepsparseBackend
from .deepsparse.backend import DeepsparseBackend
from .openai import OpenAIBackend

__all__ = [
Expand Down
4 changes: 0 additions & 4 deletions src/guidellm/backend/deepsparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,3 @@
"Please try `pip install -e '.[deepsparse]'`"
),
)

from .backend import DeepsparseBackend

__all__ = ["DeepsparseBackend"]
59 changes: 46 additions & 13 deletions src/guidellm/backend/deepsparse/backend.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from typing import Any, AsyncGenerator, List, Optional
import os
from typing import Any, AsyncGenerator, Dict, List, Optional

from deepsparse import Pipeline
from deepsparse import Pipeline, TextGeneration
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean deepsparse is a dep of guidellm? We should keep it optional at most IMO, so could this be in a try catch with an informational message to install?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mgoin Could you please check this file? Is it kind of a validation you are talking about?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, since the backend/deepsparse/__init__.py exists we determine it as a module so before you reach the deepsparse/backend.py you are going to run such a validation.

Also, all the imports are like from guidellm.backend import Backend, BackendEngine but if you need the deepsparse just from .guidellm.backend.deepsparse import DeepsparseBackend which runs this validation.

Also the deepsparse is an optional dependency. At least it is in optional section

from loguru import logger
from transformers import AutoTokenizer

from guidellm.backend import Backend, GenerativeResponse
from guidellm.config import settings
from guidellm.core import TextGenerationRequest


Expand All @@ -15,11 +17,30 @@ class DeepsparseBackend(Backend):
"""

def __init__(self, model: Optional[str] = None, **request_args):
self.request_args = request_args
self.pipeline: Pipeline = Pipeline.create(
task="sentiment-analysis",
model_path=model or self.default_model,
)
self._request_args: Dict[str, Any] = request_args
self.model: str = self._get_model(model)
self.pipeline: Pipeline = TextGeneration(model=self.model)

def _get_model(self, model_from_cli: Optional[str] = None) -> str:
"""Provides the model by the next priority list:
1. from function argument (comes from CLI)
1. from environment variable
2. `self.default_model` from `self.available_models`
"""

if model_from_cli is not None:
return model_from_cli
elif settings.deepsprase.model is not None:
parfeniukink marked this conversation as resolved.
Show resolved Hide resolved
logger.info(
"Using Deepsparse model from environment variable: {}".format(
settings.deepsprase.model
)
)
return settings.deepsprase.model

else:
logger.info(f"Using default Deepsparse model: {self.default_model}")
return self.default_model

async def make_request(
self, request: TextGenerationRequest
Expand All @@ -38,8 +59,23 @@ async def make_request(
)

token_count = 0
for response in self.pipeline.generations:
if not (token := response.text):
request_args = {
**self._request_args,
"streaming": True,
"max_new_tokens": request.output_token_count,
}

if not (output := self.pipeline(prompt=request.prompt, **request_args)):
yield GenerativeResponse(
type_="final",
prompt=request.prompt,
prompt_token_count=request.prompt_token_count,
output_token_count=token_count,
)
return

for generation in output.generations:
if not (token := generation.text):
yield GenerativeResponse(
type_="final",
prompt=request.prompt,
Expand All @@ -66,10 +102,7 @@ def available_models(self) -> List[str]:
"""

# WARNING: The default model from the documentation is defined here

return [
"zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none"
]
return ["hf:mgoin/TinyStories-33M-quant-deepsparse"]

def model_tokenizer(self, model: str) -> Optional[Any]:
"""
Expand Down
3 changes: 2 additions & 1 deletion src/guidellm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class DeepsparseSettings(BaseModel):
Deepsparse settings for the Python API library
"""

model: str = "zoo:mpt-7b-dolly_mpt_pretrain-pruned50_quantized"
model: Optional[str] = None


class ReportGenerationSettings(BaseModel):
Expand All @@ -135,6 +135,7 @@ class Settings(BaseSettings):
```sh
export GUIDELLM__LOGGING__DISABLED=true
export GUIDELLM__OPENAI__API_KEY=******
export GUIDELLM__DEEPSPARSE__MODEL=******
```
"""

Expand Down
2 changes: 2 additions & 0 deletions src/guidellm/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
load_text,
load_text_lines,
parse_text_objects,
random_strings,
split_lines_by_punctuation,
split_text,
)
Expand All @@ -30,6 +31,7 @@
"load_text",
"load_text_lines",
"load_transformers_dataset",
"random_strings",
"parse_text_objects",
"resolve_transformers_dataset",
"resolve_transformers_dataset_column",
Expand Down
32 changes: 31 additions & 1 deletion src/guidellm/utils/text.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import csv
import json
import random
import re
import string
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Generator, List, Optional, Tuple, Union
from urllib.parse import urlparse

import ftfy
Expand All @@ -23,6 +25,7 @@
"parse_text_objects",
"split_lines_by_punctuation",
"split_text",
"random_strings",
]


Expand Down Expand Up @@ -453,3 +456,30 @@ def load_text_lines(

# extract the lines from the data
return [row[filter_] for row in data] if filter_ else [str(row) for row in data]


def random_strings(
min: int, max: int, n: int = 0, dataset: Optional[str] = None
) -> Generator[str, None, None]:
"""Yield random strings.

:param min: the min number of output characters
:param max: the max number of output characters
:param n: the number of outputs. If `0` -> works for infinite
:param dataset: represents allowed characters for the operation
"""

characters: str = dataset or string.printable

if n < 0:
raise ValueError("'n' must be >= '0'")
elif n == 0:
while True:
yield "".join(
(random.choice(characters) for _ in range(random.randint(min, max)))
)
else:
for _ in range(n):
yield "".join(
(random.choice(characters) for _ in range(random.randint(min, max)))
)
155 changes: 155 additions & 0 deletions tests/unit/backend/test_deepsparse_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from typing import Any, Dict, Generator, Optional, Type

import pytest
from pydantic import BaseModel

from guidellm.backend import Backend, DeepsparseBackend
from guidellm.config import reload_settings
from guidellm.core import TextGenerationRequest
from guidellm.utils import random_strings


class TestDeepsparseTextGeneration(BaseModel):
"""The representation of a deepsparse data structure."""

text: str


class TestTextGenerationPipeline:
"""Deepsparse TextGeneration test interface.

By default this class generates '10' text responses.

This class includes an additional development information
for better testing experience.

Method `__call__` allows to mock the result object that comes from
`deepsparse.pipeline.Pipeline()` so everything is encapsulated right here.

:param self._generation: dynamic representation of generated responses
from deepsparse interface.
"""

def __init__(self):
self._generations: list[TestDeepsparseTextGeneration] = []
self._prompt: Optional[str] = None
self._max_new_tokens: Optional[int] = None

def __call__(
self, *_, prompt: str, max_new_tokens: Optional[int] = None, **kwargs
) -> Any:
"""Mocks the result from `deepsparse.pipeline.Pipeline()()`.
Set reserved request arguments on call
"""

self._prompt = prompt
self._max_new_tokens = max_new_tokens

return self

@property
def generations(self) -> Generator[TestDeepsparseTextGeneration, None, None]:
for text in random_strings(
min=10, max=50, n=self._max_new_tokens if self._max_new_tokens else 10
):
generation = TestDeepsparseTextGeneration(text=text)
self._generations.append(generation)
yield generation


@pytest.fixture(autouse=True)
def mock_deepsparse_pipeline(mocker):
return mocker.patch(
"deepsparse.Pipeline.create",
return_value=TestTextGenerationPipeline(),
)


@pytest.mark.smoke()
@pytest.mark.parametrize(
"create_payload",
[
{},
{"model": "test/custom_llm"},
],
)
def test_backend_creation(create_payload: Dict):
"""Test the "Deepspaarse Backend" class
with defaults and custom input parameters.
"""

backends: list[DeepsparseBackend] = [
Backend.create("deepsparse", **create_payload),
DeepsparseBackend(**create_payload),
]

for backend in backends:
assert getattr(backend, "pipeline")
(
getattr(backend, "model") == custom_model
if (custom_model := create_payload.get("model"))
else getattr(backend, "default_model")
)


@pytest.mark.smoke()
def test_backend_model_from_env(mocker):
mocker.patch.dict(
"os.environ",
{"GUIDELLM__DEEPSPRASE__MODEL": "test_backend_model_from_env"},
)

reload_settings()

backends: list[DeepsparseBackend] = [
Backend.create("deepsparse"),
DeepsparseBackend(),
]

for backend in backends:
assert getattr(backend, "model") == "test_backend_model_from_env"


@pytest.mark.smoke()
@pytest.mark.parametrize(
"text_generation_request_create_payload",
[
{"prompt": "Test prompt"},
{"prompt": "Test prompt", "output_token_count": 20},
],
)
@pytest.mark.asyncio()
async def test_make_request(text_generation_request_create_payload: Dict):
backend = DeepsparseBackend()

output_tokens: list[str] = []
async for response in backend.make_request(
request=TextGenerationRequest(**text_generation_request_create_payload)
):
if response.add_token:
output_tokens.append(response.add_token)
assert "".join(output_tokens) == "".join(
(generation.text for generation in backend.pipeline._generations)
)

if max_tokens := text_generation_request_create_payload.get("output_token_count"):
assert len(backend.pipeline._generations) == max_tokens


@pytest.mark.smoke()
@pytest.mark.parametrize(
"text_generation_request_create_payload,error",
[
({"prompt": "Test prompt", "output_token_count": -1}, ValueError),
],
)
@pytest.mark.asyncio()
async def test_make_request_invalid_request_payload(
text_generation_request_create_payload: Dict, error: Type[Exception]
):
backend = DeepsparseBackend()
with pytest.raises(error):
async for _ in backend.make_request(
request=TextGenerationRequest(**text_generation_request_create_payload)
):
pass
6 changes: 2 additions & 4 deletions tests/unit/executor/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

from guidellm.backend import Backend
from guidellm.config import settings
from guidellm.core import (
TextGenerationBenchmarkReport,
)
from guidellm.core import TextGenerationBenchmarkReport
from guidellm.executor.base import Executor, ExecutorResult
from guidellm.executor.profile_generator import ProfileGenerator
from guidellm.request import RequestGenerator
Expand Down Expand Up @@ -250,7 +248,6 @@ async def test_executor_run_throughput(mock_scheduler):


@pytest.mark.smoke()
@pytest.mark.asyncio()
@pytest.mark.parametrize(
("mode", "rate"),
[
Expand All @@ -260,6 +257,7 @@ async def test_executor_run_throughput(mock_scheduler):
("poisson", [10, 20, 30]),
],
)
@pytest.mark.asyncio()
async def test_executor_run_constant_poisson(mock_scheduler, mode, rate):
num_requests = 15

Expand Down
Loading