Skip to content

Commit

Permalink
Add trustyai drift test (#24)
Browse files Browse the repository at this point in the history
* Add tenacity library and update openshift-python-wrapper

* Add test for drift metrics and related functions and fixtures

* Add constants.py for trustyai/

* Add common TrustyAI fixtures

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add types-requests dependency

* Fix pre-commit issues

* Add missing types hints

* Add types-requests to .pre-commit

* Fix formatting

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix mypy issue

* Fix mypy issues

* Minor improvements in different functions

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Filter modelmesh pods by label instead of by name

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add basic test for lm-eval (#35)

* update dockerfile

* Lock file maintenance (#36)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>

* Add tenacity library and update openshift-python-wrapper

* Change name of TrustyAIService fixture

* Add create_ns function and fixture to create ns with modelmesh enabled

* Make sure user workload monitoring configmaps have correct data

* Make sure user workload monitoring configmaps have correct data

* Replace logging with simple_logger

* Add name to get_logger

* Fix some errors

* Use name variable instead of literal

* Remove constant used only once

* Rename function to create_ocp_token

* Fix typing inconsistencies

* Move trustyai route outside of util function

* Remove namespace variable used only once

* Move inference route outside of util function

* Log inference request response as error

* Add TODO to move mlserver image to a better place

* Add contextmanager to update_configmap_data

* Encapsulate TrustyAIService requests functions in a class

* Improve logic to handle http requests for trustyai service

* Add tenacity library and update openshift-python-wrapper

* Add types-requests dependency

* Add types-requests to .pre-commit

* Fix formatting

* Lock file maintenance (#36)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>

* Reuse MODELMESH_SERVING constant

* Fix typing on labels arg of create_ns

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: rnetser <[email protected]>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
  • Loading branch information
4 people authored Nov 22, 2024
1 parent b62eeb4 commit 828571c
Show file tree
Hide file tree
Showing 13 changed files with 712 additions and 42 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,5 @@ repos:
rev: v1.13.0
hooks:
- id: mypy
additional_dependencies: ["types-PyYAML"]
additional_dependencies: ["types-PyYAML", "types-requests"]
exclude: ^(docs/|.*/test_.*|.*conftest.py)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ dependencies = [
"pytest-progress",
"python-simple-logger",
"pyyaml",
"tenacity",
"types-requests>=2.32.0.20241016",
]

[project.urls]
Expand Down
8 changes: 3 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from ocp_resources.namespace import Namespace
from ocp_resources.resource import get_client

from tests.utils import create_ns


@pytest.fixture(scope="session")
def admin_client() -> DynamicClient:
Expand All @@ -11,9 +13,5 @@ def admin_client() -> DynamicClient:

@pytest.fixture(scope="class")
def model_namespace(request, admin_client: DynamicClient) -> Namespace:
with Namespace(
client=admin_client,
name=request.param["name"],
) as ns:
ns.wait_for_status(status=Namespace.Status.ACTIVE, timeout=120)
with create_ns(client=admin_client, name=request.param["name"]) as ns:
yield ns
105 changes: 99 additions & 6 deletions tests/trustyai/conftest.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,111 @@
import subprocess

import pytest
import yaml
from kubernetes.dynamic import DynamicClient
from ocp_resources.config_map import ConfigMap
from ocp_resources.deployment import Deployment
from ocp_resources.namespace import Namespace
from ocp_resources.pod import Pod
from ocp_resources.secret import Secret
from ocp_resources.service import Service
from ocp_resources.service_account import ServiceAccount
from ocp_resources.trustyai_service import TrustyAIService

from tests.trustyai.constants import TRUSTYAI_SERVICE, MODELMESH_SERVING
from tests.trustyai.utils import update_configmap_data
from tests.utils import create_ns

MINIO: str = "minio"
OPENDATAHUB_IO: str = "opendatahub.io"


@pytest.fixture(scope="class")
def minio_pod(admin_client: DynamicClient, model_namespace: Namespace) -> Pod:
def trustyai_service_with_pvc_storage(
admin_client: DynamicClient,
ns_with_modelmesh_enabled: Namespace,
modelmesh_serviceaccount: ServiceAccount,
cluster_monitoring_config: ConfigMap,
user_workload_monitoring_config: ConfigMap,
) -> TrustyAIService:
with TrustyAIService(
client=admin_client,
name=TRUSTYAI_SERVICE,
namespace=ns_with_modelmesh_enabled.name,
storage={"format": "PVC", "folder": "/inputs", "size": "1Gi"},
data={"filename": "data.csv", "format": "CSV"},
metrics={"schedule": "5s"},
) as trustyai_service:
trustyai_deployment = Deployment(
namespace=ns_with_modelmesh_enabled.name, name=TRUSTYAI_SERVICE, wait_for_resource=True
)
trustyai_deployment.wait_for_replicas()
yield trustyai_service


@pytest.fixture(scope="class")
def ns_with_modelmesh_enabled(request, admin_client: DynamicClient):
with create_ns(client=admin_client, name=request.param["name"], labels={"modelmesh-enabled": "true"}) as ns:
yield ns


@pytest.fixture(scope="class")
def openshift_token(ns_with_modelmesh_enabled):
return subprocess.check_output(["oc", "whoami", "-t", ns_with_modelmesh_enabled.name]).decode().strip()


@pytest.fixture(scope="class")
def modelmesh_serviceaccount(admin_client: DynamicClient, ns_with_modelmesh_enabled: Namespace) -> ServiceAccount:
with ServiceAccount(
client=admin_client, name=f"{MODELMESH_SERVING}-sa", namespace=ns_with_modelmesh_enabled.name
) as sa:
yield sa


@pytest.fixture(scope="session")
def cluster_monitoring_config(admin_client: DynamicClient) -> ConfigMap:
name = "cluster-monitoring-config"
namespace = "openshift-monitoring"
data = {"config.yaml": yaml.dump({"enableUserWorkload": "true"})}
cm = ConfigMap(client=admin_client, name=name, namespace=namespace)
if cm.exists: # This resource is usually created when doing exploratory testing, add this exception for convenience
with update_configmap_data(configmap=cm, data=data) as cm:
yield cm

with ConfigMap(
client=admin_client,
name=name,
namespace=namespace,
data=data,
) as cm:
yield cm


@pytest.fixture(scope="session")
def user_workload_monitoring_config(admin_client: DynamicClient) -> ConfigMap:
name = "user-workload-monitoring-config"
namespace = "openshift-user-workload-monitoring"
data = {"config.yaml": yaml.dump({"prometheus": {"logLevel": "debug", "retention": "15d"}})}
cm = ConfigMap(client=admin_client, name=name, namespace=namespace)
if cm.exists: # This resource is usually created when doing exploratory testing, add this exception for convenience
with update_configmap_data(configmap=cm, data=data) as cm:
yield cm

with ConfigMap(
client=admin_client,
name=name,
namespace=namespace,
data=data,
) as cm:
yield cm


@pytest.fixture(scope="class")
def minio_pod(admin_client: DynamicClient, ns_with_modelmesh_enabled: Namespace) -> Pod:
with Pod(
client=admin_client,
name=MINIO,
namespace=model_namespace.name,
namespace=ns_with_modelmesh_enabled.name,
containers=[
{
"args": [
Expand Down Expand Up @@ -43,10 +134,11 @@ def minio_pod(admin_client: DynamicClient, model_namespace: Namespace) -> Pod:


@pytest.fixture(scope="class")
def minio_service(admin_client: DynamicClient, model_namespace: Namespace) -> Service:
def minio_service(admin_client: DynamicClient, ns_with_modelmesh_enabled: Namespace) -> Service:
with Service(
client=admin_client,
name=MINIO,
namespace=model_namespace.name,
namespace=ns_with_modelmesh_enabled.name,
ports=[
{
"name": "minio-client-port",
Expand All @@ -64,11 +156,12 @@ def minio_service(admin_client: DynamicClient, model_namespace: Namespace) -> Se

@pytest.fixture(scope="class")
def minio_data_connection(
admin_client: DynamicClient, model_namespace: Namespace, minio_pod: Pod, minio_service: Service
admin_client: DynamicClient, ns_with_modelmesh_enabled: Namespace, minio_pod: Pod, minio_service: Service
) -> Secret:
with Secret(
client=admin_client,
name="aws-connection-minio-data-connection",
namespace=model_namespace.name,
namespace=ns_with_modelmesh_enabled.name,
data_dict={
"AWS_ACCESS_KEY_ID": "VEhFQUNDRVNTS0VZ",
"AWS_DEFAULT_REGION": "dXMtc291dGg=",
Expand Down
5 changes: 5 additions & 0 deletions tests/trustyai/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
TIMEOUT_1MIN = 60
TIMEOUT_5MIN = 5 * TIMEOUT_1MIN

TRUSTYAI_SERVICE: str = "trustyai-service"
MODELMESH_SERVING = "modelmesh-serving"
98 changes: 98 additions & 0 deletions tests/trustyai/drift/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import pytest
from kubernetes.dynamic import DynamicClient
from ocp_resources.deployment import Deployment
from ocp_resources.inference_service import InferenceService
from ocp_resources.namespace import Namespace
from ocp_resources.secret import Secret
from ocp_resources.serving_runtime import ServingRuntime
from ocp_resources.trustyai_service import TrustyAIService

from tests.trustyai.constants import MODELMESH_SERVING
from tests.trustyai.drift.utils import wait_for_modelmesh_pods_registered_by_trustyai

MLSERVER: str = "mlserver"
MLSERVER_RUNTIME_NAME: str = f"{MLSERVER}-1.x"
MLSERVER_QUAY_IMAGE: str = "quay.io/aaguirre/mlserver@sha256:8884d989b3063a47bf0e6c20c1c0ff253662121a977fe5b74b54e682839360d4" # TODO: Move this image to a better place
XGBOOST = "xgboost"
SKLEARN = "sklearn"


@pytest.fixture(scope="class")
def mlserver_runtime(
admin_client: DynamicClient, minio_data_connection: Secret, ns_with_modelmesh_enabled: Namespace
) -> ServingRuntime:
supported_model_formats = [
{"name": SKLEARN, "version": "0", "autoselect": "true"},
{"name": XGBOOST, "version": "1", "autoselect": "true"},
{"name": "lightgbm", "version": "3", "autoselect": "true"},
]
containers = [
{
"name": MLSERVER,
"image": MLSERVER_QUAY_IMAGE,
"env": [
{"name": "MLSERVER_MODELS_DIR", "value": "/models/_mlserver_models/"},
{"name": "MLSERVER_GRPC_PORT", "value": "8001"},
{"name": "MLSERVER_HTTP_PORT", "value": "8002"},
{"name": "MLSERVER_LOAD_MODELS_AT_STARTUP", "value": "false"},
{"name": "MLSERVER_MODEL_NAME", "value": "dummy-model-fixme"},
{"name": "MLSERVER_HOST", "value": "127.0.0.1"},
{"name": "MLSERVER_GRPC_MAX_MESSAGE_LENGTH", "value": "-1"},
],
"resources": {"requests": {"cpu": "500m", "memory": "1Gi"}, "limits": {"cpu": "5", "memory": "1Gi"}},
}
]

with ServingRuntime(
client=admin_client,
name=MLSERVER_RUNTIME_NAME,
namespace=ns_with_modelmesh_enabled.name,
containers=containers,
supported_model_formats=supported_model_formats,
multi_model=True,
protocol_versions=["grpc-v2"],
grpc_endpoint="port:8085",
grpc_data_endpoint="port:8001",
built_in_adapter={
"serverType": MLSERVER,
"runtimeManagementPort": 8001,
"memBufferBytes": 134217728,
"modelLoadingTimeoutMillis": 90000,
},
annotations={"enable-route": "true"},
label={"name": f"{MODELMESH_SERVING}-{MLSERVER_RUNTIME_NAME}-SR"},
) as mlserver:
yield mlserver


@pytest.fixture(scope="class")
def gaussian_credit_model(
admin_client: DynamicClient,
ns_with_modelmesh_enabled: Namespace,
minio_data_connection: Secret,
mlserver_runtime: ServingRuntime,
trustyai_service_with_pvc_storage: TrustyAIService,
) -> InferenceService:
name = "gaussian-credit-model"
with InferenceService(
client=admin_client,
name=name,
namespace=ns_with_modelmesh_enabled.name,
predictor={
"model": {
"modelFormat": {"name": XGBOOST},
"runtime": mlserver_runtime.name,
"storage": {"key": minio_data_connection.name, "path": f"{SKLEARN}/{name.replace('-', '_')}.json"},
}
},
annotations={f"{InferenceService.ApiGroup.SERVING_KSERVE_IO}/deploymentMode": "ModelMesh"},
) as inference_service:
deployment = Deployment(
client=admin_client,
namespace=ns_with_modelmesh_enabled.name,
name=f"{MODELMESH_SERVING}-{mlserver_runtime.name}",
wait_for_resource=True,
)
deployment.wait_for_replicas()
wait_for_modelmesh_pods_registered_by_trustyai(client=admin_client, namespace=ns_with_modelmesh_enabled.name)
yield inference_service
41 changes: 41 additions & 0 deletions tests/trustyai/drift/model_data/data_batches/0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"inputs": [
{
"name": "credit_inputs",
"shape": [5, 4],
"datatype": "FP64",
"data": [
[
50.76899726547295,
491.57388556551217,
11.464223840747968,
28.29901088554935
],
[
47.80397343448882,
515.4934209257955,
13.060710933476372,
23.710220802886678
],
[
56.731646594370886,
441.00792486531225,
10.616678496549381,
19.040822238191925
],
[
38.35835814888362,
540.3557103621482,
11.206492946288046,
15.561855179575819
],
[
47.422935471130046,
529.5980108579066,
10.625654599760802,
16.264208531245814
]
]
}
]
}
44 changes: 44 additions & 0 deletions tests/trustyai/drift/model_data/data_batches/5.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"inputs": [
{
"name": "credit_inputs",
"shape": [
5,
4
],
"datatype": "FP64",
"data": [
[
47.10141440274886,
511.93953118897673,
10.30958871700256,
28.462273005138734
],
[
55.018917461242545,
529.5655000531016,
10.657507738326363,
20.254038773880144
],
[
43.36735715813185,
459.45376201509356,
11.974670802162198,
16.815021767153233
],
[
48.20533085890888,
484.26866990656447,
9.765379302729444,
20.95457742333733
],
[
44.52555863555142,
448.30189185655723,
12.468831395634185,
30.501275682394212
]
]
}
]
}
Loading

0 comments on commit 828571c

Please sign in to comment.