Skip to content

Commit

Permalink
fix(ansible): Upgrade deps in ansible install (#6146)
Browse files Browse the repository at this point in the history
* update to mlserver 1.6.1

* upgrade strimzi operator

* upgrade kafka to 3.8

* smoke test fixes

* stress test fixes

* remove kraft feature gate

* add kafka strimiz (raft only)

* upgrade prometheus

* upgrade otel collector to 0.114.1

* upgrade jaeger to 1.62.0

* upgrade grafana to 11.3.1

* install prometheus adapter 0.12.0

* remove/add extra line

* changes to kafka per review comment

* remove seldon from prometheus adapter name

* add hpa configmap
  • Loading branch information
sakoush authored Dec 13, 2024
1 parent 373df43 commit 9cb6b63
Show file tree
Hide file tree
Showing 23 changed files with 127 additions and 89 deletions.
4 changes: 2 additions & 2 deletions ansible/README.dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ custom_image_config:
- components:
- mlserver
image:
tag: 1.6.0
tag: 1.6.1
custom_servers_values:
mlserver:
Expand Down Expand Up @@ -346,7 +346,7 @@ custom_image_config:
- components:
- mlserver
image:
tag: 1.6.0
tag: 1.6.1
custom_components_values:
kafka:
Expand Down
4 changes: 2 additions & 2 deletions ansible/roles/grafana/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
grafana_chart_version: 8.4.1
grafana_app_version: 11.1.3
grafana_chart_version: 8.6.4
grafana_app_version: 11.3.1

grafana_preloaded_dashboards:
- name: mms
Expand Down
2 changes: 1 addition & 1 deletion ansible/roles/jaeger/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
jaeger_namespace: observability

jaeger_version: v1.53.0
jaeger_version: v1.62.0
jaeger_yaml: "https://github.com/jaegertracing/jaeger-operator/releases/download/{{ jaeger_version }}/jaeger-operator.yaml"

jaeger_wait_for_deployments: true
2 changes: 1 addition & 1 deletion ansible/roles/opentelemetry/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
opentelemetry_namespace: opentelemetry-operator-system

opentelemetry_version: v0.92.0
opentelemetry_version: v0.114.1
opentelemetry_yaml: "https://github.com/open-telemetry/opentelemetry-operator/releases/download/{{ opentelemetry_version }}/opentelemetry-operator.yaml"

opentelemetry_wait_for_deployments: true
6 changes: 5 additions & 1 deletion ansible/roles/prometheus/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
---
seldon_monitoring_namespace: "seldon-monitoring"
seldon_monitoring_prometheus_operator_values: "{{ lookup('file', 'prometheus-operator-values.yaml') | from_yaml }}"
seldon_monitoring_prometheus_operator_chart_version: "8.3.6"
seldon_monitoring_prometheus_adapter_values: "{{ lookup('file', 'prometheus-adapter-values.yaml') | from_yaml }}"
seldon_monitoring_prometheus_operator_chart_version: "10.0.4"
seldon_monitoring_prometheus_adapter_chart_version: "4.11.0"

seldon_prometheus_adapter_config_rate: "5m"
2 changes: 2 additions & 0 deletions ansible/roles/prometheus/files/prometheus-adapter-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
prometheus:
url: http://seldon-monitoring-prometheus
19 changes: 19 additions & 0 deletions ansible/roles/prometheus/tasks/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,22 @@
chart_ref: "kube-prometheus"
chart_version: "{{ seldon_monitoring_prometheus_operator_chart_version }}"
values: "{{ seldon_monitoring_prometheus_operator_values }}"


- name: Install Prometheus Adapter
kubernetes.core.helm:
name: prometheus-adapter
release_namespace: "{{ seldon_monitoring_namespace }}"
chart_repo_url: "https://prometheus-community.github.io/helm-charts"
chart_ref: "prometheus-adapter"
chart_version: "{{ seldon_monitoring_prometheus_adapter_chart_version }}"
values: "{{ seldon_monitoring_prometheus_adapter_values }}"

- name: Create Seldon HPA ConfigMap
kubernetes.core.k8s:
state: present
namespace: "{{ seldon_monitoring_namespace }}"
template: "templates/seldon-prometheus-adapter-configmap.j2"

- name: "Rollout Prometheus Adapter"
shell: "kubectl rollout restart deployment prometheus-adapter -n {{ seldon_monitoring_namespace }}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-adapter
data:
config.yaml: |-
"rules":
-
"seriesQuery": |
{__name__="seldon_model_infer_total",namespace!=""}
"resources":
"overrides":
"model": {group: "mlops.seldon.io", resource: "model"}
"server": {group: "mlops.seldon.io", resource: "server"}
"pod": {resource: "pod"}
"namespace": {resource: "namespace"}
"name":
"matches": "seldon_model_infer_total"
"as": "infer_rps"
"metricsQuery": |
sum by (<<.GroupBy>>) (
rate (
<<.Series>>{<<.LabelMatchers>>}[{{ seldon_prometheus_adapter_config_rate }}]
)
)
4 changes: 2 additions & 2 deletions ansible/roles/strimzi/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
strimzi_kafka_operator_namespace: "strimzi-system"
strimzi_kafka_operator_version: "0.33.2"
strimzi_kafka_operator_version: "0.44.0"
strimzi_kafka_operator_values: "{{ lookup('template', 'strimzi-operator-values.yaml.j2') | from_yaml }}"
strimzi_kafka_operator_wait_timeout: "300s"

strimzi_kafka_operator_feature_gates: "+UseKRaft,+UseStrimziPodSets"
strimzi_kafka_operator_feature_gates: ""
3 changes: 1 addition & 2 deletions docs-gb/kubernetes/kafka.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ You can enable `featureGates` during Helm installation via:
helm upgrade --install strimzi-kafka-operator \
strimzi/strimzi-kafka-operator \
--namespace seldon-mesh --create-namespace \
--set featureGates='+UseKRaft\,+UseStrimziPodSets'
```

{% hint style="warning" %}
Expand Down Expand Up @@ -89,5 +88,5 @@ ansible-playbook playbooks/setup-ecosystem.yaml -e full_install=no -e install_ka

## Notes
- You can check [kafka-examples](https://github.com/strimzi/strimzi-kafka-operator/tree/main/examples/kafka) for more details.
- As we are using [KRaft](https://kafka.apache.org/documentation/#kraft), use Kafka version 3.3 or above.
- As we are using [KRaft](https://kafka.apache.org/documentation/#kraft), use Kafka version 3.4 or above.
- For security settings check [here](../getting-started/kubernetes-installation/security.md#kafka).
2 changes: 1 addition & 1 deletion k8s/helm-charts/seldon-core-v2-setup/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ serverConfig:
pullPolicy: IfNotPresent
registry: docker.io
repository: seldonio/mlserver
tag: 1.6.0
tag: 1.6.1
serverCapabilities: "mlserver,alibi-detect,alibi-explain,huggingface,lightgbm,mlflow,python,sklearn,spark-mlib,xgboost"
modelVolumeStorage: 1Gi
resources:
Expand Down
2 changes: 1 addition & 1 deletion k8s/helm-charts/seldon-core-v2-setup/values.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ serverConfig:
pullPolicy: IfNotPresent
registry: docker.io
repository: seldonio/mlserver
tag: 1.6.0
tag: 1.6.1
serverCapabilities: "mlserver,alibi-detect,alibi-explain,huggingface,lightgbm,mlflow,python,sklearn,spark-mlib,xgboost"
modelVolumeStorage: 1Gi
resources:
Expand Down
2 changes: 1 addition & 1 deletion k8s/yaml/components.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ spec:
value: "false"
- name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH
value: "1048576000"
image: 'docker.io/seldonio/mlserver:1.6.0'
image: 'docker.io/seldonio/mlserver:1.6.1'
imagePullPolicy: 'IfNotPresent'
lifecycle:
preStop:
Expand Down
1 change: 0 additions & 1 deletion kafka/strimzi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ You can enable `featureGates` during Helm installation via:
helm upgrade --install strimzi-kafka-operator \
strimzi/strimzi-kafka-operator \
--namespace seldon-mesh --create-namespace \
--set featureGates='+UseKRaft\,+UseStrimziPodSets'
```

```{warning}
Expand Down
40 changes: 4 additions & 36 deletions kafka/strimzi/templates/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@ apiVersion: kafka.strimzi.io/v1beta2
kind: Kafka
metadata:
name: {{ .Values.cluster.name }}
annotations:
strimzi.io/node-pools: enabled
strimzi.io/kraft: enabled
spec:
entityOperator:
userOperator: {}
kafka:
version: {{ .Values.cluster.version }}
metadataVersion: {{ .Values.cluster.metadataVersion }}
replicas: {{ .Values.broker.replicas }}
listeners:
{{- if .Values.broker.plaintext.enabled }}
Expand All @@ -29,53 +33,17 @@ spec:
livenessProbe:
initialDelaySeconds: {{ .Values.broker.liveness.initialDelaySeconds }}
timeoutSeconds: {{ .Values.broker.liveness.timeoutSeconds }}
resources:
requests:
cpu: '{{ .Values.broker.resources.cpu }}'
memory: '{{ .Values.broker.resources.memory }}'
limits:
memory: '{{ .Values.broker.resources.memory }}'
config:
auto.create.topics.enable: {{ .Values.topic.autoCreate }}
offsets.topic.replication.factor: {{ .Values.topic.offsetReplicationFactor }}
transaction.state.log.replication.factor: {{ .Values.topic.txStateReplicationFactor }}
transaction.state.log.min.isr: {{ .Values.topic.txStateMinISR }}
default.replication.factor: {{ .Values.topic.defaultReplicationFactor }}
min.insync.replicas: {{ .Values.topic.minISR }}
inter.broker.protocol.version: {{ .Values.broker.interBrokerProtocolVersion }}
message.max.bytes: {{ .Values.broker.messageMaxBytes }}
template:
pod:
tmpDirSizeLimit: {{ .Values.broker.tmpDirSizeLimit }}
storage:
type: jbod
volumes:
- id: 0
type: persistent-claim
size: {{ .Values.broker.pvcSize }}
deleteClaim: false
metricsConfig:
type: jmxPrometheusExporter
valueFrom:
configMapKeyRef:
name: kafka-metrics
key: kafka-metrics-config.yml
# zookeeper settings should not be use in case of STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft (raft)
# to enable raft run:
# `kubectl set env deployment/strimzi-cluster-operator STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft -n kafka`
# which is the default with ansible install
zookeeper:
replicas: {{ .Values.zookeeper.replicas }}
readinessProbe:
initialDelaySeconds: {{ .Values.zookeeper.readiness.initialDelaySeconds }}
timeoutSeconds: {{ .Values.zookeeper.readiness.timeoutSeconds }}
livenessProbe:
initialDelaySeconds: {{ .Values.zookeeper.liveness.initialDelaySeconds }}
timeoutSeconds: {{ .Values.zookeeper.liveness.timeoutSeconds }}
storage:
type: persistent-claim
size: {{ .Values.zookeeper.pvcSize }}
deleteClaim: false
kafkaExporter:
topicRegex: ".*"
groupRegex: ".*"
32 changes: 32 additions & 0 deletions kafka/strimzi/templates/pool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
apiVersion: kafka.strimzi.io/v1beta2
kind: KafkaNodePool
metadata:
name: kafka
labels:
strimzi.io/cluster: {{ .Values.cluster.name }}
spec:
replicas: {{ .Values.broker.replicas }}
roles:
- broker
- controller

resources:
requests:
cpu: '{{ .Values.broker.resources.cpu }}'
memory: '{{ .Values.broker.resources.memory }}'
limits:
memory: '{{ .Values.broker.resources.memory }}'
template:
pod:
tmpDirSizeLimit: {{ .Values.broker.tmpDirSizeLimit }}
storage:
type: jbod
volumes:
- id: 0
type: ephemeral
sizeLimit: {{ .Values.broker.kraftMetadataSizeLimit }}
kraftMetadata: shared
- id: 1
type: persistent-claim
size: {{ .Values.broker.pvcSize }}
deleteClaim: false
19 changes: 4 additions & 15 deletions kafka/strimzi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

cluster:
name: "seldon"
version: "3.3.1"
version: "3.8.0"
metadataVersion: "3.8.0"

metrics:
enabled: true
Expand Down Expand Up @@ -30,10 +31,10 @@ broker:
initialDelaySeconds: 15
timeoutSeconds: 5

interBrokerProtocolVersion: "3.3"

tmpDirSizeLimit: 100Mi

kraftMetadataSizeLimit: 1Gi

pvcSize: 100Gi

messageMaxBytes: 1000000000
Expand All @@ -50,15 +51,3 @@ topic:
defaultReplicationFactor: 1
minISR: 1

zookeeper:
replicas: 1

readiness:
initialDelaySeconds: 15
timeoutSeconds: 5

liveness:
initialDelaySeconds: 15
timeoutSeconds: 5

pvcSize: 100Gi
2 changes: 1 addition & 1 deletion operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ IMG ?= ${DOCKERHUB_USERNAME}/seldonv2-controller:${CUSTOM_IMAGE_TAG}
IMG_CLI ?= ${DOCKERHUB_USERNAME}/seldon-cli:${CUSTOM_IMAGE_TAG}
AGENT_IMG ?= ${DOCKERHUB_USERNAME}/seldon-agent:${CUSTOM_IMAGE_TAG}
RCLONE_IMG ?= ${DOCKERHUB_USERNAME}/seldon-rclone:${CUSTOM_IMAGE_TAG}
MLSERVER_IMG ?= seldonio/mlserver:1.6.0
MLSERVER_IMG ?= seldonio/mlserver:1.6.1
TRITON_IMG ?= nvcr.io/nvidia/tritonserver:23.03-py3
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.22
Expand Down
2 changes: 1 addition & 1 deletion operator/config/serverconfigs/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ images:
newTag: latest
- name: mlserver
newName: seldonio/mlserver
newTag: 1.6.0
newTag: 1.6.1
- name: rclone
newName: seldonio/seldon-rclone
newTag: latest
Expand Down
19 changes: 10 additions & 9 deletions samples/smoke-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ function load() {
else
if [ $1 == "model" ]
then
seldon model load -f $2
SELDON_FORCE_CONTROL_PLANE=true seldon model load -f $2
elif [ $1 == "pipeline" ]
then
seldon pipeline load -f $2
SELDON_FORCE_CONTROL_PLANE=true seldon pipeline load -f $2
elif [ $1 == "experiment" ]
then
seldon experiment start -f $2
SELDON_FORCE_CONTROL_PLANE=true seldon experiment start -f $2
fi
fi
}
Expand All @@ -47,13 +47,13 @@ function unload() {
else
if [ $1 == "model" ]
then
seldon model unload $2
SELDON_FORCE_CONTROL_PLANE=true seldon model unload $2
elif [ $1 == "pipeline" ]
then
seldon pipeline unload $2
SELDON_FORCE_CONTROL_PLANE=true seldon pipeline unload $2
elif [ $1 == "experiment" ]
then
seldon experiment stop $2
SELDON_FORCE_CONTROL_PLANE=true seldon experiment stop $2
fi
fi
}
Expand All @@ -74,10 +74,10 @@ function status() {
else
if [ $1 == "model" ]
then
seldon model status $2 -w ModelAvailable | jq -M .
seldon model status $2 -w ModelAvailable -t 10 | jq -M .
elif [ $1 == "pipeline" ]
then
seldon pipeline status $2 -w PipelineReady | jq -M .
seldon pipeline status $2 -w PipelineReady -t 10 | jq -M .
elif [ $1 == "experiment" ]
then
seldon experiment status $2 -w | jq -M .
Expand Down Expand Up @@ -189,16 +189,17 @@ unload pipeline trigger-joins ./pipelines/trigger-joins.yaml
unload model mul10 ./models/mul10.yaml
unload model add10 ./models/add10.yaml

sleep $sleepTime

# MLServer
sleep $sleepTime
load model ./models/sklearn-iris-gs.yaml
status model iris
seldon model infer iris '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
seldon model infer iris --inference-mode grpc \
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' | jq -M .
unload model iris ./models/sklearn-iris-gs.yaml

sleep $sleepTime

# Experiments
load model ./models/sklearn1.yaml
Expand Down
Loading

0 comments on commit 9cb6b63

Please sign in to comment.