Skip to content

Commit

Permalink
add dws multiclusters example folder
Browse files Browse the repository at this point in the history
  • Loading branch information
leroyjb committed Jan 24, 2025
1 parent ae7bfc7 commit 1fb4ebf
Show file tree
Hide file tree
Showing 8 changed files with 623 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Create Clusters

```
./create-clusters.sh
```

# Install Kueue

```
./deploy-multikueue.sh
```

## Validate installation

```
kubectl get clusterqueues dws-cluster-queue -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}CQ - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}"
kubectl get admissionchecks sample-dws-multikueue -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}AC - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}"
kubectl get multikueuecluster multikueue-dws-worker-asia -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}MC-ASIA - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}"
kubectl get multikueuecluster multikueue-dws-worker-us -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}MC-US - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}"
kubectl get multikueuecluster multikueue-dws-worker-eu -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}MC-EU - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}"
```

Output :

```
CQ - Active: True Reason: Ready Message: Can admit new workloads
AC - Active: True Reason: Active Message: The admission check is active
MC-ASIA - Active: True Reason: Active Message: Connected
MC-US - Active: True Reason: Active Message: Connected
MC-EU - Active: True Reason: Active Message: Connected
```

# Launch job



```
kubectl create -f job-multi-dws-autopilot.yaml
```

## Get the status of the job

```
kubectl get workloads.kueue.x-k8s.io -o jsonpath='{.items[0].status.admissionChecks}'
```

In the output message, you can find where the job is scheduled

Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash

# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

echo 'Create GKE Autopilot clusters'

KUEUE_VERSION=v0.8.1
regions=("europe-west4" "asia-southeast1" "us-east4" "europe-west4")
kubeconfigs=("manager-europe-west4" "worker-asia-southeast1" "worker-us-east4" "worker-eu-west4")
PROJECT_ID=$(gcloud config get-value project)
PROJECT_NUMBER=$(gcloud projects describe $PROJECT_ID --format="value(projectNumber)")
PREFIX_MANAGER="man"
PREFIX_WORKER="w"
JOBSET_VERSION=v0.6.0

# Loop through the regions
for i in "${!regions[@]}"; do
region="${regions[$i]}"
echo "$region"
# Construct the cluster name, adding "manager" if it's the first region
if [[ $i -eq 0 ]]; then
cluster_name="$PREFIX_MANAGER-$region"
else
cluster_name="$PREFIX_WORKER-$region"
fi

#Create the cluster
gcloud container clusters create-auto "$cluster_name" \
--project "$PROJECT_ID" \
--region "$region" \
--release-channel "regular" \
--async
done
for i in "${!regions[@]}"; do
region="${regions[$i]}"
if [[ $i -eq 0 ]]; then
cluster_name="$PREFIX_MANAGER-$region"
else
cluster_name="$PREFIX_WORKER-$region"
fi

# opId=$(gcloud container operations list --filter "TARGET=https://container.googleapis.com/v1/projects/$PROJECT_NUMBER/locations/$region/clusters/$cluster_name" --format="value(name)")
#gcloud container operations wait "$opId" --project "$PROJECT_ID" --region "$region"
set +e
until gcloud -q container clusters get-credentials "$cluster_name" \
--project "$PROJECT_ID" \
--region "$region"; do
echo "GKE Cluster is provisioning. Retrying in 15 seconds..."
sleep 15
done
set -e
configname="${kubeconfigs[$i]}"
kubectl config rename-context "gke_$PROJECT_ID"_"$region"_"$cluster_name" "$configname"
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
#!/bin/bash

# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

KUBECONFIG_OUT=${1:-kubeconfig}
MULTIKUEUE_SA=multikueue-sa
NAMESPACE=kueue-system

# Creating a restricted MultiKueue role, service account and role binding"
kubectl apply -f - <<EOF
apiVersion: v1
kind: ServiceAccount
metadata:
name: ${MULTIKUEUE_SA}
namespace: ${NAMESPACE}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: ${MULTIKUEUE_SA}-role
rules:
- apiGroups:
- batch
resources:
- jobs
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs/status
verbs:
- get
- apiGroups:
- jobset.x-k8s.io
resources:
- jobsets
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- jobset.x-k8s.io
resources:
- jobsets/status
verbs:
- get
- apiGroups:
- kueue.x-k8s.io
resources:
- workloads
verbs:
- create
- delete
- get
- list
- watch
- update
- apiGroups:
- kueue.x-k8s.io
resources:
- workloads/status
verbs:
- get
- patch
- update
- apiGroups:
- kubeflow.org
resources:
- tfjobs
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- tfjobs/status
verbs:
- get
- apiGroups:
- kubeflow.org
resources:
- paddlejobs
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- paddlejobs/status
verbs:
- get
- apiGroups:
- kubeflow.org
resources:
- pytorchjobs
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- pytorchjobs/status
verbs:
- get
- apiGroups:
- kubeflow.org
resources:
- xgboostjobs
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- xgboostjobs/status
verbs:
- get
- apiGroups:
- kubeflow.org
resources:
- mpijobs
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- mpijobs/status
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: ${MULTIKUEUE_SA}-crb
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: ${MULTIKUEUE_SA}-role
subjects:
- kind: ServiceAccount
name: ${MULTIKUEUE_SA}
namespace: ${NAMESPACE}
EOF

# Get or create a secret bound to the new service account.
SA_SECRET_NAME=$(kubectl get -n ${NAMESPACE} sa/${MULTIKUEUE_SA} -o "jsonpath={.secrets[0]..name}")
if [ -z "$SA_SECRET_NAME" ]; then
kubectl apply -f - <<EOF
apiVersion: v1
kind: Secret
type: kubernetes.io/service-account-token
metadata:
name: ${MULTIKUEUE_SA}
namespace: ${NAMESPACE}
annotations:
kubernetes.io/service-account.name: "${MULTIKUEUE_SA}"
EOF

SA_SECRET_NAME=${MULTIKUEUE_SA}
fi

# Note: service account token is stored base64-encoded in the secret but must
# be plaintext in kubeconfig.
SA_TOKEN=$(kubectl get -n ${NAMESPACE} "secrets/${SA_SECRET_NAME}" -o "jsonpath={.data['token']}" | base64 -d)
CA_CERT=$(kubectl get -n ${NAMESPACE} "secrets/${SA_SECRET_NAME}" -o "jsonpath={.data['ca\.crt']}")

# Extract cluster IP from the current context
CURRENT_CONTEXT=$(kubectl config current-context)
CURRENT_CLUSTER=$(kubectl config view -o jsonpath="{.contexts[?(@.name == \"${CURRENT_CONTEXT}\"})].context.cluster}")
CURRENT_CLUSTER_ADDR=$(kubectl config view -o jsonpath="{.clusters[?(@.name == \"${CURRENT_CLUSTER}\"})].cluster.server}")

# Create the Kubeconfig file
echo "Writing kubeconfig in ${KUBECONFIG_OUT}"
cat >"${KUBECONFIG_OUT}" <<EOF
apiVersion: v1
clusters:
- cluster:
certificate-authority-data: ${CA_CERT}
server: ${CURRENT_CLUSTER_ADDR}
name: ${CURRENT_CLUSTER}
contexts:
- context:
cluster: ${CURRENT_CLUSTER}
user: ${CURRENT_CLUSTER}-${MULTIKUEUE_SA}
name: ${CURRENT_CONTEXT}
current-context: ${CURRENT_CONTEXT}
kind: Config
preferences: {}
users:
- name: ${CURRENT_CLUSTER}-${MULTIKUEUE_SA}
user:
token: ${SA_TOKEN}
EOF
Loading

0 comments on commit 1fb4ebf

Please sign in to comment.