-
Notifications
You must be signed in to change notification settings - Fork 206
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add dws multiclusters example folder
- Loading branch information
Showing
8 changed files
with
623 additions
and
0 deletions.
There are no files selected for viewing
48 changes: 48 additions & 0 deletions
48
tutorials-and-examples/workflow-orchestration/dws-multiclusters-example/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# Create Clusters | ||
|
||
``` | ||
./create-clusters.sh | ||
``` | ||
|
||
# Install Kueue | ||
|
||
``` | ||
./deploy-multikueue.sh | ||
``` | ||
|
||
## Validate installation | ||
|
||
``` | ||
kubectl get clusterqueues dws-cluster-queue -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}CQ - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}" | ||
kubectl get admissionchecks sample-dws-multikueue -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}AC - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}" | ||
kubectl get multikueuecluster multikueue-dws-worker-asia -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}MC-ASIA - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}" | ||
kubectl get multikueuecluster multikueue-dws-worker-us -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}MC-US - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}" | ||
kubectl get multikueuecluster multikueue-dws-worker-eu -o jsonpath="{range .status.conditions[?(@.type == \"Active\")]}MC-EU - Active: {@.status} Reason: {@.reason} Message: {@.message}{'\n'}{end}" | ||
``` | ||
|
||
Output : | ||
|
||
``` | ||
CQ - Active: True Reason: Ready Message: Can admit new workloads | ||
AC - Active: True Reason: Active Message: The admission check is active | ||
MC-ASIA - Active: True Reason: Active Message: Connected | ||
MC-US - Active: True Reason: Active Message: Connected | ||
MC-EU - Active: True Reason: Active Message: Connected | ||
``` | ||
|
||
# Launch job | ||
|
||
|
||
|
||
``` | ||
kubectl create -f job-multi-dws-autopilot.yaml | ||
``` | ||
|
||
## Get the status of the job | ||
|
||
``` | ||
kubectl get workloads.kueue.x-k8s.io -o jsonpath='{.items[0].status.admissionChecks}' | ||
``` | ||
|
||
In the output message, you can find where the job is scheduled | ||
|
70 changes: 70 additions & 0 deletions
70
tutorials-and-examples/workflow-orchestration/dws-multiclusters-example/create-clusters.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#!/bin/bash | ||
|
||
# Copyright 2024 The Kubernetes Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
set -o errexit | ||
set -o nounset | ||
set -o pipefail | ||
|
||
echo 'Create GKE Autopilot clusters' | ||
|
||
KUEUE_VERSION=v0.8.1 | ||
regions=("europe-west4" "asia-southeast1" "us-east4" "europe-west4") | ||
kubeconfigs=("manager-europe-west4" "worker-asia-southeast1" "worker-us-east4" "worker-eu-west4") | ||
PROJECT_ID=$(gcloud config get-value project) | ||
PROJECT_NUMBER=$(gcloud projects describe $PROJECT_ID --format="value(projectNumber)") | ||
PREFIX_MANAGER="man" | ||
PREFIX_WORKER="w" | ||
JOBSET_VERSION=v0.6.0 | ||
|
||
# Loop through the regions | ||
for i in "${!regions[@]}"; do | ||
region="${regions[$i]}" | ||
echo "$region" | ||
# Construct the cluster name, adding "manager" if it's the first region | ||
if [[ $i -eq 0 ]]; then | ||
cluster_name="$PREFIX_MANAGER-$region" | ||
else | ||
cluster_name="$PREFIX_WORKER-$region" | ||
fi | ||
|
||
#Create the cluster | ||
gcloud container clusters create-auto "$cluster_name" \ | ||
--project "$PROJECT_ID" \ | ||
--region "$region" \ | ||
--release-channel "regular" \ | ||
--async | ||
done | ||
for i in "${!regions[@]}"; do | ||
region="${regions[$i]}" | ||
if [[ $i -eq 0 ]]; then | ||
cluster_name="$PREFIX_MANAGER-$region" | ||
else | ||
cluster_name="$PREFIX_WORKER-$region" | ||
fi | ||
|
||
# opId=$(gcloud container operations list --filter "TARGET=https://container.googleapis.com/v1/projects/$PROJECT_NUMBER/locations/$region/clusters/$cluster_name" --format="value(name)") | ||
#gcloud container operations wait "$opId" --project "$PROJECT_ID" --region "$region" | ||
set +e | ||
until gcloud -q container clusters get-credentials "$cluster_name" \ | ||
--project "$PROJECT_ID" \ | ||
--region "$region"; do | ||
echo "GKE Cluster is provisioning. Retrying in 15 seconds..." | ||
sleep 15 | ||
done | ||
set -e | ||
configname="${kubeconfigs[$i]}" | ||
kubectl config rename-context "gke_$PROJECT_ID"_"$region"_"$cluster_name" "$configname" | ||
done |
232 changes: 232 additions & 0 deletions
232
...examples/workflow-orchestration/dws-multiclusters-example/create-multikueue-kubeconfig.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
#!/bin/bash | ||
|
||
# Copyright 2024 The Kubernetes Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
set -o errexit | ||
set -o nounset | ||
set -o pipefail | ||
|
||
KUBECONFIG_OUT=${1:-kubeconfig} | ||
MULTIKUEUE_SA=multikueue-sa | ||
NAMESPACE=kueue-system | ||
|
||
# Creating a restricted MultiKueue role, service account and role binding" | ||
kubectl apply -f - <<EOF | ||
apiVersion: v1 | ||
kind: ServiceAccount | ||
metadata: | ||
name: ${MULTIKUEUE_SA} | ||
namespace: ${NAMESPACE} | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: ${MULTIKUEUE_SA}-role | ||
rules: | ||
- apiGroups: | ||
- batch | ||
resources: | ||
- jobs | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- batch | ||
resources: | ||
- jobs/status | ||
verbs: | ||
- get | ||
- apiGroups: | ||
- jobset.x-k8s.io | ||
resources: | ||
- jobsets | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- jobset.x-k8s.io | ||
resources: | ||
- jobsets/status | ||
verbs: | ||
- get | ||
- apiGroups: | ||
- kueue.x-k8s.io | ||
resources: | ||
- workloads | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- update | ||
- apiGroups: | ||
- kueue.x-k8s.io | ||
resources: | ||
- workloads/status | ||
verbs: | ||
- get | ||
- patch | ||
- update | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- tfjobs | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- tfjobs/status | ||
verbs: | ||
- get | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- paddlejobs | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- paddlejobs/status | ||
verbs: | ||
- get | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- pytorchjobs | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- pytorchjobs/status | ||
verbs: | ||
- get | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- xgboostjobs | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- xgboostjobs/status | ||
verbs: | ||
- get | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- mpijobs | ||
verbs: | ||
- create | ||
- delete | ||
- get | ||
- list | ||
- watch | ||
- apiGroups: | ||
- kubeflow.org | ||
resources: | ||
- mpijobs/status | ||
verbs: | ||
- get | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRoleBinding | ||
metadata: | ||
name: ${MULTIKUEUE_SA}-crb | ||
roleRef: | ||
apiGroup: rbac.authorization.k8s.io | ||
kind: ClusterRole | ||
name: ${MULTIKUEUE_SA}-role | ||
subjects: | ||
- kind: ServiceAccount | ||
name: ${MULTIKUEUE_SA} | ||
namespace: ${NAMESPACE} | ||
EOF | ||
|
||
# Get or create a secret bound to the new service account. | ||
SA_SECRET_NAME=$(kubectl get -n ${NAMESPACE} sa/${MULTIKUEUE_SA} -o "jsonpath={.secrets[0]..name}") | ||
if [ -z "$SA_SECRET_NAME" ]; then | ||
kubectl apply -f - <<EOF | ||
apiVersion: v1 | ||
kind: Secret | ||
type: kubernetes.io/service-account-token | ||
metadata: | ||
name: ${MULTIKUEUE_SA} | ||
namespace: ${NAMESPACE} | ||
annotations: | ||
kubernetes.io/service-account.name: "${MULTIKUEUE_SA}" | ||
EOF | ||
|
||
SA_SECRET_NAME=${MULTIKUEUE_SA} | ||
fi | ||
|
||
# Note: service account token is stored base64-encoded in the secret but must | ||
# be plaintext in kubeconfig. | ||
SA_TOKEN=$(kubectl get -n ${NAMESPACE} "secrets/${SA_SECRET_NAME}" -o "jsonpath={.data['token']}" | base64 -d) | ||
CA_CERT=$(kubectl get -n ${NAMESPACE} "secrets/${SA_SECRET_NAME}" -o "jsonpath={.data['ca\.crt']}") | ||
|
||
# Extract cluster IP from the current context | ||
CURRENT_CONTEXT=$(kubectl config current-context) | ||
CURRENT_CLUSTER=$(kubectl config view -o jsonpath="{.contexts[?(@.name == \"${CURRENT_CONTEXT}\"})].context.cluster}") | ||
CURRENT_CLUSTER_ADDR=$(kubectl config view -o jsonpath="{.clusters[?(@.name == \"${CURRENT_CLUSTER}\"})].cluster.server}") | ||
|
||
# Create the Kubeconfig file | ||
echo "Writing kubeconfig in ${KUBECONFIG_OUT}" | ||
cat >"${KUBECONFIG_OUT}" <<EOF | ||
apiVersion: v1 | ||
clusters: | ||
- cluster: | ||
certificate-authority-data: ${CA_CERT} | ||
server: ${CURRENT_CLUSTER_ADDR} | ||
name: ${CURRENT_CLUSTER} | ||
contexts: | ||
- context: | ||
cluster: ${CURRENT_CLUSTER} | ||
user: ${CURRENT_CLUSTER}-${MULTIKUEUE_SA} | ||
name: ${CURRENT_CONTEXT} | ||
current-context: ${CURRENT_CONTEXT} | ||
kind: Config | ||
preferences: {} | ||
users: | ||
- name: ${CURRENT_CLUSTER}-${MULTIKUEUE_SA} | ||
user: | ||
token: ${SA_TOKEN} | ||
EOF |
Oops, something went wrong.