From 7e37f432cffc877a9b9e63f9699b19b4d3491125 Mon Sep 17 00:00:00 2001 From: Farhad Sharabiani Date: Wed, 15 Jan 2025 19:54:56 +0000 Subject: [PATCH] A3Ultra blueprint updated to fix some issues --- src/xpk/core/blueprint/blueprint_generator.py | 12 +++++++++--- src/xpk/core/tests/data/a3_mega.yaml | 1 + src/xpk/core/tests/data/a3_ultra.yaml | 9 +++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/xpk/core/blueprint/blueprint_generator.py b/src/xpk/core/blueprint/blueprint_generator.py index a5576b47..39b12c13 100644 --- a/src/xpk/core/blueprint/blueprint_generator.py +++ b/src/xpk/core/blueprint/blueprint_generator.py @@ -195,7 +195,7 @@ def generate_a3_mega_blueprint( "config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl', "config_template_vars": {"num_chips": f"{num_chips}"}, }, - "jobset": {"install": True}, + "jobset": {"install": True, "version": "v0.7.2"}, }, ) @@ -482,7 +482,13 @@ def generate_a3_ultra_blueprint( use=[net_0_id], settings={ "release_channel": "RAPID", - "min_master_version": "1.31.4-gke.1072000", + "version_prefix": "1.31.", + "maintenance_exclusions": [{ + "name": "no-minor-or-node-upgrades-indefinite", + "start_time": "2024-12-01T00:00:00Z", + "end_time": "2025-12-22T00:00:00Z", + "exclusion_scope": "NO_MINOR_OR_NODE_UPGRADES", + }], "prefix_with_deployment_name": False, "name_suffix": cluster_name, "system_node_pool_machine_type": system_node_pool_machine_type, @@ -567,7 +573,7 @@ def generate_a3_ultra_blueprint( "config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl', "config_template_vars": {"num_chips": f"{num_chips}"}, }, - "jobset": {"install": True, "version": "v0.7.1"}, + "jobset": {"install": True, "version": "v0.7.2"}, "apply_manifests": [ {"source": nccl_installer_path}, {"source": mlgru_disable_path}, diff --git a/src/xpk/core/tests/data/a3_mega.yaml b/src/xpk/core/tests/data/a3_mega.yaml index c3a50eb2..a13779e5 100644 --- a/src/xpk/core/tests/data/a3_mega.yaml +++ b/src/xpk/core/tests/data/a3_mega.yaml @@ -101,6 +101,7 @@ deployment_groups: config_template_vars: {num_chips: "16"} jobset: install: true + version: v0.7.2 - !DeploymentModule id: workload_configmap diff --git a/src/xpk/core/tests/data/a3_ultra.yaml b/src/xpk/core/tests/data/a3_ultra.yaml index 8258cfc9..6983e769 100644 --- a/src/xpk/core/tests/data/a3_ultra.yaml +++ b/src/xpk/core/tests/data/a3_ultra.yaml @@ -86,7 +86,12 @@ deployment_groups: use: [gke-a3-ultra-net-0] settings: release_channel: "RAPID" - min_master_version: "1.31.4-gke.1072000" + version_prefix: "1.31." + maintenance_exclusions: + - name: no-minor-or-node-upgrades-indefinite + start_time: "2024-12-01T00:00:00Z" + end_time: "2025-12-22T00:00:00Z" + exclusion_scope: NO_MINOR_OR_NODE_UPGRADES prefix_with_deployment_name: false name_suffix: gke-a3-ultra system_node_pool_machine_type: "e2-standard-16" @@ -139,7 +144,7 @@ deployment_groups: num_chips: "16" jobset: install: true - version: v0.7.1 + version: v0.7.2 apply_manifests: - source: $(ghpc_stage("xpk-gke-a3-ultra"))/nccl-installer.yaml - source: $(ghpc_stage("xpk-gke-a3-ultra"))/mlgru-disable.yaml