Skip to content

Commit

Permalink
A3Ultra blueprint updated to fix some issues (#330)
Browse files Browse the repository at this point in the history
  • Loading branch information
sharabiani authored Jan 15, 2025
1 parent 5941bef commit f90b64e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 5 deletions.
12 changes: 9 additions & 3 deletions src/xpk/core/blueprint/blueprint_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def generate_a3_mega_blueprint(
"config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl',
"config_template_vars": {"num_chips": f"{num_chips}"},
},
"jobset": {"install": True},
"jobset": {"install": True, "version": "v0.7.2"},
},
)

Expand Down Expand Up @@ -482,7 +482,13 @@ def generate_a3_ultra_blueprint(
use=[net_0_id],
settings={
"release_channel": "RAPID",
"min_master_version": "1.31.4-gke.1072000",
"version_prefix": "1.31.",
"maintenance_exclusions": [{
"name": "no-minor-or-node-upgrades-indefinite",
"start_time": "2024-12-01T00:00:00Z",
"end_time": "2025-12-22T00:00:00Z",
"exclusion_scope": "NO_MINOR_OR_NODE_UPGRADES",
}],
"prefix_with_deployment_name": False,
"name_suffix": cluster_name,
"system_node_pool_machine_type": system_node_pool_machine_type,
Expand Down Expand Up @@ -567,7 +573,7 @@ def generate_a3_ultra_blueprint(
"config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl',
"config_template_vars": {"num_chips": f"{num_chips}"},
},
"jobset": {"install": True, "version": "v0.7.1"},
"jobset": {"install": True, "version": "v0.7.2"},
"apply_manifests": [
{"source": nccl_installer_path},
{"source": mlgru_disable_path},
Expand Down
1 change: 1 addition & 0 deletions src/xpk/core/tests/data/a3_mega.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ deployment_groups:
config_template_vars: {num_chips: "16"}
jobset:
install: true
version: v0.7.2

- !DeploymentModule
id: workload_configmap
Expand Down
9 changes: 7 additions & 2 deletions src/xpk/core/tests/data/a3_ultra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@ deployment_groups:
use: [gke-a3-ultra-net-0]
settings:
release_channel: "RAPID"
min_master_version: "1.31.4-gke.1072000"
version_prefix: "1.31."
maintenance_exclusions:
- name: no-minor-or-node-upgrades-indefinite
start_time: "2024-12-01T00:00:00Z"
end_time: "2025-12-22T00:00:00Z"
exclusion_scope: NO_MINOR_OR_NODE_UPGRADES
prefix_with_deployment_name: false
name_suffix: gke-a3-ultra
system_node_pool_machine_type: "e2-standard-16"
Expand Down Expand Up @@ -139,7 +144,7 @@ deployment_groups:
num_chips: "16"
jobset:
install: true
version: v0.7.1
version: v0.7.2
apply_manifests:
- source: $(ghpc_stage("xpk-gke-a3-ultra"))/nccl-installer.yaml
- source: $(ghpc_stage("xpk-gke-a3-ultra"))/mlgru-disable.yaml
Expand Down

0 comments on commit f90b64e

Please sign in to comment.