Skip to content

Commit

Permalink
Merge branch 'ppawl-cherry-pick' of https://github.com/AI-Hypercomput…
Browse files Browse the repository at this point in the history
…er/xpk into ppawl-cherry-pick
  • Loading branch information
pawloch00 committed Jan 29, 2025
2 parents 277964b + 5955407 commit 8bb0c30
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 3 deletions.
25 changes: 25 additions & 0 deletions .github/release.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
# For more info, see:
# https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes#configuration-options


changelog:
categories:
- title: New Features
labels: [release-features]
- title: Improvments
labels: [release-improvments]
- title: Bug fixes
labels: [release-bugfix]
29 changes: 29 additions & 0 deletions .github/workflows/build_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,37 @@ env:
PW_CLUSTER_ARGUMENTS: "--network=${{secrets.NETWORK_NAME}} --subnetwork=${{secrets.SUBNETWORK_NAME}} --maintenance-window=23:50"

jobs:
label-validation:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
permissions:
pull-requests: read
steps:
- id: check-labels
uses: mheap/github-action-required-labels@v5
with:
mode: minimum
count: 1
labels: "release-improvements, release-bugfix, release-features"
message: "This PR is being prevented from merging because it is not labeled. Please add a label to this PR. Accepted labels: release-improvements, release-bugfix, release-features"
- id: do-not-merge
uses: mheap/github-action-required-labels@v5
with:
mode: exactly
count: 0
labels: "do-not-merge"
add_comment: false
- id: print-labels
run: |
echo "Current PR labels:"
for f in $(echo "${{steps.check-labels.outputs.labels}}" | sed "s/,/ /g")
do
echo "$f"
done
run-unit-tests:
runs-on: [ubuntu-22.04]
needs:
- label-validation
concurrency: # We support one build or nightly test to run at a time currently.
group: build-test-cluster-group
cancel-in-progress: false
Expand Down
105 changes: 105 additions & 0 deletions .github/workflows/build_wheels.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

name: PyPi releases

on:
workflow_dispatch:
inputs:
testpypi:
type: boolean
required: false
default: false
tag:
type: string
required: true


jobs:
build:
name: Build distribution
runs-on: ubuntu-latest
# to build only on push to tags or on custom dispatched workflows
if: github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
- name: Install pypa/build
run: >-
python3 -m
pip install
build
--user
- name: Clone xpk from tag
if: github.event_name == 'worfklow_dispatch'
run: git clone --branch=${{inputs.tag}} https://github.com/AI-Hypercomputer/xpk.git
- name: Enter directory
if: github.event_name == 'worfklow_dispatch'
run: cd xpk
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v4
with:
name: python-package-distributions
path: dist/
publish-to-testpypi:
if: ${{inputs.testpypi}} == true
name: Publish Python distribution to TestPyPI
needs:
- build
runs-on: ubuntu-latest
environment:
name: testpypi
url: https://test.pypi.org/p/xpk-testing
permissions:
id-token: write
steps:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
- name: Publish distribution 📦 to TestPyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
verbose: true
publish-to-pypi:
name: Publish Python distribution to PyPI
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
needs:
- build
runs-on: ubuntu-latest
environment:
# We should configure trusted publishing as specified here:
# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/#configuring-trusted-publishing
name: pypi
url: https://pypi.org/p/xpk # Replace <package-name> with your PyPI project name
permissions:
id-token: write # IMPORTANT: mandatory for trusted publishing

steps:
- name: Download all the dists
uses: actions/download-artifact@v4
with:
name: python-package-distributions
path: dist/
- name: Publish distribution to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
name = "xpk"
dynamic = ["version"]
authors = [
{ name="Cloud TPU Team", email="cloud-tpu-eng@google.com" },
{ name="XPK team", email="xpk-code-reviewers@google.com" },
]
description = "xpk helps Cloud developers to orchestrate training jobs on accelerators on GKE."
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion src/xpk/commands/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from ..core.commands import run_command_for_value

XPK_VERSION = 'v0.4.1'
XPK_VERSION = 'v0.6.0'

from ..utils.console import xpk_exit, xpk_print

Expand Down
3 changes: 3 additions & 0 deletions src/xpk/commands/workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
ensure_pathways_workload_prerequisites,
get_pathways_proxy_args,
get_pathways_rm_args,
get_pathways_sidecar_container,
get_pathways_unified_query_link,
get_pathways_worker_args,
get_user_workload_for_pathways,
Expand Down Expand Up @@ -290,6 +291,7 @@
- mountPath: /tmp
name: shared-tmp
{storage_volume_mounts}
{pathways_sidecar_container}
nodeSelector:
{accelerator_label}
{machine_label}
Expand Down Expand Up @@ -553,6 +555,7 @@ def workload_create(args) -> None:
pathways_worker_args=get_pathways_worker_args(args),
pathways_proxy_args=get_pathways_proxy_args(args),
user_workload=get_user_workload_for_pathways(args, system, storages),
pathways_sidecar_container=get_pathways_sidecar_container(args),
resource_type=AcceleratorTypeToAcceleratorCharacteristics[
system.accelerator_type
].resource_type,
Expand Down
2 changes: 1 addition & 1 deletion src/xpk/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
default_docker_image = 'python:3.10'
default_script_dir = os.getcwd()
# This is the version for XPK PyPI package
__version__ = '0.5.0'
__version__ = '0.6.0'
xpk_current_version = __version__

h100_device_type = 'h100-80gb-8'
Expand Down
34 changes: 34 additions & 0 deletions src/xpk/core/pathways.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,40 @@ def get_pathways_proxy_args(args) -> str:
return ''


def get_pathways_sidecar_container(args) -> str:
"""This is a sidecar container that runs the remote python server.
It is a special case of the initContainer (designated by restartPolicy:
Always)
See https://kubernetes.io/docs/concepts/workloads/pods/sidecar-containers/
for more details.
Args:
args: user provided arguments for running the command.
Returns:
str: yaml containing arguments for the Pathways sidecar container.
"""
yaml = """initContainers:
- name: remote-python-sidecar
image: {args.remote_python_sidecar_image}
imagePullPolicy: Always
securityContext:
privileged: true
volumeMounts:
- mountPath: /tmp # Shared volume mount with the main container.
name: shared-tmp
restartPolicy: Always
ports:
- containerPort: 50051
env:
- name: GRPC_SERVER_ADDRESS
value: '0.0.0.0:50051'"""
if args.use_pathways and args.remote_python_sidecar_image is not None:
return yaml.format(args=args)
else:
return ''


def add_pw_resource_flavors(args):
"""Add resource flavors required for Pathways enabled clusters."""
resource_flavor_yaml = """apiVersion: kueue.x-k8s.io/v1beta1
Expand Down
6 changes: 6 additions & 0 deletions src/xpk/parser/workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,12 @@ def add_shared_workload_create_optional_arguments(args_parsers):
' event or deletion request.Defaults to 30 seconds.'
),
)
custom_parser.add_argument(
'--remote-python-sidecar-image',
type=str,
default=None,
help='Remote Python sidecar server image.',
)
custom_parser.add_argument(
'--enable-debug-logs',
action='store_true',
Expand Down

0 comments on commit 8bb0c30

Please sign in to comment.