forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
aaef6b9
commit 7fb906f
Showing
5 changed files
with
252 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
name: "Build Zoom wheel" | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
force_debug_with_tmate: | ||
type: boolean | ||
description: 'Run the build with tmate session' | ||
required: false | ||
default: false | ||
debug_with_tmate: | ||
type: boolean | ||
description: 'Run the build with a tmate session ONLY in case of failure' | ||
required: false | ||
default: false | ||
pull_request: | ||
push: | ||
branches: | ||
- main | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.number || github.sha }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
build: | ||
|
||
strategy: | ||
fail-fast: false | ||
matrix: | ||
include: | ||
- name: "ubuntu-22.04" | ||
runs-on: "azure-cpubuilder-linux-scale" | ||
# runs-on: "mi300" | ||
# container: "rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0" | ||
|
||
runs-on: ${{ matrix.runs-on }} | ||
|
||
name: ${{ matrix.name }} | ||
|
||
env: | ||
CACHE_DIR: ${{ github.workspace }}/.container-cache | ||
# either the PR number or `branch-N` where N always increments | ||
CACHE_KEY: linux-build-test-cpp-asserts-manylinux-v2-${{ format('{0}-{1}', github.ref_name, github.run_number) }} | ||
|
||
defaults: | ||
run: | ||
shell: bash | ||
|
||
permissions: | ||
id-token: write | ||
contents: write | ||
|
||
container: | ||
image: ${{ matrix.container }} | ||
|
||
steps: | ||
- name: "Check out repository" | ||
uses: actions/[email protected] | ||
with: | ||
submodules: true | ||
|
||
- name: Enable cache | ||
uses: actions/cache/restore@v3 | ||
with: | ||
path: ${{ env.CACHE_DIR }} | ||
key: ${{ env.CACHE_KEY }} | ||
restore-keys: linux-build-test-cpp- | ||
|
||
- name: "Build PyTorch" | ||
id: build | ||
run: | | ||
curl -sSL https://raw.githubusercontent.com/mrodden/get-rocm/refs/heads/master/get-rocm.py -o get-rocm.py | ||
python3.11 get-rocm.py --rocm-version 6.2.3 | ||
export CCACHE_DIR="${{ env.CACHE_DIR }}" | ||
export CMAKE_C_COMPILER_LAUNCHER=ccache | ||
export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
export CCACHE_SLOPPINESS=include_file_ctime,include_file_mtime,time_macros | ||
python3.11 -m venv venv | ||
source venv/bin/activate | ||
pip install -r requirements.txt | ||
./build.sh | ||
- name: "Audit" | ||
id: audit | ||
run: | | ||
sudo apt install patchelf | ||
source venv/bin/activate | ||
pip install auditwheel | ||
auditwheel repair -w dist --plat manylinux_2_39_x86_64 dist/torch* | ||
- name: "Test" | ||
id: test | ||
run: | | ||
# smoke test | ||
python zoom_extension/examples/test.py | ||
# device tests | ||
PYTORCH_TEST_WITH_SLOW=1 TORCH_TEST_DEVICES=zoom_extension/test/pytorch_test_base.py ./test.sh | ||
cat zoom_test_errors.log | ||
cat zoom_unimplemented_operators.log | ||
- name: Save cache | ||
uses: actions/cache/save@v3 | ||
if: ${{ !cancelled() }} | ||
with: | ||
path: ${{ env.CACHE_DIR }} | ||
key: ${{ env.CACHE_KEY }} | ||
|
||
- name: Upload artifacts | ||
if: ${{ !cancelled() }} | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: ${{ matrix.name }}_artifact | ||
path: dist | ||
if-no-files-found: warn | ||
|
||
- name: Release current commit | ||
if: ${{ !cancelled() }} | ||
uses: ncipollo/[email protected] | ||
with: | ||
artifacts: "dist/torch*.whl" | ||
token: "${{ secrets.GITHUB_TOKEN }}" | ||
tag: "latest" | ||
name: "latest" | ||
removeArtifacts: false | ||
allowUpdates: true | ||
replacesArtifacts: true | ||
makeLatest: true | ||
|
||
- name: "Setup tmate session" | ||
if: ${{ (failure() && inputs.debug_with_tmate) || inputs.force_debug_with_tmate }} | ||
uses: mxschmitt/[email protected] | ||
with: | ||
limit-access-to-actor: true | ||
install-dependencies: ${{ startsWith(matrix.runs-on, 'macos') || startsWith(matrix.runs-on, 'windows') }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/bin/bash | ||
|
||
log_file="test.log" | ||
bak_file="test.log.bak" | ||
output_file="zoom_unimplemented_operators.log" | ||
bak_out="zoom_unimplemented_operators.log.bak" | ||
error_file="zoom_test_errors.log" | ||
bak_err="zoom_test_errors.log.bak" | ||
|
||
# backup logs | ||
[ -f $log_file ] && cp $log_file $bak_file | ||
[ -f $output_file ] && cp $output_file $bak_out | ||
[ -f $error_file ] && cp $error_file $bak_err | ||
|
||
python test/test_torch.py --run-parallel 0 -k TestTorchDeviceTypePRIVATEUSEONE --verbose &> $log_file | ||
#python test/test_ops.py -k TestCommonPRIVATEUSEONE | ||
#python test/test_ops.py -k TestCommonPRIVATEUSEONE.test_compare_cpu --verbose &> $log_file | ||
#python test/test_ops.py -k TestCommonPRIVATEUSEONE.test_numpy_ref --verbose &> $log_file | ||
|
||
## Find Unimplemented Operator Errors from failing tests | ||
# Pattern to search for | ||
pattern="Could not run 'aten::[^']*' with arguments from the 'zoom' backend" | ||
|
||
# Extract aten operators, count frequencies, sort by frequency (descending), and save to output file | ||
grep -oP "$pattern" "$log_file" | | ||
sed -n "s/.*'aten::\([^']*\)'.*/\1/p" | | ||
sort | | ||
uniq -c | | ||
sort -rn | | ||
sed 's/^ *//; s/ /\t/' > "$output_file" | ||
|
||
# Count total matches | ||
total_matches=$(grep -cP "$pattern" "$log_file") | ||
|
||
# Append total matches to the output file | ||
echo -e "\nTotal unimplemented operator failures: $total_matches" >> "$output_file" | ||
echo "A list of unimplemented operators has been saved to $output_file" | ||
|
||
## Find errors from failing tests | ||
# Extract error messages, count frequencies, sort by frequency (descending), and save to output file | ||
# Pattern to search for | ||
pattern="^.*Error: (?!test)(.+?)(?=\n|$)" | ||
|
||
grep -oP "$pattern" "$log_file" | | ||
sed 's/^(.*Error): //g' | | ||
awk '{print substr($0, 1, 100)}' | # Limit to first 100 characters | ||
sort | | ||
uniq -c | | ||
sort -rn | | ||
sed 's/^ *//; s/ /\t/' > "$error_file" | ||
|
||
# Count total matches | ||
total_matches=$(grep -cP "$pattern" "$log_file") | ||
|
||
# Append total matches to the output file | ||
echo -e "\nTotal test errors failures: $total_matches" >> "$error_file" | ||
echo "A list of test errors has been saved to $error_file" | ||
|
||
echo "Test logs have been saved to $log_file" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import torch.zoom | ||
|
||
torch.utils.rename_privateuse1_backend("zoom") | ||
# TODO: figure this out | ||
unsupported_dtypes = None | ||
torch.utils.generate_methods_for_privateuse1_backend( | ||
unsupported_dtype=unsupported_dtypes | ||
) | ||
x = torch.empty(5, device="zoom:0", dtype=torch.int64) | ||
print(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import torch | ||
import torch.zoom | ||
from typing import ClassVar | ||
|
||
torch.utils.rename_privateuse1_backend('zoom') | ||
unsupported_dtypes = None | ||
torch.utils.generate_methods_for_privateuse1_backend(unsupported_dtype=unsupported_dtypes) | ||
|
||
class ZoomTestBase(DeviceTypeTestBase): | ||
device_type = 'privateuseone' | ||
primary_device: ClassVar[str] | ||
|
||
@classmethod | ||
def get_primary_device(cls): | ||
return cls.primary_device | ||
|
||
|
||
@classmethod | ||
def get_all_devices(cls): | ||
primary_device_idx = int(cls.get_primary_device().split(':')[1]) | ||
num_devices = torch.zoom.device_count() | ||
|
||
prim_device = cls.get_primary_device() | ||
zoom_str = 'zoom:{0}' | ||
non_primary_devices = [zoom_str.format(idx) for idx in range(num_devices) if idx != primary_device_idx] | ||
return [prim_device] + non_primary_devices | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
# Force Zoom Init | ||
t = torch.ones(1, device='zoom') | ||
# Acquires the current device as the primary (test) device | ||
cls.primary_device = f'zoom:{torch.zoom.current_device()}' | ||
|
||
TEST_CLASS = ZoomTestBase |