Skip to content

Commit

Permalink
Debug
Browse files Browse the repository at this point in the history
  • Loading branch information
jiripetrlik committed Nov 25, 2024
1 parent 1965a8e commit bf7454a
Showing 1 changed file with 20 additions and 6 deletions.
26 changes: 20 additions & 6 deletions .github/workflows/additional_demo_notebook_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,23 @@ jobs:
# Remove login/logout cells, as KinD doesn't support authentication using token
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
# Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
sed -i "s/cluster_uri()/local_client_url()/g" hf_interactive.ipynb
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb
# Set explicit namespace as SDK need it (currently) to resolve local queues
sed -i "s/worker_cpu_requests=8,/worker_cpu_requests=1, namespace='default',/" hf_interactive.ipynb
# Change cluster parameters (need to decrease)
sed -i "s/{'nvidia.com\/gpu':1}/{'nvidia.com\/gpu':0}/g" hf_interactive.ipynb
sed -i "s/worker_cpu_requests=8,/worker_cpu_requests='250m', namespace='default',/" hf_interactive.ipynb
sed -i "s/worker_cpu_limits=8,/worker_cpu_limits=1,/" hf_interactive.ipynb
sed -i "s/worker_memory_requests=16,/worker_memory_requests=4,/" hf_interactive.ipynb
sed -i "s/worker_memory_limits=8,/worker_memory_limits=4,/" hf_interactive.ipynb
sed -i "s/worker_memory_limits=16,/worker_memory_limits=4,/" hf_interactive.ipynb
cat hf_interactive.ipynb
# Run notebook
poetry run papermill hf_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
env:
GRPC_DNS_RESOLVER: "native"
working-directory: demo-notebooks/additional-demos

- name: Print CodeFlare operator logs
Expand Down Expand Up @@ -135,7 +140,6 @@ jobs:
verify-local_interactive:
# if: ${{ github.event.label.name == 'test-additional-notebooks' }}
# runs-on: ubuntu-20.04-4core
runs-on: ubuntu-20.04-4core

steps:
Expand Down Expand Up @@ -205,13 +209,18 @@ jobs:
# Remove login/logout cells, as KinD doesn't support authentication using token
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object and log in to desired user account")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
# Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
sed -i "s/cluster_uri()/local_client_url()/g" local_interactive.ipynb
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb
# Set explicit namespace as SDK need it (currently) to resolve local queues
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" local_interactive.ipynb
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" local_interactive.ipynb
cat local_interactive.ipynb
# Run notebook
poetry run papermill local_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
poetry run papermill local_interactive.ipynb local_interactive_out.ipynb --log-output --execution-timeout 1200
env:
GRPC_DNS_RESOLVER: "native"
working-directory: demo-notebooks/additional-demos

- name: Print CodeFlare operator logs
Expand Down Expand Up @@ -319,13 +328,18 @@ jobs:
# Remove login/logout cells, as KinD doesn't support authentication using token
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
# Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
sed -i "s/cluster_uri()/local_client_url()/g" ray_job_client.ipynb
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb
# Set explicit namespace as SDK need it (currently) to resolve local queues
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" ray_job_client.ipynb
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests='250m', namespace='default',/" ray_job_client.ipynb
cat ray_job_client.ipynb
# Run notebook
poetry run papermill ray_job_client.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200
env:
GRPC_DNS_RESOLVER: "native"
working-directory: demo-notebooks/additional-demos

- name: Print CodeFlare operator logs
Expand Down

0 comments on commit bf7454a

Please sign in to comment.