diff --git a/examples/check_current_and_past_jobs.ipynb b/examples/check_current_and_past_jobs.ipynb index 2c7829e..e979095 100644 --- a/examples/check_current_and_past_jobs.ipynb +++ b/examples/check_current_and_past_jobs.ipynb @@ -1,926 +1,927 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "83abc862", - "metadata": {}, - "source": [ - "## Check currently running jobs\n", - "\n", - "The `sfapi_client` can easily be used to get your current jobs running on the system, or information about past jobs.\n", - "\n", - "First we'll import the required libraries, `AsyncClient` to handle the requests and the `Machine` enum to get the correct resource." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "c7dd56fa", - "metadata": {}, - "outputs": [], - "source": [ - "# import client library\n", - "from sfapi_client import AsyncClient\n", - "from sfapi_client.compute import Machine\n", - "\n", - "\n", - "# this will help display the outputs later\n", - "import json\n", - "# Print our results nicely to the notebook\n", - "print_json = lambda j: print(json.dumps(j, indent=4))\n", - "\n", - "# Change this to your username\n", - "user_name=\"elvis\"" - ] - }, - { - "cell_type": "markdown", - "id": "747de001", - "metadata": {}, - "source": [ - "The next lines of code will create a client which can be used to get an `Compute` object. This object can be used to look at the current and past job queues, list files and directories, and run simple commands on the system.\n", - "\n", - "In this example we will get the currently running jobs for the user `elvis`." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c166ede5", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "async with AsyncClient() as client:\n", - " perlmutter = await client.compute(Machine.perlmutter)\n", - " # This selects just the jobs in the regular cpu partition on perlmutter\n", - " jobs = await perlmutter.jobs(user=user_name, partition='regular_milan_ss11')" - ] - }, - { - "cell_type": "markdown", - "id": "7f215e73", - "metadata": {}, - "source": [ - "All the jobs currently in the job queue are stored in the newly created `jobs` list. An easy way of viewing and this list is to print the objects in the list. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "63b5e715", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[\n", - " {\n", - " \"account\": \"ntrain\",\n", - " \"tres_per_node\": \"N/A\",\n", - " \"min_cpus\": \"1\",\n", - " \"min_tmp_disk\": \"0\",\n", - " \"end_time\": \"N/A\",\n", - " \"features\": \"cpu\",\n", - " \"group\": \"12345\",\n", - " \"over_subscribe\": \"NO\",\n", - " \"jobid\": \"8407414\",\n", - " \"name\": \"large_job.sh\",\n", - " \"comment\": \"(null)\",\n", - " \"time_limit\": \"12:00:00\",\n", - " \"min_memory\": \"0\",\n", - " \"req_nodes\": \"\",\n", - " \"command\": \"/global/homes/e/elvis/job_subs/large_job.sh\",\n", - " \"priority\": \"67684\",\n", - " \"qos\": \"regular_1\",\n", - " \"reason\": \"Priority\",\n", - " \"field_\": null,\n", - " \"st\": \"PD\",\n", - " \"user\": \"elvis\",\n", - " \"reservation\": \"(null)\",\n", - " \"wckey\": \"(null)\",\n", - " \"exc_nodes\": \"\",\n", - " \"nice\": \"0\",\n", - " \"s_c_t\": \"*:*:*\",\n", - " \"exec_host\": \"n/a\",\n", - " \"cpus\": \"25\",\n", - " \"nodes\": \"25\",\n", - " \"dependency\": \"(null)\",\n", - " \"array_job_id\": \"8407414\",\n", - " \"sockets_per_node\": \"*\",\n", - " \"cores_per_socket\": \"*\",\n", - " \"threads_per_core\": \"*\",\n", - " \"array_task_id\": \"N/A\",\n", - " \"time_left\": \"12:00:00\",\n", - " \"time\": \"0:00\",\n", - " \"nodelist\": \"\",\n", - " \"contiguous\": \"0\",\n", - " \"partition\": \"regular_milan_ss11\",\n", - " \"nodelist_reason_\": \"(Priority)\",\n", - " \"start_time\": \"N/A\",\n", - " \"state\": \"PENDING\",\n", - " \"uid\": \"12345\",\n", - " \"submit_time\": \"2023-05-02T18:10:51\",\n", - " \"licenses\": \"u1:1\",\n", - " \"core_spec\": \"N/A\",\n", - " \"schednodes\": \"(null)\",\n", - " \"work_dir\": \"/global/homes/e/elvis/job_subs\"\n", - " },\n", - " {\n", - " \"account\": \"ntrain\",\n", - " \"tres_per_node\": \"N/A\",\n", - " \"min_cpus\": \"1\",\n", - " \"min_tmp_disk\": \"0\",\n", - " \"end_time\": \"N/A\",\n", - " \"features\": \"cpu\",\n", - " \"group\": \"12345\",\n", - " \"over_subscribe\": \"NO\",\n", - " \"jobid\": \"8407432\",\n", - " \"name\": \"large_job.sh\",\n", - " \"comment\": \"(null)\",\n", - " \"time_limit\": \"12:00:00\",\n", - " \"min_memory\": \"0\",\n", - " \"req_nodes\": \"\",\n", - " \"command\": \"/global/homes/e/elvis/job_subs/large_job.sh\",\n", - " \"priority\": \"67683\",\n", - " \"qos\": \"regular_1\",\n", - " \"reason\": \"Priority\",\n", - " \"field_\": null,\n", - " \"st\": \"PD\",\n", - " \"user\": \"elvis\",\n", - " \"reservation\": \"(null)\",\n", - " \"wckey\": \"(null)\",\n", - " \"exc_nodes\": \"\",\n", - " \"nice\": \"0\",\n", - " \"s_c_t\": \"*:*:*\",\n", - " \"exec_host\": \"n/a\",\n", - " \"cpus\": \"25\",\n", - " \"nodes\": \"25\",\n", - " \"dependency\": \"(null)\",\n", - " \"array_job_id\": \"8407432\",\n", - " \"sockets_per_node\": \"*\",\n", - " \"cores_per_socket\": \"*\",\n", - " \"threads_per_core\": \"*\",\n", - " \"array_task_id\": \"N/A\",\n", - " \"time_left\": \"12:00:00\",\n", - " \"time\": \"0:00\",\n", - " \"nodelist\": \"\",\n", - " \"contiguous\": \"0\",\n", - " \"partition\": \"regular_milan_ss11\",\n", - " \"nodelist_reason_\": \"(Priority)\",\n", - " \"start_time\": \"N/A\",\n", - " \"state\": \"PENDING\",\n", - " \"uid\": \"12345\",\n", - " \"submit_time\": \"2023-05-02T18:11:30\",\n", - " \"licenses\": \"u1:1\",\n", - " \"core_spec\": \"N/A\",\n", - " \"schednodes\": \"(null)\",\n", - " \"work_dir\": \"/global/homes/e/elvis/job_subs\"\n", - " }\n", - "]\n" - ] - } - ], - "source": [ - "print_json([j.dict() for j in jobs])" - ] - }, - { - "cell_type": "markdown", - "id": "b9c272b3", - "metadata": {}, - "source": [ - "A single job can be pulled from the list for further inspection. This returns a `Squeue` object which is the output you would get from running the command `squeue` from slurm. This is useful for getting jobs which are currently pending or running." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "6ed12a56", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "AsyncJobSqueue(account='ntrain', tres_per_node='N/A', min_cpus='1', min_tmp_disk='0', end_time='N/A', features='cpu', group='12345', over_subscribe='NO', jobid='8407414', name='large_job.sh', comment='(null)', time_limit='12:00:00', min_memory='0', req_nodes='', command='/global/homes/e/elvis/job_subs/large_job.sh', priority='67684', qos='regular_1', reason='Priority', field_=None, st='PD', user='elvis', reservation='(null)', wckey='(null)', exc_nodes='', nice='0', s_c_t='*:*:*', exec_host='n/a', cpus='25', nodes='25', dependency='(null)', array_job_id='8407414', sockets_per_node='*', cores_per_socket='*', threads_per_core='*', array_task_id='N/A', time_left='12:00:00', time='0:00', nodelist='', contiguous='0', partition='regular_milan_ss11', nodelist_reason_='(Priority)', start_time='N/A', state=, uid='12345', submit_time='2023-05-02T18:10:51', licenses='u1:1', core_spec='N/A', schednodes='(null)', work_dir='/global/homes/e/elvis/job_subs', compute=AsyncCompute(name='perlmutter', full_name='Perlmutter', description='System is active', system_type='compute', notes=[], status=, updated_at=datetime.datetime(2023, 4, 28, 21, 19, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), client=))" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "jobs[0]" - ] - }, - { - "cell_type": "markdown", - "id": "29fd3988", - "metadata": {}, - "source": [ - "More specific information about the job can also be gotten from this object like the number of nodes or it's `jobid`." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "46059d7b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of nodes = 25\n", - "jobid = 8407414\n" - ] - } - ], - "source": [ - "print(f\"Number of nodes = {jobs[0].nodes}\")\n", - "print(f\"jobid = {jobs[0].jobid}\")" - ] - }, - { - "cell_type": "markdown", - "id": "86358832", - "metadata": {}, - "source": [ - "The information for a job can also be retrieved later based on the jobid. This can be useful for seeing if a job completed, failed, or was canceled." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b98307c8", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"account\": \"ntrain\",\n", - " \"admincomment\": \"\",\n", - " \"alloccpus\": \"25\",\n", - " \"allocnodes\": \"0\",\n", - " \"alloctres\": \"\",\n", - " \"associd\": \"206287\",\n", - " \"avecpu\": \"\",\n", - " \"avecpufreq\": \"\",\n", - " \"avediskread\": \"\",\n", - " \"avediskwrite\": \"\",\n", - " \"avepages\": \"\",\n", - " \"averss\": \"\",\n", - " \"avevmsize\": \"\",\n", - " \"blockid\": \"\",\n", - " \"cluster\": \"perlmutter\",\n", - " \"comment\": \"\",\n", - " \"constraints\": \"cpu\",\n", - " \"consumedenergy\": \"0\",\n", - " \"consumedenergyraw\": \"0\",\n", - " \"cputime\": \"00:00:00\",\n", - " \"cputimeraw\": \"0\",\n", - " \"dbindex\": \"64266939\",\n", - " \"derivedexitcode\": \"0:0\",\n", - " \"elapsed\": \"00:00:00\",\n", - " \"elapsedraw\": \"0\",\n", - " \"eligible\": \"2023-05-02T18:10:51\",\n", - " \"end\": \"Unknown\",\n", - " \"exitcode\": \"0:0\",\n", - " \"flags\": \"StartRecieved\",\n", - " \"gid\": \"12345\",\n", - " \"group\": \"elvis\",\n", - " \"jobid\": \"8407414\",\n", - " \"jobidraw\": \"8407414\",\n", - " \"jobname\": \"large_job.sh\",\n", - " \"layout\": \"\",\n", - " \"maxdiskread\": \"\",\n", - " \"maxdiskreadnode\": \"\",\n", - " \"maxdiskreadtask\": \"\",\n", - " \"maxdiskwrite\": \"\",\n", - " \"maxdiskwritenode\": \"\",\n", - " \"maxdiskwritetask\": \"\",\n", - " \"maxpages\": \"\",\n", - " \"maxpagesnode\": \"\",\n", - " \"maxpagestask\": \"\",\n", - " \"maxrss\": \"\",\n", - " \"maxrssnode\": \"\",\n", - " \"maxrsstask\": \"\",\n", - " \"maxvmsize\": \"\",\n", - " \"maxvmsizenode\": \"\",\n", - " \"maxvmsizetask\": \"\",\n", - " \"mcslabel\": \"\",\n", - " \"mincpu\": \"\",\n", - " \"mincpunode\": \"\",\n", - " \"mincputask\": \"\",\n", - " \"ncpus\": \"25\",\n", - " \"nnodes\": \"25\",\n", - " \"nodelist\": \"None assigned\",\n", - " \"ntasks\": \"\",\n", - " \"priority\": \"67679\",\n", - " \"partition\": \"regular_milan_ss11\",\n", - " \"qos\": \"regular_1\",\n", - " \"qosraw\": \"16\",\n", - " \"reason\": \"None\",\n", - " \"reqcpufreq\": \"Unknown\",\n", - " \"reqcpufreqmin\": \"Unknown\",\n", - " \"reqcpufreqmax\": \"Unknown\",\n", - " \"reqcpufreqgov\": \"Unknown\",\n", - " \"reqcpus\": \"25\",\n", - " \"reqmem\": \"12200050M\",\n", - " \"reqnodes\": \"25\",\n", - " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", - " \"reservation\": \"\",\n", - " \"reservationid\": \"\",\n", - " \"reserved\": null,\n", - " \"resvcpu\": null,\n", - " \"resvcpuraw\": null,\n", - " \"start\": \"Unknown\",\n", - " \"state\": \"PENDING\",\n", - " \"submit\": \"2023-05-02T18:10:51\",\n", - " \"suspended\": \"00:00:00\",\n", - " \"systemcpu\": \"00:00:00\",\n", - " \"systemcomment\": \"\",\n", - " \"timelimit\": \"12:00:00\",\n", - " \"timelimitraw\": \"720\",\n", - " \"totalcpu\": \"00:00:00\",\n", - " \"tresusageinave\": \"\",\n", - " \"tresusageinmax\": \"\",\n", - " \"tresusageinmaxnode\": \"\",\n", - " \"tresusageinmaxtask\": \"\",\n", - " \"tresusageinmin\": \"\",\n", - " \"tresusageinminnode\": \"\",\n", - " \"tresusageinmintask\": \"\",\n", - " \"tresusageintot\": \"\",\n", - " \"tresusageoutave\": \"\",\n", - " \"tresusageoutmax\": \"\",\n", - " \"tresusageoutmaxnode\": \"\",\n", - " \"tresusageoutmaxtask\": \"\",\n", - " \"tresusageoutmin\": \"\",\n", - " \"tresusageoutminnode\": \"\",\n", - " \"tresusageoutmintask\": \"\",\n", - " \"tresusageouttot\": \"\",\n", - " \"uid\": \"12345\",\n", - " \"user\": \"elvis\",\n", - " \"usercpu\": \"00:00:00\",\n", - " \"wckey\": \"\",\n", - " \"wckeyid\": \"0\",\n", - " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", - "}\n" - ] - } - ], - "source": [ - "async with AsyncClient() as client:\n", - " perlmutter = await client.compute(Machine.perlmutter)\n", - " # Get the running job based on it's job ID\n", - " job = await perlmutter.job(jobid=jobs[0].jobid)\n", - " # Print out the current status of the job\n", - "\n", - "print_json(job.dict())" - ] - }, - { - "cell_type": "markdown", - "id": "68d65541", - "metadata": {}, - "source": [ - "Multiple jobs can also be retrieved simultaniosly later on by creating a list of jobids. This can reduce the amount of calls needed to the Superfacility REST Api and get your results back" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "5f854ed6", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[\n", - " {\n", - " \"account\": \"ntrain\",\n", - " \"admincomment\": \"\",\n", - " \"alloccpus\": \"25\",\n", - " \"allocnodes\": \"0\",\n", - " \"alloctres\": \"\",\n", - " \"associd\": \"206287\",\n", - " \"avecpu\": \"\",\n", - " \"avecpufreq\": \"\",\n", - " \"avediskread\": \"\",\n", - " \"avediskwrite\": \"\",\n", - " \"avepages\": \"\",\n", - " \"averss\": \"\",\n", - " \"avevmsize\": \"\",\n", - " \"blockid\": \"\",\n", - " \"cluster\": \"perlmutter\",\n", - " \"comment\": \"\",\n", - " \"constraints\": \"cpu\",\n", - " \"consumedenergy\": \"0\",\n", - " \"consumedenergyraw\": \"0\",\n", - " \"cputime\": \"00:00:00\",\n", - " \"cputimeraw\": \"0\",\n", - " \"dbindex\": \"64266939\",\n", - " \"derivedexitcode\": \"0:0\",\n", - " \"elapsed\": \"00:00:00\",\n", - " \"elapsedraw\": \"0\",\n", - " \"eligible\": \"2023-05-02T18:10:51\",\n", - " \"end\": \"Unknown\",\n", - " \"exitcode\": \"0:0\",\n", - " \"flags\": \"StartRecieved\",\n", - " \"gid\": \"12345\",\n", - " \"group\": \"elvis\",\n", - " \"jobid\": \"8407414\",\n", - " \"jobidraw\": \"8407414\",\n", - " \"jobname\": \"large_job.sh\",\n", - " \"layout\": \"\",\n", - " \"maxdiskread\": \"\",\n", - " \"maxdiskreadnode\": \"\",\n", - " \"maxdiskreadtask\": \"\",\n", - " \"maxdiskwrite\": \"\",\n", - " \"maxdiskwritenode\": \"\",\n", - " \"maxdiskwritetask\": \"\",\n", - " \"maxpages\": \"\",\n", - " \"maxpagesnode\": \"\",\n", - " \"maxpagestask\": \"\",\n", - " \"maxrss\": \"\",\n", - " \"maxrssnode\": \"\",\n", - " \"maxrsstask\": \"\",\n", - " \"maxvmsize\": \"\",\n", - " \"maxvmsizenode\": \"\",\n", - " \"maxvmsizetask\": \"\",\n", - " \"mcslabel\": \"\",\n", - " \"mincpu\": \"\",\n", - " \"mincpunode\": \"\",\n", - " \"mincputask\": \"\",\n", - " \"ncpus\": \"25\",\n", - " \"nnodes\": \"25\",\n", - " \"nodelist\": \"None assigned\",\n", - " \"ntasks\": \"\",\n", - " \"priority\": \"67679\",\n", - " \"partition\": \"regular_milan_ss11\",\n", - " \"qos\": \"regular_1\",\n", - " \"qosraw\": \"16\",\n", - " \"reason\": \"None\",\n", - " \"reqcpufreq\": \"Unknown\",\n", - " \"reqcpufreqmin\": \"Unknown\",\n", - " \"reqcpufreqmax\": \"Unknown\",\n", - " \"reqcpufreqgov\": \"Unknown\",\n", - " \"reqcpus\": \"25\",\n", - " \"reqmem\": \"12200050M\",\n", - " \"reqnodes\": \"25\",\n", - " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", - " \"reservation\": \"\",\n", - " \"reservationid\": \"\",\n", - " \"reserved\": null,\n", - " \"resvcpu\": null,\n", - " \"resvcpuraw\": null,\n", - " \"start\": \"Unknown\",\n", - " \"state\": \"PENDING\",\n", - " \"submit\": \"2023-05-02T18:10:51\",\n", - " \"suspended\": \"00:00:00\",\n", - " \"systemcpu\": \"00:00:00\",\n", - " \"systemcomment\": \"\",\n", - " \"timelimit\": \"12:00:00\",\n", - " \"timelimitraw\": \"720\",\n", - " \"totalcpu\": \"00:00:00\",\n", - " \"tresusageinave\": \"\",\n", - " \"tresusageinmax\": \"\",\n", - " \"tresusageinmaxnode\": \"\",\n", - " \"tresusageinmaxtask\": \"\",\n", - " \"tresusageinmin\": \"\",\n", - " \"tresusageinminnode\": \"\",\n", - " \"tresusageinmintask\": \"\",\n", - " \"tresusageintot\": \"\",\n", - " \"tresusageoutave\": \"\",\n", - " \"tresusageoutmax\": \"\",\n", - " \"tresusageoutmaxnode\": \"\",\n", - " \"tresusageoutmaxtask\": \"\",\n", - " \"tresusageoutmin\": \"\",\n", - " \"tresusageoutminnode\": \"\",\n", - " \"tresusageoutmintask\": \"\",\n", - " \"tresusageouttot\": \"\",\n", - " \"uid\": \"12345\",\n", - " \"user\": \"elvis\",\n", - " \"usercpu\": \"00:00:00\",\n", - " \"wckey\": \"\",\n", - " \"wckeyid\": \"0\",\n", - " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", - " },\n", - " {\n", - " \"account\": \"ntrain\",\n", - " \"admincomment\": \"\",\n", - " \"alloccpus\": \"25\",\n", - " \"allocnodes\": \"0\",\n", - " \"alloctres\": \"\",\n", - " \"associd\": \"206287\",\n", - " \"avecpu\": \"\",\n", - " \"avecpufreq\": \"\",\n", - " \"avediskread\": \"\",\n", - " \"avediskwrite\": \"\",\n", - " \"avepages\": \"\",\n", - " \"averss\": \"\",\n", - " \"avevmsize\": \"\",\n", - " \"blockid\": \"\",\n", - " \"cluster\": \"perlmutter\",\n", - " \"comment\": \"\",\n", - " \"constraints\": \"cpu\",\n", - " \"consumedenergy\": \"0\",\n", - " \"consumedenergyraw\": \"0\",\n", - " \"cputime\": \"00:00:00\",\n", - " \"cputimeraw\": \"0\",\n", - " \"dbindex\": \"64267039\",\n", - " \"derivedexitcode\": \"0:0\",\n", - " \"elapsed\": \"00:00:00\",\n", - " \"elapsedraw\": \"0\",\n", - " \"eligible\": \"2023-05-02T18:11:30\",\n", - " \"end\": \"Unknown\",\n", - " \"exitcode\": \"0:0\",\n", - " \"flags\": \"StartRecieved\",\n", - " \"gid\": \"12345\",\n", - " \"group\": \"elvis\",\n", - " \"jobid\": \"8407432\",\n", - " \"jobidraw\": \"8407432\",\n", - " \"jobname\": \"large_job.sh\",\n", - " \"layout\": \"\",\n", - " \"maxdiskread\": \"\",\n", - " \"maxdiskreadnode\": \"\",\n", - " \"maxdiskreadtask\": \"\",\n", - " \"maxdiskwrite\": \"\",\n", - " \"maxdiskwritenode\": \"\",\n", - " \"maxdiskwritetask\": \"\",\n", - " \"maxpages\": \"\",\n", - " \"maxpagesnode\": \"\",\n", - " \"maxpagestask\": \"\",\n", - " \"maxrss\": \"\",\n", - " \"maxrssnode\": \"\",\n", - " \"maxrsstask\": \"\",\n", - " \"maxvmsize\": \"\",\n", - " \"maxvmsizenode\": \"\",\n", - " \"maxvmsizetask\": \"\",\n", - " \"mcslabel\": \"\",\n", - " \"mincpu\": \"\",\n", - " \"mincpunode\": \"\",\n", - " \"mincputask\": \"\",\n", - " \"ncpus\": \"25\",\n", - " \"nnodes\": \"25\",\n", - " \"nodelist\": \"None assigned\",\n", - " \"ntasks\": \"\",\n", - " \"priority\": \"67679\",\n", - " \"partition\": \"regular_milan_ss11\",\n", - " \"qos\": \"regular_1\",\n", - " \"qosraw\": \"16\",\n", - " \"reason\": \"None\",\n", - " \"reqcpufreq\": \"Unknown\",\n", - " \"reqcpufreqmin\": \"Unknown\",\n", - " \"reqcpufreqmax\": \"Unknown\",\n", - " \"reqcpufreqgov\": \"Unknown\",\n", - " \"reqcpus\": \"25\",\n", - " \"reqmem\": \"12200050M\",\n", - " \"reqnodes\": \"25\",\n", - " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", - " \"reservation\": \"\",\n", - " \"reservationid\": \"\",\n", - " \"reserved\": null,\n", - " \"resvcpu\": null,\n", - " \"resvcpuraw\": null,\n", - " \"start\": \"Unknown\",\n", - " \"state\": \"PENDING\",\n", - " \"submit\": \"2023-05-02T18:11:30\",\n", - " \"suspended\": \"00:00:00\",\n", - " \"systemcpu\": \"00:00:00\",\n", - " \"systemcomment\": \"\",\n", - " \"timelimit\": \"12:00:00\",\n", - " \"timelimitraw\": \"720\",\n", - " \"totalcpu\": \"00:00:00\",\n", - " \"tresusageinave\": \"\",\n", - " \"tresusageinmax\": \"\",\n", - " \"tresusageinmaxnode\": \"\",\n", - " \"tresusageinmaxtask\": \"\",\n", - " \"tresusageinmin\": \"\",\n", - " \"tresusageinminnode\": \"\",\n", - " \"tresusageinmintask\": \"\",\n", - " \"tresusageintot\": \"\",\n", - " \"tresusageoutave\": \"\",\n", - " \"tresusageoutmax\": \"\",\n", - " \"tresusageoutmaxnode\": \"\",\n", - " \"tresusageoutmaxtask\": \"\",\n", - " \"tresusageoutmin\": \"\",\n", - " \"tresusageoutminnode\": \"\",\n", - " \"tresusageoutmintask\": \"\",\n", - " \"tresusageouttot\": \"\",\n", - " \"uid\": \"12345\",\n", - " \"user\": \"elvis\",\n", - " \"usercpu\": \"00:00:00\",\n", - " \"wckey\": \"\",\n", - " \"wckeyid\": \"0\",\n", - " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", - " }\n", - "]\n" - ] - } - ], - "source": [ - "async with AsyncClient() as client:\n", - " perlmutter = await client.compute(Machine.perlmutter)\n", - " # Get the running job based on it's job ID\n", - " # This time we'll get information from sacct\n", - " jobs = await perlmutter.jobs(jobids=[jobs[0].jobid, jobs[1].jobid], command='sacct')\n", - " # Print out the current status of the job\n", - "\n", - "print_json([j.dict() for j in jobs])" - ] - }, - { - "cell_type": "markdown", - "id": "e6903f6f", - "metadata": {}, - "source": [ - "Jobs can also be interacted with later based on their jobid. This includes canceling the jobs and seeing their final status." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "4a9c3e14", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[\n", - " {\n", - " \"account\": \"ntrain\",\n", - " \"admincomment\": \"{\\\"resizing\\\":0,\\\"features\\\":\\\"cpu\\\",\\\"arrayTaskId\\\":4294967294,\\\"qos\\\":\\\"regular_1\\\",\\\"arrayJobId\\\":0,\\\"jobAccount\\\":\\\"ntrain\\\",\\\"submitTime\\\":1683076251,\\\"partition\\\":\\\"regular_milan_ss11\\\",\\\"uid\\\":12345,\\\"cluster\\\":\\\"perlmutter\\\",\\\"argv\\\":[\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\\/large_job.sh\\\"],\\\"gresRequest\\\":\\\"cpu=25,mem=12200050M,node=25,billing=25\\\",\\\"licenses\\\":\\\"u1:1\\\",\\\"name\\\":\\\"large_job.sh\\\",\\\"stdinPath\\\":\\\"\\\\/dev\\\\/null\\\",\\\"timeLimit\\\":720,\\\"packJobId\\\":0,\\\"jobId\\\":8407414,\\\"allocNodes\\\":0,\\\"allocCpus\\\":0,\\\"workingDirectory\\\":\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\",\\\"restartCnt\\\":0,\\\"jobExitCode\\\":0,\\\"reboot\\\":0,\\\"startTime\\\":1683077035,\\\"priority\\\":67684,\\\"endTime\\\":1683077035,\\\"jobDerivedExitCode\\\":0,\\\"packJobOffset\\\":0}\",\n", - " \"alloccpus\": \"25\",\n", - " \"allocnodes\": \"0\",\n", - " \"alloctres\": \"\",\n", - " \"associd\": \"206287\",\n", - " \"avecpu\": \"\",\n", - " \"avecpufreq\": \"\",\n", - " \"avediskread\": \"\",\n", - " \"avediskwrite\": \"\",\n", - " \"avepages\": \"\",\n", - " \"averss\": \"\",\n", - " \"avevmsize\": \"\",\n", - " \"blockid\": \"\",\n", - " \"cluster\": \"perlmutter\",\n", - " \"comment\": \"\",\n", - " \"constraints\": \"cpu\",\n", - " \"consumedenergy\": \"0\",\n", - " \"consumedenergyraw\": \"0\",\n", - " \"cputime\": \"00:00:00\",\n", - " \"cputimeraw\": \"0\",\n", - " \"dbindex\": \"64266939\",\n", - " \"derivedexitcode\": \"0:0\",\n", - " \"elapsed\": \"00:00:00\",\n", - " \"elapsedraw\": \"0\",\n", - " \"eligible\": \"2023-05-02T18:10:51\",\n", - " \"end\": \"2023-05-02T18:23:55\",\n", - " \"exitcode\": \"0:0\",\n", - " \"flags\": \"StartRecieved\",\n", - " \"gid\": \"12345\",\n", - " \"group\": \"elvis\",\n", - " \"jobid\": \"8407414\",\n", - " \"jobidraw\": \"8407414\",\n", - " \"jobname\": \"large_job.sh\",\n", - " \"layout\": \"\",\n", - " \"maxdiskread\": \"\",\n", - " \"maxdiskreadnode\": \"\",\n", - " \"maxdiskreadtask\": \"\",\n", - " \"maxdiskwrite\": \"\",\n", - " \"maxdiskwritenode\": \"\",\n", - " \"maxdiskwritetask\": \"\",\n", - " \"maxpages\": \"\",\n", - " \"maxpagesnode\": \"\",\n", - " \"maxpagestask\": \"\",\n", - " \"maxrss\": \"\",\n", - " \"maxrssnode\": \"\",\n", - " \"maxrsstask\": \"\",\n", - " \"maxvmsize\": \"\",\n", - " \"maxvmsizenode\": \"\",\n", - " \"maxvmsizetask\": \"\",\n", - " \"mcslabel\": \"\",\n", - " \"mincpu\": \"\",\n", - " \"mincpunode\": \"\",\n", - " \"mincputask\": \"\",\n", - " \"ncpus\": \"25\",\n", - " \"nnodes\": \"25\",\n", - " \"nodelist\": \"None assigned\",\n", - " \"ntasks\": \"\",\n", - " \"priority\": \"67679\",\n", - " \"partition\": \"regular_milan_ss11\",\n", - " \"qos\": \"regular_1\",\n", - " \"qosraw\": \"16\",\n", - " \"reason\": \"None\",\n", - " \"reqcpufreq\": \"Unknown\",\n", - " \"reqcpufreqmin\": \"Unknown\",\n", - " \"reqcpufreqmax\": \"Unknown\",\n", - " \"reqcpufreqgov\": \"Unknown\",\n", - " \"reqcpus\": \"25\",\n", - " \"reqmem\": \"12200050M\",\n", - " \"reqnodes\": \"25\",\n", - " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", - " \"reservation\": \"\",\n", - " \"reservationid\": \"\",\n", - " \"reserved\": null,\n", - " \"resvcpu\": null,\n", - " \"resvcpuraw\": null,\n", - " \"start\": \"None\",\n", - " \"state\": \"CANCELLED\",\n", - " \"submit\": \"2023-05-02T18:10:51\",\n", - " \"suspended\": \"00:00:00\",\n", - " \"systemcpu\": \"00:00:00\",\n", - " \"systemcomment\": \"\",\n", - " \"timelimit\": \"12:00:00\",\n", - " \"timelimitraw\": \"720\",\n", - " \"totalcpu\": \"00:00:00\",\n", - " \"tresusageinave\": \"\",\n", - " \"tresusageinmax\": \"\",\n", - " \"tresusageinmaxnode\": \"\",\n", - " \"tresusageinmaxtask\": \"\",\n", - " \"tresusageinmin\": \"\",\n", - " \"tresusageinminnode\": \"\",\n", - " \"tresusageinmintask\": \"\",\n", - " \"tresusageintot\": \"\",\n", - " \"tresusageoutave\": \"\",\n", - " \"tresusageoutmax\": \"\",\n", - " \"tresusageoutmaxnode\": \"\",\n", - " \"tresusageoutmaxtask\": \"\",\n", - " \"tresusageoutmin\": \"\",\n", - " \"tresusageoutminnode\": \"\",\n", - " \"tresusageoutmintask\": \"\",\n", - " \"tresusageouttot\": \"\",\n", - " \"uid\": \"12345\",\n", - " \"user\": \"elvis\",\n", - " \"usercpu\": \"00:00:00\",\n", - " \"wckey\": \"\",\n", - " \"wckeyid\": \"0\",\n", - " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", - " },\n", - " {\n", - " \"account\": \"ntrain\",\n", - " \"admincomment\": \"{\\\"resizing\\\":0,\\\"features\\\":\\\"cpu\\\",\\\"arrayTaskId\\\":4294967294,\\\"qos\\\":\\\"regular_1\\\",\\\"arrayJobId\\\":0,\\\"jobAccount\\\":\\\"ntrain\\\",\\\"submitTime\\\":1683076290,\\\"partition\\\":\\\"regular_milan_ss11\\\",\\\"uid\\\":12345,\\\"cluster\\\":\\\"perlmutter\\\",\\\"argv\\\":[\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\\/large_job.sh\\\"],\\\"gresRequest\\\":\\\"cpu=25,mem=12200050M,node=25,billing=25\\\",\\\"licenses\\\":\\\"u1:1\\\",\\\"name\\\":\\\"large_job.sh\\\",\\\"stdinPath\\\":\\\"\\\\/dev\\\\/null\\\",\\\"timeLimit\\\":720,\\\"packJobId\\\":0,\\\"jobId\\\":8407432,\\\"allocNodes\\\":0,\\\"allocCpus\\\":0,\\\"workingDirectory\\\":\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\",\\\"restartCnt\\\":0,\\\"jobExitCode\\\":0,\\\"reboot\\\":0,\\\"startTime\\\":1683077066,\\\"priority\\\":67683,\\\"endTime\\\":1683077066,\\\"jobDerivedExitCode\\\":0,\\\"packJobOffset\\\":0}\",\n", - " \"alloccpus\": \"25\",\n", - " \"allocnodes\": \"0\",\n", - " \"alloctres\": \"\",\n", - " \"associd\": \"206287\",\n", - " \"avecpu\": \"\",\n", - " \"avecpufreq\": \"\",\n", - " \"avediskread\": \"\",\n", - " \"avediskwrite\": \"\",\n", - " \"avepages\": \"\",\n", - " \"averss\": \"\",\n", - " \"avevmsize\": \"\",\n", - " \"blockid\": \"\",\n", - " \"cluster\": \"perlmutter\",\n", - " \"comment\": \"\",\n", - " \"constraints\": \"cpu\",\n", - " \"consumedenergy\": \"0\",\n", - " \"consumedenergyraw\": \"0\",\n", - " \"cputime\": \"00:00:00\",\n", - " \"cputimeraw\": \"0\",\n", - " \"dbindex\": \"64267039\",\n", - " \"derivedexitcode\": \"0:0\",\n", - " \"elapsed\": \"00:00:00\",\n", - " \"elapsedraw\": \"0\",\n", - " \"eligible\": \"2023-05-02T18:11:30\",\n", - " \"end\": \"2023-05-02T18:24:26\",\n", - " \"exitcode\": \"0:0\",\n", - " \"flags\": \"StartRecieved\",\n", - " \"gid\": \"12345\",\n", - " \"group\": \"elvis\",\n", - " \"jobid\": \"8407432\",\n", - " \"jobidraw\": \"8407432\",\n", - " \"jobname\": \"large_job.sh\",\n", - " \"layout\": \"\",\n", - " \"maxdiskread\": \"\",\n", - " \"maxdiskreadnode\": \"\",\n", - " \"maxdiskreadtask\": \"\",\n", - " \"maxdiskwrite\": \"\",\n", - " \"maxdiskwritenode\": \"\",\n", - " \"maxdiskwritetask\": \"\",\n", - " \"maxpages\": \"\",\n", - " \"maxpagesnode\": \"\",\n", - " \"maxpagestask\": \"\",\n", - " \"maxrss\": \"\",\n", - " \"maxrssnode\": \"\",\n", - " \"maxrsstask\": \"\",\n", - " \"maxvmsize\": \"\",\n", - " \"maxvmsizenode\": \"\",\n", - " \"maxvmsizetask\": \"\",\n", - " \"mcslabel\": \"\",\n", - " \"mincpu\": \"\",\n", - " \"mincpunode\": \"\",\n", - " \"mincputask\": \"\",\n", - " \"ncpus\": \"25\",\n", - " \"nnodes\": \"25\",\n", - " \"nodelist\": \"None assigned\",\n", - " \"ntasks\": \"\",\n", - " \"priority\": \"67679\",\n", - " \"partition\": \"regular_milan_ss11\",\n", - " \"qos\": \"regular_1\",\n", - " \"qosraw\": \"16\",\n", - " \"reason\": \"None\",\n", - " \"reqcpufreq\": \"Unknown\",\n", - " \"reqcpufreqmin\": \"Unknown\",\n", - " \"reqcpufreqmax\": \"Unknown\",\n", - " \"reqcpufreqgov\": \"Unknown\",\n", - " \"reqcpus\": \"25\",\n", - " \"reqmem\": \"12200050M\",\n", - " \"reqnodes\": \"25\",\n", - " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", - " \"reservation\": \"\",\n", - " \"reservationid\": \"\",\n", - " \"reserved\": null,\n", - " \"resvcpu\": null,\n", - " \"resvcpuraw\": null,\n", - " \"start\": \"None\",\n", - " \"state\": \"CANCELLED\",\n", - " \"submit\": \"2023-05-02T18:11:30\",\n", - " \"suspended\": \"00:00:00\",\n", - " \"systemcpu\": \"00:00:00\",\n", - " \"systemcomment\": \"\",\n", - " \"timelimit\": \"12:00:00\",\n", - " \"timelimitraw\": \"720\",\n", - " \"totalcpu\": \"00:00:00\",\n", - " \"tresusageinave\": \"\",\n", - " \"tresusageinmax\": \"\",\n", - " \"tresusageinmaxnode\": \"\",\n", - " \"tresusageinmaxtask\": \"\",\n", - " \"tresusageinmin\": \"\",\n", - " \"tresusageinminnode\": \"\",\n", - " \"tresusageinmintask\": \"\",\n", - " \"tresusageintot\": \"\",\n", - " \"tresusageoutave\": \"\",\n", - " \"tresusageoutmax\": \"\",\n", - " \"tresusageoutmaxnode\": \"\",\n", - " \"tresusageoutmaxtask\": \"\",\n", - " \"tresusageoutmin\": \"\",\n", - " \"tresusageoutminnode\": \"\",\n", - " \"tresusageoutmintask\": \"\",\n", - " \"tresusageouttot\": \"\",\n", - " \"uid\": \"12345\",\n", - " \"user\": \"elvis\",\n", - " \"usercpu\": \"00:00:00\",\n", - " \"wckey\": \"\",\n", - " \"wckeyid\": \"0\",\n", - " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", - " }\n", - "]\n" - ] - } - ], - "source": [ - "async with AsyncClient() as client:\n", - " perlmutter = await client.compute(Machine.perlmutter)\n", - " # Get the running job based on it's job ID\n", - " # This time we'll get information from sacct\n", - " jobs = await perlmutter.jobs(jobids=[jobs[0].jobid, jobs[1].jobid], command='sacct')\n", - " for job in jobs:\n", - " await job.cancel(wait=True)\n", - " await job.update()\n", - " \n", - "\n", - "print_json([j.dict() for j in jobs])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "cells": [ + { + "cell_type": "markdown", + "id": "83abc862", + "metadata": {}, + "source": [ + "## Check currently running jobs\n", + "\n", + "The `sfapi_client` can easily be used to get your current jobs running on the system, or information about past jobs.\n", + "\n", + "First we'll import the required libraries, `AsyncClient` to handle the requests and the `Machine` enum to get the correct resource." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c7dd56fa", + "metadata": {}, + "outputs": [], + "source": [ + "# import client library\n", + "from sfapi_client import AsyncClient\n", + "from sfapi_client.compute import Machine\n", + "\n", + "\n", + "# this will help display the outputs later\n", + "import json\n", + "# Print our results nicely to the notebook\n", + "def print_json(j):\n", + " return print(json.dumps(j, indent=4))\n", + "\n", + "# Change this to your username\n", + "user_name=\"elvis\"" + ] + }, + { + "cell_type": "markdown", + "id": "747de001", + "metadata": {}, + "source": [ + "The next lines of code will create a client which can be used to get an `Compute` object. This object can be used to look at the current and past job queues, list files and directories, and run simple commands on the system.\n", + "\n", + "In this example we will get the currently running jobs for the user `elvis`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c166ede5", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "async with AsyncClient() as client:\n", + " perlmutter = await client.compute(Machine.perlmutter)\n", + " # This selects just the jobs in the regular cpu partition on perlmutter\n", + " jobs = await perlmutter.jobs(user=user_name, partition='regular_milan_ss11')" + ] + }, + { + "cell_type": "markdown", + "id": "7f215e73", + "metadata": {}, + "source": [ + "All the jobs currently in the job queue are stored in the newly created `jobs` list. An easy way of viewing and this list is to print the objects in the list. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "63b5e715", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"account\": \"ntrain\",\n", + " \"tres_per_node\": \"N/A\",\n", + " \"min_cpus\": \"1\",\n", + " \"min_tmp_disk\": \"0\",\n", + " \"end_time\": \"N/A\",\n", + " \"features\": \"cpu\",\n", + " \"group\": \"12345\",\n", + " \"over_subscribe\": \"NO\",\n", + " \"jobid\": \"8407414\",\n", + " \"name\": \"large_job.sh\",\n", + " \"comment\": \"(null)\",\n", + " \"time_limit\": \"12:00:00\",\n", + " \"min_memory\": \"0\",\n", + " \"req_nodes\": \"\",\n", + " \"command\": \"/global/homes/e/elvis/job_subs/large_job.sh\",\n", + " \"priority\": \"67684\",\n", + " \"qos\": \"regular_1\",\n", + " \"reason\": \"Priority\",\n", + " \"field_\": null,\n", + " \"st\": \"PD\",\n", + " \"user\": \"elvis\",\n", + " \"reservation\": \"(null)\",\n", + " \"wckey\": \"(null)\",\n", + " \"exc_nodes\": \"\",\n", + " \"nice\": \"0\",\n", + " \"s_c_t\": \"*:*:*\",\n", + " \"exec_host\": \"n/a\",\n", + " \"cpus\": \"25\",\n", + " \"nodes\": \"25\",\n", + " \"dependency\": \"(null)\",\n", + " \"array_job_id\": \"8407414\",\n", + " \"sockets_per_node\": \"*\",\n", + " \"cores_per_socket\": \"*\",\n", + " \"threads_per_core\": \"*\",\n", + " \"array_task_id\": \"N/A\",\n", + " \"time_left\": \"12:00:00\",\n", + " \"time\": \"0:00\",\n", + " \"nodelist\": \"\",\n", + " \"contiguous\": \"0\",\n", + " \"partition\": \"regular_milan_ss11\",\n", + " \"nodelist_reason_\": \"(Priority)\",\n", + " \"start_time\": \"N/A\",\n", + " \"state\": \"PENDING\",\n", + " \"uid\": \"12345\",\n", + " \"submit_time\": \"2023-05-02T18:10:51\",\n", + " \"licenses\": \"u1:1\",\n", + " \"core_spec\": \"N/A\",\n", + " \"schednodes\": \"(null)\",\n", + " \"work_dir\": \"/global/homes/e/elvis/job_subs\"\n", + " },\n", + " {\n", + " \"account\": \"ntrain\",\n", + " \"tres_per_node\": \"N/A\",\n", + " \"min_cpus\": \"1\",\n", + " \"min_tmp_disk\": \"0\",\n", + " \"end_time\": \"N/A\",\n", + " \"features\": \"cpu\",\n", + " \"group\": \"12345\",\n", + " \"over_subscribe\": \"NO\",\n", + " \"jobid\": \"8407432\",\n", + " \"name\": \"large_job.sh\",\n", + " \"comment\": \"(null)\",\n", + " \"time_limit\": \"12:00:00\",\n", + " \"min_memory\": \"0\",\n", + " \"req_nodes\": \"\",\n", + " \"command\": \"/global/homes/e/elvis/job_subs/large_job.sh\",\n", + " \"priority\": \"67683\",\n", + " \"qos\": \"regular_1\",\n", + " \"reason\": \"Priority\",\n", + " \"field_\": null,\n", + " \"st\": \"PD\",\n", + " \"user\": \"elvis\",\n", + " \"reservation\": \"(null)\",\n", + " \"wckey\": \"(null)\",\n", + " \"exc_nodes\": \"\",\n", + " \"nice\": \"0\",\n", + " \"s_c_t\": \"*:*:*\",\n", + " \"exec_host\": \"n/a\",\n", + " \"cpus\": \"25\",\n", + " \"nodes\": \"25\",\n", + " \"dependency\": \"(null)\",\n", + " \"array_job_id\": \"8407432\",\n", + " \"sockets_per_node\": \"*\",\n", + " \"cores_per_socket\": \"*\",\n", + " \"threads_per_core\": \"*\",\n", + " \"array_task_id\": \"N/A\",\n", + " \"time_left\": \"12:00:00\",\n", + " \"time\": \"0:00\",\n", + " \"nodelist\": \"\",\n", + " \"contiguous\": \"0\",\n", + " \"partition\": \"regular_milan_ss11\",\n", + " \"nodelist_reason_\": \"(Priority)\",\n", + " \"start_time\": \"N/A\",\n", + " \"state\": \"PENDING\",\n", + " \"uid\": \"12345\",\n", + " \"submit_time\": \"2023-05-02T18:11:30\",\n", + " \"licenses\": \"u1:1\",\n", + " \"core_spec\": \"N/A\",\n", + " \"schednodes\": \"(null)\",\n", + " \"work_dir\": \"/global/homes/e/elvis/job_subs\"\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "print_json([j.dict() for j in jobs])" + ] + }, + { + "cell_type": "markdown", + "id": "b9c272b3", + "metadata": {}, + "source": [ + "A single job can be pulled from the list for further inspection. This returns a `Squeue` object which is the output you would get from running the command `squeue` from slurm. This is useful for getting jobs which are currently pending or running." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6ed12a56", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AsyncJobSqueue(account='ntrain', tres_per_node='N/A', min_cpus='1', min_tmp_disk='0', end_time='N/A', features='cpu', group='12345', over_subscribe='NO', jobid='8407414', name='large_job.sh', comment='(null)', time_limit='12:00:00', min_memory='0', req_nodes='', command='/global/homes/e/elvis/job_subs/large_job.sh', priority='67684', qos='regular_1', reason='Priority', field_=None, st='PD', user='elvis', reservation='(null)', wckey='(null)', exc_nodes='', nice='0', s_c_t='*:*:*', exec_host='n/a', cpus='25', nodes='25', dependency='(null)', array_job_id='8407414', sockets_per_node='*', cores_per_socket='*', threads_per_core='*', array_task_id='N/A', time_left='12:00:00', time='0:00', nodelist='', contiguous='0', partition='regular_milan_ss11', nodelist_reason_='(Priority)', start_time='N/A', state=, uid='12345', submit_time='2023-05-02T18:10:51', licenses='u1:1', core_spec='N/A', schednodes='(null)', work_dir='/global/homes/e/elvis/job_subs', compute=AsyncCompute(name='perlmutter', full_name='Perlmutter', description='System is active', system_type='compute', notes=[], status=, updated_at=datetime.datetime(2023, 4, 28, 21, 19, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), client=))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs[0]" + ] + }, + { + "cell_type": "markdown", + "id": "29fd3988", + "metadata": {}, + "source": [ + "More specific information about the job can also be gotten from this object like the number of nodes or it's `jobid`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "46059d7b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of nodes = 25\n", + "jobid = 8407414\n" + ] + } + ], + "source": [ + "print(f\"Number of nodes = {jobs[0].nodes}\")\n", + "print(f\"jobid = {jobs[0].jobid}\")" + ] + }, + { + "cell_type": "markdown", + "id": "86358832", + "metadata": {}, + "source": [ + "The information for a job can also be retrieved later based on the jobid. This can be useful for seeing if a job completed, failed, or was canceled." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b98307c8", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"account\": \"ntrain\",\n", + " \"admincomment\": \"\",\n", + " \"alloccpus\": \"25\",\n", + " \"allocnodes\": \"0\",\n", + " \"alloctres\": \"\",\n", + " \"associd\": \"206287\",\n", + " \"avecpu\": \"\",\n", + " \"avecpufreq\": \"\",\n", + " \"avediskread\": \"\",\n", + " \"avediskwrite\": \"\",\n", + " \"avepages\": \"\",\n", + " \"averss\": \"\",\n", + " \"avevmsize\": \"\",\n", + " \"blockid\": \"\",\n", + " \"cluster\": \"perlmutter\",\n", + " \"comment\": \"\",\n", + " \"constraints\": \"cpu\",\n", + " \"consumedenergy\": \"0\",\n", + " \"consumedenergyraw\": \"0\",\n", + " \"cputime\": \"00:00:00\",\n", + " \"cputimeraw\": \"0\",\n", + " \"dbindex\": \"64266939\",\n", + " \"derivedexitcode\": \"0:0\",\n", + " \"elapsed\": \"00:00:00\",\n", + " \"elapsedraw\": \"0\",\n", + " \"eligible\": \"2023-05-02T18:10:51\",\n", + " \"end\": \"Unknown\",\n", + " \"exitcode\": \"0:0\",\n", + " \"flags\": \"StartRecieved\",\n", + " \"gid\": \"12345\",\n", + " \"group\": \"elvis\",\n", + " \"jobid\": \"8407414\",\n", + " \"jobidraw\": \"8407414\",\n", + " \"jobname\": \"large_job.sh\",\n", + " \"layout\": \"\",\n", + " \"maxdiskread\": \"\",\n", + " \"maxdiskreadnode\": \"\",\n", + " \"maxdiskreadtask\": \"\",\n", + " \"maxdiskwrite\": \"\",\n", + " \"maxdiskwritenode\": \"\",\n", + " \"maxdiskwritetask\": \"\",\n", + " \"maxpages\": \"\",\n", + " \"maxpagesnode\": \"\",\n", + " \"maxpagestask\": \"\",\n", + " \"maxrss\": \"\",\n", + " \"maxrssnode\": \"\",\n", + " \"maxrsstask\": \"\",\n", + " \"maxvmsize\": \"\",\n", + " \"maxvmsizenode\": \"\",\n", + " \"maxvmsizetask\": \"\",\n", + " \"mcslabel\": \"\",\n", + " \"mincpu\": \"\",\n", + " \"mincpunode\": \"\",\n", + " \"mincputask\": \"\",\n", + " \"ncpus\": \"25\",\n", + " \"nnodes\": \"25\",\n", + " \"nodelist\": \"None assigned\",\n", + " \"ntasks\": \"\",\n", + " \"priority\": \"67679\",\n", + " \"partition\": \"regular_milan_ss11\",\n", + " \"qos\": \"regular_1\",\n", + " \"qosraw\": \"16\",\n", + " \"reason\": \"None\",\n", + " \"reqcpufreq\": \"Unknown\",\n", + " \"reqcpufreqmin\": \"Unknown\",\n", + " \"reqcpufreqmax\": \"Unknown\",\n", + " \"reqcpufreqgov\": \"Unknown\",\n", + " \"reqcpus\": \"25\",\n", + " \"reqmem\": \"12200050M\",\n", + " \"reqnodes\": \"25\",\n", + " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", + " \"reservation\": \"\",\n", + " \"reservationid\": \"\",\n", + " \"reserved\": null,\n", + " \"resvcpu\": null,\n", + " \"resvcpuraw\": null,\n", + " \"start\": \"Unknown\",\n", + " \"state\": \"PENDING\",\n", + " \"submit\": \"2023-05-02T18:10:51\",\n", + " \"suspended\": \"00:00:00\",\n", + " \"systemcpu\": \"00:00:00\",\n", + " \"systemcomment\": \"\",\n", + " \"timelimit\": \"12:00:00\",\n", + " \"timelimitraw\": \"720\",\n", + " \"totalcpu\": \"00:00:00\",\n", + " \"tresusageinave\": \"\",\n", + " \"tresusageinmax\": \"\",\n", + " \"tresusageinmaxnode\": \"\",\n", + " \"tresusageinmaxtask\": \"\",\n", + " \"tresusageinmin\": \"\",\n", + " \"tresusageinminnode\": \"\",\n", + " \"tresusageinmintask\": \"\",\n", + " \"tresusageintot\": \"\",\n", + " \"tresusageoutave\": \"\",\n", + " \"tresusageoutmax\": \"\",\n", + " \"tresusageoutmaxnode\": \"\",\n", + " \"tresusageoutmaxtask\": \"\",\n", + " \"tresusageoutmin\": \"\",\n", + " \"tresusageoutminnode\": \"\",\n", + " \"tresusageoutmintask\": \"\",\n", + " \"tresusageouttot\": \"\",\n", + " \"uid\": \"12345\",\n", + " \"user\": \"elvis\",\n", + " \"usercpu\": \"00:00:00\",\n", + " \"wckey\": \"\",\n", + " \"wckeyid\": \"0\",\n", + " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", + "}\n" + ] + } + ], + "source": [ + "async with AsyncClient() as client:\n", + " perlmutter = await client.compute(Machine.perlmutter)\n", + " # Get the running job based on it's job ID\n", + " job = await perlmutter.job(jobid=jobs[0].jobid)\n", + " # Print out the current status of the job\n", + "\n", + "print_json(job.dict())" + ] + }, + { + "cell_type": "markdown", + "id": "68d65541", + "metadata": {}, + "source": [ + "Multiple jobs can also be retrieved simultaniosly later on by creating a list of jobids. This can reduce the amount of calls needed to the Superfacility REST Api and get your results back" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5f854ed6", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"account\": \"ntrain\",\n", + " \"admincomment\": \"\",\n", + " \"alloccpus\": \"25\",\n", + " \"allocnodes\": \"0\",\n", + " \"alloctres\": \"\",\n", + " \"associd\": \"206287\",\n", + " \"avecpu\": \"\",\n", + " \"avecpufreq\": \"\",\n", + " \"avediskread\": \"\",\n", + " \"avediskwrite\": \"\",\n", + " \"avepages\": \"\",\n", + " \"averss\": \"\",\n", + " \"avevmsize\": \"\",\n", + " \"blockid\": \"\",\n", + " \"cluster\": \"perlmutter\",\n", + " \"comment\": \"\",\n", + " \"constraints\": \"cpu\",\n", + " \"consumedenergy\": \"0\",\n", + " \"consumedenergyraw\": \"0\",\n", + " \"cputime\": \"00:00:00\",\n", + " \"cputimeraw\": \"0\",\n", + " \"dbindex\": \"64266939\",\n", + " \"derivedexitcode\": \"0:0\",\n", + " \"elapsed\": \"00:00:00\",\n", + " \"elapsedraw\": \"0\",\n", + " \"eligible\": \"2023-05-02T18:10:51\",\n", + " \"end\": \"Unknown\",\n", + " \"exitcode\": \"0:0\",\n", + " \"flags\": \"StartRecieved\",\n", + " \"gid\": \"12345\",\n", + " \"group\": \"elvis\",\n", + " \"jobid\": \"8407414\",\n", + " \"jobidraw\": \"8407414\",\n", + " \"jobname\": \"large_job.sh\",\n", + " \"layout\": \"\",\n", + " \"maxdiskread\": \"\",\n", + " \"maxdiskreadnode\": \"\",\n", + " \"maxdiskreadtask\": \"\",\n", + " \"maxdiskwrite\": \"\",\n", + " \"maxdiskwritenode\": \"\",\n", + " \"maxdiskwritetask\": \"\",\n", + " \"maxpages\": \"\",\n", + " \"maxpagesnode\": \"\",\n", + " \"maxpagestask\": \"\",\n", + " \"maxrss\": \"\",\n", + " \"maxrssnode\": \"\",\n", + " \"maxrsstask\": \"\",\n", + " \"maxvmsize\": \"\",\n", + " \"maxvmsizenode\": \"\",\n", + " \"maxvmsizetask\": \"\",\n", + " \"mcslabel\": \"\",\n", + " \"mincpu\": \"\",\n", + " \"mincpunode\": \"\",\n", + " \"mincputask\": \"\",\n", + " \"ncpus\": \"25\",\n", + " \"nnodes\": \"25\",\n", + " \"nodelist\": \"None assigned\",\n", + " \"ntasks\": \"\",\n", + " \"priority\": \"67679\",\n", + " \"partition\": \"regular_milan_ss11\",\n", + " \"qos\": \"regular_1\",\n", + " \"qosraw\": \"16\",\n", + " \"reason\": \"None\",\n", + " \"reqcpufreq\": \"Unknown\",\n", + " \"reqcpufreqmin\": \"Unknown\",\n", + " \"reqcpufreqmax\": \"Unknown\",\n", + " \"reqcpufreqgov\": \"Unknown\",\n", + " \"reqcpus\": \"25\",\n", + " \"reqmem\": \"12200050M\",\n", + " \"reqnodes\": \"25\",\n", + " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", + " \"reservation\": \"\",\n", + " \"reservationid\": \"\",\n", + " \"reserved\": null,\n", + " \"resvcpu\": null,\n", + " \"resvcpuraw\": null,\n", + " \"start\": \"Unknown\",\n", + " \"state\": \"PENDING\",\n", + " \"submit\": \"2023-05-02T18:10:51\",\n", + " \"suspended\": \"00:00:00\",\n", + " \"systemcpu\": \"00:00:00\",\n", + " \"systemcomment\": \"\",\n", + " \"timelimit\": \"12:00:00\",\n", + " \"timelimitraw\": \"720\",\n", + " \"totalcpu\": \"00:00:00\",\n", + " \"tresusageinave\": \"\",\n", + " \"tresusageinmax\": \"\",\n", + " \"tresusageinmaxnode\": \"\",\n", + " \"tresusageinmaxtask\": \"\",\n", + " \"tresusageinmin\": \"\",\n", + " \"tresusageinminnode\": \"\",\n", + " \"tresusageinmintask\": \"\",\n", + " \"tresusageintot\": \"\",\n", + " \"tresusageoutave\": \"\",\n", + " \"tresusageoutmax\": \"\",\n", + " \"tresusageoutmaxnode\": \"\",\n", + " \"tresusageoutmaxtask\": \"\",\n", + " \"tresusageoutmin\": \"\",\n", + " \"tresusageoutminnode\": \"\",\n", + " \"tresusageoutmintask\": \"\",\n", + " \"tresusageouttot\": \"\",\n", + " \"uid\": \"12345\",\n", + " \"user\": \"elvis\",\n", + " \"usercpu\": \"00:00:00\",\n", + " \"wckey\": \"\",\n", + " \"wckeyid\": \"0\",\n", + " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", + " },\n", + " {\n", + " \"account\": \"ntrain\",\n", + " \"admincomment\": \"\",\n", + " \"alloccpus\": \"25\",\n", + " \"allocnodes\": \"0\",\n", + " \"alloctres\": \"\",\n", + " \"associd\": \"206287\",\n", + " \"avecpu\": \"\",\n", + " \"avecpufreq\": \"\",\n", + " \"avediskread\": \"\",\n", + " \"avediskwrite\": \"\",\n", + " \"avepages\": \"\",\n", + " \"averss\": \"\",\n", + " \"avevmsize\": \"\",\n", + " \"blockid\": \"\",\n", + " \"cluster\": \"perlmutter\",\n", + " \"comment\": \"\",\n", + " \"constraints\": \"cpu\",\n", + " \"consumedenergy\": \"0\",\n", + " \"consumedenergyraw\": \"0\",\n", + " \"cputime\": \"00:00:00\",\n", + " \"cputimeraw\": \"0\",\n", + " \"dbindex\": \"64267039\",\n", + " \"derivedexitcode\": \"0:0\",\n", + " \"elapsed\": \"00:00:00\",\n", + " \"elapsedraw\": \"0\",\n", + " \"eligible\": \"2023-05-02T18:11:30\",\n", + " \"end\": \"Unknown\",\n", + " \"exitcode\": \"0:0\",\n", + " \"flags\": \"StartRecieved\",\n", + " \"gid\": \"12345\",\n", + " \"group\": \"elvis\",\n", + " \"jobid\": \"8407432\",\n", + " \"jobidraw\": \"8407432\",\n", + " \"jobname\": \"large_job.sh\",\n", + " \"layout\": \"\",\n", + " \"maxdiskread\": \"\",\n", + " \"maxdiskreadnode\": \"\",\n", + " \"maxdiskreadtask\": \"\",\n", + " \"maxdiskwrite\": \"\",\n", + " \"maxdiskwritenode\": \"\",\n", + " \"maxdiskwritetask\": \"\",\n", + " \"maxpages\": \"\",\n", + " \"maxpagesnode\": \"\",\n", + " \"maxpagestask\": \"\",\n", + " \"maxrss\": \"\",\n", + " \"maxrssnode\": \"\",\n", + " \"maxrsstask\": \"\",\n", + " \"maxvmsize\": \"\",\n", + " \"maxvmsizenode\": \"\",\n", + " \"maxvmsizetask\": \"\",\n", + " \"mcslabel\": \"\",\n", + " \"mincpu\": \"\",\n", + " \"mincpunode\": \"\",\n", + " \"mincputask\": \"\",\n", + " \"ncpus\": \"25\",\n", + " \"nnodes\": \"25\",\n", + " \"nodelist\": \"None assigned\",\n", + " \"ntasks\": \"\",\n", + " \"priority\": \"67679\",\n", + " \"partition\": \"regular_milan_ss11\",\n", + " \"qos\": \"regular_1\",\n", + " \"qosraw\": \"16\",\n", + " \"reason\": \"None\",\n", + " \"reqcpufreq\": \"Unknown\",\n", + " \"reqcpufreqmin\": \"Unknown\",\n", + " \"reqcpufreqmax\": \"Unknown\",\n", + " \"reqcpufreqgov\": \"Unknown\",\n", + " \"reqcpus\": \"25\",\n", + " \"reqmem\": \"12200050M\",\n", + " \"reqnodes\": \"25\",\n", + " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", + " \"reservation\": \"\",\n", + " \"reservationid\": \"\",\n", + " \"reserved\": null,\n", + " \"resvcpu\": null,\n", + " \"resvcpuraw\": null,\n", + " \"start\": \"Unknown\",\n", + " \"state\": \"PENDING\",\n", + " \"submit\": \"2023-05-02T18:11:30\",\n", + " \"suspended\": \"00:00:00\",\n", + " \"systemcpu\": \"00:00:00\",\n", + " \"systemcomment\": \"\",\n", + " \"timelimit\": \"12:00:00\",\n", + " \"timelimitraw\": \"720\",\n", + " \"totalcpu\": \"00:00:00\",\n", + " \"tresusageinave\": \"\",\n", + " \"tresusageinmax\": \"\",\n", + " \"tresusageinmaxnode\": \"\",\n", + " \"tresusageinmaxtask\": \"\",\n", + " \"tresusageinmin\": \"\",\n", + " \"tresusageinminnode\": \"\",\n", + " \"tresusageinmintask\": \"\",\n", + " \"tresusageintot\": \"\",\n", + " \"tresusageoutave\": \"\",\n", + " \"tresusageoutmax\": \"\",\n", + " \"tresusageoutmaxnode\": \"\",\n", + " \"tresusageoutmaxtask\": \"\",\n", + " \"tresusageoutmin\": \"\",\n", + " \"tresusageoutminnode\": \"\",\n", + " \"tresusageoutmintask\": \"\",\n", + " \"tresusageouttot\": \"\",\n", + " \"uid\": \"12345\",\n", + " \"user\": \"elvis\",\n", + " \"usercpu\": \"00:00:00\",\n", + " \"wckey\": \"\",\n", + " \"wckeyid\": \"0\",\n", + " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "async with AsyncClient() as client:\n", + " perlmutter = await client.compute(Machine.perlmutter)\n", + " # Get the running job based on it's job ID\n", + " # This time we'll get information from sacct\n", + " jobs = await perlmutter.jobs(jobids=[jobs[0].jobid, jobs[1].jobid], command='sacct')\n", + " # Print out the current status of the job\n", + "\n", + "print_json([j.dict() for j in jobs])" + ] + }, + { + "cell_type": "markdown", + "id": "e6903f6f", + "metadata": {}, + "source": [ + "Jobs can also be interacted with later based on their jobid. This includes canceling the jobs and seeing their final status." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4a9c3e14", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"account\": \"ntrain\",\n", + " \"admincomment\": \"{\\\"resizing\\\":0,\\\"features\\\":\\\"cpu\\\",\\\"arrayTaskId\\\":4294967294,\\\"qos\\\":\\\"regular_1\\\",\\\"arrayJobId\\\":0,\\\"jobAccount\\\":\\\"ntrain\\\",\\\"submitTime\\\":1683076251,\\\"partition\\\":\\\"regular_milan_ss11\\\",\\\"uid\\\":12345,\\\"cluster\\\":\\\"perlmutter\\\",\\\"argv\\\":[\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\\/large_job.sh\\\"],\\\"gresRequest\\\":\\\"cpu=25,mem=12200050M,node=25,billing=25\\\",\\\"licenses\\\":\\\"u1:1\\\",\\\"name\\\":\\\"large_job.sh\\\",\\\"stdinPath\\\":\\\"\\\\/dev\\\\/null\\\",\\\"timeLimit\\\":720,\\\"packJobId\\\":0,\\\"jobId\\\":8407414,\\\"allocNodes\\\":0,\\\"allocCpus\\\":0,\\\"workingDirectory\\\":\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\",\\\"restartCnt\\\":0,\\\"jobExitCode\\\":0,\\\"reboot\\\":0,\\\"startTime\\\":1683077035,\\\"priority\\\":67684,\\\"endTime\\\":1683077035,\\\"jobDerivedExitCode\\\":0,\\\"packJobOffset\\\":0}\",\n", + " \"alloccpus\": \"25\",\n", + " \"allocnodes\": \"0\",\n", + " \"alloctres\": \"\",\n", + " \"associd\": \"206287\",\n", + " \"avecpu\": \"\",\n", + " \"avecpufreq\": \"\",\n", + " \"avediskread\": \"\",\n", + " \"avediskwrite\": \"\",\n", + " \"avepages\": \"\",\n", + " \"averss\": \"\",\n", + " \"avevmsize\": \"\",\n", + " \"blockid\": \"\",\n", + " \"cluster\": \"perlmutter\",\n", + " \"comment\": \"\",\n", + " \"constraints\": \"cpu\",\n", + " \"consumedenergy\": \"0\",\n", + " \"consumedenergyraw\": \"0\",\n", + " \"cputime\": \"00:00:00\",\n", + " \"cputimeraw\": \"0\",\n", + " \"dbindex\": \"64266939\",\n", + " \"derivedexitcode\": \"0:0\",\n", + " \"elapsed\": \"00:00:00\",\n", + " \"elapsedraw\": \"0\",\n", + " \"eligible\": \"2023-05-02T18:10:51\",\n", + " \"end\": \"2023-05-02T18:23:55\",\n", + " \"exitcode\": \"0:0\",\n", + " \"flags\": \"StartRecieved\",\n", + " \"gid\": \"12345\",\n", + " \"group\": \"elvis\",\n", + " \"jobid\": \"8407414\",\n", + " \"jobidraw\": \"8407414\",\n", + " \"jobname\": \"large_job.sh\",\n", + " \"layout\": \"\",\n", + " \"maxdiskread\": \"\",\n", + " \"maxdiskreadnode\": \"\",\n", + " \"maxdiskreadtask\": \"\",\n", + " \"maxdiskwrite\": \"\",\n", + " \"maxdiskwritenode\": \"\",\n", + " \"maxdiskwritetask\": \"\",\n", + " \"maxpages\": \"\",\n", + " \"maxpagesnode\": \"\",\n", + " \"maxpagestask\": \"\",\n", + " \"maxrss\": \"\",\n", + " \"maxrssnode\": \"\",\n", + " \"maxrsstask\": \"\",\n", + " \"maxvmsize\": \"\",\n", + " \"maxvmsizenode\": \"\",\n", + " \"maxvmsizetask\": \"\",\n", + " \"mcslabel\": \"\",\n", + " \"mincpu\": \"\",\n", + " \"mincpunode\": \"\",\n", + " \"mincputask\": \"\",\n", + " \"ncpus\": \"25\",\n", + " \"nnodes\": \"25\",\n", + " \"nodelist\": \"None assigned\",\n", + " \"ntasks\": \"\",\n", + " \"priority\": \"67679\",\n", + " \"partition\": \"regular_milan_ss11\",\n", + " \"qos\": \"regular_1\",\n", + " \"qosraw\": \"16\",\n", + " \"reason\": \"None\",\n", + " \"reqcpufreq\": \"Unknown\",\n", + " \"reqcpufreqmin\": \"Unknown\",\n", + " \"reqcpufreqmax\": \"Unknown\",\n", + " \"reqcpufreqgov\": \"Unknown\",\n", + " \"reqcpus\": \"25\",\n", + " \"reqmem\": \"12200050M\",\n", + " \"reqnodes\": \"25\",\n", + " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", + " \"reservation\": \"\",\n", + " \"reservationid\": \"\",\n", + " \"reserved\": null,\n", + " \"resvcpu\": null,\n", + " \"resvcpuraw\": null,\n", + " \"start\": \"None\",\n", + " \"state\": \"CANCELLED\",\n", + " \"submit\": \"2023-05-02T18:10:51\",\n", + " \"suspended\": \"00:00:00\",\n", + " \"systemcpu\": \"00:00:00\",\n", + " \"systemcomment\": \"\",\n", + " \"timelimit\": \"12:00:00\",\n", + " \"timelimitraw\": \"720\",\n", + " \"totalcpu\": \"00:00:00\",\n", + " \"tresusageinave\": \"\",\n", + " \"tresusageinmax\": \"\",\n", + " \"tresusageinmaxnode\": \"\",\n", + " \"tresusageinmaxtask\": \"\",\n", + " \"tresusageinmin\": \"\",\n", + " \"tresusageinminnode\": \"\",\n", + " \"tresusageinmintask\": \"\",\n", + " \"tresusageintot\": \"\",\n", + " \"tresusageoutave\": \"\",\n", + " \"tresusageoutmax\": \"\",\n", + " \"tresusageoutmaxnode\": \"\",\n", + " \"tresusageoutmaxtask\": \"\",\n", + " \"tresusageoutmin\": \"\",\n", + " \"tresusageoutminnode\": \"\",\n", + " \"tresusageoutmintask\": \"\",\n", + " \"tresusageouttot\": \"\",\n", + " \"uid\": \"12345\",\n", + " \"user\": \"elvis\",\n", + " \"usercpu\": \"00:00:00\",\n", + " \"wckey\": \"\",\n", + " \"wckeyid\": \"0\",\n", + " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", + " },\n", + " {\n", + " \"account\": \"ntrain\",\n", + " \"admincomment\": \"{\\\"resizing\\\":0,\\\"features\\\":\\\"cpu\\\",\\\"arrayTaskId\\\":4294967294,\\\"qos\\\":\\\"regular_1\\\",\\\"arrayJobId\\\":0,\\\"jobAccount\\\":\\\"ntrain\\\",\\\"submitTime\\\":1683076290,\\\"partition\\\":\\\"regular_milan_ss11\\\",\\\"uid\\\":12345,\\\"cluster\\\":\\\"perlmutter\\\",\\\"argv\\\":[\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\\/large_job.sh\\\"],\\\"gresRequest\\\":\\\"cpu=25,mem=12200050M,node=25,billing=25\\\",\\\"licenses\\\":\\\"u1:1\\\",\\\"name\\\":\\\"large_job.sh\\\",\\\"stdinPath\\\":\\\"\\\\/dev\\\\/null\\\",\\\"timeLimit\\\":720,\\\"packJobId\\\":0,\\\"jobId\\\":8407432,\\\"allocNodes\\\":0,\\\"allocCpus\\\":0,\\\"workingDirectory\\\":\\\"\\\\/global\\\\/u1\\\\/t\\\\/elvis\\\\/job_subs\\\",\\\"restartCnt\\\":0,\\\"jobExitCode\\\":0,\\\"reboot\\\":0,\\\"startTime\\\":1683077066,\\\"priority\\\":67683,\\\"endTime\\\":1683077066,\\\"jobDerivedExitCode\\\":0,\\\"packJobOffset\\\":0}\",\n", + " \"alloccpus\": \"25\",\n", + " \"allocnodes\": \"0\",\n", + " \"alloctres\": \"\",\n", + " \"associd\": \"206287\",\n", + " \"avecpu\": \"\",\n", + " \"avecpufreq\": \"\",\n", + " \"avediskread\": \"\",\n", + " \"avediskwrite\": \"\",\n", + " \"avepages\": \"\",\n", + " \"averss\": \"\",\n", + " \"avevmsize\": \"\",\n", + " \"blockid\": \"\",\n", + " \"cluster\": \"perlmutter\",\n", + " \"comment\": \"\",\n", + " \"constraints\": \"cpu\",\n", + " \"consumedenergy\": \"0\",\n", + " \"consumedenergyraw\": \"0\",\n", + " \"cputime\": \"00:00:00\",\n", + " \"cputimeraw\": \"0\",\n", + " \"dbindex\": \"64267039\",\n", + " \"derivedexitcode\": \"0:0\",\n", + " \"elapsed\": \"00:00:00\",\n", + " \"elapsedraw\": \"0\",\n", + " \"eligible\": \"2023-05-02T18:11:30\",\n", + " \"end\": \"2023-05-02T18:24:26\",\n", + " \"exitcode\": \"0:0\",\n", + " \"flags\": \"StartRecieved\",\n", + " \"gid\": \"12345\",\n", + " \"group\": \"elvis\",\n", + " \"jobid\": \"8407432\",\n", + " \"jobidraw\": \"8407432\",\n", + " \"jobname\": \"large_job.sh\",\n", + " \"layout\": \"\",\n", + " \"maxdiskread\": \"\",\n", + " \"maxdiskreadnode\": \"\",\n", + " \"maxdiskreadtask\": \"\",\n", + " \"maxdiskwrite\": \"\",\n", + " \"maxdiskwritenode\": \"\",\n", + " \"maxdiskwritetask\": \"\",\n", + " \"maxpages\": \"\",\n", + " \"maxpagesnode\": \"\",\n", + " \"maxpagestask\": \"\",\n", + " \"maxrss\": \"\",\n", + " \"maxrssnode\": \"\",\n", + " \"maxrsstask\": \"\",\n", + " \"maxvmsize\": \"\",\n", + " \"maxvmsizenode\": \"\",\n", + " \"maxvmsizetask\": \"\",\n", + " \"mcslabel\": \"\",\n", + " \"mincpu\": \"\",\n", + " \"mincpunode\": \"\",\n", + " \"mincputask\": \"\",\n", + " \"ncpus\": \"25\",\n", + " \"nnodes\": \"25\",\n", + " \"nodelist\": \"None assigned\",\n", + " \"ntasks\": \"\",\n", + " \"priority\": \"67679\",\n", + " \"partition\": \"regular_milan_ss11\",\n", + " \"qos\": \"regular_1\",\n", + " \"qosraw\": \"16\",\n", + " \"reason\": \"None\",\n", + " \"reqcpufreq\": \"Unknown\",\n", + " \"reqcpufreqmin\": \"Unknown\",\n", + " \"reqcpufreqmax\": \"Unknown\",\n", + " \"reqcpufreqgov\": \"Unknown\",\n", + " \"reqcpus\": \"25\",\n", + " \"reqmem\": \"12200050M\",\n", + " \"reqnodes\": \"25\",\n", + " \"reqtres\": \"billing=25,cpu=25,mem=12200050M,node=25\",\n", + " \"reservation\": \"\",\n", + " \"reservationid\": \"\",\n", + " \"reserved\": null,\n", + " \"resvcpu\": null,\n", + " \"resvcpuraw\": null,\n", + " \"start\": \"None\",\n", + " \"state\": \"CANCELLED\",\n", + " \"submit\": \"2023-05-02T18:11:30\",\n", + " \"suspended\": \"00:00:00\",\n", + " \"systemcpu\": \"00:00:00\",\n", + " \"systemcomment\": \"\",\n", + " \"timelimit\": \"12:00:00\",\n", + " \"timelimitraw\": \"720\",\n", + " \"totalcpu\": \"00:00:00\",\n", + " \"tresusageinave\": \"\",\n", + " \"tresusageinmax\": \"\",\n", + " \"tresusageinmaxnode\": \"\",\n", + " \"tresusageinmaxtask\": \"\",\n", + " \"tresusageinmin\": \"\",\n", + " \"tresusageinminnode\": \"\",\n", + " \"tresusageinmintask\": \"\",\n", + " \"tresusageintot\": \"\",\n", + " \"tresusageoutave\": \"\",\n", + " \"tresusageoutmax\": \"\",\n", + " \"tresusageoutmaxnode\": \"\",\n", + " \"tresusageoutmaxtask\": \"\",\n", + " \"tresusageoutmin\": \"\",\n", + " \"tresusageoutminnode\": \"\",\n", + " \"tresusageoutmintask\": \"\",\n", + " \"tresusageouttot\": \"\",\n", + " \"uid\": \"12345\",\n", + " \"user\": \"elvis\",\n", + " \"usercpu\": \"00:00:00\",\n", + " \"wckey\": \"\",\n", + " \"wckeyid\": \"0\",\n", + " \"workdir\": \"/global/homes/e/elvis/job_subs\"\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "async with AsyncClient() as client:\n", + " perlmutter = await client.compute(Machine.perlmutter)\n", + " # Get the running job based on it's job ID\n", + " # This time we'll get information from sacct\n", + " jobs = await perlmutter.jobs(jobids=[jobs[0].jobid, jobs[1].jobid], command='sacct')\n", + " for job in jobs:\n", + " await job.cancel(wait=True)\n", + " await job.update()\n", + " \n", + "\n", + "print_json([j.dict() for j in jobs])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/examples/run_job_and_check_status.ipynb b/examples/run_job_and_check_status.ipynb index ae16492..899b17d 100644 --- a/examples/run_job_and_check_status.ipynb +++ b/examples/run_job_and_check_status.ipynb @@ -17,8 +17,6 @@ "source": [ "from sfapi_client import Client\n", "from sfapi_client.compute import Machine\n", - "from sfapi_client.paths import RemotePath\n", - "from pathlib import Path\n", "\n", "user_name = \"elvis\"\n", "\n", diff --git a/src/sfapi_client/_async/client.py b/src/sfapi_client/_async/client.py index 471aaf6..8bf1b53 100644 --- a/src/sfapi_client/_async/client.py +++ b/src/sfapi_client/_async/client.py @@ -10,7 +10,7 @@ from authlib.jose import JsonWebKey from .compute import Machine, AsyncCompute -from ..exceptions import SfApiError +from ..exceptions import ClientKeyError from .._models import ( Changelog as ChangelogItem, Config as ConfItem, @@ -231,7 +231,7 @@ def __init__( :param client_id: The client ID :param secret: The client secret - :param key: The path to the client secret file + :param key: Full path to the client secret file, or path relative to `~` from the expanduser :param api_base_url: The API base URL :param token_url: The token URL :param access_token: An existing access token @@ -311,10 +311,13 @@ async def close(self): async def __aexit__(self, type, value, traceback): await self.close() - def _read_client_secret_from_file(self, name): - if name is not None and Path(name).exists(): + def _read_client_secret_from_file(self, name: Optional[Union[str, Path]]): + if name is None: + return + _path = Path(name).expanduser().resolve() + if _path.exists(): # If the user gives a full path, then use it - key_path = Path(name) + key_path = _path else: # If not let's search in ~/.superfacility for the name or any key nickname = "" if name is None else name @@ -326,12 +329,14 @@ def _read_client_secret_from_file(self, name): # We have no credentials if key_path is None or key_path.is_dir(): - return + raise ClientKeyError( + f"no key found at key_path: {_path} or in ~/.superfacility/{name}*" + ) # Check that key is read only in case it's not # 0o100600 means chmod 600 if key_path.stat().st_mode != 0o100600: - raise SfApiError( + raise ClientKeyError( f"Incorrect permissions on the key. To fix run: chmod 600 {key_path}" ) @@ -351,7 +356,7 @@ def _read_client_secret_from_file(self, name): # Validate we got a correct looking client_id if len(self._client_id) != 13: - raise SfApiError(f"client_id not found in file {key_path}") + raise ClientKeyError(f"client_id not found in file {key_path}") @tenacity.retry( retry=tenacity.retry_if_exception_type(httpx.TimeoutException) diff --git a/src/sfapi_client/_sync/client.py b/src/sfapi_client/_sync/client.py index fb577cb..8aa8aaa 100644 --- a/src/sfapi_client/_sync/client.py +++ b/src/sfapi_client/_sync/client.py @@ -10,7 +10,7 @@ from authlib.jose import JsonWebKey from .compute import Machine, Compute -from ..exceptions import SfApiError +from ..exceptions import ClientKeyError from .._models import ( Changelog as ChangelogItem, Config as ConfItem, @@ -231,7 +231,7 @@ def __init__( :param client_id: The client ID :param secret: The client secret - :param key: The path to the client secret file + :param key: Full path to the client secret file, or path relative to `~` from the expanduser :param api_base_url: The API base URL :param token_url: The token URL :param access_token: An existing access token @@ -260,7 +260,7 @@ def __enter__(self): def _http_client(self): headers = {"accept": "application/json"} - # If we have a client_id then we need to use OAuth2 client + # If we have a client_id then we need to use the OAuth2 client if self._client_id is not None: if self.__http_client is None: # Create a new session if we haven't already @@ -311,10 +311,13 @@ def close(self): def __exit__(self, type, value, traceback): self.close() - def _read_client_secret_from_file(self, name): - if name is not None and Path(name).exists(): + def _read_client_secret_from_file(self, name: Optional[Union[str, Path]]): + if name is None: + return + _path = Path(name).expanduser().resolve() + if _path.exists(): # If the user gives a full path, then use it - key_path = Path(name) + key_path = _path else: # If not let's search in ~/.superfacility for the name or any key nickname = "" if name is None else name @@ -326,12 +329,14 @@ def _read_client_secret_from_file(self, name): # We have no credentials if key_path is None or key_path.is_dir(): - return + raise ClientKeyError( + f"no key found at key_path: {_path} or in ~/.superfacility/{name}*" + ) # Check that key is read only in case it's not # 0o100600 means chmod 600 if key_path.stat().st_mode != 0o100600: - raise SfApiError( + raise ClientKeyError( f"Incorrect permissions on the key. To fix run: chmod 600 {key_path}" ) @@ -351,7 +356,7 @@ def _read_client_secret_from_file(self, name): # Validate we got a correct looking client_id if len(self._client_id) != 13: - raise SfApiError(f"client_id not found in file {key_path}") + raise ClientKeyError(f"client_id not found in file {key_path}") @tenacity.retry( retry=tenacity.retry_if_exception_type(httpx.TimeoutException) @@ -501,6 +506,7 @@ def resources(self) -> Resources: # Ensure that the job models are built, we need to import here to # avoid circular imports -from .jobs import JobSacct, JobSqueue +from .jobs import JobSacct, JobSqueue # noqa: E402 + JobSqueue.model_rebuild() -JobSacct.model_rebuild() \ No newline at end of file +JobSacct.model_rebuild() diff --git a/src/sfapi_client/_sync/paths.py b/src/sfapi_client/_sync/paths.py index a46468d..ba508b6 100644 --- a/src/sfapi_client/_sync/paths.py +++ b/src/sfapi_client/_sync/paths.py @@ -165,7 +165,10 @@ def download(self, binary=False) -> IO[AnyStr]: @staticmethod def _ls( - compute: "Compute", path, directory=False, filter_dots=True # noqa: F821 + compute: "Compute", # noqa: F821 + path, + directory=False, + filter_dots=True, # noqa: F821 ) -> List["RemotePath"]: # noqa: F821 r = compute.client.get(f"utilities/ls/{compute.name}/{path}") diff --git a/src/sfapi_client/exceptions.py b/src/sfapi_client/exceptions.py index f209580..bac172a 100644 --- a/src/sfapi_client/exceptions.py +++ b/src/sfapi_client/exceptions.py @@ -5,3 +5,12 @@ class SfApiError(Exception): def __init__(self, message): self.message = message + + +class ClientKeyError(Exception): + """ + Exception indicating an error occurred reading the client keys + """ + + def __init__(self, message): + self.message = message diff --git a/tests/conftest.py b/tests/conftest.py index ae78c77..871356c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,8 @@ import random import string from typing import Optional, Union, Dict +from pathlib import Path +from cryptography.hazmat.primitives.asymmetric import rsa import pytest from authlib.jose import JsonWebKey @@ -174,3 +176,44 @@ def async_authenticated_client(api_base_url, token_url, client_id, client_secret @pytest.fixture def access_token(): return settings.ACCESS_TOKEN + + +@pytest.fixture +def fake_key_file(tmp_path_factory): + try: + tmp_path_factory._basetemp = Path().home() + key_path = tmp_path_factory.mktemp(".sfapi_test1", numbered=False) / "key.pem" + + # Make a fake key for testing + key_path.write_text( + f"""abcdefghijlmo + -----BEGIN RSA PRIVATE KEY----- + {rsa.generate_private_key(public_exponent=65537, key_size=2048)} + -----END RSA PRIVATE KEY----- + """ + ) + key_path.chmod(0o100600) + yield key_path + finally: + # make sure to cleanup the test since we put a file in ~/.sfapi_test + temp_path = Path().home() / ".sfapi_test1" + if temp_path.exists(): + (temp_path / "key.pem").unlink(missing_ok=True) + temp_path.rmdir() + + +@pytest.fixture +def empty_key_file(tmp_path_factory): + try: + tmp_path_factory._basetemp = Path().home() + key_path = tmp_path_factory.mktemp(".sfapi_test2", numbered=False) / "nokey.pem" + # Makes an empty key + key_path.write_text("") + key_path.chmod(0o100600) + yield key_path + finally: + # make sure to cleanup the test since we put a file in ~/.sfapi_test + temp_path = Path().home() / ".sfapi_test2" + if temp_path.exists(): + (temp_path / "nokey.pem").unlink(missing_ok=True) + temp_path.rmdir() diff --git a/tests/test_key.py b/tests/test_key.py new file mode 100644 index 0000000..5d78f65 --- /dev/null +++ b/tests/test_key.py @@ -0,0 +1,33 @@ +import pytest + +from sfapi_client import Client +from sfapi_client.exceptions import ClientKeyError + + +@pytest.mark.public +def test_wrong_key_data(fake_key_file, test_machine): + with Client(key=fake_key_file) as client: + with pytest.raises(Exception): + # Raises OAuthError when trying to read incorrect PEM + client.compute(test_machine) + + +@pytest.mark.public +def test_empty_key_file(empty_key_file): + with pytest.raises(ClientKeyError): + # Raise ClientKeyError for emtpy key file + Client(key=empty_key_file) + + +@pytest.mark.public +def test_no_key_file_path(): + with pytest.raises(ClientKeyError): + # Raise error when there is no key present + Client(key="~/name") + + +@pytest.mark.public +def test_no_key_file_name(): + with pytest.raises(ClientKeyError): + # Raise error when searching for keys + Client(key="name")