Adding programs to determine the affinity of threads and processes on a

given system. They include CPU and GPU mappings.
LLNL · Nov 9, 2020 · f063220 · f063220
1 parent 12c8194
commit f063220
Show file tree

Hide file tree

Showing 9 changed files with 723 additions and 0 deletions.
diff --git a/hello/README.md → affinity/README.md b/hello/README.md → affinity/README.md
diff --git a/affinity/affinity.h b/affinity/affinity.h
@@ -0,0 +1,35 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#ifndef AFFINITY_H_INCLUDED
+#define AFFINITY_H_INCLUDED
+
+#define SHORT_STR_SIZE 32
+#define LONG_STR_SIZE 4096
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  int get_gpu_count(); 
+
+  int get_gpu_pci_id(int dev); 
+
+  int get_gpu_affinity(char *buf);
+
+  int get_gpu_info(int dev, char *buf);
+
+  int get_gpu_info_all(char *buf);
+
+  int get_num_cpus();
+
+  int get_cpu_affinity(char *buf); 
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif 
diff --git a/affinity/cpu.c b/affinity/cpu.c
@@ -0,0 +1,143 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/* __USE_GNU is needed for CPU_ISSET definition */ 
+#ifndef __USE_GNU
+#define __USE_GNU 1                
+#endif
+#include <sched.h>            // sched_getaffinity
+
+
+/* 
+ * Convert a non-negative array of ints to a range
+ */ 
+int int2range(int *intarr, int size, char *range)
+{
+  int i, curr;
+  int nc = 0; 
+  int start = -1; 
+  int prev = -2;
+
+  for (i=0; i<size; i++) {
+    curr = intarr[i]; 
+    if (curr != prev+1) {
+      /* Record end of range */ 
+      if (start != prev && prev >= 0)
+	nc += sprintf(range+nc, "-%d", prev);
+
+      /* Record start of range */ 
+      if (prev >= 0)
+	nc += sprintf(range+nc, ",");
+      nc += sprintf(range+nc, "%d", curr);
+      start = curr; 
+    } else
+      /* The last int is end of range */ 
+      if (i == size-1)
+	nc += sprintf(range+nc, "-%d", curr);
+
+    prev = curr; 
+  }
+
+  return nc; 
+}
+
+
+/* 
+ * Get number of processing units (cores or hwthreads) 
+ */ 
+static
+int get_total_num_pus()
+{
+  int pus = sysconf(_SC_NPROCESSORS_ONLN);
+
+  if ( pus < 0 )
+    perror("sysconf");
+
+  return pus; 
+}
+
+
+
+
+/*
+ * Get the affinity.
+ */
+static
+int get_affinity(int *cpus, int *count)
+{
+  int i; 
+  cpu_set_t resmask;
+
+  CPU_ZERO(&resmask);
+
+  int rc = sched_getaffinity(0, sizeof(resmask), &resmask); 
+  if ( rc < 0 ) {
+    perror("sched_getaffinity");
+    return rc; 
+  }
+
+  *count = 0; 
+  int pus = get_total_num_pus(); 
+  for (i=0; i<pus; i++) 
+    if ( CPU_ISSET(i, &resmask) ) {
+      cpus[*count] = i; 
+      (*count)++; 
+    }
+
+  return 0; 
+}
+
+
+/*
+ * Get the number of CPUs where this worker can run. 
+ */
+int get_num_cpus()
+{
+  cpu_set_t mask; 
+
+  CPU_ZERO(&mask);
+
+  int rc = sched_getaffinity(0, sizeof(mask), &mask); 
+  if ( rc < 0 ) {
+    perror("sched_getaffinity");
+    return rc; 
+  }
+
+  return CPU_COUNT(&mask);
+}
+
+
+/* 
+ * Print my affinity into a buffer.
+ */ 
+int get_cpu_affinity(char *outbuf)
+{
+  int count; 
+  int nc = 0; 
+
+  int *cpus = malloc(sizeof(int) * get_total_num_pus()); 
+  get_affinity(cpus, &count);
+
+#if 1
+  nc += int2range(cpus, count, outbuf+nc);
+  //printf("nc=%d count=%d\n", nc, count); 
+#else
+  int i; 
+  for (i=0; i<count; i++) {
+    nc += sprintf(outbuf+nc, "%d ", cpus[i]); 
+  }
+#endif
+  nc += sprintf(outbuf+nc, "\n"); 
+
+  free(cpus); 
+
+  return nc; 
+}
+
+
diff --git a/affinity/gpu.cu b/affinity/gpu.cu
@@ -0,0 +1,111 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <cuda_runtime.h>      /* Documentation in hip_runtime_api.h */ 
+#include "affinity.h"          /* Do not perform name mangling */ 
+
+
+int get_gpu_count()
+{
+  int count = 0;
+
+  cudaGetDeviceCount(&count);
+
+  return count;
+}
+
+
+int get_gpu_pci_id(int dev)
+{
+  int value = -1; 
+  cudaError_t err = cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, dev);
+
+  if ( err )
+    fprintf(stderr, "Could not get PCI ID for GPU %d\n", dev);
+
+  return value; 
+}
+
+
+int get_gpu_affinity(char *buf)
+{
+  int i, value, count;
+  int nc = 0; 
+
+  cudaGetDeviceCount(&count); 
+
+  for (i=0; i<count; i++) {
+    cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, i);
+    nc += sprintf(buf+nc, "0x%x ", value); 
+  }
+  nc += sprintf(buf+nc, "\n"); 
+
+  return nc; 
+}
+
+
+int get_gpu_info(int devid, char *buf)
+{
+  cudaDeviceProp prop;
+  cudaError_t err; 
+  int nc = 0;
+
+  err = cudaGetDeviceProperties(&prop, devid);
+  if ( err ) {
+    fprintf(stderr, "Could not get info for GPU %d\n", devid);
+    return -1;
+  }
+
+  float ghz = prop.clockRate / 1000.0 / 1000.0; 
+#if 0
+  nc += sprintf(buf+nc, "\tName: %s\n", prop.name); 
+  nc += sprintf(buf+nc, "\tPCI bus ID: 0x%x\n", prop.pciBusID);
+  //nc += sprintf(buf+nc, "\tPCI device ID 0x%x\n", prop.pciDeviceID);
+  //nc += sprintf(buf+nc, "\tPCI domain ID 0x%x\n", prop.pciDomainID); 
+  nc += sprintf(buf+nc, "\tMemory: %lu GB\n", prop.totalGlobalMem >> 30);
+  nc += sprintf(buf+nc, "\tMultiprocessor count: %d\n", prop.multiProcessorCount);
+  nc += sprintf(buf+nc, "\tClock rate: %.3f Ghz\n", ghz); 
+  nc += sprintf(buf+nc, "\tCompute capability: %d.%d\n",
+		prop.major, prop.minor);
+  nc += sprintf(buf+nc, "\tECC enabled: %d\n", prop.ECCEnabled);
+#else
+  nc += sprintf(buf+nc, "\t0x%.2x: %s, %lu GB Mem, "
+		"%d Multiprocessors, %.3f GHZ, %d.%d CC\n",
+		prop.pciBusID, prop.name, prop.totalGlobalMem >> 30,
+		prop.multiProcessorCount, ghz, prop.major, prop.minor); 
+#endif
+
+  return nc; 
+}
+
+
+int get_gpu_info_all(char *buf)
+{
+  cudaError_t err; 
+  int i, myid, count, value;
+  int nc = 0; 
+
+  cudaGetDeviceCount(&count);
+  err = cudaGetDevice(&myid);
+  if ( err ) {
+    fprintf(stderr, "Could not get default device\n");
+    return -1; 
+  }
+  cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, myid);
+
+  nc += sprintf(buf+nc, "\tDefault device: 0x%x\n", value); 
+
+  for (i=0; i<count; i++) {
+    //nc += sprintf(buf+nc, "\t--\n"); 
+    nc += get_gpu_info(i, buf+nc);
+  }
+
+  return nc; 
+}
+
+
+
+
diff --git a/affinity/mpi+omp.c b/affinity/mpi+omp.c
@@ -0,0 +1,107 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <mpi.h>
+#include <omp.h>
+#include "affinity.h"
+
+
+static
+void usage(char *name)
+{
+  printf("Usage: %s [options]\n", name);
+  printf("\t    -mpi: Show MPI info only (no OpenMP)\n"); 
+  printf("\t-verbose: Show detailed GPU info when -mpi enabled\n");
+  printf("\t   -help: Show this page\n");
+}
+
+
+int main(int argc, char *argv[])
+{
+  char buf[LONG_STR_SIZE];
+  char hostname[MPI_MAX_PROCESSOR_NAME]; 
+  int rank, np, size, i, ngpus, ncpus;
+  int verbose = 0;
+  int help = 0; 
+  int mpi = 0; 
+  int nc = 0; 
+
+  /* Command-line options */
+  if (argc > 1) 
+    for (i=1; i<argc; i++) {
+      /* Todo: Eat heading dashes here */ 
+      if ( strcmp(argv[i], "-v") >= 0 )
+	verbose = 1;
+      else if ( strcmp(argv[i], "-m") >= 0 )
+	mpi = 1;
+      else if ( strcmp(argv[i], "-h") >= 0 )
+	help = 1; 
+    }
+
+  MPI_Init(&argc, &argv); 
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
+  MPI_Comm_size(MPI_COMM_WORLD, &np); 
+  MPI_Get_processor_name(hostname, &size); 
+
+  if (help) {
+    if (rank == 0)
+      usage(argv[0]);
+
+    MPI_Finalize(); 
+    return 0; 
+  }
+
+  if ( mpi ) {
+
+    /* MPI */  
+    ncpus = get_num_cpus();
+    nc += sprintf(buf+nc, "%s Task %2d/%2d with %d cpus: ",
+		  hostname, rank, np, ncpus);
+    nc += get_cpu_affinity(buf+nc);
+#ifdef HAVE_GPUS
+    ngpus = get_gpu_count(); 
+    nc += sprintf(buf+nc, "%s Task %2d/%2d with %d gpus: ",
+		  hostname, rank, np, ngpus); 
+    nc += get_gpu_affinity(buf+nc);
+    if (verbose)
+      nc += get_gpu_info_all(buf+nc);
+#endif
+
+    /* Print per-task information */ 
+    printf("%s", buf);
+
+  } else {
+
+    /* MPI+OpenMP */ 
+#ifdef HAVE_GPUS    
+    ngpus = get_gpu_count();
+#endif 
+
+#pragma omp parallel firstprivate(buf, nc) private(ncpus) shared(rank, np, ngpus, verbose)
+    {
+      int tid = omp_get_thread_num();
+      int nthreads = omp_get_num_threads();
+      ncpus = get_num_cpus();
+
+      nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d cpus: ",
+		    hostname, rank, np, tid, nthreads, ncpus);
+      nc += get_cpu_affinity(buf+nc);
+#ifdef HAVE_GPUS
+      nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d gpus: ",
+		    hostname, rank, np, tid, nthreads, ngpus);
+      nc += get_gpu_affinity(buf+nc);
+#endif
+
+      /* Print per-worker information */ 
+      printf("%s", buf);
+    }
+
+  }
+
+  MPI_Finalize(); 
+  return 0; 
+}