-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathaffinity_test.c
104 lines (84 loc) · 2.39 KB
/
affinity_test.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
* SPDX-FileCopyrightText: 2021 CSC - IT Center for Science Ltd. <www.csc.fi>
*
* SPDX-License-Identifier: MIT
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sched.h>
#include <math.h>
#include <mpi.h>
#include <omp.h>
/* Print outs to which core MPI rank and OpenMP thread is bind to.
Performs also dummy calculation, increase in execution time indicates
that cores are oversubscribed. Comparing times with single MPI task,
single thread vs. full node gives also hints about effects of CPU
frequency scaling.
Note: compiler might optimize the calculation away, compile without
optimization */
/* mpicc -fopenmp -lmpi -lm -o out affinity_test.c */
/* Borrowed from util-linux-2.13-pre7/schedutils/taskset.c */
static char *cpuset_to_cstr(cpu_set_t *mask, char *str)
{
char *ptr = str;
int i, j, entry_made = 0;
for (i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET(i, mask)) {
int run = 0;
entry_made = 1;
for (j = i + 1; j < CPU_SETSIZE; j++) {
if (CPU_ISSET(j, mask)) run++;
else break;
}
if (!run)
sprintf(ptr, "%d,", i);
else if (run == 1) {
sprintf(ptr, "%d,%d,", i, i + 1);
i++;
} else {
sprintf(ptr, "%d-%d,", i, i + run);
i += run;
}
while (*ptr != 0) ptr++;
}
}
ptr -= entry_made;
*ptr = 0;
return(str);
}
int main(int argc, char *argv[])
{
int rank, thread_id, provided;
cpu_set_t coremask;
int niter = 40000000; // number of iterations per FOR loop
double x,y, z;
char clbuf[7 * CPU_SETSIZE], hnbuf[64]; // core and hostname strings
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
memset(clbuf, 0, sizeof(clbuf));
memset(hnbuf, 0, sizeof(hnbuf));
(void)gethostname(hnbuf, sizeof(hnbuf));
#pragma omp parallel private(x, y, z, thread_id, coremask, clbuf)
{
thread_id = omp_get_thread_num();
double t;
t = MPI_Wtime();
for (int i=0; i<niter; i++) // main loop
{
x = cos(i*0.1)*exp(i*0.04);
y = sin(i*0.1)*exp(i*0.04);
z = ((x*x)+(y*y));
}
t = MPI_Wtime() - t;
sched_getaffinity(0, sizeof(coremask), &coremask);
cpuset_to_cstr(&coremask, clbuf);
#pragma omp barrier
printf("Rank %03d thread %02d on %s. core = %s (%f seconds).\n",
rank, thread_id, hnbuf, clbuf, t);
}
MPI_Finalize();
return(0);
}