-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProblem 1c
109 lines (87 loc) · 3.18 KB
/
Problem 1c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
//----HEADER FILE--------------------------//
#include<stdio.h>
#include<math.h>
#include <sys/time.h>
#include<time.h>
#include<cuda.h>
#include<cuda_runtime.h>
//---------------------------------------//
#define THREAD_PER_BLOCK 512
//------------- KERNEL FUNCTION ADD-------------------//
__global__ void add(int*a,int*b,int*c,long n) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < n) {
c[index] = a[index] + b[index];
}
}
//----------------------------------------------------//
//--------------FUNCTION TO ASSIGN RANDOM INTEGERS----//
void random_ints(int* a, long x)
{
int i;
for (i = 0; i < x; ++i)
a[i] = rand();
}
//-----------------------------------------------------//
int main(int argc, char *argv[]) {
if(argc < 2) {
printf("Please enter the size of vector in the format ./exec.o N(1024/10240/102400)\n");
return 0;
}
long N = atoi(argv[1]);
int*a, *b, *c, *output; //host copies
int *d_a, *d_b, *d_c;//device copies
struct timeval startc,end; //gettimeofday structure
long seconds,useconds;
double mtime;
cudaEvent_t start, stop;
float time;
//create cuda events to initilaise the cuda GPU timers
cudaEventCreate(&start);
cudaEventCreate(&stop);
int size = N * sizeof(int);
// Alloc space for device copies of a, b, c
cudaMalloc((void**)&d_a, size);
cudaMalloc((void**)&d_b, size);
cudaMalloc((void**)&d_c, size);
// Alloc space for host copies of a, b, c and setup input values
a = (int*)malloc(size); random_ints(a, N);
b = (int*)malloc(size); random_ints(b, N);
c = (int*)malloc(size);
output = (int*)malloc(size);
//Executes CPU
gettimeofday(&startc,NULL);
for(int i=0;i<N;i++)
{
output[i] = a[i] + b[i];
}
gettimeofday(&end,NULL);
seconds = end.tv_sec - startc.tv_sec;
useconds = end.tv_usec - startc.tv_usec;
mtime = 1000 * seconds + useconds/1000;
printf("\n-------------------Vector addition in CPU-----------------------------\n");
printf("Value of N= %lu Time Taken= %g millisec \n",N,mtime);
printf("------------------------------------------------------------------------\n");
cudaEventRecord( start, 0 );//start recording the kernel execution time and data transfer from CPU to GPU
// Copy inputs to device
cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);
// Launch add() kernel on GPU with N blocks
add<<<(N + THREAD_PER_BLOCK-1) / THREAD_PER_BLOCK, THREAD_PER_BLOCK>>>(d_a, d_b, d_c, N);
// Copy result back to host
cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
cudaEventRecord( stop, 0 ); //stop recording the kernel execution time and data transfer from CPU to GPU
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );//calculated difference between start and stop of time
printf("----------------------------Vector Addition in GPU----------------------\n");
printf("Value of N= %lu Time Taken= %f millisecond \n",N,time);
printf("------------------------------------------------------------------------\n");
cudaEventDestroy(start);
cudaEventDestroy(stop);
// Cleanup
free(a); free(b); free(c);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}