-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
757 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
int main(int argc, char **argv) { | ||
printf("Hello World\n"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
__global__ void mykernel(void) { | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
mykernel<<<1,1>>>(); | ||
printf("Hello world\n"); | ||
} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
__global__ void add(int *da, int *db, int *dc) | ||
{ | ||
dc[blockIdx.x] = da[blockIdx.x] + db[blockIdx.x]; | ||
} | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
int a_in = atoi(argv[1]); // first addend | ||
int b_in = atoi(argv[2]); // second addend | ||
int N = atoi(argv[3]); // length of arrays | ||
int numBlocks = 512; | ||
|
||
int *a, *b, *c; | ||
int *d_a, *d_b, *d_c; | ||
int size = N * sizeof(int); | ||
a = (int *)malloc(size); | ||
b = (int *)malloc(size); | ||
c = (int *)malloc(size); | ||
|
||
// Initialize the input vectors | ||
for (int i = 0; i < N; ++i) | ||
{ | ||
a[i] = a_in; | ||
b[i] = b_in; | ||
c[i] = 0; | ||
} | ||
|
||
cudaMalloc((void **)&d_a, size); | ||
cudaMalloc((void **)&d_b, size); | ||
cudaMalloc((void **)&d_c, size); | ||
cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice); | ||
cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice); | ||
|
||
add<<<numBlocks, 1>>>(d_a, d_b, d_c); | ||
cudaDeviceSynchronize(); | ||
cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost); | ||
cudaFree(d_a); | ||
cudaFree(d_b); | ||
cudaFree(d_c); | ||
|
||
printf("%d + %d = %d\n", a[0], b[0], c[0]); | ||
printf("...\n"); | ||
printf("%d + %d = %d\n", a[N - 1], b[N - 1], c[N - 1]); | ||
free(a); | ||
free(b); | ||
free(c); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
__global__ void add(int N, int *da, int *db, int *dc) | ||
{ | ||
// This is a CUDA idiom called the grid-stride loop. | ||
int index = blockIdx.x * blockDim.x + threadIdx.x; | ||
int stride = blockDim.x * gridDim.x; | ||
for (int i = index; i < N; i += stride) | ||
dc[i] = da[i] + db[i]; | ||
} | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
// Read values from cmd line. | ||
if (argc < 6) | ||
{ | ||
printf("Usage:\n %s a b N threads blocks\n", argv[0]); | ||
return (-1); | ||
} | ||
int a_in = atoi(argv[1]); | ||
int b_in = atoi(argv[2]); | ||
int N = atoi(argv[3]); | ||
int numThreads = atoi(argv[4]); | ||
int numBlocks = atoi(argv[5]); | ||
// Or to get the block count that covers N elements: | ||
// int numBlocks = (N + numThreads - 1) / numThreads; | ||
|
||
// Calculate size of arrays in bytes. | ||
int size = N * sizeof(int); | ||
// Allocate host storage. | ||
int *a, *b, *c; | ||
a = (int *)malloc(size); | ||
b = (int *)malloc(size); | ||
c = (int *)malloc(size); | ||
// Initialize the input vectors. | ||
for (int i = 0; i < N; ++i) | ||
{ | ||
a[i] = a_in; | ||
b[i] = b_in; | ||
c[i] = 0; | ||
} | ||
|
||
// Allocate device storage. | ||
int *da, *db, *dc; | ||
cudaMalloc((void **)&da, size); | ||
cudaMalloc((void **)&db, size); | ||
cudaMalloc((void **)&dc, size); | ||
|
||
// Copy data to GPU. | ||
cudaMemcpy(da, a, size, cudaMemcpyHostToDevice); | ||
cudaMemcpy(db, b, size, cudaMemcpyHostToDevice); | ||
|
||
// Execute the kernel on the GPU. | ||
add<<<numBlocks, numThreads>>>(N, da, db, dc); | ||
cudaDeviceSynchronize(); | ||
|
||
// Copy results back from GPU. | ||
cudaMemcpy(c, dc, size, cudaMemcpyDeviceToHost); | ||
|
||
// Print results from each end of the array. | ||
printf("%d plus %d equals %d\n", a[0], b[0], c[0]); | ||
printf(" ...\n"); | ||
printf("%d plus %d equals %d\n", a[N - 1], b[N - 1], c[N - 1]); | ||
|
||
// Check for stray errors somewhere in the middle. | ||
// We won't check them all, quit after first error. | ||
int expected = a_in + b_in; | ||
for (int i = 0; i < N; ++i) | ||
{ | ||
if (c[i] != expected) | ||
{ | ||
printf("Wrong sum %d at element %d!\n", c[i], i); | ||
break; | ||
} | ||
} | ||
|
||
// Free all allocated memory. | ||
cudaFree(da); | ||
cudaFree(db); | ||
cudaFree(dc); | ||
free(a); | ||
free(b); | ||
free(c); | ||
} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#include <cuda.h> /* CUDA runtime API */ | ||
#include <cstdio> | ||
#include <cublas_v2.h> | ||
int main(int argc, char *argv[]) | ||
{ | ||
float *x_host, *y_host; /* arrays for computation on host*/ | ||
float *x_dev, *y_dev; | ||
/* arrays for computation on device */ | ||
int n = 1024*1024; | ||
float alpha = 0.5f; | ||
int nerror; | ||
size_t memsize; | ||
int i; | ||
/* could add device detection here */ | ||
memsize = n * sizeof(float); | ||
|
||
/* allocate arrays on host */ | ||
x_host = (float *)malloc(memsize); | ||
y_host = (float *)malloc(memsize); | ||
|
||
/* allocate arrays on device */ | ||
cudaMalloc((void **) &x_dev, memsize); | ||
cudaMalloc((void **) &y_dev, memsize); | ||
|
||
/* initialize arrays on host */ | ||
for ( i = 0; i < n; i++) | ||
{ | ||
x_host[i] = rand() / (float)RAND_MAX; | ||
y_host[i] = rand() / (float)RAND_MAX; | ||
} | ||
|
||
/* copy arrays to device memory (synchronous) */ | ||
cudaMemcpy(x_dev, x_host, memsize, cudaMemcpyHostToDevice); | ||
cudaMemcpy(y_dev, y_host, memsize, cudaMemcpyHostToDevice); | ||
cublasHandle_t handle; | ||
cublasStatus_t status; | ||
status = cublasCreate(&handle); | ||
int stride = 1; | ||
status = cublasSaxpy(handle,n,&alpha,x_dev,stride,y_dev,stride); | ||
|
||
/* check if cublasSaxpy launched succesfully */ | ||
if (status != CUBLAS_STATUS_SUCCESS) | ||
{ | ||
printf ("Error in launching CUBLAS routine \n"); | ||
exit (20); | ||
} | ||
|
||
status = cublasDestroy(handle); | ||
/* retrieve results from device (synchronous) */ | ||
cudaMemcpy(y_host, y_dev, memsize, cudaMemcpyDeviceToHost); | ||
/* ensure synchronization (cudaMemcpy is synchronous in most cases, but not all) */ | ||
cudaDeviceSynchronize(); | ||
/* use data in y_host*/ | ||
/* free memory */ | ||
cudaFree(x_dev); | ||
cudaFree(y_dev); | ||
free(x_host); | ||
free(y_host); | ||
return 0; | ||
} |
Oops, something went wrong.