-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
464 changed files
with
144,581 additions
and
0 deletions.
There are no files selected for viewing
125 changes: 125 additions & 0 deletions
125
CUDA_BasicMatrixMultiplication/BasicMatrixMultiplication/template.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
|
||
#include <wb.h> | ||
|
||
#define wbCheck(stmt) \ | ||
do { \ | ||
cudaError_t err = stmt; \ | ||
if (err != cudaSuccess) { \ | ||
wbLog(ERROR, "Failed to run stmt ", #stmt); \ | ||
wbLog(ERROR, "Got CUDA error ... ", cudaGetErrorString(err)); \ | ||
return -1; \ | ||
} \ | ||
} while (0) | ||
|
||
// Compute C = A * B | ||
__global__ void matrixMultiply(float *A, float *B, float *C, int numARows, int numAColumns, int numBRows, int numBColumns, int numCRows, int numCColumns) { | ||
int Row = blockIdx.y * blockDim.y + threadIdx.y; | ||
int Col = blockIdx.x * blockDim.x + threadIdx.x; | ||
|
||
if ((Row < numCRows) && (Col < numCColumns)) { | ||
float Pvalue = 0; | ||
for (int k = 0; k < numAColumns; ++k) { | ||
Pvalue += A[Row * numAColumns + k] * B[k * numBColumns + Col]; | ||
} | ||
C[Row * numCColumns + Col] = Pvalue; | ||
} | ||
} | ||
|
||
int main(int argc, char **argv) { | ||
wbArg_t args; | ||
float *hostA; // The A matrix | ||
float *hostB; // The B matrix | ||
float *hostC; // The output C matrix | ||
float *deviceA; | ||
float *deviceB; | ||
float *deviceC; | ||
int numARows; // number of rows in the matrix A | ||
int numAColumns; // number of columns in the matrix A | ||
int numBRows; // number of rows in the matrix B | ||
int numBColumns; // number of columns in the matrix B | ||
int numCRows; // number of rows in the matrix C | ||
int numCColumns; // number of columns in the matrix C | ||
|
||
hostC = NULL; | ||
int sizeA, sizeB, sizeC; | ||
|
||
args = wbArg_read(argc, argv); | ||
|
||
wbTime_start(Generic, "Importing data and creating memory on host"); | ||
hostA = (float *)wbImport(wbArg_getInputFile(args, 0), &numARows, | ||
&numAColumns); | ||
hostB = (float *)wbImport(wbArg_getInputFile(args, 1), &numBRows, | ||
&numBColumns); | ||
|
||
//@@ Set numCRows and numCColumns | ||
numCRows = numARows; | ||
numCColumns = numBColumns; | ||
|
||
sizeA = numARows * numAColumns * sizeof(float); | ||
sizeB = numBRows * numBColumns * sizeof(float); | ||
sizeC = numCRows * numCColumns * sizeof(float); | ||
|
||
//@@ Allocate the hostC matrix | ||
|
||
hostC = (float *) malloc(sizeC); | ||
|
||
wbTime_stop(Generic, "Importing data and creating memory on host"); | ||
|
||
wbLog(TRACE, "The dimensions of A are ", numARows, " x ", numAColumns); | ||
wbLog(TRACE, "The dimensions of B are ", numBRows, " x ", numBColumns); | ||
|
||
wbTime_start(GPU, "Allocating GPU memory."); | ||
//@@ Allocate GPU memory here | ||
|
||
wbCheck(cudaMalloc((void**) &deviceA, sizeA)); | ||
wbCheck(cudaMalloc((void**) &deviceB, sizeB)); | ||
wbCheck(cudaMalloc((void**) &deviceC, sizeC)); | ||
|
||
wbTime_stop(GPU, "Allocating GPU memory."); | ||
|
||
wbTime_start(GPU, "Copying input memory to the GPU."); | ||
//@@ Copy memory to the GPU here | ||
|
||
wbCheck(cudaMemcpy(deviceA, hostA, sizeA, cudaMemcpyHostToDevice)); | ||
wbCheck(cudaMemcpy(deviceB, hostB, sizeB, cudaMemcpyHostToDevice)); | ||
|
||
wbTime_stop(GPU, "Copying input memory to the GPU."); | ||
|
||
//@@ Initialize the grid and block dimensions here | ||
|
||
// TILE_WIDTH = 16 | ||
dim3 dimBlock(16, 16, 1); | ||
dim3 dimGrid(numBColumns / 16, numARows / 16, 1); | ||
|
||
wbTime_start(Compute, "Performing CUDA computation"); | ||
//@@ Launch the GPU Kernel | ||
|
||
matrixMultiply<<<dimGrid, dimBlock>>>(deviceA, deviceB, deviceC, numARows, numAColumns, numBRows, numBColumns, numCRows, numCColumns); | ||
wbCheck(cudaDeviceSynchronize()); | ||
|
||
wbTime_stop(Compute, "Performing CUDA computation"); | ||
|
||
wbTime_start(Copy, "Copying output memory to the CPU"); | ||
//@@ Copy the GPU memory back to the CPU | ||
|
||
wbCheck(cudaMemcpy(hostC, deviceC, sizeC, cudaMemcpyDeviceToHost)); | ||
|
||
wbTime_stop(Copy, "Copying output memory to the CPU"); | ||
|
||
wbTime_start(GPU, "Freeing GPU Memory"); | ||
//@@ Free the GPU memory | ||
|
||
cudaFree(deviceA); | ||
cudaFree(deviceB); | ||
cudaFree(deviceC); | ||
|
||
wbTime_stop(GPU, "Freeing GPU Memory"); | ||
|
||
wbSolution(args, hostC, numCRows, numCColumns); | ||
|
||
free(hostA); | ||
free(hostB); | ||
free(hostC); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
--- | ||
Language: Cpp | ||
BasedOnStyle: LLVM | ||
ColumnLimit: 75 | ||
AccessModifierOffset: -2 | ||
BreakBeforeBraces: Attach | ||
AlignTrailingComments: true | ||
AlignEscapedNewlinesLeft: false | ||
AlignConsecutiveAssignments: true | ||
AlignOperands: true | ||
AllowShortFunctionsOnASingleLine: false | ||
AllowShortIfStatementsOnASingleLine: false | ||
AllowShortLoopsOnASingleLine: false | ||
AlwaysBreakTemplateDeclarations: true | ||
IndentCaseLabels: true | ||
SpacesBeforeTrailingComments: 1 | ||
Standard: Cpp11 | ||
TabWidth: 4 | ||
UseTab: Never | ||
SortIncludes: false | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Use new trusty images, should yield newer compilers and packages | ||
sudo: required | ||
dist: precise | ||
language: cpp | ||
|
||
matrix: | ||
include: | ||
- compiler: gcc | ||
addons: | ||
apt: | ||
sources: | ||
- ubuntu-toolchain-r-test | ||
packages: | ||
- g++-4.9 | ||
env: COMPILER=g++-4.9 | ||
- compiler: gcc | ||
addons: | ||
apt: | ||
sources: | ||
- ubuntu-toolchain-r-test | ||
packages: | ||
- g++-5 | ||
env: COMPILER=g++-5 | ||
- compiler: clang | ||
addons: | ||
apt: | ||
sources: | ||
- ubuntu-toolchain-r-test | ||
- llvm-toolchain-precise-3.6 | ||
packages: | ||
- clang-3.6 | ||
env: COMPILER=clang++-3.6 | ||
- compiler: clang | ||
addons: | ||
apt: | ||
sources: | ||
- ubuntu-toolchain-r-test | ||
- llvm-toolchain-precise-3.7 | ||
packages: | ||
- clang-3.7 | ||
env: COMPILER=clang++-3.7 | ||
# - compiler: gcc | ||
# addons: | ||
# apt: | ||
# sources: | ||
# - ubuntu-toolchain-r-test | ||
# packages: | ||
# - g++-6 | ||
# env: COMPILER=g++-6 | ||
# - compiler: clang | ||
# addons: | ||
# apt: | ||
# sources: | ||
# - ubuntu-toolchain-r-test | ||
# - llvm-toolchain-precise-3.8 | ||
# packages: | ||
# - clang-3.8 | ||
# env: COMPILER=clang++-3.8 | ||
|
||
before_install: | ||
- sudo apt-get update -qq | ||
script: | ||
- $COMPILER --version | ||
- make CXX=$COMPILER test | ||
- ./test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
cmake_minimum_required(VERSION 2.8 FATAL_ERROR) | ||
|
||
set(CMAKE_BUILD_TYPE Release) | ||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
set(CMAKE_COLOR_MAKEFILE ON) | ||
set(VERBOSE_BUILD ON) | ||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) | ||
set(CMAKE_MACOSX_RPATH TRUE) | ||
set_property(GLOBAL PROPERTY USE_FOLDERS ON) | ||
|
||
|
||
project(wb) | ||
|
||
set(current_dir "${CMAKE_CURRENT_SOURCE_DIR}") | ||
|
||
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/cotire.cmake") | ||
file(DOWNLOAD "https://raw.githubusercontent.com/sakra/cotire/master/CMake/cotire.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cotire.cmake") | ||
endif() | ||
|
||
|
||
|
||
include(${current_dir}/sources.cmake) | ||
|
||
set(WBLIB_STATIC ${WBLIB}) | ||
set(WBLIB_SHARED lib${WBLIB}) | ||
|
||
add_library(${WBLIB_STATIC} STATIC ${LIBWB_SOURCE_FILES} ) | ||
add_library(${WBLIB_SHARED} SHARED ${LIBWB_SOURCE_FILES} ) | ||
set_property(TARGET ${WBLIB_STATIC} PROPERTY CXX_STANDARD 11) | ||
set_property(TARGET ${WBLIB_SHARED} PROPERTY CXX_STANDARD 11) | ||
if (UNIX) | ||
set_target_properties(${WBLIB_SHARED} PROPERTIES OUTPUT_NAME ${WBLIB_STATIC}) | ||
endif (UNIX) | ||
set_property(TARGET ${WBLIB} PROPERTY CXX_STANDARD 11) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
Copyright (c) 2016, Abdul Dakkak All rights reserved. | ||
|
||
Developed by: Abdul Dakkak | ||
IMPACT Group | ||
University of Illinois, Urbana-Champaign | ||
impact.crhc.illinois.edu | ||
|
||
Permission is hereby granted, free of charge, to any | ||
person obtaining a copy of this software and associated | ||
documentation files (the "Software"), to deal with the | ||
Software without restriction, including without limitation | ||
the rights to use, copy, modify, merge, publish, distribute, | ||
sublicense, and/or sell copies of the Software, and to | ||
permit persons to whom the Software is furnished to do so, | ||
subject to the following conditions: | ||
|
||
Redistributions of source code must retain the above | ||
copyright notice, this list of conditions and the following | ||
disclaimers. | ||
Redistributions in binary form must reproduce the above | ||
copyright notice, this list of conditions and the following | ||
disclaimers in the documentation and/or other materials | ||
provided with the distribution. | ||
Neither the names of Abdul Dakkak, Impact Group, nor the | ||
names of its contributors may be used to endorse or promote | ||
products derived from this Software without specific prior | ||
written permission. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF | ||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED | ||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | ||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF | ||
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
WITH THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
########################################## | ||
# Options | ||
########################################## | ||
WB_LIB_PATH=$(CURDIR)/lib | ||
WB_SRC_PATH=$(CURDIR) | ||
|
||
########################################## | ||
########################################## | ||
|
||
DEFINES= | ||
CXX_FLAGS=-fPIC -Wno-unused-function -x c++ -O3 -g -std=c++11 -Wall -Wno-unused-function -pedantic -I . -I $(WB_SRC_PATH) $(DEFINES) | ||
LIBS=-lm -lstdc++ | ||
|
||
########################################## | ||
########################################## | ||
|
||
UNAME_S := $(shell uname -s) | ||
ifeq ($(UNAME_S),Linux) | ||
LIBS += -lrt | ||
endif | ||
|
||
########################################## | ||
########################################## | ||
|
||
SOURCES := $(shell find $(WB_SRC_PATH) ! -name "*_test.cpp" -name "*.cpp") | ||
TESTS := $(shell find $(WB_SRC_PATH) -name "*_test.cpp") | ||
|
||
OBJECTS = $(SOURCES:.cpp=.o) | ||
TESTOBJECTS = $(TESTS:.cpp=.o) | ||
|
||
############################################## | ||
# OUTPUT | ||
############################################## | ||
|
||
.PHONY: all | ||
.SUFFIXES: .o .cpp | ||
all: libwb.so | ||
|
||
.cpp.o: | ||
$(CXX) $(DEFINES) $(CXX_FLAGS) -c -o $@ $< | ||
|
||
libwb.so: $(OBJECTS) | ||
mkdir -p $(WB_LIB_PATH) | ||
$(CXX) -fPIC -shared -o $(WB_LIB_PATH)/$@ $(OBJECTS) $(LIBS) | ||
|
||
libwb.a: $(OBJECTS) | ||
mkdir -p $(WB_LIB_PATH) | ||
ar rcs -o $(WB_LIB_PATH)/$@ $(OBJECTS) | ||
|
||
test: $(TESTOBJECTS) $(OBJECTS) | ||
$(CXX) -fPIC -o $@ $(TESTOBJECTS) $(OBJECTS) $(LIBS) | ||
|
||
|
||
clean: | ||
rm -fr $(ARCH) | ||
-rm -f $(EXES) *.o *~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
|
||
# libWB | ||
|
||
[](https://travis-ci.org/abduld/libwb) | ||
[](https://ci.appveyor.com/project/abduld/libwb/branch/master) | ||
<!-- [](https://scan.coverity.com/projects/abduld-libwb | ||
--> |
Oops, something went wrong.