Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
init
  • Loading branch information
Anthony Doan committed May 20, 2017
1 parent 6133fb7 commit 7e2fe33
Show file tree
Hide file tree
Showing 464 changed files with 144,581 additions and 0 deletions.
125 changes: 125 additions & 0 deletions CUDA_BasicMatrixMultiplication/BasicMatrixMultiplication/template.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@

#include <wb.h>

#define wbCheck(stmt) \
do { \
cudaError_t err = stmt; \
if (err != cudaSuccess) { \
wbLog(ERROR, "Failed to run stmt ", #stmt); \
wbLog(ERROR, "Got CUDA error ... ", cudaGetErrorString(err)); \
return -1; \
} \
} while (0)

// Compute C = A * B
__global__ void matrixMultiply(float *A, float *B, float *C, int numARows, int numAColumns, int numBRows, int numBColumns, int numCRows, int numCColumns) {
int Row = blockIdx.y * blockDim.y + threadIdx.y;
int Col = blockIdx.x * blockDim.x + threadIdx.x;

if ((Row < numCRows) && (Col < numCColumns)) {
float Pvalue = 0;
for (int k = 0; k < numAColumns; ++k) {
Pvalue += A[Row * numAColumns + k] * B[k * numBColumns + Col];
}
C[Row * numCColumns + Col] = Pvalue;
}
}

int main(int argc, char **argv) {
wbArg_t args;
float *hostA; // The A matrix
float *hostB; // The B matrix
float *hostC; // The output C matrix
float *deviceA;
float *deviceB;
float *deviceC;
int numARows; // number of rows in the matrix A
int numAColumns; // number of columns in the matrix A
int numBRows; // number of rows in the matrix B
int numBColumns; // number of columns in the matrix B
int numCRows; // number of rows in the matrix C
int numCColumns; // number of columns in the matrix C

hostC = NULL;
int sizeA, sizeB, sizeC;

args = wbArg_read(argc, argv);

wbTime_start(Generic, "Importing data and creating memory on host");
hostA = (float *)wbImport(wbArg_getInputFile(args, 0), &numARows,
&numAColumns);
hostB = (float *)wbImport(wbArg_getInputFile(args, 1), &numBRows,
&numBColumns);

//@@ Set numCRows and numCColumns
numCRows = numARows;
numCColumns = numBColumns;

sizeA = numARows * numAColumns * sizeof(float);
sizeB = numBRows * numBColumns * sizeof(float);
sizeC = numCRows * numCColumns * sizeof(float);

//@@ Allocate the hostC matrix

hostC = (float *) malloc(sizeC);

wbTime_stop(Generic, "Importing data and creating memory on host");

wbLog(TRACE, "The dimensions of A are ", numARows, " x ", numAColumns);
wbLog(TRACE, "The dimensions of B are ", numBRows, " x ", numBColumns);

wbTime_start(GPU, "Allocating GPU memory.");
//@@ Allocate GPU memory here

wbCheck(cudaMalloc((void**) &deviceA, sizeA));
wbCheck(cudaMalloc((void**) &deviceB, sizeB));
wbCheck(cudaMalloc((void**) &deviceC, sizeC));

wbTime_stop(GPU, "Allocating GPU memory.");

wbTime_start(GPU, "Copying input memory to the GPU.");
//@@ Copy memory to the GPU here

wbCheck(cudaMemcpy(deviceA, hostA, sizeA, cudaMemcpyHostToDevice));
wbCheck(cudaMemcpy(deviceB, hostB, sizeB, cudaMemcpyHostToDevice));

wbTime_stop(GPU, "Copying input memory to the GPU.");

//@@ Initialize the grid and block dimensions here

// TILE_WIDTH = 16
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(numBColumns / 16, numARows / 16, 1);

wbTime_start(Compute, "Performing CUDA computation");
//@@ Launch the GPU Kernel

matrixMultiply<<<dimGrid, dimBlock>>>(deviceA, deviceB, deviceC, numARows, numAColumns, numBRows, numBColumns, numCRows, numCColumns);
wbCheck(cudaDeviceSynchronize());

wbTime_stop(Compute, "Performing CUDA computation");

wbTime_start(Copy, "Copying output memory to the CPU");
//@@ Copy the GPU memory back to the CPU

wbCheck(cudaMemcpy(hostC, deviceC, sizeC, cudaMemcpyDeviceToHost));

wbTime_stop(Copy, "Copying output memory to the CPU");

wbTime_start(GPU, "Freeing GPU Memory");
//@@ Free the GPU memory

cudaFree(deviceA);
cudaFree(deviceB);
cudaFree(deviceC);

wbTime_stop(GPU, "Freeing GPU Memory");

wbSolution(args, hostC, numCRows, numCColumns);

free(hostA);
free(hostB);
free(hostC);

return 0;
}
21 changes: 21 additions & 0 deletions CUDA_BasicMatrixMultiplication/libwb/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
Language: Cpp
BasedOnStyle: LLVM
ColumnLimit: 75
AccessModifierOffset: -2
BreakBeforeBraces: Attach
AlignTrailingComments: true
AlignEscapedNewlinesLeft: false
AlignConsecutiveAssignments: true
AlignOperands: true
AllowShortFunctionsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakTemplateDeclarations: true
IndentCaseLabels: true
SpacesBeforeTrailingComments: 1
Standard: Cpp11
TabWidth: 4
UseTab: Never
SortIncludes: false
...
65 changes: 65 additions & 0 deletions CUDA_BasicMatrixMultiplication/libwb/.travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Use new trusty images, should yield newer compilers and packages
sudo: required
dist: precise
language: cpp

matrix:
include:
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.9
env: COMPILER=g++-4.9
- compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-5
env: COMPILER=g++-5
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-precise-3.6
packages:
- clang-3.6
env: COMPILER=clang++-3.6
- compiler: clang
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-precise-3.7
packages:
- clang-3.7
env: COMPILER=clang++-3.7
# - compiler: gcc
# addons:
# apt:
# sources:
# - ubuntu-toolchain-r-test
# packages:
# - g++-6
# env: COMPILER=g++-6
# - compiler: clang
# addons:
# apt:
# sources:
# - ubuntu-toolchain-r-test
# - llvm-toolchain-precise-3.8
# packages:
# - clang-3.8
# env: COMPILER=clang++-3.8

before_install:
- sudo apt-get update -qq
script:
- $COMPILER --version
- make CXX=$COMPILER test
- ./test
34 changes: 34 additions & 0 deletions CUDA_BasicMatrixMultiplication/libwb/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)

set(CMAKE_BUILD_TYPE Release)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_COLOR_MAKEFILE ON)
set(VERBOSE_BUILD ON)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_MACOSX_RPATH TRUE)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)


project(wb)

set(current_dir "${CMAKE_CURRENT_SOURCE_DIR}")

if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/cotire.cmake")
file(DOWNLOAD "https://raw.githubusercontent.com/sakra/cotire/master/CMake/cotire.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cotire.cmake")
endif()



include(${current_dir}/sources.cmake)

set(WBLIB_STATIC ${WBLIB})
set(WBLIB_SHARED lib${WBLIB})

add_library(${WBLIB_STATIC} STATIC ${LIBWB_SOURCE_FILES} )
add_library(${WBLIB_SHARED} SHARED ${LIBWB_SOURCE_FILES} )
set_property(TARGET ${WBLIB_STATIC} PROPERTY CXX_STANDARD 11)
set_property(TARGET ${WBLIB_SHARED} PROPERTY CXX_STANDARD 11)
if (UNIX)
set_target_properties(${WBLIB_SHARED} PROPERTIES OUTPUT_NAME ${WBLIB_STATIC})
endif (UNIX)
set_property(TARGET ${WBLIB} PROPERTY CXX_STANDARD 11)
36 changes: 36 additions & 0 deletions CUDA_BasicMatrixMultiplication/libwb/LICENSE.TXT
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Copyright (c) 2016, Abdul Dakkak All rights reserved.

Developed by: Abdul Dakkak
IMPACT Group
University of Illinois, Urbana-Champaign
impact.crhc.illinois.edu

Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal with the
Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute,
sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so,
subject to the following conditions:

Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimers.
Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimers in the documentation and/or other materials
provided with the distribution.
Neither the names of Abdul Dakkak, Impact Group, nor the
names of its contributors may be used to endorse or promote
products derived from this Software without specific prior
written permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
WITH THE SOFTWARE.
56 changes: 56 additions & 0 deletions CUDA_BasicMatrixMultiplication/libwb/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
##########################################
# Options
##########################################
WB_LIB_PATH=$(CURDIR)/lib
WB_SRC_PATH=$(CURDIR)

##########################################
##########################################

DEFINES=
CXX_FLAGS=-fPIC -Wno-unused-function -x c++ -O3 -g -std=c++11 -Wall -Wno-unused-function -pedantic -I . -I $(WB_SRC_PATH) $(DEFINES)
LIBS=-lm -lstdc++

##########################################
##########################################

UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
LIBS += -lrt
endif

##########################################
##########################################

SOURCES := $(shell find $(WB_SRC_PATH) ! -name "*_test.cpp" -name "*.cpp")
TESTS := $(shell find $(WB_SRC_PATH) -name "*_test.cpp")

OBJECTS = $(SOURCES:.cpp=.o)
TESTOBJECTS = $(TESTS:.cpp=.o)

##############################################
# OUTPUT
##############################################

.PHONY: all
.SUFFIXES: .o .cpp
all: libwb.so

.cpp.o:
$(CXX) $(DEFINES) $(CXX_FLAGS) -c -o $@ $<

libwb.so: $(OBJECTS)
mkdir -p $(WB_LIB_PATH)
$(CXX) -fPIC -shared -o $(WB_LIB_PATH)/$@ $(OBJECTS) $(LIBS)

libwb.a: $(OBJECTS)
mkdir -p $(WB_LIB_PATH)
ar rcs -o $(WB_LIB_PATH)/$@ $(OBJECTS)

test: $(TESTOBJECTS) $(OBJECTS)
$(CXX) -fPIC -o $@ $(TESTOBJECTS) $(OBJECTS) $(LIBS)


clean:
rm -fr $(ARCH)
-rm -f $(EXES) *.o *~
7 changes: 7 additions & 0 deletions CUDA_BasicMatrixMultiplication/libwb/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

# libWB

[![Travis Build Status](https://travis-ci.org/abduld/libwb.svg?branch=master)](https://travis-ci.org/abduld/libwb)
[![AppVeyor status](https://ci.appveyor.com/api/projects/status/0nx5ie7gn5c0e6ai/branch/master?svg=true)](https://ci.appveyor.com/project/abduld/libwb/branch/master)
<!-- [![Coverity Scan Build Status](https://img.shields.io/coverity/scan/8295.svg)](https://scan.coverity.com/projects/abduld-libwb
-->
Loading

0 comments on commit 7e2fe33

Please sign in to comment.