Skip to content

Commit

Permalink
Refactor the code and update the imports
Browse files Browse the repository at this point in the history
  • Loading branch information
EliasReutelsterz committed Jun 5, 2024
1 parent 800142e commit 17def2e
Show file tree
Hide file tree
Showing 227 changed files with 2,775 additions and 1,805 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ yateto.egg-info

*.swo
*.swp

examples/created_code/
8 changes: 4 additions & 4 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ pep8:
- helper
allow_failure: true
script:
- pep8 kernelforge
- pep8 tensorforge


pylint:
Expand All @@ -174,7 +174,7 @@ pylint:
- helper
allow_failure: true
script:
- pylint kernelforge
- pylint tensorforge


install:
Expand All @@ -186,9 +186,9 @@ install:
matrix:
- BACKEND: [cuda, hipsycl, hip]
before_script:
- pip3 install --user git+https://github.com/seissol/kernelforge.git@$CI_COMMIT_SHA
- pip3 install --user git+https://github.com/seissol/tensorforge.git@$CI_COMMIT_SHA
script:
- export isntall_path=$(python3 -c 'import kernelforge, os; print(os.path.dirname(kernelforge.__file__))')
- export isntall_path=$(python3 -c 'import tensorforge, os; print(os.path.dirname(tensorforge.__file__))')
- tree $isntall_path
- export root_dir=$PWD
- cd ./tests/cmake_integration
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/double-gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ set(GPU_TARGET_INCLUDE_DIRS ../../submodules/Device
if(${DEVICE_BACKEND} STREQUAL "cuda")
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/kernels.cu
include/kernelforge_aux.cu)
include/tensorforge_aux.cu)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cu
${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
elseif(${DEVICE_BACKEND} STREQUAL "hip")
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/kernels.cpp
include/kernelforge_aux.cpp)
include/tensorforge_aux.cpp)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cpp
${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
elseif((${DEVICE_BACKEND} STREQUAL "oneapi") OR (${DEVICE_BACKEND} STREQUAL "hipsycl"))
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/kernels.cpp
include/kernelforge_aux_sycl.cpp)
include/tensorforge_aux_sycl.cpp)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cpp
${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
else()
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/double-gemm/generate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from kernelforge import GenerationError, GemmGenerator
from kernelforge.common.vm.vm import vm_factory
from kernelforge.common.matrix.tensor import Tensor
from tensorforge import GenerationError, GemmGenerator
from tensorforge.common.vm.vm import vm_factory
from tensorforge.common.matrix.tensor import Tensor
import os
import yaml
import argparse
Expand Down
61 changes: 31 additions & 30 deletions benchmarks/double-gemm/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@
#include "kernels.h"
#include "stop_watch.h"
#include "gemm.h"
#include "kernelforge_aux.h"
#include "tensorforge_aux.h"
#include "yaml-cpp/yaml.h"
#include <device.h>
#include <iostream>
#include <tuple>
#include <vector>
#include <string>

using namespace kernelforge;
using namespace tensorforge;
using namespace reference;
using namespace device;

int estimateNumElements(int SizeA, int SizeB, int SizeC, int SizeD, int SizeTmp, double AllowedSpaceInGB);

int main(int Argc, char* Arcv[]) {
int main(int Argc, char *Arcv[])
{

YAML::Node Params = YAML::LoadFile("./params.yaml");
YAML::Node MatrixASpec = Params["MatA"];
Expand All @@ -41,7 +42,7 @@ int main(int Argc, char* Arcv[]) {
int N = BboxB[3] - BboxB[1];
int K = BboxA[3] - BboxA[1];

int SizeTemp = M * N; // !< required only the exact size
int SizeTemp = M * N; // !< required only the exact size

real Alpha = Params["alpha"].as<real>();
real Beta = Params["beta"].as<real>();
Expand Down Expand Up @@ -83,7 +84,6 @@ int main(int Argc, char* Arcv[]) {
real *DeviceD{};
std::tie(DeviceC, std::ignore, DeviceD) = FirstDriver.getDeviceRawData();


// Check correctness
std::cout << "INFO: computing on CPU started" << std::endl;
unsigned NextTmp = SizeTemp;
Expand All @@ -103,7 +103,8 @@ int main(int Argc, char* Arcv[]) {
int Ldd = MatrixDSpec["num_rows"].as<int>();
int LdTemp = M;

auto computeOffset = [](const int LidDim, const std::vector<int> &Bbox) {
auto computeOffset = [](const int LidDim, const std::vector<int> &Bbox)
{
return LidDim * Bbox[1] + Bbox[0];
};

Expand All @@ -113,23 +114,21 @@ int main(int Argc, char* Arcv[]) {
int OffsetD = computeOffset(Ldd, BboxD);
int OffsetTemp = 0;


kernelforge::reference::gemm(TransA, TransB,
M, N, K,
1.0, &HostA[OffsetA], Lda,
&HostB[OffsetB], Ldb,
0.0, HostTmp, LdTemp,
NextA, NextB, NextTmp,
NumElements);


kernelforge::reference::gemm(TransC, reference::LayoutType::NoTrans,
L, N, M,
Alpha, &HostC[OffsetC], Ldc,
HostTmp, M,
Beta, &HostD[OffsetD], Ldd,
NextC, NextTmp, NextD,
NumElements);
tensorforge::reference::gemm(TransA, TransB,
M, N, K,
1.0, &HostA[OffsetA], Lda,
&HostB[OffsetB], Ldb,
0.0, HostTmp, LdTemp,
NextA, NextB, NextTmp,
NumElements);

tensorforge::reference::gemm(TransC, reference::LayoutType::NoTrans,
L, N, M,
Alpha, &HostC[OffsetC], Ldc,
HostTmp, M,
Beta, &HostD[OffsetD], Ldd,
NextC, NextTmp, NextD,
NumElements);

std::cout << "INFO: computing on GPU started" << std::endl;
callFirstGemm(DeviceA, 0, DeviceB, 0, DeviceTmp, 0, NumElements, nullptr, FirstDriver.getTestStream());
Expand All @@ -139,20 +138,23 @@ int main(int Argc, char* Arcv[]) {

std::cout << "INFO: comparsion started" << std::endl;

SecondDriver.packResults(L, Ldd, N, OffsetD, SizeD, NumElements);
SecondDriver.packResults(L, Ldd, N, OffsetD, SizeD, NumElements);
bool IsPassed = SecondDriver.isTestPassed<SimpleComparator>();
if (IsPassed) {
if (IsPassed)
{
std::cout << "INFO: Results are correct" << std::endl;
}
else {
else
{
std::cout << "WARNING: Test failed" << std::endl;
}

// Measure performance
utils::StopWatch<std::chrono::duration<double, std::chrono::nanoseconds::period>> Timer;
int NumRepeats = Config["num_repeats"].as<int>();
Timer.start();
for (int Repeat = 0; Repeat < NumRepeats; ++Repeat) {
for (int Repeat = 0; Repeat < NumRepeats; ++Repeat)
{
callFirstGemm(DeviceA, 0, DeviceB, 0, DeviceTmp, 0, NumElements, nullptr, FirstDriver.getTestStream());
callSecondGemm(DeviceC, 0, DeviceTmp, 0, DeviceD, 0, NumElements, nullptr, SecondDriver.getTestStream());
}
Expand All @@ -171,11 +173,10 @@ int main(int Argc, char* Arcv[]) {
SecondDriver.TearDown();
device.api->finalize();
return 0;

}


int estimateNumElements(int SizeA, int SizeB, int SizeC, int SizeD, int SizeTmp, double AllowedSpaceInGB) {
int estimateNumElements(int SizeA, int SizeB, int SizeC, int SizeD, int SizeTmp, double AllowedSpaceInGB)
{
// Note: We are going to use only one matrix C. However, memory is going
// to get allocated for all elements
long long ElementSizeInBytes = (SizeD + SizeC + SizeTmp + SizeA + SizeB) * sizeof(real);
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/fused/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if (${BACKEND} STREQUAL "cuda")
./tmp/kernel.cu
./common/aux.cpp
./common/gemm.cpp
../kernelforge/include/kernelforge_aux.cu)
../tensorforge/include/tensorforge_aux.cu)
target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE CUDA_UNDERHOOD)
elseif(${BACKEND} STREQUAL "hip")
if(NOT DEFINED HIP_PATH)
Expand All @@ -45,7 +45,7 @@ elseif(${BACKEND} STREQUAL "hip")
./tmp/kernel.cpp
./common/aux.cpp
./common/gemm.cpp
../kernelforge/include/kernelforge_aux.hip.cpp)
../tensorforge/include/tensorforge_aux.hip.cpp)

set_source_files_properties(${DEVICE_SRC} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
set(_HIPCC --offload-arch=${ARCH}; -std=c++14; -O3)
Expand All @@ -60,7 +60,7 @@ endif()


target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ./common
../kernelforge/include)
../tensorforge/include)
target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE "CONCRETE_CPU_BACKEND=${REFERENCE_IMPL}"
"REAL_SIZE=${REAL_SIZE}")

Expand Down
10 changes: 5 additions & 5 deletions benchmarks/fused/glang.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from kernelforge.frontend import Parser, PostProcessor
from kernelforge.common import FloatingPointType
from kernelforge.generators.generator import Generator
from kernelforge.common.context import Context
from tensorforge.frontend import Parser, PostProcessor
from tensorforge.common import FloatingPointType
from tensorforge.generators.generator import Generator
from tensorforge.common.context import Context
from internals import BenchGenerator, EnryPointGenerator, Aux
from os import path, makedirs
import sys
Expand Down Expand Up @@ -98,7 +98,7 @@ def main():

# write kernel, launcher and header to files
with open(path.join(tmp_dir, f'kernel.{file_suffix}'), 'w') as file:
file.write('#include \"kernelforge_aux.h\"\n')
file.write('#include \"tensorforge_aux.h\"\n')
for kernel, launcher in zip(kernels, launchers):
file.write(kernel)
file.write(launcher)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/fused/internals/bench_generator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .gpu_api import GpuAPI
from .aux import Aux
from kernelforge.common.basic_types import Addressing
from tensorforge.common.basic_types import Addressing


class BenchGenerator:
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/fused/internals/frontend/aux.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from kernelforge.common import Addressing
from kernelforge.common.matrix.tensor import Tensor
from tensorforge.common import Addressing
from tensorforge.common.matrix.tensor import Tensor


class VarFactory:
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/fused/internals/frontend/symbol_table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import enum
from typing import Dict, Union
from kernelforge.common import Addressing
from kernelforge.common.matrix.tensor import Tensor
from tensorforge.common import Addressing
from tensorforge.common.matrix.tensor import Tensor


class ObjType(enum.Enum):
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/fused/internals/frontend/traversals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from graphviz import Digraph
from kernelforge.common import generate_tmp_matrix
from kernelforge.generators.descriptions import GemmDescr
from tensorforge.common import generate_tmp_matrix
from tensorforge.generators.descriptions import GemmDescr
from .nodes import VarNode, ScalarNode, MatrixNode, DeadNode
from .nodes import BinarryOps, AssignNode, AddNode, MultNode
from .nodes import StatementsNode, GemmListNode
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/fused/internals/templates/entry_point.tmpl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "aux.h"
#include "stop_watch.h"
#include "kernelforge_aux.h"
#include "tensorforge_aux.h"
#include "gemm.h"
#include "kernel.h"
#include <iostream>
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/gemm-chain/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@ if(${DEVICE_BACKEND} STREQUAL "cuda")
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
${CMAKE_SOURCE_DIR}/gen_code/main.cu
gen_code/kernels.cu
include/kernelforge_aux.cu)
include/tensorforge_aux.cu)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/main.cu
${CMAKE_SOURCE_DIR}/gen_code/kernels.h
${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
elseif(${DEVICE_BACKEND} STREQUAL "hip")
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/main.cpp
gen_code/kernels.cpp
include/kernelforge_aux.cpp)
include/tensorforge_aux.cpp)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cpp ${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
elseif((${DEVICE_BACKEND} STREQUAL "oneapi") OR (${DEVICE_BACKEND} STREQUAL "hipsycl"))
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/kernels.cpp
gen_code/main.cpp
include/kernelforge_aux_sycl.cpp)
include/tensorforge_aux_sycl.cpp)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cpp ${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
else()
message(FATAL_ERROR "unknown backend, given: ${DEVICE_BACKEND}")
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/gemm-chain/bench.tmpl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "kernels.h"
#include "kernelforge_aux.h"
#include "tensorforge_aux.h"
#include <device.h>
#include <chrono>
#include <iostream>
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/gemm-chain/generate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from kernelforge import GenerationError, GemmGenerator
from kernelforge.common.matrix.tensor import Tensor
from kernelforge.common.vm.vm import vm_factory
from tensorforge import GenerationError, GemmGenerator
from tensorforge.common.matrix.tensor import Tensor
from tensorforge.common.vm.vm import vm_factory
from jinja2 import Environment, FileSystemLoader
import os
import yaml
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/simple-gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@ set(GPU_TARGET_INCLUDE_DIRS ../../submodules/Device
if(${DEVICE_BACKEND} STREQUAL "cuda")
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/kernels.cu
include/kernelforge_aux.cu)
include/tensorforge_aux.cu)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cu
${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
elseif(${DEVICE_BACKEND} STREQUAL "hip")
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/kernels.cpp
include/kernelforge_aux.cpp)
include/tensorforge_aux.cpp)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cpp
${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
elseif((${DEVICE_BACKEND} STREQUAL "oneapi") OR (${DEVICE_BACKEND} STREQUAL "hipsycl"))
set(GPU_TARGET_SOURCE_FILES common/test_drivers/simple_driver.cpp
gen_code/kernels.cpp
include/kernelforge_aux_sycl.cpp)
include/tensorforge_aux_sycl.cpp)
set(GEN_COPY_PRODUCTS ${CMAKE_SOURCE_DIR}/gen_code/kernels.cpp
${CMAKE_SOURCE_DIR}/gen_code/kernels.h)
else()
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/simple-gemm/generate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from kernelforge import GenerationError
from kernelforge.common.matrix.tensor import Tensor
from kernelforge import GemmGenerator, GemmKernelType
from kernelforge.common.vm.vm import vm_factory
from tensorforge import GenerationError
from tensorforge.common.matrix.tensor import Tensor
from tensorforge import GemmGenerator, GemmKernelType
from tensorforge.common.vm.vm import vm_factory
import os
import yaml
import argparse
Expand Down
Loading

0 comments on commit 17def2e

Please sign in to comment.