From 3f68a6cd1f730640180eed1d7cdf630d32fde06f Mon Sep 17 00:00:00 2001 From: Elias Reutelsterz Date: Mon, 6 May 2024 16:32:21 +0000 Subject: [PATCH] Update kernelforge examples --- AUTHORS | 2 +- example/a_atrans_bc.py | 36 ++++++-------- example/ab_transb.py | 37 +++++++-------- example/abb_trans.py | 36 +++++++------- example/csa.py | 1 + example/five_multiplies.py | 34 +++++--------- example/four_matrices_test.py | 41 ---------------- example/gemm.py | 19 +++----- example/mat-vec.py | 50 -------------------- example/mat_vec.py | 37 +++++++-------- example/redundant_load_opt.py | 25 +++------- example/three_matrices.py | 57 ++++++++++------------- example/trace.py | 1 - examples/example.py | 3 +- examples/matmul.py | 8 ++-- kernelforge/backend/instructions/store.py | 4 +- pyproject.toml | 7 +-- {kernelforge => tensorforge}/VERSION | 0 yateto/codegen/gpukernel.py | 2 +- 19 files changed, 130 insertions(+), 270 deletions(-) delete mode 100644 example/four_matrices_test.py delete mode 100644 example/mat-vec.py rename {kernelforge => tensorforge}/VERSION (100%) diff --git a/AUTHORS b/AUTHORS index 1c81763..b2b9538 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,4 @@ Ravil Dorozhinskii Yakup Budanaz David Schneller - +Elias Reutelsterz diff --git a/example/a_atrans_bc.py b/example/a_atrans_bc.py index d32178e..49276c9 100644 --- a/example/a_atrans_bc.py +++ b/example/a_atrans_bc.py @@ -1,8 +1,9 @@ -from kernelforge.common import DenseMatrix from kernelforge.common.context import Context from kernelforge.common.aux import generate_tmp_matrix from kernelforge.generators.descriptions import GemmDescr, FloatingPointType, Addressing from kernelforge.generators.generator import Generator +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor # Q += A x ((A^T x B) x C) @@ -10,25 +11,13 @@ variants = {'v0': Addressing.STRIDED, 'v1': Addressing.NONE} -mat_q = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 20, 9]) +mat_q = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0,0], [20,9])) -mat_a = DenseMatrix(num_rows=56, - num_cols=56, - addressing=variants['v0'], - bbox=[0, 0, 20, 9]) +mat_a = Tensor([56, 56], addressing=variants['v0'], bbox=BoundingBox([0,0], [20,9])) -mat_b = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 20, 9]) +mat_b = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0,0], [20,9])) -mat_c = DenseMatrix(num_rows=56, - num_cols=9, - bbox=[0, 0, 9, 9], - addressing=Addressing.STRIDED) +mat_c = Tensor([56, 9], Addressing.STRIDED, BoundingBox([0,0], [9,9])) tmp1 = generate_tmp_matrix(mat_a, mat_b, True, False) @@ -53,8 +42,11 @@ generator = Generator(gemm_list, context) generator.generate() -print(generator.get_launcher()) -print() -print(generator.get_header()) -print() -print(generator.get_kernel()) \ No newline at end of file +with_output = True +if with_output: + print(generator.get_header()) + print(generator.default_generate_call_site()) + print() + print(generator.get_launcher()) + print() + print(generator.get_kernel()) \ No newline at end of file diff --git a/example/ab_transb.py b/example/ab_transb.py index b39ea5c..55b20e0 100644 --- a/example/ab_transb.py +++ b/example/ab_transb.py @@ -1,35 +1,29 @@ -from kernelforge.common import DenseMatrix from kernelforge.common.context import Context from kernelforge.common.aux import generate_tmp_matrix from kernelforge.generators.descriptions import GemmDescr, FloatingPointType, Addressing from kernelforge.generators.generator import Generator +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor # Q += (A x B^T) x B -mat_q = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.PTR_BASED, - bbox=[0, 0, 20, 9]) +mat_q = Tensor([56, 56], Addressing.PTR_BASED, BoundingBox([0,0], [20,9])) -mat_a = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.NONE, - bbox=[0, 0, 20, 9]) +mat_a = Tensor([56, 56], Addressing.NONE, BoundingBox([0,0], [20,9])) -mat_b = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 20, 9]) +mat_b = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0,0], [20,9])) -tmp1 = generate_tmp_matrix(mat_a, mat_b, trans_op1=False, trans_op2=True) +tmp1 = generate_tmp_matrix(mat_a, mat_b, trans_a=False, trans_b=True) gemm_list = [GemmDescr(trans_a=False, trans_b=True, a=mat_a, b=mat_b, c=tmp1), GemmDescr(trans_a=False, trans_b=False, - a=tmp1, b=mat_b, c=mat_q)] + a=tmp1, b=mat_b, c=mat_q, + alpha=1.0, + beta=1.0)] context = Context(arch='sm_60', backend='cuda', @@ -38,8 +32,11 @@ generator = Generator(gemm_list, context) generator.generate() -print(generator.get_launcher()) -print() -print(generator.get_header()) -print() -print(generator.get_kernel()) \ No newline at end of file +with_output = True +if with_output: + print(generator.get_header()) + print(generator.default_generate_call_site()) + print() + print(generator.get_launcher()) + print() + print(generator.get_kernel()) \ No newline at end of file diff --git a/example/abb_trans.py b/example/abb_trans.py index 0f913fd..9a6aed1 100644 --- a/example/abb_trans.py +++ b/example/abb_trans.py @@ -3,23 +3,18 @@ from kernelforge.common.aux import generate_tmp_matrix from kernelforge.generators.descriptions import GemmDescr, FloatingPointType, Addressing from kernelforge.generators.generator import Generator +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor # Q += (A x B) x B^T -mat_q = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.PTR_BASED, - bbox=[0, 0, 20, 9]) -mat_a = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.NONE, - bbox=[0, 0, 20, 9]) +mat_q = Tensor([56, 56], Addressing.PTR_BASED, BoundingBox([0,0], [20,9])) + +mat_a = Tensor([56, 56], Addressing.NONE, BoundingBox([0,0], [20,9])) + +mat_b = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0,0], [9,20])) -mat_b = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 9, 20]) tmp1 = generate_tmp_matrix(mat_a, mat_b) @@ -28,7 +23,9 @@ a=mat_a, b=mat_b, c=tmp1), GemmDescr(trans_a=False, trans_b=True, - a=tmp1, b=mat_b, c=mat_q)] + a=tmp1, b=mat_b, c=mat_q, + alpha=1.0, + beta=1.0)] context = Context(arch='sm_60', backend='cuda', @@ -37,8 +34,11 @@ generator = Generator(gemm_list, context) generator.generate() -print(generator.get_launcher()) -print() -print(generator.get_header()) -print() -print(generator.get_kernel()) \ No newline at end of file +with_output = True +if with_output: + print(generator.get_header()) + print(generator.default_generate_call_site()) + print() + print(generator.get_launcher()) + print() + print(generator.get_kernel()) \ No newline at end of file diff --git a/example/csa.py b/example/csa.py index fa2dfac..4e6907f 100644 --- a/example/csa.py +++ b/example/csa.py @@ -1,6 +1,7 @@ from kernelforge import DenseMatrix, GenerationError, CsaGenerator from kernelforge.common.vm.vm import vm_factory import argparse +from kernelforge.backend.instructions.csa import CSA parser = argparse.ArgumentParser(description='Specify Backend and Arch of the GPU') diff --git a/example/five_multiplies.py b/example/five_multiplies.py index a95cf54..1ce1de5 100644 --- a/example/five_multiplies.py +++ b/example/five_multiplies.py @@ -1,41 +1,29 @@ -from kernelforge.common.matrix.dense import DenseMatrix from kernelforge.common.context import Context from kernelforge.common.aux import generate_tmp_matrix from kernelforge.generators.descriptions import GemmDescr from kernelforge.common.basic_types import FloatingPointType, Addressing from kernelforge.generators.generator import Generator +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor # Q = (((A x B) x (C x B)) x D) -mat_q = DenseMatrix(num_rows=56, - num_cols=9, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 9],) -mat_a = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 56]) +mat_q = Tensor([9, 9], Addressing.STRIDED, BoundingBox([0,0], [9,9])) -mat_b = DenseMatrix(num_rows=56, - num_cols=9, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 9]) +mat_a = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0,0], [56,56])) -mat_c = DenseMatrix(num_rows=56, - num_cols=56, - bbox=[0, 0, 56, 56], - addressing=Addressing.STRIDED) +mat_b = Tensor([56, 9], Addressing.STRIDED, BoundingBox([0,0], [56,9])) + +mat_c = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0,0], [56,56])) + +mat_d = Tensor([9, 9], Addressing.STRIDED, BoundingBox([0,0], [9,9])) -mat_d = DenseMatrix(num_rows=9, - num_cols=9, - bbox=[0, 0, 9, 9], - addressing=Addressing.STRIDED) tmp0 = generate_tmp_matrix(mat_a, mat_b) tmp1 = generate_tmp_matrix(mat_c, mat_b) -tmp2 = generate_tmp_matrix(tmp0, tmp1) +tmp2 = generate_tmp_matrix(tmp0, tmp1, trans_a=True) gemm_list = [GemmDescr(trans_a=False, trans_b=False, @@ -43,7 +31,7 @@ GemmDescr(trans_a=False, trans_b=False, a=mat_c, b=mat_b, c=tmp1), - GemmDescr(trans_a=False, trans_b=False, + GemmDescr(trans_a=True, trans_b=False, a=tmp0, b=tmp1, c=tmp2), GemmDescr(trans_a=False, trans_b=False, a=tmp2, b=mat_d, c=mat_q, diff --git a/example/four_matrices_test.py b/example/four_matrices_test.py deleted file mode 100644 index 04102cd..0000000 --- a/example/four_matrices_test.py +++ /dev/null @@ -1,41 +0,0 @@ -from kernelforge.common.matrix.tensor import Tensor -from kernelforge.common.matrix.boundingbox import BoundingBox -from kernelforge.common.context import Context -from kernelforge.common.aux import generate_tmp_matrix -from kernelforge.generators.descriptions import ElementwiseDescr -from kernelforge.common.basic_types import FloatingPointType, Addressing -from kernelforge.generators.generator import Generator -from kernelforge.common.operation import Operation -from kernelforge.generators import optree - -mat_q = Tensor([56, 56], Addressing.PTR_BASED, BoundingBox([0,0], [56,9])) - -mat_a = Tensor([56, 56], Addressing.PTR_BASED, BoundingBox([0,0], [56,20])) - -mat_b = Tensor([56, 56], Addressing.PTR_BASED, BoundingBox([0,0], [20,56])) - -mat_c = Tensor([56, 9], Addressing.PTR_BASED, BoundingBox([0,0], [56,9])) - -mat_d = Tensor([9, 9], Addressing.PTR_BASED, BoundingBox([0,0], [9,9])) - - -tmp1 = generate_tmp_matrix(mat_b, mat_c) -tmp2 = generate_tmp_matrix(tmp1, mat_d) - -gemm_list = [ElementwiseDescr([optree.Assignment(mat_a, optree.TensorVar(mat_a, []))])] - -context = Context(arch='sm_60', - backend='cuda', - fp_type=FloatingPointType.FLOAT) - -generator = Generator(gemm_list, context) -generator.generate() - -with_output = True -if with_output: - print(generator.get_header()) - print(generator.default_generate_call_site()) - print() - print(generator.get_launcher()) - print() - print(generator.get_kernel()) diff --git a/example/gemm.py b/example/gemm.py index 77f39e5..9702579 100644 --- a/example/gemm.py +++ b/example/gemm.py @@ -1,11 +1,12 @@ from kernelforge.common.matrix.dense import DenseMatrix from kernelforge.common.context import Context -from kernelforge.common.aux import generate_tmp_matrix from kernelforge.generators.descriptions import GemmDescr from kernelforge.common.basic_types import FloatingPointType, Addressing from kernelforge.generators.generator import Generator from kernelforge.common.exceptions import GenerationError import argparse +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor parser = argparse.ArgumentParser(description="Specify Backend and Arch of the GPU") @@ -23,20 +24,12 @@ args = parser.parse_args() -mat_a = DenseMatrix(num_rows=56, - num_cols=18, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 18]) +mat_a = Tensor([56, 18], Addressing.STRIDED, BoundingBox([0,0], [56,18])) -mat_b = DenseMatrix(num_rows=18, - num_cols=18, - addressing=Addressing.STRIDED, - bbox=[0, 0, 18, 18]) +mat_b = Tensor([18, 18], Addressing.STRIDED, BoundingBox([0,0], [18,18])) + +mat_c = Tensor([56, 18], Addressing.STRIDED, BoundingBox([0,0], [56,18])) -mat_c = DenseMatrix(num_rows=56, - num_cols=18, - bbox=[0, 0, 56, 18], - addressing=Addressing.STRIDED) try: vm = Context(arch=args.arch, backend=args.backend, fp_type=FloatingPointType.FLOAT) diff --git a/example/mat-vec.py b/example/mat-vec.py deleted file mode 100644 index a603fd4..0000000 --- a/example/mat-vec.py +++ /dev/null @@ -1,50 +0,0 @@ -from kernelforge import DenseMatrix, GenerationError, GemmGenerator -from kernelforge.common.vm.vm import vm_factory -import argparse - - -parser = argparse.ArgumentParser(description="Specify Backend and Arch of the GPU") -parser.add_argument("-a", - "--arch", - action="store", - help="Arch of the GPU, e.g sm_60 for Nvidia or gfx906 for AMD", - default="sm_60") -parser.add_argument("-b", - "--backend", - action="store", - help="Name of the Backend, currently cuda, hip, hipsycl and oneapi are supported", - default="cuda") - - -args = parser.parse_args() - -mat_a = DenseMatrix(num_rows=56, - num_cols=9, - addressing="strided", - bbox=[0, 0, 56, 9]) - -mat_b = DenseMatrix(num_rows=9, - num_cols=1, - addressing="strided", - bbox=[0, 0, 9, 1]) - -mat_c = DenseMatrix(num_rows=56, - num_cols=1, - bbox=[0, 0, 56, 1], - addressing="strided") - -try: - vm = vm_factory(backend=args.backend, - arch=args.arch, - fp_type='float') - - gen = GemmGenerator(vm) - gen.set(False, False, mat_a, mat_b, mat_c, alpha=1.1, beta=1.1) - gen.generate() - print(gen.get_kernel()) - print(gen.get_launcher()) - print(gen.get_launcher_header()) - -except GenerationError as err: - print("ERROR: {}".format(err)) - raise err diff --git a/example/mat_vec.py b/example/mat_vec.py index 6d625da..d3422bb 100644 --- a/example/mat_vec.py +++ b/example/mat_vec.py @@ -1,31 +1,23 @@ -from kernelforge.common import DenseMatrix +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor from kernelforge.common.context import Context -from kernelforge.common.aux import generate_tmp_matrix -from kernelforge.generators.descriptions import GemmDescr, FloatingPointType, Addressing +from kernelforge.generators.descriptions import GemmDescr +from kernelforge.common.basic_types import FloatingPointType, Addressing from kernelforge.generators.generator import Generator # C += A x b -vec_c = DenseMatrix(num_rows=56, - num_cols=1, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 1]) +vec_c = Tensor([56, 1], Addressing.STRIDED, BoundingBox([0, 0],[56, 1])) -mat_a = DenseMatrix(num_rows=56, - num_cols=9, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 9]) +mat_a = Tensor([56, 9], Addressing.STRIDED, BoundingBox([0, 0],[56, 9])) -vec_b = DenseMatrix(num_rows=56, - num_cols=1, - addressing=Addressing.STRIDED, - bbox=[0, 0, 9, 1]) +vec_b = Tensor([9, 1], Addressing.STRIDED, BoundingBox([0, 0],[9, 1])) gemm_list = [GemmDescr(trans_a=False, trans_b=False, - a=mat_a, b=vec_b, c=vec_c)] + a=mat_a, b=vec_b, c=vec_c, alpha = 1.0, beta = 1.0)] context = Context(arch='sm_60', backend='cuda', @@ -34,8 +26,11 @@ generator = Generator(gemm_list, context) generator.generate() -print(generator.get_launcher()) -print() -print(generator.get_header()) -print() -print(generator.get_kernel()) \ No newline at end of file +with_output = True +if with_output: + print(generator.get_header()) + print(generator.default_generate_call_site()) + print() + print(generator.get_launcher()) + print() + print(generator.get_kernel()) \ No newline at end of file diff --git a/example/redundant_load_opt.py b/example/redundant_load_opt.py index 05cf8d8..48390a6 100644 --- a/example/redundant_load_opt.py +++ b/example/redundant_load_opt.py @@ -1,31 +1,20 @@ -from kernelforge.common import DenseMatrix from kernelforge.common.context import Context from kernelforge.common.aux import generate_tmp_matrix from kernelforge.generators.descriptions import GemmDescr, FloatingPointType, Addressing from kernelforge.generators.generator import Generator +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor # Q += A * ((A^Trans * B) * C) -mat_q = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 20, 8]) -mat_a = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.NONE, - bbox=[0, 0, 20, 8]) +mat_q = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0, 0],[20, 8])) +mat_a = Tensor([56, 56], Addressing.NONE, BoundingBox([0, 0],[20, 8])) -mat_b = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 20, 8]) +mat_b = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0, 0],[20, 8])) -mat_c = DenseMatrix(num_rows=56, - num_cols=9, - bbox=[0, 0, 8, 8], - addressing=Addressing.NONE) +mat_c = Tensor([56, 9], Addressing.NONE, BoundingBox([0, 0],[8, 8])) tmp1 = generate_tmp_matrix(mat_a, mat_b, True, False) tmp2 = generate_tmp_matrix(tmp1, mat_c) @@ -39,7 +28,7 @@ a=tmp1, b=mat_c, c=tmp2), GemmDescr(trans_a=False, trans_b=False, - a=mat_b, b=tmp2, c=mat_q, + a=mat_a, b=tmp2, c=mat_q, alpha=1.0, beta=1.0)] diff --git a/example/three_matrices.py b/example/three_matrices.py index b633330..c92397f 100644 --- a/example/three_matrices.py +++ b/example/three_matrices.py @@ -1,40 +1,26 @@ -from kernelforge.common import DenseMatrix +from kernelforge.common.matrix.boundingbox import BoundingBox +from kernelforge.common.matrix.tensor import Tensor from kernelforge.common.context import Context from kernelforge.common.aux import generate_tmp_matrix -from kernelforge.generators.descriptions import GemmDescr, FloatingPointType, Addressing +from kernelforge.generators.descriptions import GemmDescr +from kernelforge.common.basic_types import FloatingPointType, Addressing from kernelforge.generators.generator import Generator - # D += A x (B x C) -mat_d = DenseMatrix(num_rows=56, - num_cols=9, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 9]) - -mat_a = DenseMatrix(num_rows=56, - num_cols=56, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 56]) - -mat_b = DenseMatrix(num_rows=56, - num_cols=9, - addressing=Addressing.STRIDED, - bbox=[0, 0, 56, 9]) - -mat_c = DenseMatrix(num_rows=9, - num_cols=9, - bbox=[0, 0, 9, 9], - addressing=Addressing.STRIDED) +mat_d = Tensor([56, 9], Addressing.STRIDED, BoundingBox([0, 0],[56, 9])) + +mat_a = Tensor([56, 56], Addressing.STRIDED, BoundingBox([0, 0],[56, 56])) + +mat_b = Tensor([56, 9], Addressing.STRIDED, BoundingBox([0, 0],[56, 9])) + +mat_c = Tensor([9, 9], Addressing.STRIDED, BoundingBox([0, 0],[9, 9])) tmp1 = generate_tmp_matrix(mat_b, mat_c) gemm_list = [GemmDescr(trans_a=False, trans_b=False, - a=mat_b, - b=mat_c, - c=tmp1), - GemmDescr(trans_a=False, - trans_b=False, + a=mat_b, b=mat_c, c=tmp1), + GemmDescr(trans_a=False, trans_b=False, a=mat_a, b=tmp1, c=mat_d, alpha=1.0, beta=1.0)] @@ -43,11 +29,18 @@ backend='cuda', fp_type=FloatingPointType.FLOAT) +# context = Context(arch='sm_60', +# backend='omptarget', +# fp_type=FloatingPointType.FLOAT) + generator = Generator(gemm_list, context) generator.generate() -print(generator.get_launcher()) -print() -print(generator.get_header()) -print() -print(generator.get_kernel()) +with_output = True +if with_output: + print(generator.get_header()) + print(generator.default_generate_call_site()) + print() + print(generator.get_launcher()) + print() + print(generator.get_kernel()) diff --git a/example/trace.py b/example/trace.py index 6a8c6a4..9179d34 100644 --- a/example/trace.py +++ b/example/trace.py @@ -2,7 +2,6 @@ from kernelforge.common.matrix.boundingbox import BoundingBox from kernelforge.common.matrix.tensor import Tensor from kernelforge.common.context import Context -from kernelforge.common.aux import generate_tmp_matrix from kernelforge.generators.descriptions import MultilinearDescr from kernelforge.common.basic_types import FloatingPointType, Addressing from kernelforge.generators.generator import Generator diff --git a/examples/example.py b/examples/example.py index ad38bc0..b0ba116 100755 --- a/examples/example.py +++ b/examples/example.py @@ -14,6 +14,7 @@ cmdLineParser.add_argument('--arch', type=str, default='dhsw', help='Architecture (e.g. dsnb for double precision on Sandy Bridge).') cmdLineParser.add_argument('--variant', type=str, default='', help='Example specific variant (e.g. onlyblas).') cmdLineParser.add_argument('example_script', type=str, help='A yateto example script from the examples folder (without file extension).') +cmdLineParser.add_argument('--backend', type = str) cmdLineArgs = cmdLineParser.parse_args() exampleSpec = importlib.util.find_spec(cmdLineArgs.example_script) @@ -33,7 +34,7 @@ pass # explicitly force CPU target -arch = useArchitectureIdentifiedBy(cmdLineArgs.arch) +arch = useArchitectureIdentifiedBy('shsw', cmdLineArgs.arch, cmdLineArgs.backend) g = Generator(arch) example.add(g) diff --git a/examples/matmul.py b/examples/matmul.py index d37ee55..63b532b 100755 --- a/examples/matmul.py +++ b/examples/matmul.py @@ -10,7 +10,7 @@ def add(g): B = Tensor('B', (K, N)) C = Tensor('C', (M, N)) - g.add('matmulAB', C['ij'] <= A['ik'] * B['kj']) - g.add('matmulATB', C['ij'] <= A['ki'] * B['kj']) - g.add('matmulABT', C['ij'] <= A['ik'] * B['jk']) - g.add('matmulATBT', C['ij'] <= A['ki'] * B['jk']) + g.add('matmulAB', C['ij'] <= A['ik'] * B['kj'], target = 'gpu') + g.add('matmulATB', C['ij'] <= A['ki'] * B['kj'], target = 'gpu') + g.add('matmulABT', C['ij'] <= A['ik'] * B['jk'], target = 'gpu') + g.add('matmulATBT', C['ij'] <= A['ki'] * B['jk'], target = 'gpu') diff --git a/kernelforge/backend/instructions/store.py b/kernelforge/backend/instructions/store.py index b615cb8..63410b9 100644 --- a/kernelforge/backend/instructions/store.py +++ b/kernelforge/backend/instructions/store.py @@ -5,8 +5,10 @@ from kernelforge.backend.symbol import Symbol, SymbolType, DataView from kernelforge.common.exceptions import InternalError from kernelforge.backend.writer import Writer -from .abstract_instruction import AbstractInstruction, AbstractShrMemWrite +from .abstract_instruction import AbstractInstruction +from kernelforge.backend.instructions.memory.__init__ import AbstractShrMemWrite from kernelforge.common.basic_types import FloatingPointType +import math class StoreRegToShr(AbstractShrMemWrite): diff --git a/pyproject.toml b/pyproject.toml index 9db5074..854c217 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,14 +7,15 @@ authors = [ { name = "Ravil Dorozhinskii", email = "ravil.aviva.com@gmail.com" }, { name = "Yakup Budanaz", email = "budanaz.yakup@gmail.com" }, { name = "David Schneller", email = "david.schneller@tum.de" }, - { name = "Carsten Uphoff", email = "uphoff@in.tum.com" } + { name = "Carsten Uphoff", email = "uphoff@in.tum.com" }, + { name = "Elias Reutelsterz", email = "elias.reutelsterz@tum.de" } ] maintainers = [ { name = "David Schneller", email = "david.schneller@tum.de" } ] classifiers= [ "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", ] dynamic = ["version", "readme", "dependencies"] @@ -25,7 +26,7 @@ build-backend = "setuptools.build_meta" [tool.setuptools.dynamic] readme = {file = ["README.md"], content-type = "text/markdown"} -version = {file = ["kernelforge/VERSION"]} +version = {file = ["tensorforge/VERSION"]} dependencies = {file = ["requirements.txt"]} [tool.setuptools.package-data] diff --git a/kernelforge/VERSION b/tensorforge/VERSION similarity index 100% rename from kernelforge/VERSION rename to tensorforge/VERSION diff --git a/yateto/codegen/gpukernel.py b/yateto/codegen/gpukernel.py index 0651088..2375581 100644 --- a/yateto/codegen/gpukernel.py +++ b/yateto/codegen/gpukernel.py @@ -1,7 +1,7 @@ from .factory import KernelFactory from kernelforge.generators.descriptions import MultilinearDescr, ElementwiseDescr from kernelforge.generators.optree import Assignment, OpNode, TensorVar -from common import * +from .common import * from .common import TensorDescription, IndexedTensorDescription, BatchedOperationsAux from ..ast.indices import BoundingBox, Range