Skip to content

Commit

Permalink
AIE-ML/F/13: Add a new tutorial "AIE-ML Performance Analysis"
Browse files Browse the repository at this point in the history
  • Loading branch information
Bruce Ying authored and GitHub Enterprise committed Dec 1, 2023
1 parent 6485479 commit efabd14
Show file tree
Hide file tree
Showing 81 changed files with 2,937 additions and 0 deletions.

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

#Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
#SPDX-License-Identifier: MIT

TARGET = hw
PLATFORM = ${PLATFORM_REPO_PATHS}/xilinx_vek280_es1_base_202320_1/xilinx_vek280_es1_base_202320_1.xpfm
XSA = vck190_aie_base_graph_${TARGET}.xsa
HOST_EXE = host.exe

GRAPH = aie/graph.cpp
LIBADF = Work/libadf.a
AIE_CMPL_CMD = v++ -c --mode aie --platform=${PLATFORM} --include="./aie" --work_dir=./Work ${GRAPH} --aie.pl-freq=312.5 --aie.xlopt=0 |& tee log.txt
AIE_SIM_CMD = aiesimulator --pkg-dir=./Work --dump-vcd=foo --profile --simulation-cycle-timeout=99999

X86_CMPL_CMD = v++ -c --mode aie --platform=${PLATFORM} --include="./aie" --work_dir=./Work_x86 ${GRAPH} --aie.pl-freq=312.5 --aie.xlopt=0 --target=x86sim |& tee log.txt
X86_SIM_CMD = x86simulator --pkg-dir=./Work_x86

##########################################################################################################################################################
### DO NOT MODIFY BELOW THIS LINE UNLESS NECESSARY
################################################################################################################################################


XOS = $(subst .cpp,.xo,$(wildcard pl_kernels/*.cpp))
VCC = v++
VPP_SPEC =system.cfg
VPP_FLAGS=--save-temps --verbose --config ${VPP_SPEC}

.PHONY: clean

all: ${XSA} ${HOST_EXE} package
run: all

aie: ${LIBADF}
${LIBADF}: ${GRAPH}
${AIE_CMPL_CMD}
mv libadf.a ${LIBADF}

aiesim: ${LIBADF}
${AIE_SIM_CMD}

Work_x86:
${X86_CMPL_CMD}
x86sim: Work_x86
${X86_SIM_CMD}

xclbin: ${XSA}
${XSA}: ${LIBADF} ${VPP_SPEC} ${XOS} ${GRAPH_XOS}
${VCC} -g -l --platform ${PLATFORM} ${XOS} ${LIBADF} \
-t ${TARGET} ${VPP_FLAGS} -o $@

kernels: ${XOS}
${XOS}:
make -C pl_kernels/

host: ${HOST_EXE}
${HOST_EXE}: sw/*.cpp
make -C sw/

package: package_${TARGET}
package_${TARGET}: ${XSA} ${LIBADF} ${HOST_EXE}
${VCC} -p -t ${TARGET} -f ${PLATFORM} \
--package.rootfs ${ROOTFS} \
--package.kernel_image ${IMAGE} \
--package.boot_mode=sd \
--package.image_format=ext4 \
--package.defer_aie_run \
--package.sd_dir data \
--package.sd_file ${HOST_EXE} ${XSA} ${LIBADF}

run_hw_emu: launch_hw_emu.sh
launch_hw_emu.sh: package_hw_emu
./launch_hw_emu.sh

clean:
rm -rf _x v++_* ${XOS} ${OS} ${LIBADF} *.o.* *.o *.xpe *.xo.* \
vck190_aie_base*.xclbin* *.xsa *.log *.jou xnwOut Work Map_Report.csv \
ilpProblem* sol.db drivers .Xil
make -C pl_kernels clean
make -C sw clean
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
SPDX-License-Identifier: MIT
*/
#include <aie_api/aie.hpp>
#include <aie_api/aie_adf.hpp>
using namespace adf;
static bfloat16 mean_val=0;
static float accum_vec[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
static int iteration=0;

template<int COL, int ROW, int REPEAT>
__attribute__((noinline)) void deviation(input_buffer<bfloat16> & __restrict data, input_async_buffer<bfloat16> & __restrict mean, output_async_buffer<bfloat16> & __restrict out){
if(iteration==0){
mean.acquire();
bfloat16* pmean=mean.data();
mean_val=*pmean;
mean.release();
}
iteration++;

auto inIter=aie::begin_vector<32>(data);
aie::vector<bfloat16,32> vm=aie::broadcast<bfloat16,32>(mean_val);
aie::accum<accfloat,32> acc;
acc.from_vector(aie::load_v<32>(accum_vec));
for(int i=0;i<ROW;i++){
for(int j=0;j<COL/32;j++){
aie::vector<bfloat16,32> tmp=aie::sub(*inIter++,vm);
auto tmp2=aie::mul(tmp,tmp);
acc=aie::add(acc,tmp2);
}
}

if(iteration==REPEAT){
float accum_val=aie::reduce_add(acc.to_vector<float>()) / ROW / COL / REPEAT;
out.acquire();
bfloat16* pout=out.data();
*pout++=mean_val;
*pout=(bfloat16)sqrtf(accum_val);
out.release();
iteration=0;
aie::vector<float,32> zero=aie::broadcast<float,32>(0);
aie::store_v(accum_vec,zero);
}else{
aie::store_v(accum_vec,acc.to_vector<float>());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
SPDX-License-Identifier: MIT
*/
#include "graph.h"

SimpleGraph gr;

int main(int argc, char ** argv) {
gr.init();
gr.run(1);
gr.wait();
gr.end();
return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
SPDX-License-Identifier: MIT
*/
#ifndef GRAPH_MODULE_H
#define GRAPH_MODULE_H

#include "kernels.h"
#include <adf.h>
using namespace adf;

const int COL=256;
const int ROW=384;
const int K_COL=256;
const int K_ROW=64;
class SimpleGraph : public adf::graph {

public :
output_plio out;
input_plio in;
private:
kernel k_mean;
kernel k_deviation;
kernel k_norm;
shared_buffer<bfloat16> mtxA;

public:
SimpleGraph() {
in=input_plio::create("Datain0", plio_128_bits, "data/input0.csv");
out=output_plio::create("Dataout0", plio_128_bits, "data/output0.txt");

k_mean = adf::kernel::create(mean<K_COL,K_ROW, ROW*COL/K_ROW/K_COL>);
source(k_mean) = "mean.cc";
runtime<ratio>(k_mean) = 0.9;
k_deviation = adf::kernel::create(deviation<K_COL,K_ROW, ROW*COL/K_ROW/K_COL>);
source(k_deviation) = "deviation.cc";
runtime<ratio>(k_deviation) = 0.9;
k_norm = adf::kernel::create(norm<K_COL,K_ROW, ROW*COL/K_ROW/K_COL>);
source(k_norm) = "norm.cc";
runtime<ratio>(k_norm) = 0.9;

repetition_count(k_mean)=ROW*COL/K_ROW/K_COL;
repetition_count(k_deviation)=ROW*COL/K_ROW/K_COL;
repetition_count(k_norm)=ROW*COL/K_ROW/K_COL;

mtxA = shared_buffer<bfloat16>::create({COL,ROW}, 1, 1);
write_access(mtxA.in[0]) = tiling({.buffer_dimension={COL,ROW}, .tiling_dimension={COL,ROW}, .offset={0,0} });
read_access(mtxA.out[0]) = tiling({.buffer_dimension={COL,ROW}, .tiling_dimension={COL,ROW}, .offset={0,0} });

connect(in.out[0], mtxA.in[0]);
connect(mtxA.out[0], k_mean.in[0]);
connect(mtxA.out[0], k_deviation.in[0]);
connect(mtxA.out[0], k_norm.in[0]);
connect(k_mean.out[0], k_deviation.in[1]);
connect(k_deviation.out[0], k_norm.in[1]);
connect(k_norm.out[0], out.in[0]);

dimensions(k_mean.in[0])={K_ROW*K_COL};//elements
dimensions(k_mean.out[0])={1};//elements
dimensions(k_deviation.in[0])={K_ROW*K_COL};//elements
dimensions(k_deviation.in[1])={1};//elements
dimensions(k_deviation.out[0])={2};//elements
dimensions(k_norm.in[0])={K_ROW*K_COL};//elements
dimensions(k_norm.in[1])={2};//elements
dimensions(k_norm.out[0])={K_ROW*K_COL};//elements

}

};

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
SPDX-License-Identifier: MIT
*/
#ifndef COPY_MODULE_H
#define COPY_MODULE_H

#include <adf.h>
using namespace adf;
template<int COL, int ROW, int REPEAT>
__attribute__((noinline)) void mean(input_buffer<bfloat16> & __restrict data, output_async_buffer<bfloat16> & __restrict out);

template<int COL, int ROW, int REPEAT>
__attribute__((noinline)) void deviation(input_buffer<bfloat16> & __restrict data, input_async_buffer<bfloat16> & __restrict mean, output_async_buffer<bfloat16> & __restrict out);

template<int COL, int ROW, int REPEAT>
__attribute__((noinline)) void norm(input_buffer<bfloat16> & __restrict data,input_async_buffer<bfloat16> & __restrict mean_deviation, output_buffer<bfloat16> & __restrict out);
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
SPDX-License-Identifier: MIT
*/
#include <aie_api/aie.hpp>
#include <aie_api/aie_adf.hpp>
#include <aie_api/utils.hpp>
using namespace adf;
alignas(aie::vector_decl_align) static float accum_vec[32]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
static int iteration=0;

template<int COL, int ROW, int REPEAT>
__attribute__((noinline)) void mean(input_buffer<bfloat16> & __restrict data, output_async_buffer<bfloat16> & __restrict out){
iteration++;

auto inIter=aie::begin_vector<32>(data);
aie::accum<accfloat,32> acc;
acc.from_vector(aie::load_v<32>(accum_vec),0);
for(int i=0;i<ROW;i++){
for(int j=0;j<COL/32;j++){
acc=aie::add(acc,*inIter++);
}
}

if(iteration==REPEAT){
out.acquire();
bfloat16* pout=out.data();
*pout=(bfloat16)(aie::reduce_add(acc.to_vector<float>()) / ROW / COL / REPEAT);
out.release();
aie::store_v(accum_vec,aie::broadcast<float,32>(0));
iteration=0;
}else{
aie::store_v(accum_vec,acc.to_vector<float>());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
SPDX-License-Identifier: MIT
*/
#include <aie_api/aie.hpp>
#include <aie_api/aie_adf.hpp>
using namespace adf;
static int iteration=0;
static bfloat16 mean_val=0;
static bfloat16 dev_val=0;

template<int COL, int ROW, int REPEAT>
__attribute__((noinline)) void norm(input_buffer<bfloat16> & __restrict data,input_async_buffer<bfloat16> & __restrict mean_dev, output_buffer<bfloat16> & __restrict out){
if(iteration==0){
mean_dev.acquire();
bfloat16 *pm_v=mean_dev.data();
mean_val=*pm_v++;
dev_val=*pm_v;
if(dev_val<(bfloat16)0.00001f){
dev_val=(bfloat16)0.00001f;
}
mean_dev.release();
}
iteration++;

aie::vector<bfloat16,32> vm=aie::broadcast<bfloat16,32>(mean_val);
auto inIter=aie::begin_vector<32>(data);
auto outIter=aie::begin_vector<32>(out);
for(int i=0;i<ROW;i++){
for(int j=0;j<COL/32;j++){
auto tmp=aie::sub(*inIter++,vm);
auto norm_vec=aie::div(tmp,dev_val);
*outIter++=norm_vec;
}
}

if(iteration==REPEAT){
iteration=0;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CMD,D,D,D,D,D,D,D,D,TKEEP,TLAST
STALL:1000,0,0,0,0,0,0,0,0,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
DATA:12288,1,2,3,4,5,6,7,8,-1,0
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"name": "AIE-ML_Performance_Analysis-Normalization_v1",
"description": "Normalization_v1",
"flow": "versal",

"platform_allowlist": [ "vek280"],
"platform_blocklist": [],

"testinfo": {
"test_name": "AIE-ML_F_13-normalization_v1",
"disable": false,
"jobs": [
{
"index": 0,
"dependency": [],
"env": "",
"cmd": "",
"max_memory_MB": 65536,
"max_time_min": {
"vitis_aie_sim": 50,
"vitis_aie_x86sim": 50,
"vitis_hw_build": 470,
"vitis_hw_emu": 200,
"vitis_hw_run": 30
}
}
],
"targets": [
"vitis_hw_emu",
"vitis_hw_build",
"vitis_hw_run"
],
"category": "canary",
"make_options": [
"EMU_CMD=\\\"./launch_hw_emu.sh -run-app embedded_exec.sh\\\"",
"EMBEDDED_PACKAGE_OUT=./",
"EMBEDDED_EXEC_SCRIPT=./embedded_exec.sh"
],
"custom_build_target":
{
"all": "run"
},
"custom_board_target": "run_test",
"tasks": {
"board": {
"pre_exec": "./env_setup_versal.sh"
},
"build": {
"pre_exec": "./env_setup_versal.sh"
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
#
# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT

export XILINX_XRT=/usr

./host.exe a.xclbin 9999
Loading

0 comments on commit efabd14

Please sign in to comment.