-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add latency estimations to simulator #21
base: master
Are you sure you want to change the base?
Changes from 7 commits
fa4016d
8f403b8
3141ac5
fe3d72d
22cc30c
b46f6d2
770f2da
de70c2c
4b563b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,19 @@ | ||
import copy | ||
import numpy as np | ||
from ctypes import c_int32 | ||
import csv | ||
import os.path | ||
|
||
from characterization import display_characterization, get_latency_cc | ||
from kernels import * | ||
from memory import * | ||
|
||
# CGRA from left to right, top to bottom | ||
N_ROWS = 4 | ||
N_COLS = 4 | ||
INSTR_SIZE = N_ROWS+1 | ||
MAX_COL = N_COLS - 1 | ||
MAX_ROW = N_ROWS - 1 | ||
|
||
BUS_TYPES = ["ONE-TO-M", "N-TO-M", "INTERLEAVED"] | ||
PRINT_OUTS = 1 | ||
|
||
MAX_32b = 0xFFFFFFFF | ||
|
@@ -47,8 +49,8 @@ def print_out( prs, outs, insts, ops, reg ): | |
elif pr == "R1" : pnt = reg[1] | ||
elif pr == "R2" : pnt = reg[2] | ||
elif pr == "R3" : pnt = reg[3] | ||
|
||
out_string += "[" | ||
if pnt != []: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and if pnt == []? The for should still be executed? What was the idea behind it? |
||
out_string += "[" | ||
for i in range(len(pnt)): | ||
out_string += "{{{}:4}}".format(i) | ||
if i == (len(pnt) - 1): | ||
|
@@ -60,7 +62,7 @@ def print_out( prs, outs, insts, ops, reg ): | |
|
||
|
||
class CGRA: | ||
def __init__( self, kernel, memory, read_addrs, write_addrs): | ||
def __init__( self, kernel, memory, read_addrs, write_addrs, memory_manager): | ||
self.cells = [] | ||
for r in range(N_ROWS): | ||
list = [] | ||
|
@@ -71,6 +73,11 @@ def __init__( self, kernel, memory, read_addrs, write_addrs): | |
self.memory = memory | ||
self.instr2exec = 0 | ||
self.cycles = 0 | ||
self.N_COLS = N_COLS | ||
self.N_ROWS = N_ROWS | ||
self.total_latency_cc = 0 | ||
self.instr_latency_cc = [] | ||
self.max_latency_instr = None | ||
JuanSapriza marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if read_addrs is not None and len(read_addrs) == N_COLS: | ||
self.load_addr = read_addrs | ||
else: | ||
|
@@ -79,6 +86,8 @@ def __init__( self, kernel, memory, read_addrs, write_addrs): | |
self.store_addr = write_addrs | ||
else: | ||
self.store_addr = [0]*N_COLS | ||
self.init_store = copy.copy(self.store_addr) | ||
self.memory_manager = memory_manager | ||
self.exit = False | ||
|
||
def run( self, pr, limit ): | ||
|
@@ -109,10 +118,12 @@ def step( self, prs="ROUT" ): | |
ops = [ self.cells[r][i].op for i in range(N_COLS) ] | ||
reg = [[ self.cells[r][i].regs[regs[x]] for i in range(N_COLS) ] for x in range(len(regs)) ] | ||
print_out( prs, outs, insts, ops, reg ) | ||
|
||
self.flag_poll_cnt = 0 | ||
get_latency_cc(self) | ||
self.instr2exec += 1 | ||
self.cycles += 1 | ||
return self.exit | ||
return self.exit | ||
|
||
|
||
def get_neighbour_address( self, r, c, dir ): | ||
n_r = r | ||
|
@@ -181,6 +192,8 @@ def __init__( self, parent, row, col ): | |
self.regs = {'R0':0, 'R1':0, 'R2':0, 'R3':0 } | ||
self.op = "" | ||
self.instr = "" | ||
self.latency_cc = 0 | ||
self.addr = 0 | ||
|
||
def get_out( self ): | ||
return self.old_out | ||
|
@@ -222,7 +235,6 @@ def run_instr( self, instr): | |
self.op = instr[0] | ||
except: | ||
self.op = instr | ||
|
||
if self.op in self.ops_arith: | ||
des = instr[1] | ||
val1 = self.fetch_val( instr[2] ) | ||
|
@@ -251,25 +263,27 @@ def run_instr( self, instr): | |
|
||
elif self.op in self.ops_lwd: | ||
des = instr[1] | ||
self.addr = self.parent.load_addr[self.col] | ||
ret = self.parent.load_direct( self.col, 4 ) | ||
if des in self.regs: self.regs[des] = ret | ||
self.out = ret | ||
|
||
elif self.op in self.ops_swd: | ||
val = self.fetch_val( instr[1] ) | ||
self.addr = self.parent.store_addr[self.col] | ||
self.parent.store_direct( self.col, val, 4 ) | ||
|
||
elif self.op in self.ops_lwi: | ||
des = instr[1] | ||
addr = self.fetch_val( instr[2] ) | ||
ret = self.parent.load_indirect(addr) | ||
self.addr = self.fetch_val( instr[2] ) | ||
ret = self.parent.load_indirect(self.addr) | ||
if des in self.regs: self.regs[des] = ret | ||
self.out = ret | ||
|
||
elif self.op in self.ops_swi: | ||
addr = self.fetch_val( instr[2] ) | ||
self.addr = self.fetch_val( instr[2] ) | ||
val = self.fetch_val( instr[1] ) | ||
self.parent.store_indirect( addr, val ) | ||
self.parent.store_indirect( self.addr, val ) | ||
pass | ||
|
||
elif self.op in self.ops_nop: | ||
|
@@ -375,7 +389,9 @@ def blt( self, val1, val2, branch ): | |
ops_jump = { 'JUMP' : '' } | ||
ops_exit = { 'EXIT' : '' } | ||
|
||
def run( kernel, version="", pr="ROUT", limit=100, load_addrs=None, store_addrs=None): | ||
|
||
|
||
def run( kernel, version="", pr="ROUT", limit=100, load_addrs=None, store_addrs=None, memory_manager=MEMORY()): | ||
ker = [] | ||
mem = [] | ||
|
||
|
@@ -400,12 +416,12 @@ def run( kernel, version="", pr="ROUT", limit=100, load_addrs=None, store_addrs= | |
return None | ||
|
||
# Run the kernel | ||
cgra = CGRA(ker, mem, load_addrs, store_addrs) | ||
cgra = CGRA(ker, mem, load_addrs, store_addrs, memory_manager) | ||
mem = cgra.run(pr, limit) | ||
|
||
# Store the output sorted | ||
sorted_mem = sorted(mem, key=lambda x: x[0]) | ||
with open( kernel + "/"+FILENAME_MEM_O+version+EXT, 'w+') as f: | ||
for row in sorted_mem: csv.writer(f).writerow(row) | ||
|
||
display_characterization(cgra, pr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if a user does not want to print the characterization? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pr parameter takes in the different types of information the user wants to display. If a user doesn't want to print anything, he can do so by leaving the pr string array empty from the notebook. |
||
print("\n\nEND") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
import copy | ||
import math | ||
import os.path | ||
import csv | ||
|
||
OPERATIONS_MEMORY_ACCESS = ["LWD", "LWI", "SWD","SWI"] | ||
BUS_TYPES = ["ONE-TO-M", "N-TO-M", "INTERLEAVED"] | ||
INTERVAL_CST = 14 | ||
|
||
def load_operation_characterization(characterization_type, mapping_file='operation_characterization.csv'): | ||
operation_mapping = {} | ||
csv_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), mapping_file) | ||
with open(csv_file_path, 'r') as csvfile: | ||
reader = csv.reader(csvfile) | ||
current_section = None | ||
for row in reader: | ||
if not row or row[0].startswith('#'): | ||
current_section = row[0].strip('# ') if row else current_section | ||
continue | ||
if current_section == f'operation_{characterization_type}_mapping': | ||
operation, *rest = row | ||
if len(rest) == 1: | ||
key = int(rest[0]) | ||
operation_mapping[operation] = key | ||
elif len(rest) == 2: | ||
key = int(rest[0]) | ||
value = float(rest[1]) | ||
if operation not in operation_mapping: | ||
operation_mapping[operation] = {} | ||
operation_mapping[operation][key] = value | ||
return operation_mapping | ||
|
||
operation_latency_mapping = load_operation_characterization("latency_cc") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When is this code executed? When the module is included? Shouldnt this be in a init_characterization or load_characterization or something? Then we should call these functions from the notebook. I haven't finished reading the code, but i find it weird that all this is working without generating any changes on the notebooks launching the examples right? |
||
bus_type_active_row_coef = load_operation_characterization("active_row_coef") | ||
bus_type_cpu_loop_instrs = load_operation_characterization("cpu_loop_instrs") | ||
|
||
# This function takes the maximum latency between the memory operations and the non-memory operations in the instruction | ||
def get_latency_cc(cgra): | ||
cgra.max_latency_instr = None | ||
longest_alu_op_latency_cc = get_latency_alu_cc(cgra) | ||
total_mem_latency_cc = get_latency_mem_cc(cgra) | ||
cgra.max_latency_instr.latency_cc = max(longest_alu_op_latency_cc, total_mem_latency_cc) | ||
if total_mem_latency_cc > longest_alu_op_latency_cc: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and if not? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This if condition only serves to add "MEM" to the longest operation's name when the mem ops take longer than the non-mem ones. If this is not the case, then the name is just that of the longest non-mem operation. |
||
cgra.max_latency_instr.instr = f'MEM ({cgra.max_latency_instr.instr})' | ||
if (cgra.exit): | ||
cgra.max_latency_instr.latency_cc += 1 | ||
cgra.max_latency_instr.instr2exec = cgra.instr2exec | ||
cgra.instr_latency_cc.append(copy.copy(cgra.max_latency_instr)) | ||
cgra.total_latency_cc += cgra.instr_latency_cc[-1].latency_cc | ||
|
||
def get_latency_alu_cc(cgra): | ||
for r in range(cgra.N_ROWS): | ||
for c in range(cgra.N_COLS): | ||
cgra.cells[r][c].latency_cc = int(operation_latency_mapping[cgra.cells[r][c].op]) | ||
if cgra.max_latency_instr is None or cgra.cells[r][c].latency_cc > cgra.max_latency_instr.latency_cc: | ||
cgra.max_latency_instr = cgra.cells[r][c] | ||
return cgra.max_latency_instr.latency_cc | ||
|
||
def get_latency_mem_cc(cgra): | ||
record_bank_access(cgra) | ||
cgra.concurrent_accesses = group_dma_accesses(cgra) | ||
dependencies = track_dependencies(cgra) | ||
latency_cc = compute_latency_cc(cgra, dependencies) | ||
return latency_cc | ||
|
||
# Record the bank index used for each memory access | ||
def record_bank_access(cgra): | ||
for r in range(cgra.N_ROWS): | ||
for c in range(cgra.N_COLS): | ||
if cgra.cells[r][c].op in OPERATIONS_MEMORY_ACCESS: | ||
cgra.cells[r][c].bank_index = compute_bank_index(cgra,r,c) | ||
|
||
def compute_bank_index(cgra, r, c): | ||
base_addr = cgra.init_store[0] if cgra.cells[r][c].op == "SWD" else sorted(cgra.memory)[0][0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont fully understand what this base address stands for, or why it's different for the SWD... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but why is the starting address related to the bank number? |
||
if cgra.memory_manager.bus_type == "INTERLEAVED": | ||
index_pos = int(((cgra.cells[r][c].addr - base_addr) / cgra.memory_manager.spacing) % cgra.memory_manager.n_banks) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add units to variables so that we know if they are in bits (b) , Bytes (B) or words of 4 bytes (w). My standard is to add it as a underscore and then the unit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✔️ |
||
else: | ||
index_pos = cgra.cells[r][c].addr / cgra.memory_manager.bank_size | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldnt the one-to-M be another case where the index is always the same (to simulate that they always get a conflict)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✔️ |
||
return index_pos | ||
|
||
def group_dma_accesses(cgra): | ||
# For each row, scan the PEs for memory accesses and place them into concurrent_accesses | ||
# If a column has no memory access, then scan all the rows on that column (=push up) | ||
cgra.covered_accesses = [] | ||
concurrent_accesses = [{} for _ in range(4)] | ||
for r in range(cgra.N_ROWS): | ||
for c in range(cgra.N_COLS): | ||
if cgra.cells[r][c].op in OPERATIONS_MEMORY_ACCESS and (r, c) not in cgra.covered_accesses: | ||
cgra.covered_accesses, concurrent_accesses = mark_access(cgra.covered_accesses, concurrent_accesses, r, c, r, cgra.cells[r][c].bank_index) | ||
else: | ||
for k in range(cgra.N_ROWS): | ||
if cgra.cells[k][c].op in OPERATIONS_MEMORY_ACCESS and (k, c) not in cgra.covered_accesses: | ||
cgra.covered_accesses, concurrent_accesses = mark_access(cgra.covered_accesses, concurrent_accesses, r, c, k, cgra.cells[k][c].bank_index) | ||
break | ||
if not accesses_are_ordered(cgra, concurrent_accesses): | ||
concurrent_accesses = rearrange_accesses(cgra, concurrent_accesses) | ||
return concurrent_accesses | ||
|
||
def mark_access(covered_accesses, concurrent_accesses, r, c, k, bank_index): | ||
# Record a PE and its bank index into concurrent_accesses | ||
covered_accesses.append((k, c)) | ||
concurrent_accesses[r].setdefault(bank_index, []).append((k, c)) | ||
return covered_accesses, concurrent_accesses | ||
|
||
def accesses_are_ordered(cgra, concurrent_accesses): | ||
if (cgra.memory_manager.bus_type != "INTERLEAVED"): | ||
return False | ||
else: | ||
highest_row = [0] * 4 | ||
for i in range (cgra.N_ROWS): | ||
for values in concurrent_accesses[i].values(): | ||
for current_access in values: | ||
if highest_row[i] > current_access[0]: | ||
return False | ||
else: | ||
highest_row[i] = current_access[0] | ||
return True | ||
|
||
# This function arranges the concurrent lists to ensure they match the DMA's behavior | ||
def rearrange_accesses(cgra, concurrent_accesses): | ||
if cgra.memory_manager.bus_type == "INTERLEAVED": | ||
for i in range(cgra.N_ROWS - 1, 0, -1): | ||
order_pairs = [pair[1] for pairs in concurrent_accesses[i].values() for pair in pairs][::-1] | ||
concurrent_accesses[i-1] = {key: sorted(pairs, key=lambda x: order_pairs.index(x[1]) if x[1] in order_pairs else float('inf')) for key, pairs in concurrent_accesses[i-1].items()} | ||
else: | ||
# Latencies for non-interleaved bus types require the total number of accesses | ||
concurrent_accesses = [{1: [(0, 0)] * len(cgra.covered_accesses)}, {}, {}, {}] | ||
return concurrent_accesses | ||
|
||
def track_dependencies(cgra): | ||
latency = [1] * 4 | ||
for i in range (cgra.N_ROWS): | ||
for values in cgra.concurrent_accesses[i].values(): | ||
for current_access in values: | ||
# Compare each access with its next dependency (=subsequent access at same column) | ||
# Record the difference between the access within the conflict, and the subsequent access | ||
current_pos = find_position(cgra.concurrent_accesses[i], current_access[1]) + 1 | ||
next_pos = find_position(cgra.concurrent_accesses[i+1], current_access[1]) if i < cgra.N_ROWS - 1 else 0 | ||
latency[current_access[1]] += current_pos - next_pos | ||
return latency | ||
|
||
def find_position(conflict_pos, column): | ||
for pairs in conflict_pos.items(): | ||
for pair in pairs[1]: | ||
if pair[1] == column: | ||
return pairs[1].index(pair) | ||
return 0 | ||
|
||
def compute_latency_cc(cgra, dependencies): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe wanna give it a name that refers to memory? because its not latency in general, but the latency of a memory access, i understand There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✔️ |
||
# Account for additional bus type specific delays | ||
ACTIVE_ROW_COEF = bus_type_active_row_coef[cgra.memory_manager.bus_type] | ||
CPU_LOOP_INSTRS = bus_type_cpu_loop_instrs[cgra.memory_manager.bus_type] | ||
mem_count = [0] * cgra.N_COLS | ||
latency_cc = max(dependencies) | ||
for r in range(cgra.N_ROWS): | ||
for c in range(cgra.N_COLS): | ||
if cgra.cells[r][c].op in OPERATIONS_MEMORY_ACCESS: | ||
mem_count[c] += 1 | ||
if ACTIVE_ROW_COEF: | ||
for i in range (cgra.N_ROWS): | ||
if mem_count[i] != 0: | ||
latency_cc += ACTIVE_ROW_COEF | ||
if CPU_LOOP_INSTRS: | ||
cgra.flag_poll_cnt += latency_cc | ||
if cgra.flag_poll_cnt % (CPU_LOOP_INSTRS - 1) == 0: | ||
latency_cc += 1 | ||
return latency_cc | ||
|
||
def display_characterization(cgra, pr): | ||
if any(item in pr for item in ["OP_MAX_LAT", "ALL_LAT_INFO"]): | ||
print("\nLongest instructions per cycle:\n") | ||
print("{:<8} {:<25} {:<10}".format("Cycle", "Instruction", "Latency (CC)")) | ||
for index, item in enumerate(cgra.instr_latency_cc): | ||
print("{:<2} {:<6} {:<25} {:<10}".format(index + 1, f'({item.instr2exec})', item.instr, item.latency_cc)) | ||
if any(item in pr for item in ["TOTAL_LAT", "ALL_LAT_INFO"]): | ||
print(f'\nConfiguration time: {len(cgra.instrs)} CC') | ||
cgra.interval_latency = math.ceil(INTERVAL_CST + (len(cgra.instrs) * 3)) | ||
print(f'Time between end of configuration and start of first iteration: {cgra.interval_latency} CC') | ||
print(f'Total time for all instructions: {cgra.total_latency_cc}') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why isn't latency or energy one of these options?