Skip to content

Commit

Permalink
Merge pull request #422 from paulusmack/real-icache
Browse files Browse the repository at this point in the history
Icache improvements - use synchronous RAMs and remove 4kB per set limit
  • Loading branch information
paulusmack authored Jan 18, 2024
2 parents 63d4553 + 73a2fcb commit fdcb6ec
Show file tree
Hide file tree
Showing 13 changed files with 893 additions and 389 deletions.
6 changes: 1 addition & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,6 @@ RAM_INIT_FILE ?=hello_world/hello_world.hex

FPGA_TARGET ?= ORANGE-CRAB-0.21

# FIXME: icache RAMs aren't being inferrenced as block RAMs on ECP5
# with yosys, so make it smaller for now as a workaround.
ICACHE_NUM_LINES=4

clkgen=fpga/clk_gen_ecp5.vhd
toplevel=fpga/top-generic.vhdl
dmi_dtm=dmi_dtm_dummy.vhdl
Expand Down Expand Up @@ -227,7 +223,7 @@ LITEDRAM_GHDL_ARG=-gUSE_LITEDRAM=true
endif

GHDL_IMAGE_GENERICS=-gMEMORY_SIZE=$(MEMORY_SIZE) -gRAM_INIT_FILE=$(RAM_INIT_FILE) \
-gRESET_LOW=$(RESET_LOW) -gCLK_INPUT=$(CLK_INPUT) -gCLK_FREQUENCY=$(CLK_FREQUENCY) -gICACHE_NUM_LINES=$(ICACHE_NUM_LINES) \
-gRESET_LOW=$(RESET_LOW) -gCLK_INPUT=$(CLK_INPUT) -gCLK_FREQUENCY=$(CLK_FREQUENCY) \
$(LITEDRAM_GHDL_ARG)


Expand Down
18 changes: 14 additions & 4 deletions common.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ package common is
subtype real_addr_t is std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t;

-- Minimum page size
constant MIN_LG_PGSZ : positive := 12;
constant MIN_PAGESZ : positive := 2 ** MIN_LG_PGSZ;

-- Used for tracking instruction completion and pending register writes
constant TAG_COUNT : positive := 4;
constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
Expand Down Expand Up @@ -231,13 +235,17 @@ package common is

type Fetch1ToIcacheType is record
req: std_ulogic;
fetch_fail : std_ulogic;
virt_mode : std_ulogic;
priv_mode : std_ulogic;
big_endian : std_ulogic;
stop_mark: std_ulogic;
predicted : std_ulogic;
pred_ntaken : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
next_nia: std_ulogic_vector(63 downto 0);
rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
next_rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
end record;

type IcacheToDecode1Type is record
Expand Down Expand Up @@ -606,7 +614,7 @@ package common is
data : std_ulogic_vector(63 downto 0);
end record;

type MmuToIcacheType is record
type MmuToITLBType is record
tlbld : std_ulogic;
tlbie : std_ulogic;
doall : std_ulogic;
Expand Down Expand Up @@ -658,7 +666,6 @@ package common is
redirect: std_ulogic;
redir_mode: std_ulogic_vector(3 downto 0);
last_nia: std_ulogic_vector(63 downto 0);
br_offset: std_ulogic_vector(63 downto 0);
br_last: std_ulogic;
br_taken: std_ulogic;
abs_br: std_ulogic;
Expand All @@ -672,7 +679,7 @@ package common is
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000",
last_nia => (others => '0'), br_offset => (others => '0'),
last_nia => (others => '0'),
br_last => '0', br_taken => '0', abs_br => '0',
srr1 => (others => '0'), msr => (others => '0'));

Expand Down Expand Up @@ -758,11 +765,14 @@ package common is
br_nia : std_ulogic_vector(63 downto 0);
br_last : std_ulogic;
br_taken : std_ulogic;
interrupt : std_ulogic;
intr_vec : std_ulogic_vector(11 downto 0);
end record;
constant WritebackToFetch1Init : WritebackToFetch1Type :=
(redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0',
mode_32bit => '0', redirect_nia => (others => '0'),
br_last => '0', br_taken => '0', br_nia => (others => '0'));
br_last => '0', br_taken => '0', br_nia => (others => '0'),
interrupt => '0', intr_vec => x"000");

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;
Expand Down
10 changes: 5 additions & 5 deletions core.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ architecture behave of core is
signal fetch1_to_icache : Fetch1ToIcacheType;
signal writeback_to_fetch1: WritebackToFetch1Type;
signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType;
signal mmu_to_itlb : MmuToITLBType;

-- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type;
Expand Down Expand Up @@ -223,6 +223,7 @@ begin
generic map (
RESET_ADDRESS => (others => '0'),
ALT_RESET_ADDRESS => ALT_RESET_ADDRESS,
TLB_SIZE => ICACHE_TLB_SIZE,
HAS_BTC => HAS_BTC
)
port map (
Expand All @@ -231,8 +232,9 @@ begin
alt_reset_in => alt_reset_d,
stall_in => fetch1_stall_in,
flush_in => fetch1_flush,
inval_btc => ex1_icache_inval or mmu_to_icache.tlbie,
inval_btc => ex1_icache_inval or mmu_to_itlb.tlbie,
stop_in => dbg_core_stop,
m_in => mmu_to_itlb,
d_in => decode1_to_fetch1,
w_in => writeback_to_fetch1,
i_out => fetch1_to_icache,
Expand All @@ -249,15 +251,13 @@ begin
LINE_SIZE => 64,
NUM_LINES => ICACHE_NUM_LINES,
NUM_WAYS => ICACHE_NUM_WAYS,
TLB_SIZE => ICACHE_TLB_SIZE,
LOG_LENGTH => LOG_LENGTH
)
port map(
clk => clk,
rst => rst_icache,
i_in => fetch1_to_icache,
i_out => icache_to_decode1,
m_in => mmu_to_icache,
flush_in => fetch1_flush,
inval_in => dbg_icache_rst or ex1_icache_inval,
stall_in => icache_stall_in,
Expand Down Expand Up @@ -454,7 +454,7 @@ begin
l_out => mmu_to_loadstore1,
d_out => mmu_to_dcache,
d_in => dcache_to_mmu,
i_out => mmu_to_icache
i_out => mmu_to_itlb
);

dcache_0: entity work.dcache
Expand Down
37 changes: 17 additions & 20 deletions decode1.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ architecture behaviour of decode1 is
signal f, fin : Decode1ToFetch1Type;

type br_predictor_t is record
br_nia : std_ulogic_vector(61 downto 0);
br_offset : signed(23 downto 0);
br_target : signed(61 downto 0);
predict : std_ulogic;
end record;

Expand Down Expand Up @@ -94,8 +93,10 @@ architecture behaviour of decode1 is
INSN_andi_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_andis_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_attn => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_b => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bc => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_brel => (ALU, NONE, OP_B, CIA, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_babs => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcrel => (ALU, NONE, OP_BC, CIA, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcabs => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcctr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bclr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bctar => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
Expand Down Expand Up @@ -310,6 +311,7 @@ architecture behaviour of decode1 is
INSN_rlwimi => (ALU, NONE, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_rlwinm => (ALU, NONE, OP_RLC, NONE, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_rlwnm => (ALU, NONE, OP_RLC, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_rnop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_sc => (ALU, NONE, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_setb => (ALU, NONE, OP_SETB, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_slbia => (LDST, NONE, OP_TLBIE, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
Expand Down Expand Up @@ -476,8 +478,6 @@ begin
end if;
end if;
if rst = '1' then
br.br_nia <= (others => '0');
br.br_offset <= (others => '0');
br.predict <= '0';
else
br <= br_in;
Expand All @@ -499,8 +499,8 @@ begin
decode1_1: process(all)
variable v : Decode1ToDecode2Type;
variable vr : Decode1ToRegisterFileType;
variable br_target : std_ulogic_vector(61 downto 0);
variable br_offset : signed(23 downto 0);
variable br_nia : std_ulogic_vector(61 downto 0);
variable br_offset : std_ulogic_vector(23 downto 0);
variable bv : br_predictor_t;
variable icode : insn_code;
variable sprn : spr_num_t;
Expand Down Expand Up @@ -594,31 +594,28 @@ begin
-- Branch predictor
-- Note bclr, bcctr and bctar not predicted as we have no
-- count cache or link stack.
br_offset := (others => '0');
br_offset := f_in.insn(25 downto 2);
case icode is
when INSN_b =>
when INSN_brel | INSN_babs =>
-- Unconditional branches are always taken
v.br_pred := '1';
br_offset := signed(f_in.insn(25 downto 2));
when INSN_bc =>
-- Predict backward branches as taken, forward as untaken
when INSN_bcrel =>
-- Predict backward relative branches as taken, others as untaken
v.br_pred := f_in.insn(15);
br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
br_offset(23 downto 14) := (others => '1');
when others =>
end case;
bv.br_nia := f_in.nia(63 downto 2);
br_nia := f_in.nia(63 downto 2);
if f_in.insn(1) = '1' then
bv.br_nia := (others => '0');
br_nia := (others => '0');
end if;
bv.br_offset := br_offset;
bv.br_target := signed(br_nia) + signed(br_offset);
if f_in.next_predicted = '1' then
v.br_pred := '1';
elsif f_in.next_pred_ntaken = '1' then
v.br_pred := '0';
end if;
bv.predict := v.br_pred and f_in.valid and not flush_in and not busy_out and not f_in.next_predicted;
-- after a clock edge...
br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);

-- Work out GPR/FPR read addresses
-- Note that for prefixed instructions we are working this out based
Expand Down Expand Up @@ -665,7 +662,7 @@ begin
d_out.decode <= decode;
r_out <= vr;
f_out.redirect <= br.predict;
f_out.redirect_nia <= br_target & "00";
f_out.redirect_nia <= std_ulogic_vector(br.br_target) & "00";
flush_out <= bv.predict or br.predict;
end process;

Expand Down
5 changes: 2 additions & 3 deletions decode2.vhdl
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,8 @@ architecture behaviour of decode2 is
OP_SHR => "010",
OP_EXTSWSLI => "010",
OP_MUL_L64 => "011", -- muldiv_result
OP_B => "110", -- next_nia
OP_BC => "110",
OP_BCREG => "110",
OP_BCREG => "101", -- ramspr_result
OP_RFID => "101",
OP_ADDG6S => "111", -- misc_result
OP_ISEL => "111",
OP_DARN => "111",
Expand Down
Loading

0 comments on commit fdcb6ec

Please sign in to comment.