Skip to content

Commit

Permalink
recompiler: enable instruction tracing w/recompilers (#1301)
Browse files Browse the repository at this point in the history
To prevent undue performance losses, the extra codegen only occurs when
tracing is enabled. To support toggling tracing on/off during execution,
the recompiler cache is flushed on setting changes.

Flushing the recompiler cache has its own side effects, but that's
already an issue, and it seemed the least bad way to isolate the impact
of this feature.
  • Loading branch information
invertego authored Nov 29, 2023
1 parent 079295e commit cd11421
Show file tree
Hide file tree
Showing 20 changed files with 94 additions and 25 deletions.
6 changes: 4 additions & 2 deletions ares/ares/node/debugger/tracer/tracer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ struct Tracer : Debugger {
auto file() const -> bool { return _file; }
auto autoLineBreak() const -> bool { return _autoLineBreak; }

auto setToggle(function<void ()> toggle) -> void { _toggle = toggle; }
auto setComponent(string component) -> void { _component = component; }
auto setPrefix(bool prefix) -> void { _prefix = prefix; }
auto setTerminal(bool terminal) -> void { _terminal = terminal; }
auto setFile(bool file) -> void { _file = file; }
auto setTerminal(bool terminal) -> void { _terminal = terminal; if(_toggle) _toggle(); }
auto setFile(bool file) -> void { _file = file; if(_toggle) _toggle(); }
auto setAutoLineBreak(bool autoLineBreak) -> void { _autoLineBreak = autoLineBreak; }

auto serialize(string& output, string depth) -> void override {
Expand All @@ -35,6 +36,7 @@ struct Tracer : Debugger {
}

protected:
function<void ()> _toggle;
string _component;
bool _prefix = false;
bool _terminal = false;
Expand Down
4 changes: 1 addition & 3 deletions ares/component/processor/sh2/disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ auto SH2::hint(P&&... p) const -> string {
return {std::forward<P>(p)...};
}

auto SH2::disassembleInstruction() -> string {
auto SH2::disassembleInstruction(u16 opcode) -> string {
vector<string> s;

auto registerName = [&](u32 r) -> string {
Expand Down Expand Up @@ -48,8 +48,6 @@ auto SH2::disassembleInstruction() -> string {
return {"0x", hex(PC + (i12)d * 2, 8L)};
};

u16 opcode = readWord(PC - 4);

#define n (opcode >> 8 & 0x00f)
#define m (opcode >> 4 & 0x00f)
#define i (opcode >> 0 & 0x0ff)
Expand Down
1 change: 1 addition & 0 deletions ares/component/processor/sh2/instruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ auto SH2::instruction() -> void {
}
ID = 0;
u16 opcode = readWord(PC - 4);
instructionPrologue(opcode);
execute(opcode);
instructionEpilogue();
}
Expand Down
4 changes: 4 additions & 0 deletions ares/component/processor/sh2/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ auto SH2::Recompiler::emit(u32 address) -> Block* {
inDelaySlot = 1; //force runtime check on first instruction
while(true) {
u16 instruction = instructions[index++];
if(callInstructionPrologue) {
mov32(reg(1), imm(instruction));
call(&SH2::instructionPrologue);
}
auto branch = emitInstruction(instruction);
inDelaySlot = branch == Branch::Slot;
add64(CCR, CCR, imm(1));
Expand Down
4 changes: 3 additions & 1 deletion ares/component/processor/sh2/sh2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
namespace ares {

struct SH2 {
virtual auto instructionPrologue(u16 instruction) -> void = 0;
virtual auto step(u32 clocks) -> void = 0;
virtual auto busReadByte(u32 address) -> u32 = 0;
virtual auto busReadWord(u32 address) -> u32 = 0;
Expand Down Expand Up @@ -189,7 +190,7 @@ struct SH2 {

//disassembler.cpp
template<typename... P> auto hint(P&&...) const -> string;
auto disassembleInstruction() -> string;
auto disassembleInstruction(u16 opcode) -> string;
auto disassembleContext() -> string;

static constexpr u32 undefined = 0;
Expand Down Expand Up @@ -299,6 +300,7 @@ struct SH2 {

static auto mask(u8 address, u8 size) -> u64;

bool callInstructionPrologue = false;
bool inDelaySlot;
u32 generation;
bump_allocator allocator;
Expand Down
10 changes: 8 additions & 2 deletions ares/md/m32x/debugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@ auto M32X::SH7604::Debugger::load(Node::Object parent) -> void {
tracer.instruction = parent->append<Node::Debugger::Tracer::Instruction>("Instruction", parent->name());
tracer.instruction->setAddressBits(32, 1);
tracer.instruction->setDepth(16);
if constexpr(SH2::Accuracy::Recompiler) {
tracer.instruction->setToggle([&] {
self->recompiler.reset();
self->recompiler.callInstructionPrologue = tracer.instruction->enabled();
});
}

tracer.interrupt = parent->append<Node::Debugger::Tracer::Notification>("Interrupt", parent->name());
}

auto M32X::SH7604::Debugger::instruction() -> void {
auto M32X::SH7604::Debugger::instruction(u16 opcode) -> void {
if(tracer.instruction->enabled() && tracer.instruction->address(self->regs.PC - 4)) {
tracer.instruction->notify(self->disassembleInstruction(), self->disassembleContext());
tracer.instruction->notify(self->disassembleInstruction(opcode), self->disassembleContext());
}
}

Expand Down
3 changes: 2 additions & 1 deletion ares/md/m32x/m32x.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ struct M32X {

//debugger.cpp
auto load(Node::Object) -> void;
auto instruction() -> void;
auto instruction(u16 opcode) -> void;
auto interrupt(string_view) -> void;

struct Tracer {
Expand All @@ -37,6 +37,7 @@ struct M32X {
auto unload() -> void;

auto main() -> void;
auto instructionPrologue(u16 instruction) -> void override;
auto step(u32 clocks) -> void override;
auto power(bool reset) -> void;
auto restart() -> void;
Expand Down
5 changes: 4 additions & 1 deletion ares/md/m32x/sh7604.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,17 @@ auto M32X::SH7604::main() -> void {
#undef raise
}

debugger.instruction();
SH2::instruction();
SH2::intc.run();
SH2::dmac.run();
if(m32x.shm.active()) m32x.shm.dmac.dreq[1] = 0;
if(m32x.shs.active()) m32x.shs.dmac.dreq[1] = 0;
}

auto M32X::SH7604::instructionPrologue(u16 instruction) -> void {
debugger.instruction(instruction);
}

auto M32X::SH7604::step(u32 clocks) -> void {
if(clocks > 0) {
auto cycles = clocks;
Expand Down
10 changes: 7 additions & 3 deletions ares/n64/cpu/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,20 @@ auto CPU::instruction() -> void {
}

if constexpr(Accuracy::CPU::Interpreter) {
pipeline.address = ipu.pc;
auto data = fetch(ipu.pc);
if (!data) return;
pipeline.instruction = *data;
debugger.instruction();
instructionPrologue(*data);
decoderEXECUTE();
instructionEpilogue();
}
}

auto CPU::instructionPrologue(u32 instruction) -> void {
pipeline.address = ipu.pc;
pipeline.instruction = instruction;
debugger.instruction();
}

auto CPU::instructionEpilogue() -> s32 {
if constexpr(Accuracy::CPU::Recompiler) {
//simulates timings without performing actual icache loads
Expand Down
2 changes: 2 additions & 0 deletions ares/n64/cpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ struct CPU : Thread {
auto synchronize() -> void;

auto instruction() -> void;
auto instructionPrologue(u32 instruction) -> void;
auto instructionEpilogue() -> s32;

auto power(bool reset) -> void;
Expand Down Expand Up @@ -911,6 +912,7 @@ struct CPU : Thread {
auto emitFPU(u32 instruction) -> bool;
auto emitCOP2(u32 instruction) -> bool;

bool callInstructionPrologue = false;
bump_allocator allocator;
Pool* pools[1 << 21]; //2_MiB * sizeof(void*) == 16_MiB
} recompiler{*this};
Expand Down
6 changes: 6 additions & 0 deletions ares/n64/cpu/debugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ auto CPU::Debugger::load(Node::Object parent) -> void {
tracer.instruction = parent->append<Node::Debugger::Tracer::Instruction>("Instruction", "CPU");
tracer.instruction->setAddressBits(64, 2);
tracer.instruction->setDepth(64);
if constexpr(Accuracy::CPU::Recompiler) {
tracer.instruction->setToggle([&] {
cpu.recompiler.reset();
cpu.recompiler.callInstructionPrologue = tracer.instruction->enabled();
});
}

tracer.exception = parent->append<Node::Debugger::Tracer::Notification>("Exception", "CPU");
tracer.interrupt = parent->append<Node::Debugger::Tracer::Notification>("Interrupt", "CPU");
Expand Down
4 changes: 4 additions & 0 deletions ares/n64/cpu/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ auto CPU::Recompiler::emit(u32 vaddr, u32 address, bool singleInstruction) -> Bl
bool hasBranched = 0;
while(true) {
u32 instruction = bus.read<Word>(address, thread);
if(callInstructionPrologue) {
mov32(reg(1), imm(instruction));
call(&CPU::instructionPrologue);
}
bool branched = emitEXECUTE(instruction);
if(unlikely(instruction == 0x1000'ffff //beq 0,0,<pc>
|| instruction == (2 << 26 | vaddr >> 2 & 0x3ff'ffff))) { //j <pc>
Expand Down
6 changes: 6 additions & 0 deletions ares/n64/rsp/debugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ auto RSP::Debugger::load(Node::Object parent) -> void {
tracer.instruction = parent->append<Node::Debugger::Tracer::Instruction>("Instruction", "RSP");
tracer.instruction->setAddressBits(12, 2);
tracer.instruction->setDepth(64);
if constexpr(Accuracy::RSP::Recompiler) {
tracer.instruction->setToggle([&] {
rsp.recompiler.reset();
rsp.recompiler.callInstructionPrologue = tracer.instruction->enabled();
});
}

tracer.io = parent->append<Node::Debugger::Tracer::Notification>("I/O", "RSP");
}
Expand Down
12 changes: 10 additions & 2 deletions ares/n64/rsp/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,27 @@ auto RSP::Recompiler::emit(u12 address) -> Block* {
u12 start = address;
bool hasBranched = 0;
while(true) {
pipeline.begin();
u32 instruction = self.imem.read<Word>(address);
if(callInstructionPrologue) {
mov32(reg(1), imm(instruction));
call(&RSP::instructionPrologue);
}
pipeline.begin();
OpInfo op0 = self.decoderEXECUTE(instruction);
pipeline.issue(op0);
bool branched = emitEXECUTE(instruction);

if(!pipeline.singleIssue && !branched && u12(address + 4) != start) {
u32 instruction = self.imem.read<Word>(address + 4);
u32 instruction = self.imem.read<Word>(address + 4);
OpInfo op1 = self.decoderEXECUTE(instruction);

if(RSP::canDualIssue(op0, op1)) {
mov32(reg(1), imm(0));
call(&RSP::instructionEpilogue);
if(callInstructionPrologue) {
mov32(reg(1), imm(instruction));
call(&RSP::instructionPrologue);
}
address += 4;
pipeline.issue(op1);
branched = emitEXECUTE(instruction);
Expand Down
17 changes: 10 additions & 7 deletions ares/n64/rsp/rsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@ auto RSP::instruction() -> void {
}

if constexpr(Accuracy::RSP::Interpreter) {
u32 instruction = imem.read<Word>(ipu.pc);
instructionPrologue(instruction);
pipeline.begin();
pipeline.address = ipu.pc;
pipeline.instruction = imem.read<Word>(pipeline.address);
OpInfo op0 = decoderEXECUTE(pipeline.instruction);
OpInfo op0 = decoderEXECUTE(instruction);
pipeline.issue(op0);
debugger.instruction();
interpreterEXECUTE();

if(!pipeline.singleIssue && !op0.branch()) {
Expand All @@ -57,10 +56,8 @@ auto RSP::instruction() -> void {

if(canDualIssue(op0, op1)) {
instructionEpilogue(0);
pipeline.address = ipu.pc;
pipeline.instruction = instruction;
instructionPrologue(instruction);
pipeline.issue(op1);
debugger.instruction();
interpreterEXECUTE();
}
}
Expand All @@ -74,6 +71,12 @@ auto RSP::instruction() -> void {
step(pipeline.clocks);
}

auto RSP::instructionPrologue(u32 instruction) -> void {
pipeline.address = ipu.pc;
pipeline.instruction = instruction;
debugger.instruction();
}

auto RSP::instructionEpilogue(u32 clocks) -> s32 {
if constexpr(Accuracy::RSP::Recompiler) {
step(clocks);
Expand Down
2 changes: 2 additions & 0 deletions ares/n64/rsp/rsp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct RSP : Thread, Memory::RCP<RSP> {
auto main() -> void;

auto instruction() -> void;
auto instructionPrologue(u32 instruction) -> void;
auto instructionEpilogue(u32 clocks) -> s32;

auto power(bool reset) -> void;
Expand Down Expand Up @@ -512,6 +513,7 @@ struct RSP : Thread, Memory::RCP<RSP> {
return s <= e ? smask & emask : smask | emask;
}

bool callInstructionPrologue = false;
Pipeline pipeline;
bump_allocator allocator;
array<Block*[1024]> context;
Expand Down
11 changes: 8 additions & 3 deletions ares/ps1/cpu/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,10 @@ auto CPU::instruction() -> void {
}
}

pipeline.address = ipu.pc;
pipeline.instruction = fetch(ipu.pc);
u32 instruction = fetch(ipu.pc);
if(exception()) return (void)instructionEpilogue();

debugger.instruction();
instructionPrologue(instruction);
decoderEXECUTE();
instructionEpilogue();
}
Expand All @@ -94,6 +93,12 @@ auto CPU::instruction() -> void {
}
}

auto CPU::instructionPrologue(u32 instruction) -> void {
pipeline.address = ipu.pc;
pipeline.instruction = instruction;
debugger.instruction();
}

auto CPU::instructionEpilogue() -> s32 {
if constexpr(Accuracy::CPU::Recompiler) {
icache.step(ipu.pc); //simulates timings without performing actual icache loads
Expand Down
2 changes: 2 additions & 0 deletions ares/ps1/cpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ struct CPU : Thread {
auto synchronize() -> void;

auto instruction() -> void;
auto instructionPrologue(u32 instruction) -> void;
auto instructionEpilogue() -> s32;
auto instructionHook() -> void;

Expand Down Expand Up @@ -541,6 +542,7 @@ struct CPU : Thread {
auto emitSCC(u32 instruction) -> bool;
auto emitGTE(u32 instruction) -> bool;

bool callInstructionPrologue = false;
bump_allocator allocator;
Pool* pools[1 << 21]; //2_MiB * sizeof(void*) = 16_MiB
} recompiler{*this};
Expand Down
6 changes: 6 additions & 0 deletions ares/ps1/cpu/debugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ auto CPU::Debugger::load(Node::Object parent) -> void {
tracer.instruction = parent->append<Node::Debugger::Tracer::Instruction>("Instruction", "CPU");
tracer.instruction->setAddressBits(32, 2);
tracer.instruction->setDepth(32);
if constexpr(Accuracy::CPU::Recompiler) {
tracer.instruction->setToggle([&] {
cpu.recompiler.reset();
cpu.recompiler.callInstructionPrologue = tracer.instruction->enabled();
});
}

tracer.exception = parent->append<Node::Debugger::Tracer::Notification>("Exception", "CPU");
tracer.interrupt = parent->append<Node::Debugger::Tracer::Notification>("Interrupt", "CPU");
Expand Down
4 changes: 4 additions & 0 deletions ares/ps1/cpu/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ auto CPU::Recompiler::emit(u32 address) -> Block* {
while(true) {
//shortcut: presume CPU is executing out of either CPU RAM or the BIOS area
u32 instruction = address <= 0x007f'ffff ? cpu.ram.readWord(address) : bios.readWord(address);
if(callInstructionPrologue) {
mov32(reg(1), imm(instruction));
call(&CPU::instructionPrologue);
}
bool branched = emitEXECUTE(instruction);
if(unlikely(instruction == 0x1000'ffff)) {
//accelerate idle loops
Expand Down

0 comments on commit cd11421

Please sign in to comment.