Skip to content

Commit

Permalink
cpu/drcbex64.cpp: Inline reads smaller than native width too.
Browse files Browse the repository at this point in the history
In my testing, this actually hurts performance for pmac6100.
  • Loading branch information
cuavas committed Feb 3, 2025
1 parent 0ed7850 commit 2c08910
Showing 1 changed file with 90 additions and 10 deletions.
100 changes: 90 additions & 10 deletions src/devices/cpu/drcbex64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2531,15 +2531,44 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
auto const &accessors = m_memory_accessors[spacesizep.space()];
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.read.function) || accessors.specific.read.is_virtual;
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
if (have_specific && ((1 << spacesizep.size()) == accessors.specific.native_bytes))
if (have_specific && ((1 << spacesizep.size()) <= accessors.specific.native_bytes))
{
// need to do this early - shift count must be CL, and RCX is a function parameter
if ((1 << spacesizep.size()) < accessors.specific.native_bytes)
{
a.mov(ecx, Gpd(REG_PARAM2));
if ((accessors.specific.native_bytes <= 4) || (spacesizep.size() != SIZE_QWORD))
a.mov(Gpd(REG_PARAM3), imm(make_bitmask<uint32_t>(8 << spacesizep.size())));
else
a.mov(Gpq(REG_PARAM3), imm(make_bitmask<uint64_t>(8 << spacesizep.size())));
int const shift = m_space[spacesizep.space()]->addr_shift() - 3;
if (shift < 0)
a.shl(ecx, imm(-shift));
else if (shift > 0)
a.shr(ecx, imm(shift));
if (m_space[spacesizep.space()]->endianness() != ENDIANNESS_LITTLE)
{
a.sub(ecx, imm((accessors.specific.native_bytes << 3) - (8 << spacesizep.size())));
a.neg(ecx);
}
a.and_(cl, imm((accessors.specific.native_bytes - 1) << 3));
a.mov(ptr(rsp, 32), cl);
if (accessors.specific.native_bytes <= 4)
a.shl(Gpd(REG_PARAM3), cl);
else
a.shl(Gpq(REG_PARAM3), cl);
}
else
{
if (accessors.specific.native_bytes <= 4)
a.mov(Gpd(REG_PARAM3), make_bitmask<uint32_t>(accessors.specific.native_bytes << 3));
else
a.mov(Gpq(REG_PARAM3), make_bitmask<uint64_t>(accessors.specific.native_bytes << 3));
}

a.and_(Gpd(REG_PARAM2), make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits));
mov_r64_imm(a, rax, uintptr_t(accessors.specific.read.dispatch));
a.mov(Gpd(REG_PARAM1), Gpd(REG_PARAM2));
if (accessors.specific.native_bytes > 4)
a.mov(Gpq(REG_PARAM3), make_bitmask<uint64_t>(accessors.specific.native_bytes << 3));
else
a.mov(Gpd(REG_PARAM3), make_bitmask<uint32_t>(accessors.specific.native_bytes << 3));
if (accessors.specific.low_bits)
a.shr(Gpd(REG_PARAM1), accessors.specific.low_bits);
a.mov(Gpq(REG_PARAM1), ptr(rax, Gpq(REG_PARAM1), 3));
Expand All @@ -2555,6 +2584,15 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
smart_call_r64(a, (x86code *)accessors.specific.read.function, rax);
}

if ((1 << spacesizep.size()) < accessors.specific.native_bytes)
{
a.mov(cl, ptr(rsp, 32));
if (accessors.specific.native_bytes <= 4)
a.shr(eax, cl);
else
a.shr(rax, cl);
}

if (spacesizep.size() == SIZE_BYTE)
{
a.movzx(dstreg, al);
Expand Down Expand Up @@ -2634,12 +2672,41 @@ void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
auto const &accessors = m_memory_accessors[spacesizep.space()];
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.read.function) || accessors.specific.read.is_virtual;
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
if (spacesizep.size() != SIZE_QWORD)
mov_reg_param(a, Gpd(REG_PARAM3), maskp);
else
mov_reg_param(a, Gpq(REG_PARAM3), maskp);
if (have_specific && ((1 << spacesizep.size()) == accessors.specific.native_bytes))
if (have_specific && ((1 << spacesizep.size()) <= accessors.specific.native_bytes))
{
// need to do this early - shift count must be CL, and RCX is a function parameter
if ((1 << spacesizep.size()) < accessors.specific.native_bytes)
{
if (spacesizep.size() != SIZE_QWORD)
mov_reg_param(a, Gpd(REG_PARAM3), maskp);
else
mov_reg_param(a, Gpq(REG_PARAM3), maskp);
a.mov(ecx, Gpd(REG_PARAM2));
int const shift = m_space[spacesizep.space()]->addr_shift() - 3;
if (shift < 0)
a.shl(ecx, imm(-shift));
else if (shift > 0)
a.shr(ecx, imm(shift));
if (m_space[spacesizep.space()]->endianness() != ENDIANNESS_LITTLE)
{
a.sub(ecx, imm((accessors.specific.native_bytes << 3) - (8 << spacesizep.size())));
a.neg(ecx);
}
a.and_(cl, imm((accessors.specific.native_bytes - 1) << 3));
a.mov(ptr(rsp, 32), cl);
if (accessors.specific.native_bytes <= 4)
a.shl(Gpd(REG_PARAM3), cl);
else
a.shl(Gpq(REG_PARAM3), cl);
}
else
{
if (accessors.specific.native_bytes <= 4)
a.mov(Gpd(REG_PARAM3), make_bitmask<uint32_t>(accessors.specific.native_bytes << 3));
else
a.mov(Gpq(REG_PARAM3), make_bitmask<uint64_t>(accessors.specific.native_bytes << 3));
}

a.and_(Gpd(REG_PARAM2), make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits));
mov_r64_imm(a, rax, uintptr_t(accessors.specific.read.dispatch));
a.mov(Gpd(REG_PARAM1), Gpd(REG_PARAM2));
Expand All @@ -2658,6 +2725,15 @@ void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
smart_call_r64(a, (x86code *)accessors.specific.read.function, rax);
}

if ((1 << spacesizep.size()) < accessors.specific.native_bytes)
{
a.mov(cl, ptr(rsp, 32));
if (accessors.specific.native_bytes <= 4)
a.shr(eax, cl);
else
a.shr(rax, cl);
}

if (spacesizep.size() == SIZE_BYTE)
{
a.movzx(dstreg, al);
Expand All @@ -2679,25 +2755,29 @@ void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
}
else if (spacesizep.size() == SIZE_BYTE)
{
mov_reg_param(a, Gpd(REG_PARAM3), maskp);
mov_r64_imm(a, Gpq(REG_PARAM1), accessors.resolved.read_byte_masked.obj);
smart_call_r64(a, accessors.resolved.read_byte_masked.func, rax);
a.movzx(dstreg, al);
}
else if (spacesizep.size() == SIZE_WORD)
{
mov_reg_param(a, Gpd(REG_PARAM3), maskp);
mov_r64_imm(a, Gpq(REG_PARAM1), accessors.resolved.read_word_masked.obj);
smart_call_r64(a, accessors.resolved.read_word_masked.func, rax);
a.movzx(dstreg, ax);
}
else if (spacesizep.size() == SIZE_DWORD)
{
mov_reg_param(a, Gpd(REG_PARAM3), maskp);
mov_r64_imm(a, Gpq(REG_PARAM1), accessors.resolved.read_dword_masked.obj);
smart_call_r64(a, accessors.resolved.read_dword_masked.func, rax);
if (dstreg != eax || inst.size() == 8)
a.mov(dstreg, eax);
}
else if (spacesizep.size() == SIZE_QWORD)
{
mov_reg_param(a, Gpq(REG_PARAM3), maskp);
mov_r64_imm(a, Gpq(REG_PARAM1), accessors.resolved.read_qword_masked.obj);
smart_call_r64(a, accessors.resolved.read_qword_masked.func, rax);
if (dstreg != eax)
Expand Down

0 comments on commit 2c08910

Please sign in to comment.