Skip to content

Commit

Permalink
Enhance XMM register validation in SSE instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 30, 2024
1 parent 43f09e1 commit d113488
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 49 deletions.
38 changes: 19 additions & 19 deletions gen/gen_code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) {
const MmxTbl6 *p = &mmxTbl6[i];
printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, T_0F, %s); }\n", p->name, p->code, p->pref);
printf("void %s(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
printf("void %s(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|%s, 0x%02X); }\n", p->name, p->pref, p->code2);
}
}
{
Expand Down Expand Up @@ -484,7 +484,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s = type2String(p->type);
printf("void %s(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
printf("void %s(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
{
Expand Down Expand Up @@ -1095,7 +1095,7 @@ void put()
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
// cast xmm register to 16bit register to put 0x66
printf("void %s(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x%02X); }\n", p->name, p->code);
printf("void %s(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0x%02X); }\n", p->name, p->code);
}
}
{
Expand Down Expand Up @@ -1165,22 +1165,22 @@ void put()
puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x20, isXMM_REG32orMEM, imm); }");
puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x22, isXMM_REG32orMEM, imm); }");

puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }");
puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }");
puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }");
puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(reg, mmx, T_0F, 0xD7); }");
puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { opSSE(reg1, reg2, T_0F, 0xF7); }");
puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opSSE(reg, xmm, T_0F, 0x50); }");
puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }");
puts("void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }");
puts("void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }");
puts("void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }");
puts("void movntps(const Address& addr, const Xmm& xmm) { opSSE(Xmm(xmm.getIdx()), addr, T_0F, 0x2B); }");
puts("void movntdqa(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_66 | T_0F38, 0x2A); }");
puts("void lddqu(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_F2 | T_0F, 0xF0); }");
puts("void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }");
puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }");

puts("void movd(const Operand& op, const Mmx& mmx) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opRO(mmx, op, T_0F, 0x7E); }");
puts("void movd(const Mmx& mmx, const Operand& op) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opRO(mmx, op, T_0F, 0x6E); }");
puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }");
puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }");
puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }");
puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, addr, T_0F, 0xE7); }");

puts("void movd(const Operand& op, const Mmx& mmx) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x7E); }");
puts("void movd(const Mmx& mmx, const Operand& op) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x6E); }");
puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { opSSE(xmm, mmx, T_F3 | T_0F, 0xD6); }");
puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { opSSE(mmx, xmm, T_F2 | T_0F, 0xD6); }");
puts("void movq(const Mmx& mmx, const Operand& op) { if (!op.isMEM() && mmx.getKind() != op.getKind()) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0xF3); opSSE(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F); }");
puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, addr, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }");
puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(6, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(7, Operand::REG, r.getBit()), r, T_0F, 0xC7); }");
puts("void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; if (opROO(Reg(), op, static_cast<const Reg&>(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); }");
Expand Down Expand Up @@ -1949,8 +1949,8 @@ void put64()

putMemOp("cmpxchg16b", "T_0F", 1, 0xC7, 64);
putMemOp("fxrstor64", "T_0F", 1, 0xAE, 64);
puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }");
puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }");
puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); }");
puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); }");
puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }");
puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }");
puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }");
Expand Down
36 changes: 36 additions & 0 deletions test/misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,42 @@ CYBOZU_TEST_AUTO(badSSE)
CYBOZU_TEST_EXCEPTION(movapd(xm16, xm1), Xbyak::Error);
CYBOZU_TEST_EXCEPTION(movhpd(xm16, ptr[eax]), Xbyak::Error);
CYBOZU_TEST_EXCEPTION(pextrb(eax, xm16, 1), Xbyak::Error);

CYBOZU_TEST_EXCEPTION(lddqu(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(maskmovdqu(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(maskmovq(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(movapd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movaps(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movd(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(movd(eax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movd(xm16, eax), Error);
CYBOZU_TEST_EXCEPTION(movdq2q(mm1, xm16), Error);
CYBOZU_TEST_EXCEPTION(movdqa(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movdqu(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movhlps(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(movlhps(xm16, xm1), Error);
CYBOZU_TEST_EXCEPTION(movmskpd(rax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movmskps(rax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movntdq(ptr[rax], xmm16), Error);
CYBOZU_TEST_EXCEPTION(movntdqa(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(movntpd(ptr[rax], xmm16), Error);
CYBOZU_TEST_EXCEPTION(movntps(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movntq(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movq(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movq(xm16, ptr[rax]), Error);
CYBOZU_TEST_EXCEPTION(movq(rax, xm16), Error);
CYBOZU_TEST_EXCEPTION(movq(xm16, rax), Error);
CYBOZU_TEST_EXCEPTION(movq2dq(xm16, mm1), Error);
CYBOZU_TEST_EXCEPTION(movsd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movss(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movupd(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(movups(ptr[rax], xm16), Error);
CYBOZU_TEST_EXCEPTION(extractps(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pextrb(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pextrd(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pextrw(ptr[rax], xm16, 3), Error);
CYBOZU_TEST_EXCEPTION(pmovmskb(eax, xm16), Error);
}
} code;
}
Expand Down
4 changes: 2 additions & 2 deletions xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -1734,10 +1734,10 @@ class CodeGenerator : public CodeArray {
{
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
}
static inline bool isValidSSE(const Operand& op1)
static inline bool isValidSSE(const Operand& op)
{
// SSE instructions do not support XMM16 - XMM31
return !(op1.isXMM() && op1.getIdx() >= 16);
return !(op.isXMM() && op.getIdx() >= 16);
}
static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg())
{
Expand Down
Loading

0 comments on commit d113488

Please sign in to comment.