Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Jan 3, 2024
2 parents e1b6896 + 0b3f360 commit 2ce465b
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 73 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)

project(xbyak LANGUAGES CXX VERSION 7.04)
project(xbyak LANGUAGES CXX VERSION 7.05)

file(GLOB headers xbyak/*.h)

Expand Down
1 change: 1 addition & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# History

* 2024/Jan/03 ver 7.05 support RAO-INT for APX
* 2023/Dec/28 ver 7.04 rex2 supports two-byte opecode
* 2023/Dec/26 ver 7.03 set the default value of dfv to 0
* 2023/Dec/20 ver 7.02 SHA* support APX
Expand Down
19 changes: 9 additions & 10 deletions gen/gen_code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ void put()
printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg);
printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg);
printf("void set%s(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | %d)) return; opRext(op, 8, 0, T_0F, 0x90 | %d); }%s\n", p->name, p->ext, p->ext, msg);
printf("void set%s(const Operand& op) { opSetCC(op, %d); }%s\n", p->name, p->ext, msg);

// ccmpscc
// true if SCC = 0b1010, false if SCC = 0b1011 (see APX Architecture Specification p.266)
Expand Down Expand Up @@ -860,14 +860,13 @@ void put()
const char *prefix;
} tbl[] = {
{ "aadd", "" },
{ "aand", " | T_66" },
{ "aor", " | T_F2" },
{ "axor", " | T_F3" },
{ "aand", "|T_66" },
{ "aor", "|T_F2" },
{ "axor", "|T_F3" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Address& addr, const Reg32e &reg) { ", p->name);
printf("opMR(addr, reg, T_0F38%s, 0x0FC); }\n", p->prefix);
printf("void %s(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38%s, 0x0FC, T_APX%s); }\n", p->name, p->prefix, p->prefix);
}
}

Expand Down Expand Up @@ -1149,10 +1148,10 @@ void put()

puts("void xadd(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xC0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
puts("void cmpxchg(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); }");
puts("void movbe(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg, T_APX, 0x60)) return; opMR(addr, reg, T_0F38, 0xF0); }");
puts("void movbe(const Address& addr, const Reg& reg) { if (opROO(Reg(), addr, reg, T_APX, 0x61)) return; opMR(addr, reg, T_0F38, 0xF1); }");
puts("void movdiri(const Address& addr, const Reg32e& reg) { if (opROO(Reg(), addr, reg, T_APX, 0xF9)) return; opMR(addr, reg, T_0F38, 0xF9); }");
puts("void movdir64b(const Reg& reg, const Address& addr) { if (opROO(Reg(), addr, reg.cvt32(), T_APX|T_66, 0xF8)) return; opMR(addr, reg.cvt32(), T_66 | T_0F38, 0xF8); }");
puts("void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); }");
puts("void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); }");
puts("void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); }");
puts("void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); }");
puts("void cmpxchg8b(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xC7); }");

puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }");
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '7.04',
version: '7.05',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
Expand Down
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Xbyak 7.04 [![Badge Build]][Build Status]
# Xbyak 7.05 [![Badge Build]][Build Status]

*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*

Expand Down Expand Up @@ -33,6 +33,7 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.

### News

- support RAO-INT for APX
- support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE
- support APX except for a few instructions
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
Expand Down
3 changes: 2 additions & 1 deletion readme.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.04
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.05

-----------------------------------------------------------------------------
◎概要
Expand Down Expand Up @@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴

2024/01/03 ver 7.05 APX対応RAO-INT
2023/12/28 ver 7.04 2バイトオペコードのrex2対応
2023/12/26 ver 7.03 dfvのデフォルト値を0に設定
2023/12/20 ver 7.02 SHA*のAPX対応
Expand Down
26 changes: 26 additions & 0 deletions test/apx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1936,3 +1936,29 @@ CYBOZU_TEST_AUTO(0x0f_rex2)
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

CYBOZU_TEST_AUTO(rao_int)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
aadd(ptr [r16+r31*1], r17d);
aadd(ptr [r16+r31*1], r17);
aand(ptr [r16+r31*1], r17d);
aand(ptr [r16+r31*1], r17);
aor(ptr [r16+r31*1], r17d);
aor(ptr [r16+r31*1], r17);
axor(ptr [r16+r31*1], r17d);
axor(ptr [r16+r31*1], r17);
}
} c;
const uint8_t tbl[] = {
0x62, 0xac, 0x78, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf8, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac,
0x79, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0xf9, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7b, 0x08,
0xfc, 0x0c, 0x38, 0x62, 0xac, 0xfb, 0x08, 0xfc, 0x0c, 0x38, 0x62, 0xac, 0x7a, 0x08, 0xfc, 0x0c,
0x38, 0x62, 0xac, 0xfa, 0x08, 0xfc, 0x0c, 0x38,
};
const size_t n = sizeof(tbl);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

62 changes: 42 additions & 20 deletions xbyak/xbyak.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ namespace Xbyak {

enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7040 /* 0xABCD = A.BC(.D) */
VERSION = 0x7050 /* 0xABCD = A.BC(.D) */
};

#ifndef MIE_INTEGER_TYPE_DEFINED
Expand Down Expand Up @@ -727,6 +727,7 @@ class Operand {
bool operator==(const Operand& rhs) const;
bool operator!=(const Operand& rhs) const { return !operator==(rhs); }
const Address& getAddress() const;
Address getAddress(int immSize) const;
const Reg& getReg() const;
};

Expand Down Expand Up @@ -1298,15 +1299,15 @@ class Address : public Operand {
M_ripAddr
};
XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast), optimize_(true)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true)
{
e_.verify();
}
#ifdef XBYAK64
explicit XBYAK_CONSTEXPR Address(size_t disp)
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false), optimize_(true) { }
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), immSize(0), disp8N(0), permitVsib(false), broadcast_(false), optimize_(true) { }
XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast), optimize_(true) { }
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) { }
#endif
RegExp getRegExp() const
{
Expand All @@ -1323,14 +1324,19 @@ class Address : public Operand {
const Label* getLabel() const { return label_; }
bool operator==(const Address& rhs) const
{
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && immSize == rhs.immSize && disp8N == rhs.disp8N && permitVsib == rhs.permitVsib && broadcast_ == rhs.broadcast_ && optimize_ == rhs.optimize_;
}
bool operator!=(const Address& rhs) const { return !operator==(rhs); }
bool isVsib() const { return e_.isVsib(); }
private:
RegExp e_;
const Label* label_;
Mode mode_;
public:
int immSize; // the size of immediate value of nmemonics (0, 1, 2, 4)
int disp8N; // 0(normal), 1(force disp32), disp8N = {2, 4, 8}
bool permitVsib;
private:
bool broadcast_;
bool optimize_;
};
Expand All @@ -1340,6 +1346,12 @@ inline const Address& Operand::getAddress() const
assert(isMEM());
return static_cast<const Address&>(*this);
}
inline Address Operand::getAddress(int immSize) const
{
Address addr = getAddress();
addr.immSize = immSize;
return addr;
}

inline bool Operand::operator==(const Operand& rhs) const
{
Expand Down Expand Up @@ -2044,12 +2056,14 @@ class CodeGenerator : public CodeArray {
writeCode(type, reg1, code, rex2);
setModRM(3, reg1.getIdx(), reg2.getIdx());
}
void opMR(const Address& addr, const Reg& r, uint64_t type, int code, int immSize = 0)
void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE)
{
if (code2 == NONE) code2 = code;
if (type2 && opROO(Reg(), addr, r, type2, code2)) return;
if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
bool rex2 = rex(addr, r, type);
writeCode(type, r, code, rex2);
opAddr(addr, r.getIdx(), immSize);
opAddr(addr, r.getIdx());
}
void opLoadSeg(const Address& addr, const Reg& reg, uint64_t type, int code)
{
Expand Down Expand Up @@ -2130,21 +2144,20 @@ class CodeGenerator : public CodeArray {
}
// reg is reg field of ModRM
// immSize is the size for immediate value
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
void opAddr(const Address &addr, int reg)
{
if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
if (!addr.permitVsib && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
if (addr.getMode() == Address::M_ModRM) {
setSIB(addr.getRegExp(), reg, disp8N);
setSIB(addr.getRegExp(), reg, addr.disp8N);
} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
setModRM(0, reg, 5);
if (addr.getLabel()) { // [rip + Label]
putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize);
putL_inner(*addr.getLabel(), true, addr.getDisp() - addr.immSize);
} else {
size_t disp = addr.getDisp();
if (addr.getMode() == Address::M_ripAddr) {
if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW)
disp -= (size_t)getCurr() + 4 + immSize;
disp -= (size_t)getCurr() + 4 + addr.immSize;
}
dd(inner::VerifyInInt32(disp));
}
Expand Down Expand Up @@ -2201,11 +2214,12 @@ class CodeGenerator : public CodeArray {
if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false)
if (p2->isMEM()) {
const Reg& r = *static_cast<const Reg*>(p1);
const Address& addr = p2->getAddress();
Address addr = p2->getAddress();
const RegExp e = addr.getRegExp();
evexLeg(r, e.getBase(), e.getIndex(), d, type, sc);
writeCode(type, d, code);
opAddr(addr, r.getIdx(), immSize);
addr.immSize = immSize;
opAddr(addr, r.getIdx());
} else {
evexLeg(static_cast<const Reg&>(op2), static_cast<const Reg&>(op1), Reg(), d, type, sc);
writeCode(type, d, code);
Expand All @@ -2220,13 +2234,18 @@ class CodeGenerator : public CodeArray {
const Reg r(ext, Operand::REG, opBit);
if ((type & T_APX) && op.hasRex2NFZU() && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
if (op.isMEM()) {
opMR(op.getAddress(), r, type, code, immSize);
opMR(op.getAddress(immSize), r, type, code);
} else if (op.isREG(bit)) {
opRR(r, op.getReg().changeBit(opBit), type, code);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
}
}
void opSetCC(const Operand& op, int ext)
{
if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | ext)) return;
opRext(op, 8, 0, T_0F, 0x90 | ext);
}
void opShift(const Operand& op, int imm, int ext, const Reg *d = 0)
{
if (d == 0) verifyMemHasSize(op);
Expand All @@ -2246,7 +2265,7 @@ class CodeGenerator : public CodeArray {
void opRO(const Reg& r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0)
{
if (op.isMEM()) {
opMR(op.getAddress(), r, type, code, immSize);
opMR(op.getAddress(immSize), r, type, code);
} else if (condR) {
opRR(r, op.getReg(), type, code);
} else {
Expand Down Expand Up @@ -2431,7 +2450,7 @@ class CodeGenerator : public CodeArray {
void opVex(const Reg& r, const Operand *p1, const Operand& op2, uint64_t type, int code, int imm8 = NONE)
{
if (op2.isMEM()) {
const Address& addr = op2.getAddress();
Address addr = op2.getAddress();
const RegExp& regExp = addr.getRegExp();
const Reg& base = regExp.getBase();
const Reg& index = regExp.getIndex();
Expand All @@ -2450,7 +2469,10 @@ class CodeGenerator : public CodeArray {
} else {
vex(r, base, p1, type, code, index.isExtIdx());
}
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
if (type & T_VSIB) addr.permitVsib = true;
if (disp8N) addr.disp8N = disp8N;
if (imm8 != NONE) addr.immSize = 1;
opAddr(addr, r.getIdx());
} else {
const Reg& base = op2.getReg();
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
Expand Down Expand Up @@ -2945,7 +2967,7 @@ class CodeGenerator : public CodeArray {
if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
immSize = 4;
}
opMR(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0, 0xC6, immSize);
opMR(op.getAddress(immSize), Reg(0, Operand::REG, op.getBit()), 0, 0xC6);
db(static_cast<uint32_t>(imm), immSize);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
Expand Down
Loading

0 comments on commit 2ce465b

Please sign in to comment.