Skip to content

Commit

Permalink
wazevo(arm64): relocations for large conditional branches (#1873)
Browse files Browse the repository at this point in the history
Signed-off-by: Takeshi Yoneda <[email protected]>
  • Loading branch information
mathetake authored Dec 14, 2023
1 parent 656d872 commit a6fea91
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 25 deletions.
6 changes: 5 additions & 1 deletion internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ func (i *instruction) brLabel() label {
}

// brOffsetResolved is called when the target label is resolved.
func (i *instruction) brOffsetResolved(offset int64) {
func (i *instruction) brOffsetResolve(offset int64) {
i.u2 = uint64(offset)
i.u3 = 1 // indicate that the offset is resolved, for debugging.
}
Expand All @@ -666,6 +666,10 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
}
}

func (i *instruction) setCondBrTargets(target label) {
i.u2 = uint64(target)
}

func (i *instruction) condBrLabel() label {
return label(i.u2)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1280,7 +1280,7 @@ func TestInstruction_encode(t *testing.T) {
}},
{want: "20000014", setup: func(i *instruction) {
i.asBr(dummyLabel)
i.brOffsetResolved(0x80)
i.brOffsetResolve(0x80)
}},
{want: "01040034", setup: func(i *instruction) {
i.asCondBr(registerAsRegZeroCond(x1VReg), dummyLabel, false)
Expand Down
125 changes: 102 additions & 23 deletions internal/engine/wazevo/backend/isa/arm64/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ type (
addends64 queue[regalloc.VReg]
unresolvedAddressModes []*instruction

// condBrRelocs holds the conditional branches which need offset relocation.
condBrRelocs []condBrReloc

// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
// During the execution of the function, the stack looks like:
//
Expand Down Expand Up @@ -98,10 +101,20 @@ type (

// labelPosition represents the regions of the generated code which the label represents.
labelPosition struct {
l label
begin, end *instruction
binarySize int64
binaryOffset int64
}

condBrReloc struct {
cbr *instruction
// currentLabelPos is the labelPosition within which condBr is defined.
currentLabelPos *labelPosition
// Next block's labelPosition.
nextLabel label
offset int64
}
)

const (
Expand Down Expand Up @@ -205,7 +218,7 @@ func (m *machine) StartBlock(blk ssa.BasicBlock) {

labelPos, ok := m.labelPositions[l]
if !ok {
labelPos = m.allocateLabelPosition()
labelPos = m.allocateLabelPosition(l)
m.labelPositions[l] = labelPos
}
m.orderedBlockLabels = append(m.orderedBlockLabels, labelPos)
Expand All @@ -231,18 +244,24 @@ func (m *machine) insert(i *instruction) {
}

func (m *machine) insertBrTargetLabel() label {
l := m.allocateLabel()
nop := m.allocateInstr()
nop.asNop0WithLabel(l)
nop, l := m.allocateBrTarget()
m.insert(nop)
pos := m.allocateLabelPosition()
return l
}

func (m *machine) allocateBrTarget() (nop *instruction, l label) {
l = m.allocateLabel()
nop = m.allocateInstr()
nop.asNop0WithLabel(l)
pos := m.allocateLabelPosition(l)
pos.begin, pos.end = nop, nop
m.labelPositions[l] = pos
return l
return
}

func (m *machine) allocateLabelPosition() *labelPosition {
func (m *machine) allocateLabelPosition(la label) *labelPosition {
l := m.labelPositionPool.Allocate()
l.l = la
return l
}

Expand Down Expand Up @@ -344,17 +363,34 @@ func (m *machine) ResolveRelativeAddresses() {
}
}

// Reuse the slice to gather the unresolved conditional branches.
cbrs := m.condBrRelocs[:0]

// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
var offset int64
for _, pos := range m.orderedBlockLabels {
for i, pos := range m.orderedBlockLabels {
pos.binaryOffset = offset
var size int64
for cur := pos.begin; ; cur = cur.next {
if cur.kind == nop0 {
switch cur.kind {
case nop0:
l := cur.nop0Label()
if pos, ok := m.labelPositions[l]; ok {
pos.binaryOffset = offset + size
}
case condBr:
if !cur.condBrOffsetResolved() {
var nextLabel label
if i < len(m.orderedBlockLabels)-1 {
// Note: this is only used when the block ends with fallthrough,
// therefore can be safely assumed that the next block exists when it's needed.
nextLabel = m.orderedBlockLabels[i+1].l
}
cbrs = append(cbrs, condBrReloc{
cbr: cur, currentLabelPos: pos, offset: offset + size,
nextLabel: nextLabel,
})
}
}
size += cur.size()
if cur == pos.end {
Expand All @@ -365,36 +401,50 @@ func (m *machine) ResolveRelativeAddresses() {
offset += size
}

// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
var needRerun bool
for i := range cbrs {
reloc := &cbrs[i]
cbr := reloc.cbr
offset := reloc.offset

target := cbr.condBrLabel()
offsetOfTarget := m.labelPositions[target].binaryOffset
diff := offsetOfTarget - offset
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
// and jump to it.
m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
// Then, we need to recall this function to fix up the label offsets
// as they have changed after the trampoline is inserted.
needRerun = true
}
}
if needRerun {
m.ResolveRelativeAddresses()
return
}

var currentOffset int64
for cur := m.rootInstr; cur != nil; cur = cur.next {
switch cur.kind {
case br:
target := cur.brLabel()
offsetOfTarget := m.labelPositions[target].binaryOffset
diff := offsetOfTarget - currentOffset
if diff%4 != 0 {
panic("BUG: offsets between b and the target must be a multiple of 4")
}
divided := diff >> 2
if divided < minSignedInt26 || divided > maxSignedInt26 {
// This means the currently compiled single function is extremely large.
panic("BUG: implement branch relocation for large unconditional branch larger than 26-bit range")
panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
}
cur.brOffsetResolved(diff)
cur.brOffsetResolve(diff)
case condBr:
if !cur.condBrOffsetResolved() {
target := cur.condBrLabel()
offsetOfTarget := m.labelPositions[target].binaryOffset
diff := offsetOfTarget - currentOffset
if diff%4 != 0 {
panic("BUG: offsets between b and the target must be a multiple of 4")
}
divided := diff >> 2
if divided < minSignedInt19 || divided > maxSignedInt19 {
// This case we can insert "trampoline block" in the middle and jump to it.
// After that, we need to re-calculate the offset of labels after the trampoline block by
// recursively calling this function.
panic("TODO: implement branch relocation for large conditional branch larger than 19-bit range")
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
}
cur.condBrOffsetResolve(diff)
}
Expand All @@ -421,6 +471,35 @@ const (
minSignedInt19 int64 = -(1 << 19)
)

func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
cur := currentBlk.end
originalTarget := cbr.condBrLabel()
endNext := cur.next

if cur.kind != br {
// If the current block ends with a conditional branch, we can just insert the trampoline after it.
// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
skip := m.allocateInstr()
skip.asBr(nextLabel)
cur = linkInstr(cur, skip)
}

cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
cbr.setCondBrTargets(cbrNewTargetLabel)
cur = linkInstr(cur, cbrNewTargetInstr)

// Then insert the unconditional branch to the original, which should be possible to get encoded
// as 26-bit offset should be enough for any practical application.
br := m.allocateInstr()
br.asBr(originalTarget)
cur = linkInstr(cur, br)

// Update the end of the current block.
currentBlk.end = cur

linkInstr(cur, endNext)
}

func (m *machine) getOrAllocateSSABlockLabel(blk ssa.BasicBlock) label {
if blk.ReturnBlock() {
return returnLabel
Expand Down
97 changes: 97 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,100 @@ func TestMachine_getVRegSpillSlotOffsetFromSP(t *testing.T) {
_, ok = m.spillSlots[id]
require.True(t, ok)
}

func TestMachine_insertConditionalJumpTrampoline(t *testing.T) {
for _, tc := range []struct {
brAtEnd bool
expBefore, expAfter string
}{
{
brAtEnd: true,
expBefore: `
L100:
b.eq L12345
b L888888888
L200:
exit_sequence x0
`,
expAfter: `
L100:
b.eq L10000000
b L888888888
L10000000:
b L12345
L200:
exit_sequence x0
`,
},
{
brAtEnd: false,
expBefore: `
L100:
b.eq L12345
udf
L200:
exit_sequence x0
`,
expAfter: `
L100:
b.eq L10000000
udf
b L200
L10000000:
b L12345
L200:
exit_sequence x0
`,
},
} {
var name string
if tc.brAtEnd {
name = "brAtEnd"
} else {
name = "brNotAtEnd"
}

t.Run(name, func(t *testing.T) {
m := NewBackend().(*machine)
const (
originLabel = 100
originLabelNext = 200
targetLabel = 12345
)

cbr := m.allocateInstr()
cbr.asCondBr(eq.asCond(), targetLabel, false)

end := m.allocateInstr()
if tc.brAtEnd {
end.asBr(888888888)
} else {
end.asUDF()
}

originalEndNext := m.allocateInstr()
originalEndNext.asExitSequence(x0VReg)

originLabelPos := m.allocateLabelPosition(originLabel)
originLabelPos.begin = cbr
originLabelPos.end = linkInstr(cbr, end)
originNextLabelPos := m.allocateLabelPosition(originLabelNext)
originNextLabelPos.begin = originalEndNext
linkInstr(originLabelPos.end, originalEndNext)

m.labelPositions[originLabel] = originLabelPos
m.labelPositions[originLabelNext] = originNextLabelPos

m.rootInstr = cbr
require.Equal(t, tc.expBefore, m.Format())

m.nextLabel = 9999999
m.insertConditionalJumpTrampoline(cbr, originLabelPos, originLabelNext)

require.Equal(t, tc.expAfter, m.Format())

// The original label position should be updated to the unconditional jump to the original target destination.
require.Equal(t, "b L12345", originLabelPos.end.String())
})
}
}

0 comments on commit a6fea91

Please sign in to comment.