Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wazevo(arm64): relocations for large conditional branches #1873

Merged
merged 3 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ func (i *instruction) brLabel() label {
}

// brOffsetResolved is called when the target label is resolved.
func (i *instruction) brOffsetResolved(offset int64) {
func (i *instruction) brOffsetResolve(offset int64) {
i.u2 = uint64(offset)
i.u3 = 1 // indicate that the offset is resolved, for debugging.
}
Expand All @@ -666,6 +666,10 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
}
}

func (i *instruction) setCondBrTargets(target label) {
i.u2 = uint64(target)
}

func (i *instruction) condBrLabel() label {
return label(i.u2)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1280,7 +1280,7 @@ func TestInstruction_encode(t *testing.T) {
}},
{want: "20000014", setup: func(i *instruction) {
i.asBr(dummyLabel)
i.brOffsetResolved(0x80)
i.brOffsetResolve(0x80)
}},
{want: "01040034", setup: func(i *instruction) {
i.asCondBr(registerAsRegZeroCond(x1VReg), dummyLabel, false)
Expand Down
125 changes: 102 additions & 23 deletions internal/engine/wazevo/backend/isa/arm64/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ type (
addends64 queue[regalloc.VReg]
unresolvedAddressModes []*instruction

// condBrRelocs holds the conditional branches which need offset relocation.
condBrRelocs []condBrReloc

// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
// During the execution of the function, the stack looks like:
//
Expand Down Expand Up @@ -98,10 +101,20 @@ type (

// labelPosition represents the regions of the generated code which the label represents.
labelPosition struct {
l label
begin, end *instruction
binarySize int64
binaryOffset int64
}

condBrReloc struct {
cbr *instruction
// currentLabelPos is the labelPosition within which condBr is defined.
currentLabelPos *labelPosition
// Next block's labelPosition.
nextLabel label
offset int64
}
)

const (
Expand Down Expand Up @@ -205,7 +218,7 @@ func (m *machine) StartBlock(blk ssa.BasicBlock) {

labelPos, ok := m.labelPositions[l]
if !ok {
labelPos = m.allocateLabelPosition()
labelPos = m.allocateLabelPosition(l)
m.labelPositions[l] = labelPos
}
m.orderedBlockLabels = append(m.orderedBlockLabels, labelPos)
Expand All @@ -231,18 +244,24 @@ func (m *machine) insert(i *instruction) {
}

func (m *machine) insertBrTargetLabel() label {
l := m.allocateLabel()
nop := m.allocateInstr()
nop.asNop0WithLabel(l)
nop, l := m.allocateBrTarget()
m.insert(nop)
pos := m.allocateLabelPosition()
return l
}

func (m *machine) allocateBrTarget() (nop *instruction, l label) {
l = m.allocateLabel()
nop = m.allocateInstr()
nop.asNop0WithLabel(l)
pos := m.allocateLabelPosition(l)
pos.begin, pos.end = nop, nop
m.labelPositions[l] = pos
return l
return
}

func (m *machine) allocateLabelPosition() *labelPosition {
func (m *machine) allocateLabelPosition(la label) *labelPosition {
l := m.labelPositionPool.Allocate()
l.l = la
return l
}

Expand Down Expand Up @@ -344,17 +363,34 @@ func (m *machine) ResolveRelativeAddresses() {
}
}

// Reuse the slice to gather the unresolved conditional branches.
cbrs := m.condBrRelocs[:0]

// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
var offset int64
for _, pos := range m.orderedBlockLabels {
for i, pos := range m.orderedBlockLabels {
pos.binaryOffset = offset
var size int64
for cur := pos.begin; ; cur = cur.next {
if cur.kind == nop0 {
switch cur.kind {
case nop0:
l := cur.nop0Label()
if pos, ok := m.labelPositions[l]; ok {
pos.binaryOffset = offset + size
}
case condBr:
if !cur.condBrOffsetResolved() {
var nextLabel label
if i < len(m.orderedBlockLabels)-1 {
// Note: this is only used when the block ends with fallthrough,
// therefore can be safely assumed that the next block exists when it's needed.
nextLabel = m.orderedBlockLabels[i+1].l
}
cbrs = append(cbrs, condBrReloc{
cbr: cur, currentLabelPos: pos, offset: offset + size,
nextLabel: nextLabel,
})
}
}
size += cur.size()
if cur == pos.end {
Expand All @@ -365,36 +401,50 @@ func (m *machine) ResolveRelativeAddresses() {
offset += size
}

// Before resolving any offsets, we need to check if all the conditional branches can be resolved.
var needRerun bool
for i := range cbrs {
reloc := &cbrs[i]
cbr := reloc.cbr
offset := reloc.offset

target := cbr.condBrLabel()
offsetOfTarget := m.labelPositions[target].binaryOffset
diff := offsetOfTarget - offset
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
// This case the conditional branch is too huge. We place the trampoline instructions at the end of the current block,
// and jump to it.
m.insertConditionalJumpTrampoline(cbr, reloc.currentLabelPos, reloc.nextLabel)
// Then, we need to recall this function to fix up the label offsets
// as they have changed after the trampoline is inserted.
needRerun = true
}
}
if needRerun {
m.ResolveRelativeAddresses()
return
}

var currentOffset int64
for cur := m.rootInstr; cur != nil; cur = cur.next {
switch cur.kind {
case br:
target := cur.brLabel()
offsetOfTarget := m.labelPositions[target].binaryOffset
diff := offsetOfTarget - currentOffset
if diff%4 != 0 {
panic("BUG: offsets between b and the target must be a multiple of 4")
}
divided := diff >> 2
if divided < minSignedInt26 || divided > maxSignedInt26 {
// This means the currently compiled single function is extremely large.
panic("BUG: implement branch relocation for large unconditional branch larger than 26-bit range")
panic("too large function that requires branch relocation of large unconditional branch larger than 26-bit range")
}
cur.brOffsetResolved(diff)
cur.brOffsetResolve(diff)
case condBr:
if !cur.condBrOffsetResolved() {
target := cur.condBrLabel()
offsetOfTarget := m.labelPositions[target].binaryOffset
diff := offsetOfTarget - currentOffset
if diff%4 != 0 {
panic("BUG: offsets between b and the target must be a multiple of 4")
}
divided := diff >> 2
if divided < minSignedInt19 || divided > maxSignedInt19 {
// This case we can insert "trampoline block" in the middle and jump to it.
// After that, we need to re-calculate the offset of labels after the trampoline block by
// recursively calling this function.
panic("TODO: implement branch relocation for large conditional branch larger than 19-bit range")
if divided := diff >> 2; divided < minSignedInt19 || divided > maxSignedInt19 {
panic("BUG: branch relocation for large conditional branch larger than 19-bit range must be handled properly")
}
cur.condBrOffsetResolve(diff)
}
Expand All @@ -421,6 +471,35 @@ const (
minSignedInt19 int64 = -(1 << 19)
)

func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *labelPosition, nextLabel label) {
cur := currentBlk.end
originalTarget := cbr.condBrLabel()
endNext := cur.next

if cur.kind != br {
// If the current block ends with a conditional branch, we can just insert the trampoline after it.
// Otherwise, we need to insert "skip" instruction to skip the trampoline instructions.
skip := m.allocateInstr()
skip.asBr(nextLabel)
cur = linkInstr(cur, skip)
}

cbrNewTargetInstr, cbrNewTargetLabel := m.allocateBrTarget()
cbr.setCondBrTargets(cbrNewTargetLabel)
cur = linkInstr(cur, cbrNewTargetInstr)

// Then insert the unconditional branch to the original, which should be possible to get encoded
// as 26-bit offset should be enough for any practical application.
br := m.allocateInstr()
br.asBr(originalTarget)
cur = linkInstr(cur, br)

// Update the end of the current block.
currentBlk.end = cur

linkInstr(cur, endNext)
}

func (m *machine) getOrAllocateSSABlockLabel(blk ssa.BasicBlock) label {
if blk.ReturnBlock() {
return returnLabel
Expand Down
97 changes: 97 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,100 @@ func TestMachine_getVRegSpillSlotOffsetFromSP(t *testing.T) {
_, ok = m.spillSlots[id]
require.True(t, ok)
}

func TestMachine_insertConditionalJumpTrampoline(t *testing.T) {
for _, tc := range []struct {
brAtEnd bool
expBefore, expAfter string
}{
{
brAtEnd: true,
expBefore: `
L100:
b.eq L12345
b L888888888
L200:
exit_sequence x0
`,
expAfter: `
L100:
b.eq L10000000
b L888888888
L10000000:
b L12345
L200:
exit_sequence x0
`,
},
{
brAtEnd: false,
expBefore: `
L100:
b.eq L12345
udf
L200:
exit_sequence x0
`,
expAfter: `
L100:
b.eq L10000000
udf
b L200
L10000000:
b L12345
L200:
exit_sequence x0
`,
},
} {
var name string
if tc.brAtEnd {
name = "brAtEnd"
} else {
name = "brNotAtEnd"
}

t.Run(name, func(t *testing.T) {
m := NewBackend().(*machine)
const (
originLabel = 100
originLabelNext = 200
targetLabel = 12345
)

cbr := m.allocateInstr()
cbr.asCondBr(eq.asCond(), targetLabel, false)

end := m.allocateInstr()
if tc.brAtEnd {
end.asBr(888888888)
} else {
end.asUDF()
}

originalEndNext := m.allocateInstr()
originalEndNext.asExitSequence(x0VReg)

originLabelPos := m.allocateLabelPosition(originLabel)
originLabelPos.begin = cbr
originLabelPos.end = linkInstr(cbr, end)
originNextLabelPos := m.allocateLabelPosition(originLabelNext)
originNextLabelPos.begin = originalEndNext
linkInstr(originLabelPos.end, originalEndNext)

m.labelPositions[originLabel] = originLabelPos
m.labelPositions[originLabelNext] = originNextLabelPos

m.rootInstr = cbr
require.Equal(t, tc.expBefore, m.Format())

m.nextLabel = 9999999
m.insertConditionalJumpTrampoline(cbr, originLabelPos, originLabelNext)

require.Equal(t, tc.expAfter, m.Format())

// The original label position should be updated to the unconditional jump to the original target destination.
require.Equal(t, "b L12345", originLabelPos.end.String())
})
}
}
Loading