Skip to content

Commit

Permalink
Cranelift/s390x: do not use one-way conditional branches.
Browse files Browse the repository at this point in the history
This is a followup to #10086, this time removing the one-armed branch
variant for s390x. This branch was only used as the default-target
branch in the `br_table` lowering. This PR incorporates the branch into
the `JTSequence` pseudo-instruction. Some care is needed to keep the
`ProducesBool` abstraction; it is unwrapped into its `ProducesFlags` and
the `JTSequence` becomes a `ConsumesFlags`, so the compare for the
jump-table bound (for default target) is not part of the pseudoinst.
(This is OK because regalloc-inserted moves never alter flags, by
explicit contract; the same reason allows cmp/branch terminators.)

Along the way I noticed that the comments on `JTSequence` claimed that
`targets` included the default, but this is (no longer?) the case, as
the targets are unwrapped by `jump_table_targets` which peels off the
first (default) separately. Aside from comments, this only affected
pretty-printing; codegen was correct.

With this, we have no more one-armed branches; hence, this fixes #9980.
  • Loading branch information
cfallin committed Jan 23, 2025
1 parent 392c7a9 commit 6225ef6
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 83 deletions.
47 changes: 18 additions & 29 deletions cranelift/codegen/src/isa/s390x/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -942,21 +942,6 @@
(cond Cond)
(trap_code TrapCode))

;; A one-way conditional branch, invisible to the CFG processing; used *only* as part of
;; straight-line sequences in code to be emitted.
;;
;; In more detail:
;; - This branch is lowered to a branch at the machine-code level, but does not end a basic
;; block, and does not create edges in the CFG seen by regalloc.
;; - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is
;; lowered from a single CLIF instruction. For example, certain arithmetic operations may
;; use these branches to handle certain conditions, such as overflows, traps, etc.
;;
;; See, e.g., the lowering of `trapif` (conditional trap) for an example.
(OneWayCondBr
(target MachLabel)
(cond Cond))

;; An indirect branch through a register, augmented with set of all
;; possible successors.
(IndirectBr
Expand All @@ -974,7 +959,14 @@
;; Jump-table sequence, as one compound instruction (see note in lower.rs
;; for rationale).
(JTSequence
;; The index into the list of targets.
(ridx Reg)
;; The default target.
(default MachLabel)
;; The condition for taking the default target, based on flags
;; when this instruction executes.
(default_cond Cond)
;; All other targets.
(targets BoxVecMachLabel))

;; Stack probe loop sequence, as one compound instruction.
Expand Down Expand Up @@ -2702,16 +2694,10 @@
(rule (cond_br taken not_taken cond)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.CondBr taken not_taken cond)))

;; Helper for emitting `MInst.OneWayCondBr` instructions.
(decl oneway_cond_br (MachLabel Cond) ConsumesFlags)
(rule (oneway_cond_br dest cond)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.OneWayCondBr dest cond)))

;; Helper for emitting `MInst.JTSequence` instructions.
(decl jt_sequence (Reg BoxVecMachLabel) SideEffectNoResult)
(rule (jt_sequence ridx targets)
(SideEffectNoResult.Inst (MInst.JTSequence ridx targets)))

(decl jt_sequence (Reg MachLabel Cond BoxVecMachLabel) ConsumesFlags)
(rule (jt_sequence ridx default default_cond targets)
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.JTSequence ridx default default_cond targets)))

;; Helpers for instruction sequences ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -3324,6 +3310,14 @@
(decl bool (ProducesFlags Cond) ProducesBool)
(rule (bool producer cond) (ProducesBool.ProducesBool producer cond))

;; Get the producer from a ProducesBool.
(decl bool_producer (ProducesBool) ProducesFlags)
(rule (bool_producer (ProducesBool.ProducesBool producer _)) producer)

;; Get the condition from a ProducesBool.
(decl bool_cond (ProducesBool) Cond)
(rule (bool_cond (ProducesBool.ProducesBool _ cond)) cond)

;; Invert boolean condition.
(decl invert_bool (ProducesBool) ProducesBool)
(rule (invert_bool (ProducesBool.ProducesBool producer cond))
Expand Down Expand Up @@ -3358,11 +3352,6 @@
(rule (cond_br_bool (ProducesBool.ProducesBool producer cond) taken not_taken)
(with_flags_side_effect producer (cond_br taken not_taken cond)))

;; Emit a one-way conditional branch based on a boolean condition.
(decl oneway_cond_br_bool (ProducesBool MachLabel) SideEffectNoResult)
(rule (oneway_cond_br_bool (ProducesBool.ProducesBool producer cond) dest)
(with_flags_side_effect producer (oneway_cond_br dest cond)))

;; Emit a conditional trap based on a boolean condition.
(decl trap_if_bool (ProducesBool TrapCode) SideEffectNoResult)
(rule (trap_if_bool (ProducesBool.ProducesBool producer cond) trap_code)
Expand Down
37 changes: 21 additions & 16 deletions cranelift/codegen/src/isa/s390x/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2002,21 +2002,21 @@ impl Inst {
match &inst {
// Replace a CondBreak with a branch to done_label.
&Inst::CondBreak { cond } => {
let inst = Inst::OneWayCondBr {
target: done_label,
cond: *cond,
};
inst.emit_with_alloc_consumer(sink, emit_info, state);
let opcode = 0xc04; // BCRL
sink.use_label_at_offset(
sink.cur_offset(),
done_label,
LabelUse::BranchRIL,
);
put(sink, &enc_ril_c(opcode, cond.bits(), 0));
}
_ => inst.emit_with_alloc_consumer(sink, emit_info, state),
};
}

let inst = Inst::OneWayCondBr {
target: loop_label,
cond,
};
inst.emit(sink, emit_info, state);
let opcode = 0xc04; // BCRL
sink.use_label_at_offset(sink.cur_offset(), loop_label, LabelUse::BranchRIL);
put(sink, &enc_ril_c(opcode, cond.bits(), 0));

// Emit label at the end of the loop.
sink.bind_label(done_label, &mut state.ctrl_plane);
Expand Down Expand Up @@ -3314,11 +3314,6 @@ impl Inst {
sink.add_uncond_branch(uncond_off, uncond_off + 6, not_taken);
put(sink, &enc_ril_c(opcode, 15, 0));
}
&Inst::OneWayCondBr { target, cond } => {
let opcode = 0xc04; // BCRL
sink.use_label_at_offset(sink.cur_offset(), target, LabelUse::BranchRIL);
put(sink, &enc_ril_c(opcode, cond.bits(), 0));
}
&Inst::Nop0 => {}
&Inst::Nop2 => {
put(sink, &enc_e(0x0707));
Expand All @@ -3341,13 +3336,23 @@ impl Inst {
put_with_trap(sink, &enc[0..4], trap_code);
put(sink, &enc[4..6]);
}
&Inst::JTSequence { ridx, ref targets } => {
&Inst::JTSequence {
ridx,
default,
default_cond,
ref targets,
} => {
let table_label = sink.get_label();

// This sequence is *one* instruction in the vcode, and is expanded only here at
// emission time, because we cannot allow the regalloc to insert spills/reloads in
// the middle; we depend on hardcoded PC-rel addressing below.

// Branch to the default target if the given default condition is true.
let opcode = 0xc04; // BCRL
sink.use_label_at_offset(sink.cur_offset(), default, LabelUse::BranchRIL);
put(sink, &enc_ril_c(opcode, default_cond.bits(), 0));

// Set temp register to address of jump table.
let rtmp = writable_spilltmp_reg();
let inst = Inst::LoadAddr {
Expand Down
33 changes: 16 additions & 17 deletions cranelift/codegen/src/isa/s390x/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ impl Inst {
| Inst::Jump { .. }
| Inst::CondBr { .. }
| Inst::TrapIf { .. }
| Inst::OneWayCondBr { .. }
| Inst::IndirectBr { .. }
| Inst::Debugtrap
| Inst::Trap { .. }
Expand Down Expand Up @@ -952,7 +951,7 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor<impl Ope
Inst::IndirectBr { rn, .. } => {
collector.reg_use(rn);
}
Inst::CondBr { .. } | Inst::OneWayCondBr { .. } => {}
Inst::CondBr { .. } => {}
Inst::Nop0 | Inst::Nop2 => {}
Inst::Debugtrap => {}
Inst::Trap { .. } => {}
Expand Down Expand Up @@ -1087,10 +1086,6 @@ impl MachInst for Inst {
&Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
&Inst::Jump { .. } => MachTerminator::Uncond,
&Inst::CondBr { .. } => MachTerminator::Cond,
&Inst::OneWayCondBr { .. } => {
// Explicitly invisible to CFG processing.
MachTerminator::None
}
&Inst::IndirectBr { .. } => MachTerminator::Indirect,
&Inst::JTSequence { .. } => MachTerminator::Indirect,
_ => MachTerminator::None,
Expand Down Expand Up @@ -3212,11 +3207,6 @@ impl Inst {
let cond = cond.pretty_print_default();
format!("jg{cond} {taken} ; jg {not_taken}")
}
&Inst::OneWayCondBr { target, cond } => {
let target = target.to_string();
let cond = cond.pretty_print_default();
format!("jg{cond} {target}")
}
&Inst::Debugtrap => ".word 0x0001 # debugtrap".to_string(),
&Inst::Trap { trap_code } => {
format!(".word 0x0000 # trap={trap_code}")
Expand All @@ -3225,25 +3215,34 @@ impl Inst {
let cond = cond.pretty_print_default();
format!("jg{cond} .+2 # trap={trap_code}")
}
&Inst::JTSequence { ridx, ref targets } => {
&Inst::JTSequence {
ridx,
default,
default_cond,
ref targets,
} => {
let ridx = pretty_print_reg(ridx);
let rtmp = pretty_print_reg(writable_spilltmp_reg().to_reg());
// The first entry is the default target, which is not emitted
// into the jump table, so we skip it here. It is only in the
// list so MachTerminator will see the potential target.
let jt_entries: String = targets
.iter()
.skip(1)
.map(|label| format!(" {}", label.to_string()))
.collect();
format!(
concat!(
"jg{} {} ; ",
"larl {}, 14 ; ",
"agf {}, 0({}, {}) ; ",
"br {} ; ",
"jt_entries{}"
),
rtmp, rtmp, rtmp, ridx, rtmp, jt_entries,
default_cond.pretty_print_default(),
default.to_string(),
rtmp,
rtmp,
rtmp,
ridx,
rtmp,
jt_entries,
)
}
&Inst::LoadSymbolReloc {
Expand Down
15 changes: 9 additions & 6 deletions cranelift/codegen/src/isa/s390x/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3729,17 +3729,17 @@
;; list of branch targets per index value.
(rule (lower_branch (br_table val_idx _) (jump_table_targets default targets))
(let ((idx Reg (put_in_reg_zext64 val_idx))
;; Bounds-check the index and branch to default.
;; This is an internal branch that is not a terminator insn.
;; Instead, the default target is listed a potential target
;; in the final JTSequence, which is the block terminator.
;; Bounds-check the index and create a ProducesBool that
;; denotes the "default target" condition.
(cond ProducesBool
(bool (icmpu_uimm32 $I64 idx (jump_table_size targets))
(intcc_as_cond (IntCC.UnsignedGreaterThanOrEqual))))
(_ Unit (emit_side_effect (oneway_cond_br_bool cond default))))
(default_cond_producer ProducesFlags (bool_producer cond))
(default_cond Cond (bool_cond cond)))
;; Scale the index by the element size, and then emit the
;; compound instruction that does:
;;
;; [cond branch to default]
;; larl %r1, <jt-base>
;; agf %r1, 0(%r1, %rScaledIndex)
;; br %r1
Expand All @@ -3751,7 +3751,10 @@
;; PC-rel offset to the jumptable would be incorrect.
;; (The alternative is to introduce a relocation pass
;; for inlined jumptables, which is much worse, IMHO.)
(emit_side_effect (jt_sequence (lshl_imm $I64 idx 2) targets))))
(let ((shifted_idx Reg (lshl_imm $I64 idx 2)))
(emit_side_effect (with_flags_side_effect
default_cond_producer
(jt_sequence shifted_idx default default_cond targets))))))


;;;; Rules for `brif` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down
29 changes: 14 additions & 15 deletions cranelift/filetests/filetests/isa/s390x/jumptable.clif
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,33 @@ block5(v5: i32):
; VCode:
; block0:
; llgfr %r3, %r2
; sllg %r4, %r3, 2
; clgfi %r3, 3
; jghe label4
; sllg %r3, %r3, 2
; larl %r1, 14 ; agf %r1, 0(%r1, %r3) ; br %r1 ; jt_entries label2 label1
; jghe label4 ; larl %r1, 14 ; agf %r1, 0(%r1, %r4) ; br %r1 ; jt_entries label3 label2 label1
; block1:
; lhi %r5, 3
; lhi %r4, 3
; jg label5
; block2:
; lhi %r5, 2
; lhi %r4, 2
; jg label5
; block3:
; lhi %r5, 1
; lhi %r4, 1
; jg label5
; block4:
; lhi %r5, 4
; lhi %r4, 4
; jg label5
; block5:
; ar %r2, %r5
; ar %r2, %r4
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; llgfr %r3, %r2
; sllg %r4, %r3, 2
; clgfi %r3, 3
; jghe 0x4e
; sllg %r3, %r3, 2
; larl %r1, 0x24
; agf %r1, 0(%r3, %r1)
; agf %r1, 0(%r4, %r1)
; br %r1
; .byte 0x00, 0x00
; .byte 0x00, 0x20
Expand All @@ -65,17 +64,17 @@ block5(v5: i32):
; .byte 0x00, 0x00
; .byte 0x00, 0x0c
; block1: ; offset 0x30
; lhi %r5, 3
; lhi %r4, 3
; jg 0x52
; block2: ; offset 0x3a
; lhi %r5, 2
; lhi %r4, 2
; jg 0x52
; block3: ; offset 0x44
; lhi %r5, 1
; lhi %r4, 1
; jg 0x52
; block4: ; offset 0x4e
; lhi %r5, 4
; lhi %r4, 4
; block5: ; offset 0x52
; ar %r2, %r5
; ar %r2, %r4
; br %r14

0 comments on commit 6225ef6

Please sign in to comment.