Skip to content

Commit

Permalink
pulley: Implement SIMD splat instruction
Browse files Browse the repository at this point in the history
Gets a few spec tests and CLIF tests passing

cc bytecodealliance#9783
  • Loading branch information
alexcrichton committed Dec 16, 2024
1 parent b10dc29 commit 035c862
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 6 deletions.
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -771,3 +771,12 @@

(rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a)))
(rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a))

;;;; Rules for `split` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8X16 (splat a))) (pulley_vsplatx8 a))
(rule (lower (has_type $I16X8 (splat a))) (pulley_vsplatx16 a))
(rule (lower (has_type $I32X4 (splat a))) (pulley_vsplatx32 a))
(rule (lower (has_type $I64X2 (splat a))) (pulley_vsplatx64 a))
(rule (lower (has_type $F32X4 (splat a))) (pulley_vsplatf32 a))
(rule (lower (has_type $F64X2 (splat a))) (pulley_vsplatf64 a))
4 changes: 4 additions & 0 deletions cranelift/filetests/filetests/runtests/simd-splat.clif
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ target x86_64 sse41 has_avx has_avx2
set enable_multi_ret_implicit_sret
target riscv64 has_v
target riscv64 has_v has_c has_zcb
target pulley32
target pulley32be
target pulley64
target pulley64be

function %splat_i8x16(i8) -> i8x16 {
block0(v0: i8):
Expand Down
2 changes: 0 additions & 2 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,6 @@ impl WastTest {
// features in Pulley are implemented.
if config.compiler == Compiler::CraneliftPulley {
let unsupported = [
"misc_testsuite/int-to-float-splat.wast",
"misc_testsuite/issue6562.wast",
"misc_testsuite/memory64/simd.wast",
"misc_testsuite/simd/almost-extmul.wast",
"misc_testsuite/simd/canonicalize-nan.wast",
Expand Down
44 changes: 40 additions & 4 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2551,7 +2551,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i8x16();
let b = self.state[operands.src2].get_i8x16();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i8x16(a);
ControlFlow::Continue(())
Expand All @@ -2561,7 +2561,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i16x8();
let b = self.state[operands.src2].get_i16x8();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i16x8(a);
ControlFlow::Continue(())
Expand All @@ -2571,7 +2571,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i32x4();
let b = self.state[operands.src2].get_i32x4();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i32x4(a);
ControlFlow::Continue(())
Expand All @@ -2581,7 +2581,7 @@ impl OpVisitor for Interpreter<'_> {
let mut a = self.state[operands.src1].get_i64x2();
let b = self.state[operands.src2].get_i64x2();
for (a, b) in a.iter_mut().zip(b) {
*a += b;
*a = a.wrapping_add(b);
}
self.state[operands.dst].set_i64x2(a);
ControlFlow::Continue(())
Expand Down Expand Up @@ -2695,6 +2695,42 @@ impl OpVisitor for Interpreter<'_> {
self.state[dst].set_u128(val);
ControlFlow::Continue(())
}

fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u32() as u8;
self.state[dst].set_u8x16([val; 16]);
ControlFlow::Continue(())
}

fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u32() as u16;
self.state[dst].set_u16x8([val; 8]);
ControlFlow::Continue(())
}

fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u32();
self.state[dst].set_u32x4([val; 4]);
ControlFlow::Continue(())
}

fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow<Done> {
let val = self.state[src].get_u64();
self.state[dst].set_u64x2([val; 2]);
ControlFlow::Continue(())
}

fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
let val = self.state[src].get_f32();
self.state[dst].set_f32x4([val; 4]);
ControlFlow::Continue(())
}

fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow<Done> {
let val = self.state[src].get_f64();
self.state[dst].set_f64x2([val; 2]);
ControlFlow::Continue(())
}
}

impl ExtendedOpVisitor for Interpreter<'_> {
Expand Down
13 changes: 13 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,19 @@ macro_rules! for_each_op {
vshri32x4_u = VShrI32x4U { operands: BinaryOperands<VReg, VReg, XReg> };
/// `dst = src1 >> src2` (unsigned)
vshri64x2_u = VShrI64x2U { operands: BinaryOperands<VReg, VReg, XReg> };

/// `dst = splat(low8(src))`
vsplatx8 = VSplatX8 { dst: VReg, src: XReg };
/// `dst = splat(low16(src))`
vsplatx16 = VSplatX16 { dst: VReg, src: XReg };
/// `dst = splat(low32(src))`
vsplatx32 = VSplatX32 { dst: VReg, src: XReg };
/// `dst = splat(src)`
vsplatx64 = VSplatX64 { dst: VReg, src: XReg };
/// `dst = splat(low32(src))`
vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
/// `dst = splat(src)`
vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
}
};
}
Expand Down

0 comments on commit 035c862

Please sign in to comment.