From 035c862ba86d5ca51970769a786265f6b69159dd Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 11 Dec 2024 21:46:18 -0800 Subject: [PATCH 1/2] pulley: Implement SIMD `splat` instruction Gets a few spec tests and CLIF tests passing cc #9783 --- .../codegen/src/isa/pulley_shared/lower.isle | 9 ++++ .../filetests/runtests/simd-splat.clif | 4 ++ crates/wast-util/src/lib.rs | 2 - pulley/src/interp.rs | 44 +++++++++++++++++-- pulley/src/lib.rs | 13 ++++++ 5 files changed, 66 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 612d181a148f..625381a2728b 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -771,3 +771,12 @@ (rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a))) (rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a)) + +;;;; Rules for `split` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8X16 (splat a))) (pulley_vsplatx8 a)) +(rule (lower (has_type $I16X8 (splat a))) (pulley_vsplatx16 a)) +(rule (lower (has_type $I32X4 (splat a))) (pulley_vsplatx32 a)) +(rule (lower (has_type $I64X2 (splat a))) (pulley_vsplatx64 a)) +(rule (lower (has_type $F32X4 (splat a))) (pulley_vsplatf32 a)) +(rule (lower (has_type $F64X2 (splat a))) (pulley_vsplatf64 a)) diff --git a/cranelift/filetests/filetests/runtests/simd-splat.clif b/cranelift/filetests/filetests/runtests/simd-splat.clif index 71840ac14b64..f1ad9224aec4 100644 --- a/cranelift/filetests/filetests/runtests/simd-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-splat.clif @@ -10,6 +10,10 @@ target x86_64 sse41 has_avx has_avx2 set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %splat_i8x16(i8) -> i8x16 { block0(v0: i8): diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index a5b4d1fcbd8a..d94057c2e0cc 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -401,8 +401,6 @@ impl WastTest { // features in Pulley are implemented. if config.compiler == Compiler::CraneliftPulley { let unsupported = [ - "misc_testsuite/int-to-float-splat.wast", - "misc_testsuite/issue6562.wast", "misc_testsuite/memory64/simd.wast", "misc_testsuite/simd/almost-extmul.wast", "misc_testsuite/simd/canonicalize-nan.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index f918c5782e28..6072894496e7 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -2551,7 +2551,7 @@ impl OpVisitor for Interpreter<'_> { let mut a = self.state[operands.src1].get_i8x16(); let b = self.state[operands.src2].get_i8x16(); for (a, b) in a.iter_mut().zip(b) { - *a += b; + *a = a.wrapping_add(b); } self.state[operands.dst].set_i8x16(a); ControlFlow::Continue(()) @@ -2561,7 +2561,7 @@ impl OpVisitor for Interpreter<'_> { let mut a = self.state[operands.src1].get_i16x8(); let b = self.state[operands.src2].get_i16x8(); for (a, b) in a.iter_mut().zip(b) { - *a += b; + *a = a.wrapping_add(b); } self.state[operands.dst].set_i16x8(a); ControlFlow::Continue(()) @@ -2571,7 +2571,7 @@ impl OpVisitor for Interpreter<'_> { let mut a = self.state[operands.src1].get_i32x4(); let b = self.state[operands.src2].get_i32x4(); for (a, b) in a.iter_mut().zip(b) { - *a += b; + *a = a.wrapping_add(b); } self.state[operands.dst].set_i32x4(a); ControlFlow::Continue(()) @@ -2581,7 +2581,7 @@ impl OpVisitor for Interpreter<'_> { let mut a = self.state[operands.src1].get_i64x2(); let b = self.state[operands.src2].get_i64x2(); for (a, b) in a.iter_mut().zip(b) { - *a += b; + *a = a.wrapping_add(b); } self.state[operands.dst].set_i64x2(a); ControlFlow::Continue(()) @@ -2695,6 +2695,42 @@ impl OpVisitor for Interpreter<'_> { self.state[dst].set_u128(val); ControlFlow::Continue(()) } + + fn vsplatx8(&mut self, dst: VReg, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u8; + self.state[dst].set_u8x16([val; 16]); + ControlFlow::Continue(()) + } + + fn vsplatx16(&mut self, dst: VReg, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32() as u16; + self.state[dst].set_u16x8([val; 8]); + ControlFlow::Continue(()) + } + + fn vsplatx32(&mut self, dst: VReg, src: XReg) -> ControlFlow { + let val = self.state[src].get_u32(); + self.state[dst].set_u32x4([val; 4]); + ControlFlow::Continue(()) + } + + fn vsplatx64(&mut self, dst: VReg, src: XReg) -> ControlFlow { + let val = self.state[src].get_u64(); + self.state[dst].set_u64x2([val; 2]); + ControlFlow::Continue(()) + } + + fn vsplatf32(&mut self, dst: VReg, src: FReg) -> ControlFlow { + let val = self.state[src].get_f32(); + self.state[dst].set_f32x4([val; 4]); + ControlFlow::Continue(()) + } + + fn vsplatf64(&mut self, dst: VReg, src: FReg) -> ControlFlow { + let val = self.state[src].get_f64(); + self.state[dst].set_f64x2([val; 2]); + ControlFlow::Continue(()) + } } impl ExtendedOpVisitor for Interpreter<'_> { diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index fe6fdc45e724..3c436d3e0852 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -609,6 +609,19 @@ macro_rules! for_each_op { vshri32x4_u = VShrI32x4U { operands: BinaryOperands }; /// `dst = src1 >> src2` (unsigned) vshri64x2_u = VShrI64x2U { operands: BinaryOperands }; + + /// `dst = splat(low8(src))` + vsplatx8 = VSplatX8 { dst: VReg, src: XReg }; + /// `dst = splat(low16(src))` + vsplatx16 = VSplatX16 { dst: VReg, src: XReg }; + /// `dst = splat(low32(src))` + vsplatx32 = VSplatX32 { dst: VReg, src: XReg }; + /// `dst = splat(src)` + vsplatx64 = VSplatX64 { dst: VReg, src: XReg }; + /// `dst = splat(low32(src))` + vsplatf32 = VSplatF32 { dst: VReg, src: FReg }; + /// `dst = splat(src)` + vsplatf64 = VSplatF64 { dst: VReg, src: FReg }; } }; } From c6fe4a6d6b5510599a4f1cad562f24efec24d1ed Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 16 Dec 2024 16:26:36 -0800 Subject: [PATCH 2/2] Fix typo --- cranelift/codegen/src/isa/pulley_shared/lower.isle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 625381a2728b..12cc6fe5067d 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -772,7 +772,7 @@ (rule 0 (lower (has_type (fits_in_32 _) (iabs a))) (pulley_xabs32 (sext32 a))) (rule 1 (lower (has_type $I64 (iabs a))) (pulley_xabs64 a)) -;;;; Rules for `split` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8X16 (splat a))) (pulley_vsplatx8 a)) (rule (lower (has_type $I16X8 (splat a))) (pulley_vsplatx16 a))