From 283f186f95696f2edd2940700249734b7a05e8ab Mon Sep 17 00:00:00 2001 From: Robbin Ehn Date: Fri, 15 Dec 2023 09:52:00 +0100 Subject: [PATCH] Index load, other comment --- .../cpu/riscv/.macroAssembler_riscv.cpp.swp | Bin 0 -> 16384 bytes src/hotspot/cpu/riscv/assembler_riscv.hpp | 1 + .../cpu/riscv/macroAssembler_riscv.hpp | 6 ++-- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 34 +++++++----------- 4 files changed, 17 insertions(+), 24 deletions(-) create mode 100644 src/hotspot/cpu/riscv/.macroAssembler_riscv.cpp.swp diff --git a/src/hotspot/cpu/riscv/.macroAssembler_riscv.cpp.swp b/src/hotspot/cpu/riscv/.macroAssembler_riscv.cpp.swp new file mode 100644 index 0000000000000000000000000000000000000000..04c8ee59ce62594afb07423f61b84b548b406770 GIT binary patch literal 16384 zcmeHOYm6jS6>eB?7kMZvYDf?+vj%%MJ<~nI%bjt--r1R*W_uoV_b^!_wAEF&r)#FV zZc%k>dS^il$+{S$MvaMyi2>0-d<7rz4}VBVOo*aEkoQC6Aqb*?2(q~7cW&M4nXZ}H zU94jO}4$Q@D(=dIJ4+7q- zIoz+RoBgKe9lnJw+f|^e!1@ZDnmT`QKrQtxJ?FCbZJt@bXqV|K&{d$TKv#jT0$l~V z3Un3dD$rHne@+4EWmBJlSI2A5;(-4+{W<>oPyMRt`y#==t$9n||4o8Fr1^P$|3Ymz zw*L>!2b#Yz!M~;XxZTaV!*TtCny>5n*C+TlHGi$$%gsQE7@_*XPPrul~x z{L7mEoaWDYSEv2IXns!fwMYko=dr)})4|B~io{clL{f6{zB|2AyutpB3stJ?n6 z3H}AmJDR^I!T(Y7q2@n!Qm6grH6QEu;{^Yl=HrO@P=f!1=C9QDOYiQq|E%Vln!hc< zKco5mnm_C0&iYSleq8f6CHSW_KdbrY6a4QrAM11JDV_G8)V!(de?P(hPV)`TpLS|z z{U-(Vu|Cr|E`M>+LPWz8)p7ttw-Jakd(fomkPHlTnbP=!Lhc&-f z^LHfphcthk<}XGrlwNWD2Q?qhzn>-e2Q<&4YN<0$k1pc+_iO%hnqNxrztz00`QtWs z*1u2l@%org@OnE+#q0Z_1h36@|8y1TD$rG+t3X$Qt^!>Jx(fWCD_~fu6dm;+qk}5W zwDI}!5YERx1YF>3;BlO{9|67!Tn3y4oC*92=lK1=Fz_-A-4EOgd=?l64k8YD1NZ?@ z11iAfzyweP27ohwmk|&B61WxE2h0LnfepX|h_!A5z5q0V2_Ov|LX3AG@Dt!0zyeSJ z81NS2%Ljp<0yhH(fF@7^b_16I!@y?X6yOQOvkwBd1J?rA0G9$E0Zs-s0w)3|0CypF z{t@ss;LE_50E)LS2ettp0?q_J2z&r|1hM>`z%9Vdz)ir7Koz(MNCVH{0RL;?^T4&h zK41wL0!{}u0Cyl~@m+uev%m~c0nP*tV*F?fAB_VJ3@2l@D?RB~C9Cne@HyMgM774 zL)k+Ge*{yl%Uk?I=v{ zOgpp&d!ub5x*lfLD%(+<8rfYPnVOgwovfs{D2DY6_4KCV+U3gBbUKiJFAJpb)YWim z3wJHM-Y(N=MHQ|&!d$>CY&N+oRV`F<0)DtU*{#eK*dN_Fwl$mDV%OmSn=Vc5oEbsh zH*V8*O((QC>k*z5SrJMvl>H6Q>p_hU$`ux0lR&RL+i7#E8Avc)D2|o}S6B(9?by=h zLDmy3?oZV#Es-?nhuFxPJR8WcRzhbWc(87nhT(Io6xCR-(7bDp8?NQRjO{uww{F4q zzP?@wJUqBxH%wj*Jx};@9fLx6K^AXGGwS|=gSCCQiX9UT_1cf4Z0$|0Zt=e<>TfDCGyMB-Avzt$r+-$AdEIywVPiLBZ^b z>%$eRElG6P;^TNAi8d#SF{BQWmb^;7oy=nYbhTy zz3K!pP`kd6d<$G2@0I zsn=tL%H(Ld%*LilEYGI%rAlFBrkF3W>6y~>RC%$*b}hs@ zj@@YBH`l8BjHaXQI0y|`81Q=NWEe`a*+OM}YNn!P^ho2Nf%>&2<;Hr0sFcNs;-E|3 zVpnPgM?Yv73*kDLv_`PXi8@W2Ku?=twO~xc#W8%@JMsmOq^ufFJolGyLp!i@icL;c zSiV>pEisHzX>?-hicvN)HNB@)*fm}u79(3Gt}) zF?-oyZV+c8yyN&e9vUrfvkEsGu5iR0p2G;Ku%L?4>SuXKtLg>lYQTNWra_W*YTcL6s8UjeQH z#(;BxQ-PC!}HpfU|*Tkq7t|upd|iJ^@?;TnxMheMqBMNT=?r zt3X$Qt^!>J{tFeLv`)}U%ch5K8)Wvb`_LojR@X7<eK+m_uGU-LMcL=nmKB z)g_$Og){8Dz!5D~CrYqtf^w|rPsPWHQeY@3ozg%i7`9}YwPQw(GU@+Qg8^)r1pYj(?RKNuFL({nYykk(wN+HTdu|DTL{ z&my&=K9E>Su8{^e7ow9p80yE0KHy}DtYwD_QUuF=h+L@9K-X7{FH+t(vywlEHcg7) z5DTbT0sW&XvBEp;9PVSJS6ZhOe8*~vKIv7!D74iDlIwtcJM7$NT8yZ@gcyi2Rjd)Y zMVHG{H}VMHt>M!eYDKF&G33SeLVzk5{5qy{TaP)ZeS)(z>D4yTHh?sBbRTmrz?UZ6 z>LqJsDm65O!oFPIPa?P8iDe$k7563~7{$9QBx*a=PYX=VGUP&$$Dz=HHVY&ooG1;U zBE^Q`;XmvmSayshOb`_qBVI9)kswKx0g5|B%cV6PZ>cJZ)ni4ivpGz-$TX}Vm0^ay zkA|J4VT4LefptvNw5?Zcx`F|&BQGV?j$zVzpxDYmG#o|A$RtX%iY~S5(mJ8Yl0s>U z^${FW99=^)1q-Hg8^$fl2VrDTs?MoPRbPL56QtF&S6fdsyQATiIkozudTDAFo=;Ht zy?r|?PE?B&W6qA!dxrI>o*>vpAWsPs+M4i5o z*aNXGTD|cWtJ5^m-Vv3@byPmON&7mb9P~DWGE{8LaFBk%j1l2ngM}VOR6i~HiQa*% z2L>D^@W8+Xh)UFvvk867J(MUQL&7PQACRQ75lPw?H3LyG#ov(!zuL{#e^%o-phhA- kS46Yqi01*u-ig%@8CE^yX-?r0k8j-KXFQ~(oB9d=FKg(&0ssI2 literal 0 HcmV?d00001 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index c1b13d1b16a40..9a685585ef488 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -1810,6 +1810,7 @@ enum Nf { } // Vector unordered indexed load instructions + INSN( vluxei8_v, 0b0000111, 0b000, 0b01, 0b0); INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0); // Vector unordered indexed store instructions diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index c00b63169be16..e5f74f676bf57 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1345,11 +1345,13 @@ class MacroAssembler: public Assembler { vmfle_vv(vd, vs1, vs2, vm); } - inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) { + inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { + guarantee(imm >= 1 && imm <= 16, "imm is invalid"); vmsleu_vi(Vd, Vs2, imm-1, vm); } - inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) { + inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { + guarantee(imm >= 1 && imm <= 16, "imm is invalid"); vmsgtu_vi(Vd, Vs2, imm-1, vm); } diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 97796e4b009aa..d1b2ea373ba82 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -3933,28 +3933,18 @@ class StubGenerator: public StubCodeGenerator { // ta: tail agnostic (don't care about those lanes) // ma: mask agnostic (don't care about those lanes) // x0 is not written, we known the number of vector elements. + __ vsetivli(x0, 4, vset_sew, Assembler::m1, Assembler::ma, Assembler::ta); + // Splat indexes in v26 if SEW = e64, but don't hurt anything. + int64_t indexes = vset_sew == Assembler::e32 ? 0x00041014ul : 0x00082028ul; + __ li(t0, indexes); + __ vmv_s_x(v26, t0); - // Load H[0..8] to produce - // v16 = {a,b,e,f} - // v17 = {c,d,g,h} - __ vleXX_v(vset_sew, v16, state); // v16 = {d,c,b,a} - __ addi(state, state, const_add); - __ vleXX_v(vset_sew, v17, state); // v17 = {h,g,f,e} - - __ vid_v(v30); // v30 = {3,2,1,0} - __ vxor_vi(v30, v30, 0x3); // v30 = {0,1,2,3} - __ vrgather_vv(v26, v16, v30); // v26 = {a,b,c,d} - __ vrgather_vv(v27, v17, v30); // v27 = {e,f,g,h} - __ vmsgeu_vi(v0, v30, 2); // v0 = {f,f,t,t} - // Copy elements [3..2] of v26 ({d,c}) into elements [3..2] of v17. - __ vslideup_vi(v17, v26, 2); // v17 = {c,d,_,_} - // Merge elements [1..0] of v27 ({g,h}) into elements [1..0] of v17 - __ vmerge_vvm(v17, v17, v27); // v17 = {c,d,g,h} - // Copy elements [1..0] of v27 ({f,e}) into elements [1..0] of v16. - __ vslidedown_vi(v16, v27, 2); // v16 = {_,_,e,f} - // Merge elements [3..2] of v26 ({a,b}) into elements [3..2] of v16 - __ vmerge_vvm(v16, v26, v16); // v16 = {a,b,e,f} + // Use index-load to get {f,e,b,a},{h,g,d,c} + __ vluxei8_v(v16, state, v26); + // Step-over a,b, so we are pointing to c. + __ addi(t0, state, const_add/2); + __ vluxei8_v(v17, t0, v26); __ bind(multi_block_loop); @@ -4053,9 +4043,9 @@ class StubGenerator: public StubCodeGenerator { __ vmerge_vvm(v16, v16, v26); // v16 = {d,c,b,a} // Save the hash - __ vseXX_v(vset_sew, v17, state); - __ addi(state, state, -const_add); __ vseXX_v(vset_sew, v16, state); + __ addi(state, state, const_add); + __ vseXX_v(vset_sew, v17, state); __ pop_reg(saved_regs, sp); __ leave();