diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index d16d35a4ffb7..78706f20d2ca 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5800,7 +5800,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, namespace { // Describes a general N-operand vector shuffle. struct GeneralShuffle { - GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} + GeneralShuffle(EVT vt) + : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {} void addUndef(); bool add(SDValue, unsigned); SDValue getNode(SelectionDAG &, const SDLoc &); @@ -5821,8 +5822,10 @@ struct GeneralShuffle { // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. unsigned UnpackFromEltSize; + // True if the final unpack uses the low half. + bool UnpackLow; }; -} +} // namespace // Add an extra undefined element to the shuffle. void GeneralShuffle::addUndef() { @@ -6027,11 +6030,21 @@ void GeneralShuffle::tryPrepareForUnpack() { if (MatchUnpack) { if (Ops.size() == 2) { // Don't use unpack if a single source operand needs rearrangement. - for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) - if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { + bool CanUseUnpackLow = true, CanUseUnpackHigh = true; + for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) { + if (SrcBytes[i] == -1) + continue; + if (SrcBytes[i] % 16 != int(i)) + CanUseUnpackHigh = false; + if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2)) + CanUseUnpackLow = false; + if (!CanUseUnpackLow && !CanUseUnpackHigh) { UnpackFromEltSize = UINT_MAX; return; } + } + if (!CanUseUnpackHigh) + UnpackLow = true; } break; } @@ -6046,13 +6059,19 @@ void GeneralShuffle::tryPrepareForUnpack() { // Apply the unpack in reverse to the Bytes array. unsigned B = 0; + if (UnpackLow) { + while (B < SystemZ::VectorBytes / 2) + Bytes[B++] = -1; + } for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { Elt += UnpackFromEltSize; for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) Bytes[B] = Bytes[Elt]; } - while (B < SystemZ::VectorBytes) - Bytes[B++] = -1; + if (!UnpackLow) { + while (B < SystemZ::VectorBytes) + Bytes[B++] = -1; + } // Remove the zero vector from Ops Ops.erase(&Ops[ZeroVecOpNo]); @@ -6079,7 +6098,9 @@ SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, unsigned OutBits = InBits * 2; EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), SystemZ::VectorBits / OutBits); - return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); + return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW + : SystemZISD::UNPACKL_HIGH, + DL, OutVT, PackedOp); } // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. @@ -6486,12 +6507,55 @@ lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { EVT InVT = PackedOp.getValueType(); unsigned ToBits = OutVT.getScalarSizeInBits(); unsigned FromBits = InVT.getScalarSizeInBits(); + unsigned StartOffset = 0; + + // If the input is a VECTOR_SHUFFLE, there are a number of important + // cases where we can directly implement the sign-extension of the + // original input lanes of the shuffle. + if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) { + ShuffleVectorSDNode *SVN = cast(PackedOp.getNode()); + ArrayRef ShuffleMask = SVN->getMask(); + int OutNumElts = OutVT.getVectorNumElements(); + + // Recognize the special case where the sign-extension can be done + // by the VSEG instruction. Handled via the default expander. + if (ToBits == 64 && OutNumElts == 2) { + int NumElem = ToBits / FromBits; + if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1) + return SDValue(); + } + + // Recognize the special case where we can fold the shuffle by + // replacing some of the UNPACK_HIGH with UNPACK_LOW. + int StartOffsetCandidate = -1; + for (int Elt = 0; Elt < OutNumElts; Elt++) { + if (ShuffleMask[Elt] == -1) + continue; + if (ShuffleMask[Elt] % OutNumElts == Elt) { + if (StartOffsetCandidate == -1) + StartOffsetCandidate = ShuffleMask[Elt] - Elt; + if (StartOffsetCandidate == ShuffleMask[Elt] - Elt) + continue; + } + StartOffsetCandidate = -1; + break; + } + if (StartOffsetCandidate != -1) { + StartOffset = StartOffsetCandidate; + PackedOp = PackedOp.getOperand(0); + } + } + do { FromBits *= 2; - EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), - SystemZ::VectorBits / FromBits); - PackedOp = - DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); + unsigned OutNumElts = SystemZ::VectorBits / FromBits; + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts); + unsigned Opcode = SystemZISD::UNPACK_HIGH; + if (StartOffset >= OutNumElts) { + Opcode = SystemZISD::UNPACK_LOW; + StartOffset -= OutNumElts; + } + PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp); } while (FromBits != ToBits); return PackedOp; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 3e78b3d175f4..db957bb7c029 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1970,6 +1970,22 @@ let Predicates = [FeatureVector] in { (VLEG (VGBM 0), bdxaddr12only:$addr, 1)>; } +// Zero-extensions from VR element to i128 on arch15. +let Predicates = [FeatureVectorEnhancements3] in { + def : Pat<(i128 (zext (i64 (z_vector_extract (v2i64 VR128:$src), 0)))), + (VUPLHG VR128:$src)>; + def : Pat<(i128 (zext (i64 (z_vector_extract (v2i64 VR128:$src), 1)))), + (VUPLLG VR128:$src)>; + def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 0)))), + (VUPLHG (VUPLHF VR128:$src))>; + def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 1)))), + (VUPLHG (VUPLLF VR128:$src))>; + def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 2)))), + (VUPLLG (VUPLHF VR128:$src))>; + def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 3)))), + (VUPLLG (VUPLLF VR128:$src))>; +} + // In-register i128 sign-extensions on arch15. let Predicates = [FeatureVectorEnhancements3] in { def : Pat<(i128 (sext_inreg VR128:$x, i8)), (VUPLG (VSEGB VR128:$x))>; @@ -2034,6 +2050,22 @@ let Predicates = [FeatureVector] in { (VSRAB (VLREPG bdxaddr12only:$addr), (VREPIB 64))>; } +// Sign-extensions from VR element to i128 on arch15. +let Predicates = [FeatureVectorEnhancements3] in { + def : Pat<(i128 (sext (i64 (z_vector_extract (v2i64 VR128:$src), 0)))), + (VUPHG VR128:$src)>; + def : Pat<(i128 (sext (i64 (z_vector_extract (v2i64 VR128:$src), 1)))), + (VUPLG VR128:$src)>; + def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 0)))), + (VUPHG (VUPHF VR128:$src))>; + def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 1)))), + (VUPHG (VUPLF VR128:$src))>; + def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 2)))), + (VUPLG (VUPHF VR128:$src))>; + def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 3)))), + (VUPLG (VUPLF VR128:$src))>; +} + // i128 comparison pseudo-instructions. let Predicates = [FeatureVector], Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { diff --git a/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll b/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll index 9ed7ca217973..c13293bd64a1 100644 --- a/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll +++ b/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll @@ -58,8 +58,7 @@ define <16 x i16> @fun3(<16 x i8> %val1, <16 x i8> %val2, <16 x i16> %val3, <16 ; CHECK: # %bb.0: ; CHECK-DAG: vceqb [[REG0:%v[0-9]+]], %v24, %v26 ; CHECK-DAG: vuphb [[REG2:%v[0-9]+]], [[REG0]] -; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]] -; CHECK-DAG: vuphb [[REG1]], [[REG1]] +; CHECK-DAG: vuplb [[REG1:%v[0-9]+]], [[REG0]] ; CHECK-DAG: vceqh [[REG3:%v[0-9]+]], %v28, %v25 ; CHECK-DAG: vceqh [[REG4:%v[0-9]+]], %v30, %v27 ; CHECK-DAG: vl [[REG5:%v[0-9]+]], 176(%r15) @@ -186,10 +185,9 @@ define <8 x i32> @fun10(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3, <8 x ; CHECK-DAG: vceqh [[REG1:%v[0-9]+]], %v28, %v30 ; CHECK-NEXT: vx [[REG2:%v[0-9]+]], [[REG0]], [[REG1]] ; CHECK-DAG: vuphh [[REG3:%v[0-9]+]], [[REG2]] -; CHECK-DAG: vmrlg [[REG4:%v[0-9]+]], [[REG2]], [[REG2]] -; CHECK-DAG: vuphh [[REG5:%v[0-9]+]], [[REG4]] +; CHECK-DAG: vuplhw [[REG4:%v[0-9]+]], [[REG2]] ; CHECK-NEXT: vsel %v24, %v25, %v29, [[REG3]] -; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG5]] +; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG4]] ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <8 x i16> %val1, %val2 %cmp1 = icmp eq <8 x i16> %val3, %val4 @@ -347,10 +345,9 @@ define <4 x i64> @fun18(<4 x i32> %val1, <4 x i32> %val2, <4 x i16> %val3, <4 x ; CHECK-NEXT: vuphh %v1, %v1 ; CHECK-NEXT: vn %v0, %v0, %v1 ; CHECK-DAG: vuphf [[REG0:%v[0-9]+]], %v0 -; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0 -; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG1]] +; CHECK-DAG: vuplf [[REG1:%v[0-9]+]], %v0 ; CHECK-NEXT: vsel %v24, %v25, %v29, [[REG0]] -; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG2]] +; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG1]] ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <4 x i32> %val1, %val2 %cmp1 = icmp eq <4 x i16> %val3, %val4 @@ -455,14 +452,13 @@ define <4 x i64> @fun24(<4 x i64> %val1, <4 x i64> %val2, <4 x i32> %val3, <4 x ; CHECK-LABEL: fun24: ; CHECK: # %bb.0: ; CHECK-NEXT: vceqf [[REG0:%v[0-9]+]], %v25, %v27 -; CHECK-NEXT: vuphf [[REG1:%v[0-9]+]], [[REG0]] -; CHECK-NEXT: vmrlg [[REG2:%v[0-9]+]], [[REG0]], [[REG0]] +; CHECK-DAG: vuphf [[REG1:%v[0-9]+]], [[REG0]] +; CHECK-DAG: vuplf [[REG2:%v[0-9]+]], [[REG0]] ; CHECK-DAG: vceqg [[REG3:%v[0-9]+]], %v24, %v28 ; CHECK-DAG: vceqg [[REG4:%v[0-9]+]], %v26, %v30 -; CHECK-DAG: vuphf [[REG5:%v[0-9]+]], [[REG2]] ; CHECK-DAG: vl [[REG6:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vl [[REG7:%v[0-9]+]], 160(%r15) -; CHECK-DAG: vx [[REG8:%v[0-9]+]], [[REG4]], [[REG5]] +; CHECK-DAG: vx [[REG8:%v[0-9]+]], [[REG4]], [[REG2]] ; CHECK-DAG: vx [[REG9:%v[0-9]+]], [[REG3]], [[REG1]] ; CHECK-DAG: vsel %v24, %v29, [[REG7]], [[REG9]] ; CHECK-DAG: vsel %v26, %v31, [[REG6]], [[REG8]] @@ -631,8 +627,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va ; CHECK-NEXT: vfchdb %v2, %v3, %v2 ; CHECK-NEXT: vpkg %v1, %v2, %v1 ; CHECK-NEXT: vx %v0, %v0, %v1 -; CHECK-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-NEXT: vuphf %v1, %v1 +; CHECK-NEXT: vuplf %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-NEXT: vsel %v26, %v27, %v31, %v1 @@ -643,8 +638,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va ; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26 ; CHECK-Z14-NEXT: vfchsb %v1, %v28, %v30 ; CHECK-Z14-NEXT: vx %v0, %v0, %v1 -; CHECK-Z14-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-Z14-NEXT: vuphf %v1, %v1 +; CHECK-Z14-NEXT: vuplf %v1, %v0 ; CHECK-Z14-NEXT: vuphf %v0, %v0 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-Z14-NEXT: vsel %v26, %v27, %v31, %v1 @@ -816,11 +810,10 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> % ; CHECK-DAG: vfchdb [[REG11:%v[0-9]+]], [[REG9]], [[REG7]] ; CHECK-DAG: vpkg [[REG12:%v[0-9]+]], [[REG11]], [[REG4]] ; CHECK-DAG: vuphf [[REG13:%v[0-9]+]], [[REG12]] -; CHECK-DAG: vmrlg [[REG14:%v[0-9]+]], [[REG12]], [[REG12]] -; CHECK-NEXT: vfchdb [[REG15:%v[0-9]+]], %v24, %v28 -; CHECK-NEXT: vfchdb [[REG16:%v[0-9]+]], %v26, %v30 -; CHECK-NEXT: vuphf [[REG17:%v[0-9]+]], [[REG14]] -; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]] +; CHECK-DAG: vuplf [[REG14:%v[0-9]+]], [[REG12]] +; CHECK-DAG: vfchdb [[REG15:%v[0-9]+]], %v24, %v28 +; CHECK-DAG: vfchdb [[REG16:%v[0-9]+]], %v26, %v30 +; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG14]] ; CHECK-NEXT: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] ; CHECK-NEXT: vsel %v24, %v29, [[REG10]], [[REG19]] ; CHECK-NEXT: vsel %v26, %v31, [[REG8]], [[REG18]] @@ -829,13 +822,12 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> % ; CHECK-Z14-LABEL: fun34: ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchsb %v4, %v25, %v27 +; CHECK-Z14-NEXT: vl %v0, 176(%r15) +; CHECK-Z14-NEXT: vl %v1, 160(%r15) +; CHECK-Z14-NEXT: vfchdb %v2, %v24, %v28 +; CHECK-Z14-NEXT: vfchdb %v3, %v26, %v30 ; CHECK-Z14-NEXT: vuphf %v5, %v4 -; CHECK-Z14-NEXT: vmrlg %v4, %v4, %v4 -; CHECK-Z14-DAG: vfchdb %v2, %v24, %v28 -; CHECK-Z14-DAG: vfchdb %v3, %v26, %v30 -; CHECK-Z14-DAG: vuphf %v4, %v4 -; CHECK-Z14-DAG: vl %v0, 176(%r15) -; CHECK-Z14-DAG: vl %v1, 160(%r15) +; CHECK-Z14-NEXT: vuplf %v4, %v4 ; CHECK-Z14-NEXT: vn %v3, %v3, %v4 ; CHECK-Z14-NEXT: vn %v2, %v2, %v5 ; CHECK-Z14-NEXT: vsel %v24, %v29, %v1, %v2 diff --git a/llvm/test/CodeGen/SystemZ/vec-cmpsel.ll b/llvm/test/CodeGen/SystemZ/vec-cmpsel.ll index 200c25179d38..f93ecc348af6 100644 --- a/llvm/test/CodeGen/SystemZ/vec-cmpsel.ll +++ b/llvm/test/CodeGen/SystemZ/vec-cmpsel.ll @@ -43,8 +43,7 @@ define <16 x i16> @fun3(<16 x i8> %val1, <16 x i8> %val2, <16 x i16> %val3, <16 ; CHECK: # %bb.0: ; CHECK-NEXT: vceqb %v0, %v24, %v26 ; CHECK-DAG: vuphb [[REG0:%v[0-9]+]], %v0 -; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0 -; CHECK-DAG: vuphb [[REG1]], [[REG1]] +; CHECK-DAG: vuplb [[REG1:%v[0-9]+]], %v0 ; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]] ; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]] ; CHECK-NEXT: br %r14 @@ -129,8 +128,7 @@ define <8 x i32> @fun10(<8 x i16> %val1, <8 x i16> %val2, <8 x i32> %val3, <8 x ; CHECK: # %bb.0: ; CHECK-NEXT: vceqh %v0, %v24, %v26 ; CHECK-DAG: vuphh [[REG0:%v[0-9]+]], %v0 -; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0 -; CHECK-DAG: vuphh [[REG1]], [[REG1]] +; CHECK-DAG: vuplhw [[REG1:%v[0-9]+]], %v0 ; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]] ; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]] ; CHECK-NEXT: br %r14 @@ -228,8 +226,7 @@ define <4 x i64> @fun18(<4 x i32> %val1, <4 x i32> %val2, <4 x i64> %val3, <4 x ; CHECK: # %bb.0: ; CHECK-NEXT: vceqf %v0, %v24, %v26 ; CHECK-DAG: vuphf [[REG0:%v[0-9]+]], %v0 -; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0 -; CHECK-DAG: vuphf [[REG1]], [[REG1]] +; CHECK-DAG: vuplf [[REG1]], %v0 ; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]] ; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]] ; CHECK-NEXT: br %r14 @@ -428,8 +425,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x double> %v ; CHECK-NEXT: vldeb %v2, %v2 ; CHECK-NEXT: vfchdb %v1, %v2, %v1 ; CHECK-NEXT: vpkg [[REG0:%v[0-9]+]], %v1, %v0 -; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]] -; CHECK-DAG: vuphf [[REG1]], [[REG1]] +; CHECK-DAG: vuplf [[REG1:%v[0-9]+]], [[REG0]] ; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG0]] ; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG2]] ; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]] @@ -439,8 +435,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x double> %v ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26 ; CHECK-Z14-DAG: vuphf [[REG0:%v[0-9]+]], %v0 -; CHECK-Z14-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0 -; CHECK-Z14-DAG: vuphf [[REG1]], [[REG1]] +; CHECK-Z14-DAG: vuplf [[REG1:%v[0-9]+]], %v0 ; CHECK-Z14-NEXT: vsel %v24, %v28, %v25, [[REG0]] ; CHECK-Z14-NEXT: vsel %v26, %v30, %v27, [[REG1]] ; CHECK-Z14-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/vec-move-23.ll b/llvm/test/CodeGen/SystemZ/vec-move-23.ll index 11ad6f360580..1976e6710ecf 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-23.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-23.ll @@ -143,19 +143,17 @@ define void @fun8(<2 x i64> %dwords, ptr %ptr) { ; Test that this results in vectorized conversions. define void @fun9(ptr %Src, ptr %ptr) { ; CHECK-LABEL: fun9 -; Z15: larl %r1, .LCPI9_0 -; Z15-NEXT: vl %v0, 16(%r2), 4 +; Z15: vl %v0, 16(%r2), 4 ; Z15-NEXT: vl %v1, 0(%r2), 4 -; Z15-NEXT: vl %v2, 0(%r1), 3 -; Z15-NEXT: vperm %v2, %v2, %v1, %v2 -; Z15-NEXT: vuplhh %v1, %v1 +; Z15-NEXT: vuplhh %v2, %v1 +; Z15-NEXT: vupllh %v1, %v1 ; Z15-NEXT: vuplhh %v0, %v0 ; Z15-NEXT: vcelfb %v2, %v2, 0, 0 ; Z15-NEXT: vcelfb %v1, %v1, 0, 0 ; Z15-NEXT: vcelfb %v0, %v0, 0, 0 ; Z15-NEXT: vsteg %v0, 32(%r3), 0 -; Z15-NEXT: vst %v2, 16(%r3), 4 -; Z15-NEXT: vst %v1, 0(%r3), 4 +; Z15-NEXT: vst %v1, 16(%r3), 4 +; Z15-NEXT: vst %v2, 0(%r3), 4 ; Z15-NEXT: br %r14 %Val = load <10 x i16>, ptr %Src diff --git a/llvm/test/CodeGen/SystemZ/vec-shift-07.ll b/llvm/test/CodeGen/SystemZ/vec-shift-07.ll index f229c5e25a46..afb04045957a 100644 --- a/llvm/test/CodeGen/SystemZ/vec-shift-07.ll +++ b/llvm/test/CodeGen/SystemZ/vec-shift-07.ll @@ -180,3 +180,33 @@ define <2 x i64> @f16(<16 x i32> %val) { %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1 ret <2 x i64> %vec1 } + +; Test a shufflevector-based v2i8->v2i64 extension. +define <2 x i64> @f17(<16 x i8> %val) { +; CHECK-LABEL: f17: +; CHECK: vsegb %v24, %v24 +; CHECK: br %r14 + %shuf = shufflevector <16 x i8> %val, <16 x i8> poison, <2 x i32> + %ret = sext <2 x i8> %shuf to <2 x i64> + ret <2 x i64> %ret +} + +; Test a shufflevector-based v2i16->v2i64 extension. +define <2 x i64> @f18(<8 x i16> %val) { +; CHECK-LABEL: f18: +; CHECK: vsegh %v24, %v24 +; CHECK: br %r14 + %shuf = shufflevector <8 x i16> %val, <8 x i16> poison, <2 x i32> + %ret = sext <2 x i16> %shuf to <2 x i64> + ret <2 x i64> %ret +} + +; Test a shufflevector-based v2i32->v2i64 extension. +define <2 x i64> @f19(<4 x i32> %val) { +; CHECK-LABEL: f19: +; CHECK: vsegf %v24, %v24 +; CHECK: br %r14 + %shuf = shufflevector <4 x i32> %val, <4 x i32> poison, <2 x i32> + %ret = sext <2 x i32> %shuf to <2 x i64> + ret <2 x i64> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-unpack-01.ll b/llvm/test/CodeGen/SystemZ/vec-unpack-01.ll new file mode 100644 index 000000000000..9355341d6ca8 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-unpack-01.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define <8 x i16> @f1(<16 x i8> %a) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphb %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %1 = sext <8 x i8> %0 to <8 x i16> + ret <8 x i16> %1 +} + +define <8 x i16> @f2(<16 x i8> %a) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplb %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %1 = sext <8 x i8> %0 to <8 x i16> + ret <8 x i16> %1 +} + +define <4 x i32> @f3(<8 x i16> %a) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphh %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> + %1 = sext <4 x i16> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @f4(<8 x i16> %a) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhw %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> + %1 = sext <4 x i16> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @f5(<16 x i8> %a) { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphb %v0, %v24 +; CHECK-NEXT: vuphh %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> + %1 = sext <4 x i8> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @f6(<16 x i8> %a) { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphb %v0, %v24 +; CHECK-NEXT: vuplhw %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> + %1 = sext <4 x i8> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @f7(<16 x i8> %a) { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplb %v0, %v24 +; CHECK-NEXT: vuphh %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> + %1 = sext <4 x i8> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @f8(<16 x i8> %a) { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplb %v0, %v24 +; CHECK-NEXT: vuplhw %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> + %1 = sext <4 x i8> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <2 x i64> @f9(<4 x i32> %a) { +; CHECK-LABEL: f9: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphf %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> + %1 = sext <2 x i32> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f10(<4 x i32> %a) { +; CHECK-LABEL: f10: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplf %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> + %1 = sext <2 x i32> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f11(<8 x i16> %a) { +; CHECK-LABEL: f11: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphh %v0, %v24 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> + %1 = sext <2 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f12(<8 x i16> %a) { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphh %v0, %v24 +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> + %1 = sext <2 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f13(<8 x i16> %a) { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhw %v0, %v24 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> + %1 = sext <2 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f14(<8 x i16> %a) { +; CHECK-LABEL: f14: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhw %v0, %v24 +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> + %1 = sext <2 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f15(<16 x i8> %a) { +; CHECK-LABEL: f15: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphb %v0, %v24 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f16(<16 x i8> %a) { +; CHECK-LABEL: f16: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphb %v0, %v24 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f17(<16 x i8> %a) { +; CHECK-LABEL: f17: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphb %v0, %v24 +; CHECK-NEXT: vuplhw %v0, %v0 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f18(<16 x i8> %a) { +; CHECK-LABEL: f18: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphb %v0, %v24 +; CHECK-NEXT: vuplhw %v0, %v0 +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f19(<16 x i8> %a) { +; CHECK-LABEL: f19: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplb %v0, %v24 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f20(<16 x i8> %a) { +; CHECK-LABEL: f20: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplb %v0, %v24 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f21(<16 x i8> %a) { +; CHECK-LABEL: f21: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplb %v0, %v24 +; CHECK-NEXT: vuplhw %v0, %v0 +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f22(<16 x i8> %a) { +; CHECK-LABEL: f22: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplb %v0, %v24 +; CHECK-NEXT: vuplhw %v0, %v0 +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> + %1 = sext <2 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-unpack-02.ll b/llvm/test/CodeGen/SystemZ/vec-unpack-02.ll new file mode 100644 index 000000000000..301fea1b1958 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-unpack-02.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s + +define i128 @f1(<2 x i64> %a) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphg %v0, %v24 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <2 x i64> %a, i32 0 + %1 = sext i64 %0 to i128 + ret i128 %1 +} + +define i128 @f2(<2 x i64> %a) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplg %v0, %v24 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <2 x i64> %a, i32 1 + %1 = sext i64 %0 to i128 + ret i128 %1 +} + +define i128 @f3(<4 x i32> %a) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphf %v0, %v24 +; CHECK-NEXT: vuphg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 0 + %1 = sext i32 %0 to i128 + ret i128 %1 +} + +define i128 @f4(<4 x i32> %a) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplf %v0, %v24 +; CHECK-NEXT: vuphg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 1 + %1 = sext i32 %0 to i128 + ret i128 %1 +} + +define i128 @f5(<4 x i32> %a) { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuphf %v0, %v24 +; CHECK-NEXT: vuplg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 2 + %1 = sext i32 %0 to i128 + ret i128 %1 +} + +define i128 @f6(<4 x i32> %a) { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplf %v0, %v24 +; CHECK-NEXT: vuplg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 3 + %1 = sext i32 %0 to i128 + ret i128 %1 +} diff --git a/llvm/test/CodeGen/SystemZ/vec-unpack-03.ll b/llvm/test/CodeGen/SystemZ/vec-unpack-03.ll new file mode 100644 index 000000000000..de4c51e695bf --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-unpack-03.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define <8 x i16> @f1(<16 x i8> %a) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhb %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %1 = zext <8 x i8> %0 to <8 x i16> + ret <8 x i16> %1 +} + +define <8 x i16> @f2(<16 x i8> %a) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vupllb %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %1 = zext <8 x i8> %0 to <8 x i16> + ret <8 x i16> %1 +} + +define <4 x i32> @f3(<8 x i16> %a) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhh %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> + %1 = zext <4 x i16> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <4 x i32> @f4(<8 x i16> %a) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vupllh %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> + %1 = zext <4 x i16> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <2 x i64> @f5(<4 x i32> %a) { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhf %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> + %1 = zext <2 x i32> %0 to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @f6(<4 x i32> %a) { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vupllf %v24, %v24 +; CHECK-NEXT: br %r14 +start: + %0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> + %1 = zext <2 x i32> %0 to <2 x i64> + ret <2 x i64> %1 +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-unpack-04.ll b/llvm/test/CodeGen/SystemZ/vec-unpack-04.ll new file mode 100644 index 000000000000..2cf8d1325e16 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-unpack-04.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s + +define i128 @f1(<2 x i64> %a) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhg %v0, %v24 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <2 x i64> %a, i32 0 + %1 = zext i64 %0 to i128 + ret i128 %1 +} + +define i128 @f2(<2 x i64> %a) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vupllg %v0, %v24 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <2 x i64> %a, i32 1 + %1 = zext i64 %0 to i128 + ret i128 %1 +} + +define i128 @f3(<4 x i32> %a) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhf %v0, %v24 +; CHECK-NEXT: vuplhg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 0 + %1 = zext i32 %0 to i128 + ret i128 %1 +} + +define i128 @f4(<4 x i32> %a) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vupllf %v0, %v24 +; CHECK-NEXT: vuplhg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 1 + %1 = zext i32 %0 to i128 + ret i128 %1 +} + +define i128 @f5(<4 x i32> %a) { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vuplhf %v0, %v24 +; CHECK-NEXT: vupllg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 2 + %1 = zext i32 %0 to i128 + ret i128 %1 +} + +define i128 @f6(<4 x i32> %a) { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vupllf %v0, %v24 +; CHECK-NEXT: vupllg %v0, %v0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 +start: + %0 = extractelement <4 x i32> %a, i32 3 + %1 = zext i32 %0 to i128 + ret i128 %1 +}