[SystemZ] Optimize vector zero/sign extensions
Generate more efficient code for zero or sign extensions where the source is a subvector generated via SHUFFLE_VECTOR. Specifically, recognize patterns corresponding to (series of) VECTOR UNPACK instructions, or the VECTOR SIGN EXTEND TO DOUBLEWORD instruction. As a special case, also handle zero or sign extensions of a vector element to i128. Fixes: https://github.com/llvm/llvm-project/issues/129576 Fixes: https://github.com/llvm/llvm-project/issues/129899
This commit is contained in:
@@ -5800,7 +5800,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
|
||||
namespace {
|
||||
// Describes a general N-operand vector shuffle.
|
||||
struct GeneralShuffle {
|
||||
GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
|
||||
GeneralShuffle(EVT vt)
|
||||
: VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
|
||||
void addUndef();
|
||||
bool add(SDValue, unsigned);
|
||||
SDValue getNode(SelectionDAG &, const SDLoc &);
|
||||
@@ -5821,8 +5822,10 @@ struct GeneralShuffle {
|
||||
|
||||
// Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
|
||||
unsigned UnpackFromEltSize;
|
||||
// True if the final unpack uses the low half.
|
||||
bool UnpackLow;
|
||||
};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Add an extra undefined element to the shuffle.
|
||||
void GeneralShuffle::addUndef() {
|
||||
@@ -6027,11 +6030,21 @@ void GeneralShuffle::tryPrepareForUnpack() {
|
||||
if (MatchUnpack) {
|
||||
if (Ops.size() == 2) {
|
||||
// Don't use unpack if a single source operand needs rearrangement.
|
||||
for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
|
||||
if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
|
||||
bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
|
||||
for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
|
||||
if (SrcBytes[i] == -1)
|
||||
continue;
|
||||
if (SrcBytes[i] % 16 != int(i))
|
||||
CanUseUnpackHigh = false;
|
||||
if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
|
||||
CanUseUnpackLow = false;
|
||||
if (!CanUseUnpackLow && !CanUseUnpackHigh) {
|
||||
UnpackFromEltSize = UINT_MAX;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!CanUseUnpackHigh)
|
||||
UnpackLow = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -6046,13 +6059,19 @@ void GeneralShuffle::tryPrepareForUnpack() {
|
||||
|
||||
// Apply the unpack in reverse to the Bytes array.
|
||||
unsigned B = 0;
|
||||
if (UnpackLow) {
|
||||
while (B < SystemZ::VectorBytes / 2)
|
||||
Bytes[B++] = -1;
|
||||
}
|
||||
for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
|
||||
Elt += UnpackFromEltSize;
|
||||
for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
|
||||
Bytes[B] = Bytes[Elt];
|
||||
}
|
||||
while (B < SystemZ::VectorBytes)
|
||||
Bytes[B++] = -1;
|
||||
if (!UnpackLow) {
|
||||
while (B < SystemZ::VectorBytes)
|
||||
Bytes[B++] = -1;
|
||||
}
|
||||
|
||||
// Remove the zero vector from Ops
|
||||
Ops.erase(&Ops[ZeroVecOpNo]);
|
||||
@@ -6079,7 +6098,9 @@ SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
|
||||
unsigned OutBits = InBits * 2;
|
||||
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
|
||||
SystemZ::VectorBits / OutBits);
|
||||
return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
|
||||
return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
|
||||
: SystemZISD::UNPACKL_HIGH,
|
||||
DL, OutVT, PackedOp);
|
||||
}
|
||||
|
||||
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
|
||||
@@ -6486,12 +6507,55 @@ lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT InVT = PackedOp.getValueType();
|
||||
unsigned ToBits = OutVT.getScalarSizeInBits();
|
||||
unsigned FromBits = InVT.getScalarSizeInBits();
|
||||
unsigned StartOffset = 0;
|
||||
|
||||
// If the input is a VECTOR_SHUFFLE, there are a number of important
|
||||
// cases where we can directly implement the sign-extension of the
|
||||
// original input lanes of the shuffle.
|
||||
if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
|
||||
ArrayRef<int> ShuffleMask = SVN->getMask();
|
||||
int OutNumElts = OutVT.getVectorNumElements();
|
||||
|
||||
// Recognize the special case where the sign-extension can be done
|
||||
// by the VSEG instruction. Handled via the default expander.
|
||||
if (ToBits == 64 && OutNumElts == 2) {
|
||||
int NumElem = ToBits / FromBits;
|
||||
if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Recognize the special case where we can fold the shuffle by
|
||||
// replacing some of the UNPACK_HIGH with UNPACK_LOW.
|
||||
int StartOffsetCandidate = -1;
|
||||
for (int Elt = 0; Elt < OutNumElts; Elt++) {
|
||||
if (ShuffleMask[Elt] == -1)
|
||||
continue;
|
||||
if (ShuffleMask[Elt] % OutNumElts == Elt) {
|
||||
if (StartOffsetCandidate == -1)
|
||||
StartOffsetCandidate = ShuffleMask[Elt] - Elt;
|
||||
if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
|
||||
continue;
|
||||
}
|
||||
StartOffsetCandidate = -1;
|
||||
break;
|
||||
}
|
||||
if (StartOffsetCandidate != -1) {
|
||||
StartOffset = StartOffsetCandidate;
|
||||
PackedOp = PackedOp.getOperand(0);
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
FromBits *= 2;
|
||||
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
|
||||
SystemZ::VectorBits / FromBits);
|
||||
PackedOp =
|
||||
DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
|
||||
unsigned OutNumElts = SystemZ::VectorBits / FromBits;
|
||||
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
|
||||
unsigned Opcode = SystemZISD::UNPACK_HIGH;
|
||||
if (StartOffset >= OutNumElts) {
|
||||
Opcode = SystemZISD::UNPACK_LOW;
|
||||
StartOffset -= OutNumElts;
|
||||
}
|
||||
PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
|
||||
} while (FromBits != ToBits);
|
||||
return PackedOp;
|
||||
}
|
||||
|
||||
@@ -1970,6 +1970,22 @@ let Predicates = [FeatureVector] in {
|
||||
(VLEG (VGBM 0), bdxaddr12only:$addr, 1)>;
|
||||
}
|
||||
|
||||
// Zero-extensions from VR element to i128 on arch15.
|
||||
let Predicates = [FeatureVectorEnhancements3] in {
|
||||
def : Pat<(i128 (zext (i64 (z_vector_extract (v2i64 VR128:$src), 0)))),
|
||||
(VUPLHG VR128:$src)>;
|
||||
def : Pat<(i128 (zext (i64 (z_vector_extract (v2i64 VR128:$src), 1)))),
|
||||
(VUPLLG VR128:$src)>;
|
||||
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 0)))),
|
||||
(VUPLHG (VUPLHF VR128:$src))>;
|
||||
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 1)))),
|
||||
(VUPLHG (VUPLLF VR128:$src))>;
|
||||
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 2)))),
|
||||
(VUPLLG (VUPLHF VR128:$src))>;
|
||||
def : Pat<(i128 (zext (i32 (z_vector_extract (v4i32 VR128:$src), 3)))),
|
||||
(VUPLLG (VUPLLF VR128:$src))>;
|
||||
}
|
||||
|
||||
// In-register i128 sign-extensions on arch15.
|
||||
let Predicates = [FeatureVectorEnhancements3] in {
|
||||
def : Pat<(i128 (sext_inreg VR128:$x, i8)), (VUPLG (VSEGB VR128:$x))>;
|
||||
@@ -2034,6 +2050,22 @@ let Predicates = [FeatureVector] in {
|
||||
(VSRAB (VLREPG bdxaddr12only:$addr), (VREPIB 64))>;
|
||||
}
|
||||
|
||||
// Sign-extensions from VR element to i128 on arch15.
|
||||
let Predicates = [FeatureVectorEnhancements3] in {
|
||||
def : Pat<(i128 (sext (i64 (z_vector_extract (v2i64 VR128:$src), 0)))),
|
||||
(VUPHG VR128:$src)>;
|
||||
def : Pat<(i128 (sext (i64 (z_vector_extract (v2i64 VR128:$src), 1)))),
|
||||
(VUPLG VR128:$src)>;
|
||||
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 0)))),
|
||||
(VUPHG (VUPHF VR128:$src))>;
|
||||
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 1)))),
|
||||
(VUPHG (VUPLF VR128:$src))>;
|
||||
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 2)))),
|
||||
(VUPLG (VUPHF VR128:$src))>;
|
||||
def : Pat<(i128 (sext (i32 (z_vector_extract (v4i32 VR128:$src), 3)))),
|
||||
(VUPLG (VUPLF VR128:$src))>;
|
||||
}
|
||||
|
||||
// i128 comparison pseudo-instructions.
|
||||
let Predicates = [FeatureVector], Defs = [CC],
|
||||
usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
|
||||
|
||||
@@ -58,8 +58,7 @@ define <16 x i16> @fun3(<16 x i8> %val1, <16 x i8> %val2, <16 x i16> %val3, <16
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-DAG: vceqb [[REG0:%v[0-9]+]], %v24, %v26
|
||||
; CHECK-DAG: vuphb [[REG2:%v[0-9]+]], [[REG0]]
|
||||
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]]
|
||||
; CHECK-DAG: vuphb [[REG1]], [[REG1]]
|
||||
; CHECK-DAG: vuplb [[REG1:%v[0-9]+]], [[REG0]]
|
||||
; CHECK-DAG: vceqh [[REG3:%v[0-9]+]], %v28, %v25
|
||||
; CHECK-DAG: vceqh [[REG4:%v[0-9]+]], %v30, %v27
|
||||
; CHECK-DAG: vl [[REG5:%v[0-9]+]], 176(%r15)
|
||||
@@ -186,10 +185,9 @@ define <8 x i32> @fun10(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3, <8 x
|
||||
; CHECK-DAG: vceqh [[REG1:%v[0-9]+]], %v28, %v30
|
||||
; CHECK-NEXT: vx [[REG2:%v[0-9]+]], [[REG0]], [[REG1]]
|
||||
; CHECK-DAG: vuphh [[REG3:%v[0-9]+]], [[REG2]]
|
||||
; CHECK-DAG: vmrlg [[REG4:%v[0-9]+]], [[REG2]], [[REG2]]
|
||||
; CHECK-DAG: vuphh [[REG5:%v[0-9]+]], [[REG4]]
|
||||
; CHECK-DAG: vuplhw [[REG4:%v[0-9]+]], [[REG2]]
|
||||
; CHECK-NEXT: vsel %v24, %v25, %v29, [[REG3]]
|
||||
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG5]]
|
||||
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG4]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp0 = icmp eq <8 x i16> %val1, %val2
|
||||
%cmp1 = icmp eq <8 x i16> %val3, %val4
|
||||
@@ -347,10 +345,9 @@ define <4 x i64> @fun18(<4 x i32> %val1, <4 x i32> %val2, <4 x i16> %val3, <4 x
|
||||
; CHECK-NEXT: vuphh %v1, %v1
|
||||
; CHECK-NEXT: vn %v0, %v0, %v1
|
||||
; CHECK-DAG: vuphf [[REG0:%v[0-9]+]], %v0
|
||||
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
|
||||
; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG1]]
|
||||
; CHECK-DAG: vuplf [[REG1:%v[0-9]+]], %v0
|
||||
; CHECK-NEXT: vsel %v24, %v25, %v29, [[REG0]]
|
||||
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG2]]
|
||||
; CHECK-NEXT: vsel %v26, %v27, %v31, [[REG1]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp0 = icmp eq <4 x i32> %val1, %val2
|
||||
%cmp1 = icmp eq <4 x i16> %val3, %val4
|
||||
@@ -455,14 +452,13 @@ define <4 x i64> @fun24(<4 x i64> %val1, <4 x i64> %val2, <4 x i32> %val3, <4 x
|
||||
; CHECK-LABEL: fun24:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vceqf [[REG0:%v[0-9]+]], %v25, %v27
|
||||
; CHECK-NEXT: vuphf [[REG1:%v[0-9]+]], [[REG0]]
|
||||
; CHECK-NEXT: vmrlg [[REG2:%v[0-9]+]], [[REG0]], [[REG0]]
|
||||
; CHECK-DAG: vuphf [[REG1:%v[0-9]+]], [[REG0]]
|
||||
; CHECK-DAG: vuplf [[REG2:%v[0-9]+]], [[REG0]]
|
||||
; CHECK-DAG: vceqg [[REG3:%v[0-9]+]], %v24, %v28
|
||||
; CHECK-DAG: vceqg [[REG4:%v[0-9]+]], %v26, %v30
|
||||
; CHECK-DAG: vuphf [[REG5:%v[0-9]+]], [[REG2]]
|
||||
; CHECK-DAG: vl [[REG6:%v[0-9]+]], 176(%r15)
|
||||
; CHECK-DAG: vl [[REG7:%v[0-9]+]], 160(%r15)
|
||||
; CHECK-DAG: vx [[REG8:%v[0-9]+]], [[REG4]], [[REG5]]
|
||||
; CHECK-DAG: vx [[REG8:%v[0-9]+]], [[REG4]], [[REG2]]
|
||||
; CHECK-DAG: vx [[REG9:%v[0-9]+]], [[REG3]], [[REG1]]
|
||||
; CHECK-DAG: vsel %v24, %v29, [[REG7]], [[REG9]]
|
||||
; CHECK-DAG: vsel %v26, %v31, [[REG6]], [[REG8]]
|
||||
@@ -631,8 +627,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va
|
||||
; CHECK-NEXT: vfchdb %v2, %v3, %v2
|
||||
; CHECK-NEXT: vpkg %v1, %v2, %v1
|
||||
; CHECK-NEXT: vx %v0, %v0, %v1
|
||||
; CHECK-NEXT: vmrlg %v1, %v0, %v0
|
||||
; CHECK-NEXT: vuphf %v1, %v1
|
||||
; CHECK-NEXT: vuplf %v1, %v0
|
||||
; CHECK-NEXT: vuphf %v0, %v0
|
||||
; CHECK-NEXT: vsel %v24, %v25, %v29, %v0
|
||||
; CHECK-NEXT: vsel %v26, %v27, %v31, %v1
|
||||
@@ -643,8 +638,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va
|
||||
; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26
|
||||
; CHECK-Z14-NEXT: vfchsb %v1, %v28, %v30
|
||||
; CHECK-Z14-NEXT: vx %v0, %v0, %v1
|
||||
; CHECK-Z14-NEXT: vmrlg %v1, %v0, %v0
|
||||
; CHECK-Z14-NEXT: vuphf %v1, %v1
|
||||
; CHECK-Z14-NEXT: vuplf %v1, %v0
|
||||
; CHECK-Z14-NEXT: vuphf %v0, %v0
|
||||
; CHECK-Z14-NEXT: vsel %v24, %v25, %v29, %v0
|
||||
; CHECK-Z14-NEXT: vsel %v26, %v27, %v31, %v1
|
||||
@@ -816,11 +810,10 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %
|
||||
; CHECK-DAG: vfchdb [[REG11:%v[0-9]+]], [[REG9]], [[REG7]]
|
||||
; CHECK-DAG: vpkg [[REG12:%v[0-9]+]], [[REG11]], [[REG4]]
|
||||
; CHECK-DAG: vuphf [[REG13:%v[0-9]+]], [[REG12]]
|
||||
; CHECK-DAG: vmrlg [[REG14:%v[0-9]+]], [[REG12]], [[REG12]]
|
||||
; CHECK-NEXT: vfchdb [[REG15:%v[0-9]+]], %v24, %v28
|
||||
; CHECK-NEXT: vfchdb [[REG16:%v[0-9]+]], %v26, %v30
|
||||
; CHECK-NEXT: vuphf [[REG17:%v[0-9]+]], [[REG14]]
|
||||
; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]]
|
||||
; CHECK-DAG: vuplf [[REG14:%v[0-9]+]], [[REG12]]
|
||||
; CHECK-DAG: vfchdb [[REG15:%v[0-9]+]], %v24, %v28
|
||||
; CHECK-DAG: vfchdb [[REG16:%v[0-9]+]], %v26, %v30
|
||||
; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG14]]
|
||||
; CHECK-NEXT: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]]
|
||||
; CHECK-NEXT: vsel %v24, %v29, [[REG10]], [[REG19]]
|
||||
; CHECK-NEXT: vsel %v26, %v31, [[REG8]], [[REG18]]
|
||||
@@ -829,13 +822,12 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %
|
||||
; CHECK-Z14-LABEL: fun34:
|
||||
; CHECK-Z14: # %bb.0:
|
||||
; CHECK-Z14-NEXT: vfchsb %v4, %v25, %v27
|
||||
; CHECK-Z14-NEXT: vl %v0, 176(%r15)
|
||||
; CHECK-Z14-NEXT: vl %v1, 160(%r15)
|
||||
; CHECK-Z14-NEXT: vfchdb %v2, %v24, %v28
|
||||
; CHECK-Z14-NEXT: vfchdb %v3, %v26, %v30
|
||||
; CHECK-Z14-NEXT: vuphf %v5, %v4
|
||||
; CHECK-Z14-NEXT: vmrlg %v4, %v4, %v4
|
||||
; CHECK-Z14-DAG: vfchdb %v2, %v24, %v28
|
||||
; CHECK-Z14-DAG: vfchdb %v3, %v26, %v30
|
||||
; CHECK-Z14-DAG: vuphf %v4, %v4
|
||||
; CHECK-Z14-DAG: vl %v0, 176(%r15)
|
||||
; CHECK-Z14-DAG: vl %v1, 160(%r15)
|
||||
; CHECK-Z14-NEXT: vuplf %v4, %v4
|
||||
; CHECK-Z14-NEXT: vn %v3, %v3, %v4
|
||||
; CHECK-Z14-NEXT: vn %v2, %v2, %v5
|
||||
; CHECK-Z14-NEXT: vsel %v24, %v29, %v1, %v2
|
||||
|
||||
@@ -43,8 +43,7 @@ define <16 x i16> @fun3(<16 x i8> %val1, <16 x i8> %val2, <16 x i16> %val3, <16
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vceqb %v0, %v24, %v26
|
||||
; CHECK-DAG: vuphb [[REG0:%v[0-9]+]], %v0
|
||||
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
|
||||
; CHECK-DAG: vuphb [[REG1]], [[REG1]]
|
||||
; CHECK-DAG: vuplb [[REG1:%v[0-9]+]], %v0
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]]
|
||||
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
|
||||
; CHECK-NEXT: br %r14
|
||||
@@ -129,8 +128,7 @@ define <8 x i32> @fun10(<8 x i16> %val1, <8 x i16> %val2, <8 x i32> %val3, <8 x
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vceqh %v0, %v24, %v26
|
||||
; CHECK-DAG: vuphh [[REG0:%v[0-9]+]], %v0
|
||||
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
|
||||
; CHECK-DAG: vuphh [[REG1]], [[REG1]]
|
||||
; CHECK-DAG: vuplhw [[REG1:%v[0-9]+]], %v0
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]]
|
||||
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
|
||||
; CHECK-NEXT: br %r14
|
||||
@@ -228,8 +226,7 @@ define <4 x i64> @fun18(<4 x i32> %val1, <4 x i32> %val2, <4 x i64> %val3, <4 x
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vceqf %v0, %v24, %v26
|
||||
; CHECK-DAG: vuphf [[REG0:%v[0-9]+]], %v0
|
||||
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
|
||||
; CHECK-DAG: vuphf [[REG1]], [[REG1]]
|
||||
; CHECK-DAG: vuplf [[REG1]], %v0
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG0]]
|
||||
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
|
||||
; CHECK-NEXT: br %r14
|
||||
@@ -428,8 +425,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x double> %v
|
||||
; CHECK-NEXT: vldeb %v2, %v2
|
||||
; CHECK-NEXT: vfchdb %v1, %v2, %v1
|
||||
; CHECK-NEXT: vpkg [[REG0:%v[0-9]+]], %v1, %v0
|
||||
; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]]
|
||||
; CHECK-DAG: vuphf [[REG1]], [[REG1]]
|
||||
; CHECK-DAG: vuplf [[REG1:%v[0-9]+]], [[REG0]]
|
||||
; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG0]]
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v25, [[REG2]]
|
||||
; CHECK-NEXT: vsel %v26, %v30, %v27, [[REG1]]
|
||||
@@ -439,8 +435,7 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x double> %v
|
||||
; CHECK-Z14: # %bb.0:
|
||||
; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26
|
||||
; CHECK-Z14-DAG: vuphf [[REG0:%v[0-9]+]], %v0
|
||||
; CHECK-Z14-DAG: vmrlg [[REG1:%v[0-9]+]], %v0, %v0
|
||||
; CHECK-Z14-DAG: vuphf [[REG1]], [[REG1]]
|
||||
; CHECK-Z14-DAG: vuplf [[REG1:%v[0-9]+]], %v0
|
||||
; CHECK-Z14-NEXT: vsel %v24, %v28, %v25, [[REG0]]
|
||||
; CHECK-Z14-NEXT: vsel %v26, %v30, %v27, [[REG1]]
|
||||
; CHECK-Z14-NEXT: br %r14
|
||||
|
||||
@@ -143,19 +143,17 @@ define void @fun8(<2 x i64> %dwords, ptr %ptr) {
|
||||
; Test that this results in vectorized conversions.
|
||||
define void @fun9(ptr %Src, ptr %ptr) {
|
||||
; CHECK-LABEL: fun9
|
||||
; Z15: larl %r1, .LCPI9_0
|
||||
; Z15-NEXT: vl %v0, 16(%r2), 4
|
||||
; Z15: vl %v0, 16(%r2), 4
|
||||
; Z15-NEXT: vl %v1, 0(%r2), 4
|
||||
; Z15-NEXT: vl %v2, 0(%r1), 3
|
||||
; Z15-NEXT: vperm %v2, %v2, %v1, %v2
|
||||
; Z15-NEXT: vuplhh %v1, %v1
|
||||
; Z15-NEXT: vuplhh %v2, %v1
|
||||
; Z15-NEXT: vupllh %v1, %v1
|
||||
; Z15-NEXT: vuplhh %v0, %v0
|
||||
; Z15-NEXT: vcelfb %v2, %v2, 0, 0
|
||||
; Z15-NEXT: vcelfb %v1, %v1, 0, 0
|
||||
; Z15-NEXT: vcelfb %v0, %v0, 0, 0
|
||||
; Z15-NEXT: vsteg %v0, 32(%r3), 0
|
||||
; Z15-NEXT: vst %v2, 16(%r3), 4
|
||||
; Z15-NEXT: vst %v1, 0(%r3), 4
|
||||
; Z15-NEXT: vst %v1, 16(%r3), 4
|
||||
; Z15-NEXT: vst %v2, 0(%r3), 4
|
||||
; Z15-NEXT: br %r14
|
||||
|
||||
%Val = load <10 x i16>, ptr %Src
|
||||
|
||||
@@ -180,3 +180,33 @@ define <2 x i64> @f16(<16 x i32> %val) {
|
||||
%vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
|
||||
ret <2 x i64> %vec1
|
||||
}
|
||||
|
||||
; Test a shufflevector-based v2i8->v2i64 extension.
|
||||
define <2 x i64> @f17(<16 x i8> %val) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: vsegb %v24, %v24
|
||||
; CHECK: br %r14
|
||||
%shuf = shufflevector <16 x i8> %val, <16 x i8> poison, <2 x i32> <i32 7, i32 15>
|
||||
%ret = sext <2 x i8> %shuf to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a shufflevector-based v2i16->v2i64 extension.
|
||||
define <2 x i64> @f18(<8 x i16> %val) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: vsegh %v24, %v24
|
||||
; CHECK: br %r14
|
||||
%shuf = shufflevector <8 x i16> %val, <8 x i16> poison, <2 x i32> <i32 3, i32 7>
|
||||
%ret = sext <2 x i16> %shuf to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a shufflevector-based v2i32->v2i64 extension.
|
||||
define <2 x i64> @f19(<4 x i32> %val) {
|
||||
; CHECK-LABEL: f19:
|
||||
; CHECK: vsegf %v24, %v24
|
||||
; CHECK: br %r14
|
||||
%shuf = shufflevector <4 x i32> %val, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
|
||||
%ret = sext <2 x i32> %shuf to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
270
llvm/test/CodeGen/SystemZ/vec-unpack-01.ll
Normal file
270
llvm/test/CodeGen/SystemZ/vec-unpack-01.ll
Normal file
@@ -0,0 +1,270 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
define <8 x i16> @f1(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphb %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%1 = sext <8 x i8> %0 to <8 x i16>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @f2(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplb %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%1 = sext <8 x i8> %0 to <8 x i16>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f3(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphh %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%1 = sext <4 x i16> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f4(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhw %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%1 = sext <4 x i16> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f5(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphb %v0, %v24
|
||||
; CHECK-NEXT: vuphh %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%1 = sext <4 x i8> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f6(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphb %v0, %v24
|
||||
; CHECK-NEXT: vuplhw %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%1 = sext <4 x i8> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f7(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplb %v0, %v24
|
||||
; CHECK-NEXT: vuphh %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
|
||||
%1 = sext <4 x i8> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplb %v0, %v24
|
||||
; CHECK-NEXT: vuplhw %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
||||
%1 = sext <4 x i8> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f9(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphf %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
%1 = sext <2 x i32> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f10(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplf %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
|
||||
%1 = sext <2 x i32> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f11(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphh %v0, %v24
|
||||
; CHECK-NEXT: vuphf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> <i32 0, i32 1>
|
||||
%1 = sext <2 x i16> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f12(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphh %v0, %v24
|
||||
; CHECK-NEXT: vuplf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> <i32 2, i32 3>
|
||||
%1 = sext <2 x i16> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f13(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhw %v0, %v24
|
||||
; CHECK-NEXT: vuphf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> <i32 4, i32 5>
|
||||
%1 = sext <2 x i16> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f14(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhw %v0, %v24
|
||||
; CHECK-NEXT: vuplf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <2 x i32> <i32 6, i32 7>
|
||||
%1 = sext <2 x i16> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f15(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphb %v0, %v24
|
||||
; CHECK-NEXT: vuphh %v0, %v0
|
||||
; CHECK-NEXT: vuphf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 0, i32 1>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f16(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphb %v0, %v24
|
||||
; CHECK-NEXT: vuphh %v0, %v0
|
||||
; CHECK-NEXT: vuplf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 2, i32 3>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f17(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphb %v0, %v24
|
||||
; CHECK-NEXT: vuplhw %v0, %v0
|
||||
; CHECK-NEXT: vuphf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 4, i32 5>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f18(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphb %v0, %v24
|
||||
; CHECK-NEXT: vuplhw %v0, %v0
|
||||
; CHECK-NEXT: vuplf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 6, i32 7>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f19(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f19:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplb %v0, %v24
|
||||
; CHECK-NEXT: vuphh %v0, %v0
|
||||
; CHECK-NEXT: vuphf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 8, i32 9>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f20(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f20:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplb %v0, %v24
|
||||
; CHECK-NEXT: vuphh %v0, %v0
|
||||
; CHECK-NEXT: vuplf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 10, i32 11>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f21(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f21:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplb %v0, %v24
|
||||
; CHECK-NEXT: vuplhw %v0, %v0
|
||||
; CHECK-NEXT: vuphf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 12, i32 13>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f22(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f22:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplb %v0, %v24
|
||||
; CHECK-NEXT: vuplhw %v0, %v0
|
||||
; CHECK-NEXT: vuplf %v24, %v0
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <2 x i32> <i32 14, i32 15>
|
||||
%1 = sext <2 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
79
llvm/test/CodeGen/SystemZ/vec-unpack-02.ll
Normal file
79
llvm/test/CodeGen/SystemZ/vec-unpack-02.ll
Normal file
@@ -0,0 +1,79 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
|
||||
|
||||
define i128 @f1(<2 x i64> %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphg %v0, %v24
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <2 x i64> %a, i32 0
|
||||
%1 = sext i64 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f2(<2 x i64> %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplg %v0, %v24
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <2 x i64> %a, i32 1
|
||||
%1 = sext i64 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f3(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphf %v0, %v24
|
||||
; CHECK-NEXT: vuphg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 0
|
||||
%1 = sext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f4(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplf %v0, %v24
|
||||
; CHECK-NEXT: vuphg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 1
|
||||
%1 = sext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f5(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuphf %v0, %v24
|
||||
; CHECK-NEXT: vuplg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 2
|
||||
%1 = sext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f6(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplf %v0, %v24
|
||||
; CHECK-NEXT: vuplg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 3
|
||||
%1 = sext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
70
llvm/test/CodeGen/SystemZ/vec-unpack-03.ll
Normal file
70
llvm/test/CodeGen/SystemZ/vec-unpack-03.ll
Normal file
@@ -0,0 +1,70 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
define <8 x i16> @f1(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhb %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%1 = zext <8 x i8> %0 to <8 x i16>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @f2(<16 x i8> %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vupllb %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%1 = zext <8 x i8> %0 to <8 x i16>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f3(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhh %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%1 = zext <4 x i16> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @f4(<8 x i16> %a) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vupllh %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%1 = zext <4 x i16> %0 to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f5(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhf %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
%1 = zext <2 x i32> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @f6(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vupllf %v24, %v24
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
|
||||
%1 = zext <2 x i32> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
79
llvm/test/CodeGen/SystemZ/vec-unpack-04.ll
Normal file
79
llvm/test/CodeGen/SystemZ/vec-unpack-04.ll
Normal file
@@ -0,0 +1,79 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
|
||||
|
||||
define i128 @f1(<2 x i64> %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhg %v0, %v24
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <2 x i64> %a, i32 0
|
||||
%1 = zext i64 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f2(<2 x i64> %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vupllg %v0, %v24
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <2 x i64> %a, i32 1
|
||||
%1 = zext i64 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f3(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhf %v0, %v24
|
||||
; CHECK-NEXT: vuplhg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 0
|
||||
%1 = zext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f4(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vupllf %v0, %v24
|
||||
; CHECK-NEXT: vuplhg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 1
|
||||
%1 = zext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f5(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vuplhf %v0, %v24
|
||||
; CHECK-NEXT: vupllg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 2
|
||||
%1 = zext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @f6(<4 x i32> %a) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vupllf %v0, %v24
|
||||
; CHECK-NEXT: vupllg %v0, %v0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
start:
|
||||
%0 = extractelement <4 x i32> %a, i32 3
|
||||
%1 = zext i32 %0 to i128
|
||||
ret i128 %1
|
||||
}
|
||||
Reference in New Issue
Block a user