[PowerPC] Fix i64 to vector lowering on big endian

Lowering for scalar to vector would skip if current subtarget is big endian and the scalar is larger or equal than 64 bits. However there's some issue in implementation that SToVRHS may refer to SToVLHS's scalar size if SToVLHS is present, which leads to some crash.o Reviewed By: nemanjai, shchenz Differential Revision: https://reviews.llvm.org/D105094
2021-07-08 11:05:09 +08:00
parent 31d10ea10e
commit a22ecb4508
2 changed files with 42 additions and 11 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14524,18 +14524,15 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
    int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
                            : SToVRHS.getValueType().getVectorNumElements();
    int NumEltsOut = ShuffV.size();
-    unsigned InElemSizeInBits =
-        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits()
-                : SToVRHS.getValueType().getScalarSizeInBits();
-    unsigned OutElemSizeInBits = SToVLHS
-                                     ? LHS.getValueType().getScalarSizeInBits()
-                                     : RHS.getValueType().getScalarSizeInBits();
-
    // The width of the "valid lane" (i.e. the lane that contains the value that
    // is vectorized) needs to be expressed in terms of the number of elements
    // of the shuffle. It is thereby the ratio of the values before and after
    // any bitcast.
-    unsigned ValidLaneWidth = InElemSizeInBits / OutElemSizeInBits;
+    unsigned ValidLaneWidth =
+        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
+                      LHS.getValueType().getScalarSizeInBits()
+                : SToVRHS.getValueType().getVectorNumElements() /
+                      RHS.getValueType().getScalarSizeInBits();

    // Initially assume that neither input is permuted. These will be adjusted
    // accordingly if either input is.
@@ -14548,9 +14545,10 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
    // ISD::SCALAR_TO_VECTOR.
    // On big endian systems, this only makes sense for element sizes smaller
    // than 64 bits since for 64-bit elements, all instructions already put
-    // the value into element zero.
+    // the value into element zero. Since scalar size of LHS and RHS may differ
+    // after isScalarToVec, this should be checked using their own sizes.
    if (SToVLHS) {
-      if (!IsLittleEndian && InElemSizeInBits >= 64)
+      if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
        return Res;
      // Set up the values for the shuffle vector fixup.
      LHSMaxIdx = NumEltsOut / NumEltsIn;
@@ -14560,7 +14558,7 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
      LHS = SToVLHS;
    }
    if (SToVRHS) {
-      if (!IsLittleEndian && InElemSizeInBits >= 64)
+      if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
        return Res;
      RHSMinIdx = NumEltsOut;
      RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2561,3 +2561,36 @@ entry:
  ret double %vecext
 ; FIXME: add check patterns when variable element extraction is implemented
 }
+
+; To check when LHS is i32 to vector and RHS is i64 to vector,
+; the combination should be skipped properly.
+define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
+; CHECK-LABEL: buildi2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r4, r4, 32
+; CHECK-NEXT:    mtfprd f1, r3
+; CHECK-NEXT:    mtfprd f0, r4
+; CHECK-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildi2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtfprwz f0, r4
+; CHECK-LE-NEXT:    mtfprd f1, r3
+; CHECK-LE-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildi2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    sldi 4, 4, 32
+; CHECK-AIX-NEXT:    mtfprd 1, 3
+; CHECK-AIX-NEXT:    mtfprd 0, 4
+; CHECK-AIX-NEXT:    xxmrghd 34, 0, 1
+; CHECK-AIX-NEXT:    blr
+entry:
+  %lhs.i32 = insertelement <4 x i32> undef, i32 %arg1, i32 0
+  %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0
+  %lhs = bitcast <4 x i32> %lhs.i32 to <2 x i64>
+  %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle
+}