From a2d129b792cc49224f98c83d2279b2e19bfa700f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 11 Apr 2025 10:53:48 -0700 Subject: [PATCH] [SLP]Fix a crash when trying to reduce in revec after minbitwidth analysis Need to use the original scalar type, when building the reduction, and use the scalar type, when performing casting, to avoid compiler crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 6 +-- .../SLPVectorizer/SystemZ/revec-fix-128169.ll | 37 ++++++++++++++++++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c854821417c9..b7ee5bfc310d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -22141,8 +22141,8 @@ public: if (isa(ScalarTy)) { assert(SLPReVec && "FixedVectorType is not expected."); unsigned ScalarTyNumElements = getNumElements(ScalarTy); - Value *ReducedSubTree = PoisonValue::get(getWidenedType( - VectorizedRoot->getType()->getScalarType(), ScalarTyNumElements)); + Value *ReducedSubTree = PoisonValue::get( + getWidenedType(ScalarTy->getScalarType(), ScalarTyNumElements)); for (unsigned I : seq(ScalarTyNumElements)) { // Do reduction for each lane. // e.g., do reduce add for @@ -22359,7 +22359,7 @@ private: Type *DestTy) { Value *Rdx = emitReduction(Vec, Builder, &TTI, DestTy); if (Rdx->getType() != DestTy->getScalarType()) - Rdx = Builder.CreateIntCast(Rdx, DestTy, IsSigned); + Rdx = Builder.CreateIntCast(Rdx, DestTy->getScalarType(), IsSigned); // Improved analysis for add/fadd/xor reductions with same scale // factor for all operands of reductions. We can emit scalar ops for // them instead. diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll index 5320c6b9cb87..b9f35451b02a 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s +; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s +; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec -slp-threshold=-1000 < %s | FileCheck %s --check-prefix=THRESH define void @e(<4 x i16> %0) { ; CHECK-LABEL: @e( @@ -22,6 +23,40 @@ define void @e(<4 x i16> %0) { ; CHECK-NEXT: [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]] ; CHECK-NEXT: br label [[VECTOR_BODY]] ; +; THRESH-LABEL: @e( +; THRESH-NEXT: entry: +; THRESH-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) +; THRESH-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP1]], <4 x i16> zeroinitializer, i64 4) +; THRESH-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 4) +; THRESH-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) +; THRESH-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP4]], <4 x i16> zeroinitializer, i64 4) +; THRESH-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP5]], <4 x i16> zeroinitializer, i64 8) +; THRESH-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP6]], <4 x i16> zeroinitializer, i64 12) +; THRESH-NEXT: br label [[VECTOR_BODY:%.*]] +; THRESH: vector.body: +; THRESH-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ] +; THRESH-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; THRESH-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP3]], <4 x i16> [[VEC_IND]], i64 0) +; THRESH-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP2]], [[TMP8]] +; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <16 x i32> +; THRESH-NEXT: [[TMP11:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP10]], <4 x i16> [[TMP0:%.*]], i64 4) +; THRESH-NEXT: [[TMP12:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP11]], <8 x i16> [[TMP9]], i64 8) +; THRESH-NEXT: [[TMP13:%.*]] = icmp sgt <16 x i16> [[TMP12]], [[TMP7]] +; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> +; THRESH-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) +; THRESH-NEXT: [[TMP16:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> +; THRESH-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]]) +; THRESH-NEXT: [[TMP18:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> +; THRESH-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]]) +; THRESH-NEXT: [[TMP20:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> +; THRESH-NEXT: [[TMP21:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]]) +; THRESH-NEXT: [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TMP15]], i32 0 +; THRESH-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TMP17]], i32 1 +; THRESH-NEXT: [[TMP24:%.*]] = insertelement <4 x i1> [[TMP23]], i1 [[TMP19]], i32 2 +; THRESH-NEXT: [[TMP25:%.*]] = insertelement <4 x i1> [[TMP24]], i1 [[TMP21]], i32 3 +; THRESH-NEXT: [[TMP26]] = zext <4 x i1> [[TMP25]] to <4 x i32> +; THRESH-NEXT: br label [[VECTOR_BODY]] +; entry: br label %vector.body