This fixes a missed optimization caused by the `foldBitcastExtElt`
pattern interfering with other combine patterns. In the case I was
hitting, we have IR that combines two vectors into a new larger vector
by extracting elements and inserting them into the new vector.
```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
%avec = bitcast i32 %a to <2 x half>
%a0 = extractelement <2 x half> %avec, i32 0
%a1 = extractelement <2 x half> %avec, i32 1
%bvec = bitcast i32 %b to <2 x half>
%b0 = extractelement <2 x half> %bvec, i32 0
%b1 = extractelement <2 x half> %bvec, i32 1
%ins0 = insertelement <4 x half> undef, half %a0, i32 0
%ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
%ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
%ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
ret <4 x half> %ins3
}
```
With the current behavior, `InstCombine` converts each vector extract
sequence to
```llvm
%tmp = trunc i32 %a to i16
%a0 = bitcast i16 %tmp to half
%a1 = extractelement <2 x half> %avec, i32 1
```
where the extraction of `%a0` is now done by truncating the original
integer. While on it's own this is fairly reasonable, in this case it
also blocks the pattern which converts `extractelement` -
`insertelement` into shuffles which gives the overall simpler result:
```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
%avec = bitcast i32 %a to <2 x half>
%bvec = bitcast i32 %b to <2 x half>
%ins3 = shufflevector <2 x half> %avec, <2 x half> %bvec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x half> %ins3
}
```
In this PR I fix the conflict by obeying the `hasOneUse` check even if
there is no shift instruction required. In these cases we can't remove
the vector completely, so the pattern has less benefit anyway.
Also fwiw, I think dropping the `hasOneUse` check for the 0th element
might have been a mistake in the first place. Looking at
535c5d56a7
the commit message only mentions loosening the `isDesirableIntType`
requirement and doesn't mention changing the `hasOneUse` check at all.
116 lines
5.0 KiB
LLVM
116 lines
5.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
|
|
|
|
; insertelements should fold to shuffle
|
|
define <4 x float> @foo(<4 x float> %x) {
|
|
; CHECK-LABEL: @foo(
|
|
; CHECK-NEXT: [[INS2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> <float poison, float 1.000000e+00, float 2.000000e+00, float poison>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
|
; CHECK-NEXT: ret <4 x float> [[INS2]]
|
|
;
|
|
%ins1 = insertelement<4 x float> %x, float 1.0, i32 1
|
|
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
|
|
ret <4 x float> %ins2
|
|
}
|
|
|
|
; Insert of a constant is canonicalized ahead of insert of a variable.
|
|
|
|
define <4 x float> @bar(<4 x float> %x, float %a) {
|
|
; CHECK-LABEL: @bar(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[X:%.*]], float 2.000000e+00, i64 2
|
|
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A:%.*]], i64 1
|
|
; CHECK-NEXT: ret <4 x float> [[INS2]]
|
|
;
|
|
%ins1 = insertelement<4 x float> %x, float %a, i32 1
|
|
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
|
|
ret <4 x float> %ins2
|
|
}
|
|
|
|
define <4 x float> @baz(<4 x float> %x, i32 %a) {
|
|
; CHECK-LABEL: @baz(
|
|
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[X:%.*]], float 1.000000e+00, i64 1
|
|
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 [[A:%.*]]
|
|
; CHECK-NEXT: ret <4 x float> [[INS2]]
|
|
;
|
|
%ins1 = insertelement<4 x float> %x, float 1.0, i32 1
|
|
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 %a
|
|
ret <4 x float> %ins2
|
|
}
|
|
|
|
; insertelements should fold to shuffle
|
|
define <4 x float> @bazz(<4 x float> %x, i32 %a) {
|
|
; CHECK-LABEL: @bazz(
|
|
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[X:%.*]], float 1.000000e+00, i64 3
|
|
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 5.000000e+00, i32 [[A:%.*]]
|
|
; CHECK-NEXT: [[INS5:%.*]] = shufflevector <4 x float> [[INS2]], <4 x float> <float poison, float 1.000000e+00, float 2.000000e+00, float poison>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
|
; CHECK-NEXT: [[INS6:%.*]] = insertelement <4 x float> [[INS5]], float 7.000000e+00, i32 [[A]]
|
|
; CHECK-NEXT: ret <4 x float> [[INS6]]
|
|
;
|
|
%ins1 = insertelement<4 x float> %x, float 1.0, i32 3
|
|
%ins2 = insertelement<4 x float> %ins1, float 5.0, i32 %a
|
|
%ins3 = insertelement<4 x float> %ins2, float 3.0, i32 2
|
|
%ins4 = insertelement<4 x float> %ins3, float 1.0, i32 1
|
|
%ins5 = insertelement<4 x float> %ins4, float 2.0, i32 2
|
|
%ins6 = insertelement<4 x float> %ins5, float 7.0, i32 %a
|
|
ret <4 x float> %ins6
|
|
}
|
|
|
|
; Out of bounds index folds to poison
|
|
define <4 x float> @bazzz(<4 x float> %x) {
|
|
; CHECK-LABEL: @bazzz(
|
|
; CHECK-NEXT: ret <4 x float> <float poison, float poison, float 2.000000e+00, float poison>
|
|
;
|
|
%ins1 = insertelement<4 x float> %x, float 1.0, i32 5
|
|
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
|
|
ret <4 x float> %ins2
|
|
}
|
|
|
|
define <4 x float> @bazzzz(<4 x float> %x) {
|
|
; CHECK-LABEL: @bazzzz(
|
|
; CHECK-NEXT: ret <4 x float> <float poison, float poison, float 2.000000e+00, float poison>
|
|
;
|
|
%ins1 = insertelement<4 x float> %x, float 1.0, i32 undef
|
|
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
|
|
ret <4 x float> %ins2
|
|
}
|
|
|
|
define <4 x float> @bazzzzz() {
|
|
; CHECK-LABEL: @bazzzzz(
|
|
; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
|
|
;
|
|
%ins1 = insertelement <4 x float> insertelement (<4 x float> <float 1.0, float 2.0, float 3.0, float undef>, float 4.0, i32 3), float 5.0, i32 1
|
|
%ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2
|
|
ret <4 x float> %ins2
|
|
}
|
|
|
|
define <4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
|
|
; CHECK-LABEL: @bazzzzzz(
|
|
; CHECK-NEXT: ret <4 x float> <float undef, float 5.000000e+00, float undef, float 4.000000e+00>
|
|
;
|
|
%ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> undef, <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0> , <4 x i32> <i32 0, i32 5, i32 undef, i32 6> ), float 4.0, i32 3), float 5.0, i32 1
|
|
ret <4 x float> %ins1
|
|
}
|
|
|
|
; test that foldBitcastExtElt doesn't interfere with shuffle folding
|
|
|
|
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
|
|
; CHECK-LABEL: @bitcast_extract_insert_to_shuffle(
|
|
; CHECK-NEXT: [[AVEC:%.*]] = bitcast i32 [[A:%.*]] to <2 x half>
|
|
; CHECK-NEXT: [[BVEC:%.*]] = bitcast i32 [[B:%.*]] to <2 x half>
|
|
; CHECK-NEXT: [[INS3:%.*]] = shufflevector <2 x half> [[AVEC]], <2 x half> [[BVEC]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: ret <4 x half> [[INS3]]
|
|
;
|
|
%avec = bitcast i32 %a to <2 x half>
|
|
%a0 = extractelement <2 x half> %avec, i32 0
|
|
%a1 = extractelement <2 x half> %avec, i32 1
|
|
%bvec = bitcast i32 %b to <2 x half>
|
|
%b0 = extractelement <2 x half> %bvec, i32 0
|
|
%b1 = extractelement <2 x half> %bvec, i32 1
|
|
%ins0 = insertelement <4 x half> undef, half %a0, i32 0
|
|
%ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
|
|
%ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
|
|
%ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
|
|
ret <4 x half> %ins3
|
|
}
|
|
|
|
|