Files
clang-p2996/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
peterbell10 a3f2e01c95 [InstCombine] Only fold extract element to trunc if vector hasOneUse (#115627)
This fixes a missed optimization caused by the `foldBitcastExtElt`
pattern interfering with other combine patterns. In the case I was
hitting, we have IR that combines two vectors into a new larger vector
by extracting elements and inserting them into the new vector.

```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
  %avec = bitcast i32 %a to <2 x half>
  %a0 = extractelement <2 x half> %avec, i32 0
  %a1 = extractelement <2 x half> %avec, i32 1
  %bvec = bitcast i32 %b to <2 x half>
  %b0 = extractelement <2 x half> %bvec, i32 0
  %b1 = extractelement <2 x half> %bvec, i32 1
  %ins0 = insertelement <4 x half> undef, half %a0, i32 0
  %ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
  %ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
  %ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
  ret <4 x half> %ins3
}
```

With the current behavior, `InstCombine` converts each vector extract
sequence to

```llvm
  %tmp = trunc i32 %a to i16
  %a0 = bitcast i16 %tmp to half
  %a1 = extractelement <2 x half> %avec, i32 1
```

where the extraction of `%a0` is now done by truncating the original
integer. While on it's own this is fairly reasonable, in this case it
also blocks the pattern which converts `extractelement` -
`insertelement` into shuffles which gives the overall simpler result:

```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
  %avec = bitcast i32 %a to <2 x half>
  %bvec = bitcast i32 %b to <2 x half>
  %ins3 = shufflevector <2 x half> %avec, <2 x half> %bvec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x half> %ins3
}
```

In this PR I fix the conflict by obeying the `hasOneUse` check even if
there is no shift instruction required. In these cases we can't remove
the vector completely, so the pattern has less benefit anyway.

Also fwiw, I think dropping the `hasOneUse` check for the 0th element
might have been a mistake in the first place. Looking at
535c5d56a7
the commit message only mentions loosening the `isDesirableIntType`
requirement and doesn't mention changing the `hasOneUse` check at all.
2024-11-20 13:06:57 -08:00

116 lines
5.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
; insertelements should fold to shuffle
define <4 x float> @foo(<4 x float> %x) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[INS2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> <float poison, float 1.000000e+00, float 2.000000e+00, float poison>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 1
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
; Insert of a constant is canonicalized ahead of insert of a variable.
define <4 x float> @bar(<4 x float> %x, float %a) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[X:%.*]], float 2.000000e+00, i64 2
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A:%.*]], i64 1
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
%ins1 = insertelement<4 x float> %x, float %a, i32 1
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @baz(<4 x float> %x, i32 %a) {
; CHECK-LABEL: @baz(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[X:%.*]], float 1.000000e+00, i64 1
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 [[A:%.*]]
; CHECK-NEXT: ret <4 x float> [[INS2]]
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 1
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 %a
ret <4 x float> %ins2
}
; insertelements should fold to shuffle
define <4 x float> @bazz(<4 x float> %x, i32 %a) {
; CHECK-LABEL: @bazz(
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[X:%.*]], float 1.000000e+00, i64 3
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 5.000000e+00, i32 [[A:%.*]]
; CHECK-NEXT: [[INS5:%.*]] = shufflevector <4 x float> [[INS2]], <4 x float> <float poison, float 1.000000e+00, float 2.000000e+00, float poison>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[INS6:%.*]] = insertelement <4 x float> [[INS5]], float 7.000000e+00, i32 [[A]]
; CHECK-NEXT: ret <4 x float> [[INS6]]
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 3
%ins2 = insertelement<4 x float> %ins1, float 5.0, i32 %a
%ins3 = insertelement<4 x float> %ins2, float 3.0, i32 2
%ins4 = insertelement<4 x float> %ins3, float 1.0, i32 1
%ins5 = insertelement<4 x float> %ins4, float 2.0, i32 2
%ins6 = insertelement<4 x float> %ins5, float 7.0, i32 %a
ret <4 x float> %ins6
}
; Out of bounds index folds to poison
define <4 x float> @bazzz(<4 x float> %x) {
; CHECK-LABEL: @bazzz(
; CHECK-NEXT: ret <4 x float> <float poison, float poison, float 2.000000e+00, float poison>
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 5
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @bazzzz(<4 x float> %x) {
; CHECK-LABEL: @bazzzz(
; CHECK-NEXT: ret <4 x float> <float poison, float poison, float 2.000000e+00, float poison>
;
%ins1 = insertelement<4 x float> %x, float 1.0, i32 undef
%ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @bazzzzz() {
; CHECK-LABEL: @bazzzzz(
; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
;
%ins1 = insertelement <4 x float> insertelement (<4 x float> <float 1.0, float 2.0, float 3.0, float undef>, float 4.0, i32 3), float 5.0, i32 1
%ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2
ret <4 x float> %ins2
}
define <4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
; CHECK-LABEL: @bazzzzzz(
; CHECK-NEXT: ret <4 x float> <float undef, float 5.000000e+00, float undef, float 4.000000e+00>
;
%ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> undef, <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0> , <4 x i32> <i32 0, i32 5, i32 undef, i32 6> ), float 4.0, i32 3), float 5.0, i32 1
ret <4 x float> %ins1
}
; test that foldBitcastExtElt doesn't interfere with shuffle folding
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
; CHECK-LABEL: @bitcast_extract_insert_to_shuffle(
; CHECK-NEXT: [[AVEC:%.*]] = bitcast i32 [[A:%.*]] to <2 x half>
; CHECK-NEXT: [[BVEC:%.*]] = bitcast i32 [[B:%.*]] to <2 x half>
; CHECK-NEXT: [[INS3:%.*]] = shufflevector <2 x half> [[AVEC]], <2 x half> [[BVEC]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x half> [[INS3]]
;
%avec = bitcast i32 %a to <2 x half>
%a0 = extractelement <2 x half> %avec, i32 0
%a1 = extractelement <2 x half> %avec, i32 1
%bvec = bitcast i32 %b to <2 x half>
%b0 = extractelement <2 x half> %bvec, i32 0
%b1 = extractelement <2 x half> %bvec, i32 1
%ins0 = insertelement <4 x half> undef, half %a0, i32 0
%ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
%ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
%ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
ret <4 x half> %ins3
}