Files
clang-p2996/llvm/test/Transforms/InstCombine/select-extractelement.ll
Philip Reames e6ad9ef4e7 [instcombine] Canonicalize constant index type to i64 for extractelement/insertelement
The basic idea to this is that a) having a single canonical type makes CSE easier, and b) many of our transforms are inconsistent about which types we end up with based on visit order.

I'm restricting this to constants as for non-constants, we'd have to decide whether the simplicity was worth extra instructions. For constants, there are no extra instructions.

We chose the canonical type as i64 arbitrarily.  We might consider changing this to something else in the future if we have cause.

Differential Revision: https://reviews.llvm.org/D115387
2021-12-13 16:56:22 -08:00

237 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
declare void @v4float_user(<4 x float>) #0
define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_one_select(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i64 2
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
ret float %extract
}
; Multiple extractelements
define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_two_select(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x float> [[BUILD2]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract1 = extractelement <4 x float> %sel, i32 1
%extract2 = extractelement <4 x float> %sel, i32 2
%build1 = insertelement <2 x float> undef, float %extract1, i32 0
%build2 = insertelement <2 x float> %build1, float %extract2, i32 1
ret <2 x float> %build2
}
; Select has an extra non-extractelement user, don't change it
define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
; CHECK-LABEL: @extract_one_select_user(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i64 2
; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]])
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne i32 %c, 0
%sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
call void @v4float_user(<4 x float> %sel)
ret float %extract
}
define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_one_vselect_user(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i64 2
; CHECK-NEXT: call void @v4float_user(<4 x float> [[SEL]])
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %sel, i32 2
call void @v4float_user(<4 x float> %sel)
ret float %extract
}
; Do not convert the vector select into a scalar select. That would increase
; the instruction count and potentially obfuscate a vector min/max idiom.
define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_one_vselect(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SELECT:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i64 0
; CHECK-NEXT: ret float [[EXTRACT]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract = extractelement <4 x float> %select, i32 0
ret float %extract
}
; Multiple extractelements from a vector select
define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @extract_two_vselect(
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x float> [[BUILD2]]
;
%cmp = icmp ne <4 x i32> %c, zeroinitializer
%sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%extract1 = extractelement <4 x float> %sel, i32 1
%extract2 = extractelement <4 x float> %sel, i32 2
%build1 = insertelement <2 x float> undef, float %extract1, i32 0
%build2 = insertelement <2 x float> %build1, float %extract2, i32 1
ret <2 x float> %build2
}
; The vector selects are not decomposed into scalar selects because that would increase
; the instruction count. Extract+insert is converted to non-lane-crossing shuffles.
; Test multiple extractelements
define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_vector_select(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[C:%.*]], i64 0
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: [[A_SINK:%.*]] = select i1 [[TOBOOL_NOT]], <4 x float> [[B:%.*]], <4 x float> [[A:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C]], i64 1
; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i64 2
; CHECK-NEXT: [[TOBOOL6_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[C]], i64 3
; CHECK-NEXT: [[TOBOOL11_NOT:%.*]] = icmp eq i32 [[TMP5]], 0
; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11_NOT]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: ret <4 x float> [[TMP6]]
;
entry:
%0 = extractelement <4 x i32> %c, i32 0
%tobool = icmp ne i32 %0, 0
%a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
%1 = extractelement <4 x float> %a.sink, i32 0
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = extractelement <4 x i32> %c, i32 1
%tobool1 = icmp ne i32 %3, 0
%a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
%4 = extractelement <4 x float> %a.sink1, i32 1
%5 = insertelement <4 x float> %2, float %4, i32 1
%6 = extractelement <4 x i32> %c, i32 2
%tobool6 = icmp ne i32 %6, 0
%a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
%7 = extractelement <4 x float> %a.sink2, i32 2
%8 = insertelement <4 x float> %5, float %7, i32 2
%9 = extractelement <4 x i32> %c, i32 3
%tobool11 = icmp ne i32 %9, 0
%a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
%10 = extractelement <4 x float> %a.sink3, i32 3
%11 = insertelement <4 x float> %8, float %10, i32 3
ret <4 x float> %11
}
define <4 x i32> @extract_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @extract_cond(
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 3
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
define <4 x i32> @splat_cond(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @splat_cond(
; CHECK-NEXT: [[SPLATCOND:%.*]] = shufflevector <4 x i1> [[CONDV:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[SPLATCOND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%splatcond = shufflevector <4 x i1> %condv, <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%r = select <4 x i1> %splatcond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
declare void @extra_use(i1)
; Negative test
define <4 x i32> @extract_cond_extra_use(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv) {
; CHECK-LABEL: @extract_cond_extra_use(
; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i64 3
; CHECK-NEXT: call void @extra_use(i1 [[COND]])
; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 3
call void @extra_use(i1 %cond)
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
; Negative test
define <4 x i32> @extract_cond_variable_index(<4 x i32> %x, <4 x i32> %y, <4 x i1> %condv, i32 %index) {
; CHECK-LABEL: @extract_cond_variable_index(
; CHECK-NEXT: [[COND:%.*]] = extractelement <4 x i1> [[CONDV:%.*]], i32 [[INDEX:%.*]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[COND]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <4 x i1> %condv, i32 %index
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
; IR shuffle can alter the number of elements in the vector, so this is ok.
define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1> %condv) {
; CHECK-LABEL: @extract_cond_type_mismatch(
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <5 x i1> [[CONDV:%.*]], <5 x i1> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[DOTSPLAT]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%cond = extractelement <5 x i1> %condv, i32 1
%r = select i1 %cond, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %r
}
; This would infinite loop because a select transform would create
; a complete -1 vector constant and demanded elements would change
; it back to partial undef.
define i32 @inf_loop_partial_undef(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @inf_loop_partial_undef(
; CHECK-NEXT: [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], <i32 2147483647, i32 2147483647>
; CHECK-NEXT: [[T6:%.*]] = icmp sge <2 x i32> [[T5]], [[X:%.*]]
; CHECK-NEXT: [[AB:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[T7:%.*]] = select <2 x i1> [[AB]], <2 x i1> [[T6]], <2 x i1> <i1 true, i1 poison>
; CHECK-NEXT: [[P:%.*]] = select <2 x i1> [[T7]], <2 x i32> <i32 0, i32 poison>, <2 x i32> [[Y]]
; CHECK-NEXT: [[T11:%.*]] = extractelement <2 x i32> [[P]], i64 0
; CHECK-NEXT: ret i32 [[T11]]
;
%t5 = add nsw <2 x i32> %y, <i32 2147483647, i32 2147483647>
%t6 = icmp slt <2 x i32> %t5, %x
%ab = and <2 x i1> %a, %b
%t7 = select <2 x i1> %ab, <2 x i1> %t6, <2 x i1> <i1 0, i1 poison>
%t10 = xor <2 x i1> %t7, <i1 true, i1 poison>
%p = select <2 x i1> %t10, <2 x i32> zeroinitializer, <2 x i32> %y
%t11 = extractelement <2 x i32> %p, i32 0
ret i32 %t11
}
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }