After SLP + LTO we may have have reduction(shuffle V, poison, mask). This can be simplified to just reduction(V) if the mask is only for single vector and just all elements from this vector are permuted, without reusing, replacing with undefs and/or other values, etc. Differential Revision: https://reviews.llvm.org/D105053
271 lines
12 KiB
LLVM
271 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
define i32 @reduce_add(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_add(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_or(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_or(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
|
|
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_and(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_and(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
|
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_xor(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_xor(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
|
|
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_umax(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_umax(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
|
|
%res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_umin(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_umin(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
|
|
%res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_smax(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_smax(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
|
|
%res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_smin(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_smin(
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
%res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define float @reduce_fmax(<4 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fmax(
|
|
; CHECK-NEXT: [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
|
|
%res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fmin(<4 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fmin(
|
|
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
%res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fadd(float %a, <4 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fadd(
|
|
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
%res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fmul(float %a, <4 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fmul(
|
|
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
%res = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
; Failed cases
|
|
; TODO: simplify the reductions for shuffles resulting in undef/poison elements.
|
|
|
|
define i32 @reduce_add_failed(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_add_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
|
%res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_or_failed(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_or_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 4>
|
|
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_and_failed(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_and_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
|
|
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_xor_failed(<4 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_xor_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
|
|
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_umax_failed(<2 x i32> %x, <2 x i32> %y) {
|
|
; CHECK-LABEL: @reduce_umax_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <4 x i32> <i32 2, i32 1, i32 3, i32 0>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
|
|
%res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_umin_failed(<2 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_umin_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <2 x i32> %x, <2 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
|
|
%res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_smax_failed(<8 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_smax_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
|
|
%res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @reduce_smin_failed(<8 x i32> %x) {
|
|
; CHECK-LABEL: @reduce_smin_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]])
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%shuf = shufflevector <8 x i32> %x, <8 x i32> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
%res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf)
|
|
ret i32 %res
|
|
}
|
|
|
|
define float @reduce_fmax_failed(<4 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fmax_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
|
|
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
|
|
%res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fmin_failed(<4 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fmin_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 3, i32 1, i32 2>
|
|
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 poison, i32 3, i32 1, i32 2>
|
|
%res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fadd_failed(float %a, <4 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fadd_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
%res = call float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fmul_failed(float %a, <2 x float> %x) {
|
|
; CHECK-LABEL: @reduce_fmul_failed(
|
|
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
|
|
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
|
|
; CHECK-NEXT: ret float [[RES]]
|
|
;
|
|
%shuf = shufflevector <2 x float> %x, <2 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
|
|
%res = call float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf)
|
|
ret float %res
|
|
}
|
|
|
|
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
|
|
declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
|
|
declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
|
|
declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
|
|
declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
|
|
declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
|
|
declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
|
|
declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
|
|
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
|
|
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
|
|
declare float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %b)
|
|
declare float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %b)
|