Files
clang-p2996/llvm/test/Analysis/CostModel/SystemZ/vector-reductions.ll
Dominik Steenken 866b9f43a0 [SystemZ] Add realistic cost estimates for vector reduction intrinsics (#118319)
This PR adds more realistic cost estimates for these reduction
intrinsics

- `llvm.vector.reduce.umax`
- `llvm.vector.reduce.umin`
- `llvm.vector.reduce.smax`
- `llvm.vector.reduce.smin`
- `llvm.vector.reduce.fadd`
- `llvm.vector.reduce.fmul`
- `llvm.vector.reduce.fmax`
- `llvm.vector.reduce.fmin`
- `llvm.vector.reduce.fmaximum`
- `llvm.vector.reduce.fminimum`
- `llvm.vector.reduce.mul
`
The pre-existing cost estimates for `llvm.vector.reduce.add` are moved
to `getArithmeticReductionCosts` to reduce complexity in
`getVectorIntrinsicInstrCost` and enable other passes, like the SLP
vectorizer, to benefit from these updated calculations.

These are not expected to provide noticable performance improvements and
are rather provided for the sake of completeness and correctness. This
PR is in draft mode pending benchmark confirmation of this.

This also provides and/or updates cost tests for all of these
intrinsics.

This PR was co-authored by me and @JonPsson1 .
2024-12-03 17:08:51 +01:00

377 lines
25 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes='print<cost-model>' -disable-output -mtriple=s390x-unknown-linux \
; RUN: -mcpu=z15 < %s 2>&1 | FileCheck %s --check-prefix=Z15
define void @fadd_reductions() {
; Z15-LABEL: 'fadd_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
%fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
%fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
%fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
%fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fast_fadd_reductions(ptr %src, ptr %dst) {
; Z15-LABEL: 'fast_fadd_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
%fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
%fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
%fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
%fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fmul_reductions() {
; Z15-LABEL: 'fmul_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
%fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
%fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
%fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
%fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fast_fmul_reductions() {
; Z15-LABEL: 'fast_fmul_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%fmul_v4f32 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
%fmul_v8f32 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
%fmul_v2f64 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
%fmul_v4f64 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
%fadd_v4f128 = call fast fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
ret void
}
define void @fmin_reductions() {
; Z15-LABEL: 'fmin_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
%V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
%V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
%V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
%V4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
ret void
}
define void @fmax_reductions() {
; Z15-LABEL: 'fmax_reductions'
; Z15-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
%V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
%V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
%V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
%V4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
ret void
}
define void @reduceumin() {
; Z15-LABEL: 'reduceumin'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.umin.v4i128(<4 x i128> undef)
ret void
}
define void @reduceumax() {
; Z15-LABEL: 'reduceumax'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.umax.v4i128(<4 x i128> undef)
ret void
}
define void @reducesmin() {
; Z15-LABEL: 'reducesmin'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.smin.v4i128(<4 x i128> undef)
ret void
}
define void @reducesmax() {
; Z15-LABEL: 'reducesmax'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 6 for instruction: %V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 7 for instruction: %V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 37 for instruction: %V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
; Z15-NEXT Cost Model: Found an estimated cost of 3 for instruction: %V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef)
;
%V2_64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
%V4_32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
%V128_8 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
%V4_128 = call i128 @llvm.vector.reduce.smax.v4i128(<4 x i128> undef)
ret void
}
define void @reduceadd() {
; Z15-LABEL: 'reduceadd'
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
;
; Z15-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
; Z15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef)
; REDUCEADD64
%V2_64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
%V8_64 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
%V16_64 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
; REDUCEADD32
%V2_32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
%V4_32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
%V16_32 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
; REDUCEADD16
%V2_16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
%V4_16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
%V8_16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
%V16_16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
; REDUCEADD8
%V2_8 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
%V4_8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
%V8_8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
%V16_8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
; EXTREME VALUES
%V128_8 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
%V4_256 = call i256 @llvm.vector.reduce.add.v4i256(<4 x i256> undef)
ret void
}
define void @reducemul() {
; CHECK-LABEL: 'reducemul'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
;
; CHECK: Cost Model: Found an estimated cost of 15 for instruction: %V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; CHECK: Cost Model: Found an estimated cost of 28 for instruction: %V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef)
; REDUCEADD64
%V2_64 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> undef)
%V4_64 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> undef)
%V8_64 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> undef)
%V16_64 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> undef)
; REDUCEADD32
%V2_32 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> undef)
%V4_32 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> undef)
%V8_32 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> undef)
%V16_32 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> undef)
; REDUCEADD16
%V2_16 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> undef)
%V4_16 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> undef)
%V8_16 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> undef)
%V16_16 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> undef)
; REDUCEADD8
%V2_8 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
%V4_8 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
%V8_8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
%V16_8 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; EXTREME VALUES
%V128_8 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
%V4_256 = call i256 @llvm.vector.reduce.mul.v4i256(<4 x i256> undef)
ret void
}
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>)
declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>)
declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>)
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>)
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>)
declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.umin.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.umax.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.smin.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)
declare i128 @llvm.vector.reduce.smax.v4i128(<4 x i128>)
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)
declare i256 @llvm.vector.reduce.add.v4i256(<4 x i256>)
declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>)
declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>)
declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>)
declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>)
declare i256 @llvm.vector.reduce.mul.v4i256(<4 x i256>)