This is a follow up to924907bc6, and is mostly motivated by consistency but does include one additional optimization. In general, we prefer 0.0 over -0.0 as the identity value for an fadd. We use that value in several places, but don't in others. So, let's be consistent and use the same identity (when nsz allows) everywhere. This creates a bunch of test churn, but due to924907bc6, most of that churn doesn't actually indicate a change in codegen. The exception is that this change enables the use of 0.0 for nsz, but *not* reasoc, fadd reductions. Or said differently, it allows the neutral value of an ordered fadd reduction to be 0.0.
751 lines
36 KiB
LLVM
751 lines
36 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
|
|
; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
|
|
|
|
define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
|
|
; NON-POW2-LABEL: @v3_load_i32_mul_by_constant_store(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
|
|
; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @v3_load_i32_mul_by_constant_store(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
|
|
; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], <i32 10, i32 10>
|
|
; POW2-ONLY-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
|
|
; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
|
|
; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
|
|
%l.src.0 = load i32, ptr %gep.src.0, align 4
|
|
%mul.0 = mul nsw i32 %l.src.0, 10
|
|
|
|
%gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
|
|
%l.src.1 = load i32, ptr %gep.src.1, align 4
|
|
%mul.1 = mul nsw i32 %l.src.1, 10
|
|
|
|
%gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
|
|
%l.src.2 = load i32, ptr %gep.src.2, align 4
|
|
%mul.2 = mul nsw i32 %l.src.2, 10
|
|
|
|
store i32 %mul.0, ptr %dst
|
|
|
|
%dst.1 = getelementptr i32, ptr %dst, i32 1
|
|
store i32 %mul.1, ptr %dst.1
|
|
|
|
%dst.2 = getelementptr i32, ptr %dst, i32 2
|
|
store i32 %mul.2, ptr %dst.2
|
|
|
|
ret void
|
|
}
|
|
|
|
; Should no be vectorized with a undef/poison element as padding, as
|
|
; division by undef/poison may cause UB. Must use VL predication or
|
|
; masking instead, where RISCV wins.
|
|
define void @v3_load_i32_udiv_by_constant_store(ptr %src, ptr %dst) {
|
|
; NON-POW2-LABEL: @v3_load_i32_udiv_by_constant_store(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = udiv <3 x i32> <i32 10, i32 10, i32 10>, [[TMP0]]
|
|
; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @v3_load_i32_udiv_by_constant_store(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_0:%.*]] = udiv i32 10, [[L_SRC_0]]
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
|
|
; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_1:%.*]] = udiv i32 10, [[L_SRC_1]]
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = udiv i32 10, [[L_SRC_2]]
|
|
; POW2-ONLY-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
|
|
; POW2-ONLY-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
|
|
; POW2-ONLY-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4
|
|
; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
|
|
; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
|
|
%l.src.0 = load i32, ptr %gep.src.0, align 4
|
|
%mul.0 = udiv i32 10, %l.src.0
|
|
|
|
%gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
|
|
%l.src.1 = load i32, ptr %gep.src.1, align 4
|
|
%mul.1 = udiv i32 10, %l.src.1
|
|
|
|
%gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
|
|
%l.src.2 = load i32, ptr %gep.src.2, align 4
|
|
%mul.2 = udiv i32 10, %l.src.2
|
|
|
|
store i32 %mul.0, ptr %dst
|
|
|
|
%dst.1 = getelementptr i32, ptr %dst, i32 1
|
|
store i32 %mul.1, ptr %dst.1
|
|
|
|
%dst.2 = getelementptr i32, ptr %dst, i32 2
|
|
store i32 %mul.2, ptr %dst.2
|
|
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) {
|
|
; NON-POW2-LABEL: @v3_load_i32_mul_store(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
|
|
; NON-POW2-NEXT: store <3 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @v3_load_i32_mul_store(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
|
|
; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
|
|
; POW2-ONLY-NEXT: store <2 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
|
|
; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
|
|
; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
|
|
%l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
|
|
%gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
|
|
%l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
|
|
%mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
|
|
|
|
%gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
|
|
%l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
|
|
%gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
|
|
%l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
|
|
%mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
|
|
|
|
%gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
|
|
%l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
|
|
%gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
|
|
%l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
|
|
%mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
|
|
|
|
store i32 %mul.0, ptr %dst
|
|
|
|
%dst.1 = getelementptr i32, ptr %dst, i32 1
|
|
store i32 %mul.1, ptr %dst.1
|
|
|
|
%dst.2 = getelementptr i32, ptr %dst, i32 2
|
|
store i32 %mul.2, ptr %dst.2
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
|
|
; NON-POW2-LABEL: @v3_load_i32_mul_add_const_store(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
|
|
; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], <i32 9, i32 9, i32 9>
|
|
; NON-POW2-NEXT: store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @v3_load_i32_mul_add_const_store(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
|
|
; POW2-ONLY-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9
|
|
; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
|
|
; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 9, i32 9>
|
|
; POW2-ONLY-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
|
|
; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
|
|
; POW2-ONLY-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
|
|
%l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
|
|
%gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
|
|
%l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
|
|
%mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
|
|
%add.0 = add i32 %mul.0, 9
|
|
|
|
%gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
|
|
%l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
|
|
%gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
|
|
%l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
|
|
%mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
|
|
%add.1 = add i32 %mul.1, 9
|
|
|
|
%gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
|
|
%l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
|
|
%gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
|
|
%l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
|
|
%mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
|
|
%add.2 = add i32 %mul.2, 9
|
|
|
|
store i32 %add.0, ptr %dst
|
|
|
|
%dst.1 = getelementptr i32, ptr %dst, i32 1
|
|
store i32 %add.1, ptr %dst.1
|
|
|
|
%dst.2 = getelementptr i32, ptr %dst, i32 2
|
|
store i32 %add.2, ptr %dst.2
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
|
|
; NON-POW2-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>
|
|
; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
|
|
; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
|
|
; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01>
|
|
; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
|
|
; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
|
|
; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
|
|
%l.src.0 = load float , ptr %gep.src.0, align 4
|
|
%fadd.0 = fadd float %l.src.0, 10.0
|
|
|
|
%gep.src.1 = getelementptr inbounds float , ptr %src, i32 1
|
|
%l.src.1 = load float, ptr %gep.src.1, align 4
|
|
%fadd.1 = fadd float %l.src.1, 10.0
|
|
|
|
%gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
|
|
%l.src.2 = load float, ptr %gep.src.2, align 4
|
|
%fadd.2 = fadd float %l.src.2, 10.0
|
|
|
|
store float %fadd.0, ptr %dst
|
|
|
|
%dst.1 = getelementptr float, ptr %dst, i32 1
|
|
store float %fadd.1, ptr %dst.1
|
|
|
|
%dst.2 = getelementptr float, ptr %dst, i32 2
|
|
store float %fadd.2, ptr %dst.2
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @phi_store3(ptr %dst) {
|
|
; NON-POW2-LABEL: @phi_store3(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: br label [[EXIT:%.*]]
|
|
; NON-POW2: invoke.cont8.loopexit:
|
|
; NON-POW2-NEXT: br label [[EXIT]]
|
|
; NON-POW2: exit:
|
|
; NON-POW2-NEXT: [[TMP0:%.*]] = phi <3 x i32> [ <i32 1, i32 2, i32 3>, [[ENTRY:%.*]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
|
|
; NON-POW2-NEXT: store <3 x i32> [[TMP0]], ptr [[DST:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @phi_store3(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: br label [[EXIT:%.*]]
|
|
; POW2-ONLY: invoke.cont8.loopexit:
|
|
; POW2-ONLY-NEXT: br label [[EXIT]]
|
|
; POW2-ONLY: exit:
|
|
; POW2-ONLY-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
|
|
; POW2-ONLY-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ]
|
|
; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2
|
|
; POW2-ONLY-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4
|
|
; POW2-ONLY-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %exit
|
|
|
|
invoke.cont8.loopexit: ; No predecessors!
|
|
br label %exit
|
|
|
|
exit:
|
|
%p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ]
|
|
%p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ]
|
|
%p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ]
|
|
|
|
%dst.1 = getelementptr i32, ptr %dst, i32 1
|
|
%dst.2 = getelementptr i32, ptr %dst, i32 2
|
|
|
|
store i32 %p.0, ptr %dst, align 4
|
|
store i32 %p.1, ptr %dst.1, align 4
|
|
store i32 %p.2, ptr %dst.2, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @store_try_reorder(ptr %dst) {
|
|
; NON-POW2-LABEL: @store_try_reorder(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @store_try_reorder(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
|
|
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
|
|
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
|
|
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
%add = add i32 0, 0
|
|
store i32 %add, ptr %dst, align 4
|
|
%add207 = sub i32 0, 0
|
|
%arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1
|
|
store i32 %add207, ptr %arrayidx.i1887, align 4
|
|
%add216 = sub i32 0, 0
|
|
%arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2
|
|
store i32 %add216, ptr %arrayidx.i1891, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @vec3_fpext_cost(ptr %Colour, float %0) {
|
|
; NON-POW2-LABEL: @vec3_fpext_cost(
|
|
; NON-POW2-NEXT: entry:
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <3 x i32> zeroinitializer
|
|
; NON-POW2-NEXT: [[TMP3:%.*]] = fpext <3 x float> [[TMP2]] to <3 x double>
|
|
; NON-POW2-NEXT: [[TMP4:%.*]] = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> [[TMP3]], <3 x double> zeroinitializer, <3 x double> zeroinitializer)
|
|
; NON-POW2-NEXT: [[TMP5:%.*]] = fptrunc <3 x double> [[TMP4]] to <3 x float>
|
|
; NON-POW2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR:%.*]], align 4
|
|
; NON-POW2-NEXT: ret void
|
|
;
|
|
; POW2-ONLY-LABEL: @vec3_fpext_cost(
|
|
; POW2-ONLY-NEXT: entry:
|
|
; POW2-ONLY-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2
|
|
; POW2-ONLY-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
|
|
; POW2-ONLY-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
|
|
; POW2-ONLY-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
|
|
; POW2-ONLY-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
|
|
; POW2-ONLY-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4
|
|
; POW2-ONLY-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double
|
|
; POW2-ONLY-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00)
|
|
; POW2-ONLY-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float
|
|
; POW2-ONLY-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4
|
|
; POW2-ONLY-NEXT: ret void
|
|
;
|
|
entry:
|
|
%arrayidx72 = getelementptr float, ptr %Colour, i64 1
|
|
%arrayidx80 = getelementptr float, ptr %Colour, i64 2
|
|
%conv62 = fpext float %0 to double
|
|
%1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00)
|
|
%conv66 = fptrunc double %1 to float
|
|
store float %conv66, ptr %Colour, align 4
|
|
%conv70 = fpext float %0 to double
|
|
%2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00)
|
|
%conv74 = fptrunc double %2 to float
|
|
store float %conv74, ptr %arrayidx72, align 4
|
|
%conv78 = fpext float %0 to double
|
|
%3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00)
|
|
%conv82 = fptrunc double %3 to float
|
|
store float %conv82, ptr %arrayidx80, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @fpext_scatter(ptr %dst, double %conv) {
|
|
; CHECK-LABEL: @fpext_scatter(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV25:%.*]] = fptrunc double [[CONV:%.*]] to float
|
|
; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0
|
|
; CHECK-NEXT: store float [[CONV25]], ptr [[LENGTHS]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1
|
|
; CHECK-NEXT: store float [[CONV25]], ptr [[ARRAYIDX32]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr float, ptr [[DST]], i64 2
|
|
; CHECK-NEXT: store float [[CONV25]], ptr [[ARRAYIDX37]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%conv25 = fptrunc double %conv to float
|
|
%Lengths = getelementptr float, ptr %dst, i64 0
|
|
store float %conv25, ptr %Lengths, align 4
|
|
%arrayidx32 = getelementptr float, ptr %dst, i64 1
|
|
store float %conv25, ptr %arrayidx32, align 4
|
|
%arrayidx37 = getelementptr float, ptr %dst, i64 2
|
|
store float %conv25, ptr %arrayidx37, align 4
|
|
ret void
|
|
}
|
|
|
|
define i32 @reduce_add(ptr %src) {
|
|
; CHECK-LABEL: @reduce_add(
|
|
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
|
|
; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
|
|
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
|
|
; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
|
|
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
|
|
; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
|
|
; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[L_SRC_0]], [[L_SRC_1]]
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[L_SRC_2]]
|
|
; CHECK-NEXT: ret i32 [[ADD_1]]
|
|
;
|
|
%gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
|
|
%l.src.0 = load i32, ptr %gep.src.0, align 4
|
|
%gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
|
|
%l.src.1 = load i32, ptr %gep.src.1, align 4
|
|
%gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
|
|
%l.src.2 = load i32, ptr %gep.src.2, align 4
|
|
|
|
%add.0 = add i32 %l.src.0, %l.src.1
|
|
%add.1 = add i32 %add.0, %l.src.2
|
|
ret i32 %add.1
|
|
}
|
|
|
|
define float @reduce_fadd(ptr %src) {
|
|
; NON-POW2-LABEL: @reduce_fadd(
|
|
; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP1]])
|
|
; NON-POW2-NEXT: ret float [[TMP2]]
|
|
;
|
|
; POW2-ONLY-LABEL: @reduce_fadd(
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 1
|
|
; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
|
|
; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[L_SRC_0]], [[L_SRC_1]]
|
|
; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[L_SRC_2]]
|
|
; POW2-ONLY-NEXT: ret float [[ADD_1]]
|
|
;
|
|
%gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
|
|
%l.src.0 = load float, ptr %gep.src.0, align 4
|
|
%gep.src.1 = getelementptr inbounds float, ptr %src, i32 1
|
|
%l.src.1 = load float, ptr %gep.src.1, align 4
|
|
%gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
|
|
%l.src.2 = load float, ptr %gep.src.2, align 4
|
|
|
|
%add.0 = fadd fast float %l.src.0, %l.src.1
|
|
%add.1 = fadd fast float %add.0, %l.src.2
|
|
ret float %add.1
|
|
}
|
|
|
|
define i32 @reduce_add_after_mul(ptr %src) {
|
|
; NON-POW2-LABEL: @reduce_add_after_mul(
|
|
; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], <i32 10, i32 10, i32 10>
|
|
; NON-POW2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP2]])
|
|
; NON-POW2-NEXT: ret i32 [[TMP3]]
|
|
;
|
|
; POW2-ONLY-LABEL: @reduce_add_after_mul(
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
|
|
; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
|
|
; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
|
|
; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
|
|
; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
|
|
; POW2-ONLY-NEXT: ret i32 [[ADD_1]]
|
|
;
|
|
%gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
|
|
%l.src.0 = load i32, ptr %gep.src.0, align 4
|
|
%gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
|
|
%l.src.1 = load i32, ptr %gep.src.1, align 4
|
|
%gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
|
|
%l.src.2 = load i32, ptr %gep.src.2, align 4
|
|
|
|
%mul.0 = mul nsw i32 %l.src.0, 10
|
|
%mul.1 = mul nsw i32 %l.src.1, 10
|
|
%mul.2 = mul nsw i32 %l.src.2, 10
|
|
|
|
%add.0 = add i32 %mul.0, %mul.1
|
|
%add.1 = add i32 %add.0, %mul.2
|
|
ret i32 %add.1
|
|
}
|
|
|
|
define i32 @dot_product_i32(ptr %a, ptr %b) {
|
|
; NON-POW2-LABEL: @dot_product_i32(
|
|
; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_A_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[GEP_B_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP3:%.*]] = mul nsw <3 x i32> [[TMP1]], [[TMP2]]
|
|
; NON-POW2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP3]])
|
|
; NON-POW2-NEXT: ret i32 [[TMP4]]
|
|
;
|
|
; POW2-ONLY-LABEL: @dot_product_i32(
|
|
; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
|
|
; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
|
|
; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
|
|
; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
|
|
; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
|
|
; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
|
|
; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
|
|
; POW2-ONLY-NEXT: ret i32 [[ADD_1]]
|
|
;
|
|
%gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0
|
|
%l.a.0 = load i32, ptr %gep.a.0, align 4
|
|
%gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1
|
|
%l.a.1 = load i32, ptr %gep.a.1, align 4
|
|
%gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2
|
|
%l.a.2 = load i32, ptr %gep.a.2, align 4
|
|
|
|
%gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0
|
|
%l.b.0 = load i32, ptr %gep.b.0, align 4
|
|
%gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1
|
|
%l.b.1 = load i32, ptr %gep.b.1, align 4
|
|
%gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2
|
|
%l.b.2 = load i32, ptr %gep.b.2, align 4
|
|
|
|
%mul.0 = mul nsw i32 %l.a.0, %l.b.0
|
|
%mul.1 = mul nsw i32 %l.a.1, %l.b.1
|
|
%mul.2 = mul nsw i32 %l.a.2, %l.b.2
|
|
|
|
%add.0 = add i32 %mul.0, %mul.1
|
|
%add.1 = add i32 %add.0, %mul.2
|
|
ret i32 %add.1
|
|
}
|
|
|
|
; Same as above, except the reduction order has been perturbed. This
|
|
; is checking for our ability to reorder.
|
|
define i32 @dot_product_i32_reorder(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @dot_product_i32_reorder(
|
|
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
|
|
; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
|
|
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
|
|
; CHECK-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
|
|
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
|
|
; CHECK-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
|
|
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
|
|
; CHECK-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
|
|
; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
|
|
; CHECK-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
|
|
; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
|
|
; CHECK-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
|
|
; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
|
|
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
|
|
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
|
|
; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
|
|
; CHECK-NEXT: ret i32 [[ADD_1]]
|
|
;
|
|
%gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0
|
|
%l.a.0 = load i32, ptr %gep.a.0, align 4
|
|
%gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1
|
|
%l.a.1 = load i32, ptr %gep.a.1, align 4
|
|
%gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2
|
|
%l.a.2 = load i32, ptr %gep.a.2, align 4
|
|
|
|
%gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0
|
|
%l.b.0 = load i32, ptr %gep.b.0, align 4
|
|
%gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1
|
|
%l.b.1 = load i32, ptr %gep.b.1, align 4
|
|
%gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2
|
|
%l.b.2 = load i32, ptr %gep.b.2, align 4
|
|
|
|
%mul.0 = mul nsw i32 %l.a.0, %l.b.0
|
|
%mul.1 = mul nsw i32 %l.a.1, %l.b.1
|
|
%mul.2 = mul nsw i32 %l.a.2, %l.b.2
|
|
|
|
%add.0 = add i32 %mul.1, %mul.0
|
|
%add.1 = add i32 %add.0, %mul.2
|
|
ret i32 %add.1
|
|
}
|
|
|
|
define float @dot_product_fp32(ptr %a, ptr %b) {
|
|
; NON-POW2-LABEL: @dot_product_fp32(
|
|
; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_A_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[GEP_B_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x float> [[TMP1]], [[TMP2]]
|
|
; NON-POW2-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP3]])
|
|
; NON-POW2-NEXT: ret float [[TMP4]]
|
|
;
|
|
; POW2-ONLY-LABEL: @dot_product_fp32(
|
|
; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
|
|
; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
|
|
; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
|
|
; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
|
|
; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
|
|
; POW2-ONLY-NEXT: ret float [[ADD_1]]
|
|
;
|
|
%gep.a.0 = getelementptr inbounds float, ptr %a, i32 0
|
|
%l.a.0 = load float, ptr %gep.a.0, align 4
|
|
%gep.a.1 = getelementptr inbounds float, ptr %a, i32 1
|
|
%l.a.1 = load float, ptr %gep.a.1, align 4
|
|
%gep.a.2 = getelementptr inbounds float, ptr %a, i32 2
|
|
%l.a.2 = load float, ptr %gep.a.2, align 4
|
|
|
|
%gep.b.0 = getelementptr inbounds float, ptr %b, i32 0
|
|
%l.b.0 = load float, ptr %gep.b.0, align 4
|
|
%gep.b.1 = getelementptr inbounds float, ptr %b, i32 1
|
|
%l.b.1 = load float, ptr %gep.b.1, align 4
|
|
%gep.b.2 = getelementptr inbounds float, ptr %b, i32 2
|
|
%l.b.2 = load float, ptr %gep.b.2, align 4
|
|
|
|
%mul.0 = fmul fast float %l.a.0, %l.b.0
|
|
%mul.1 = fmul fast float %l.a.1, %l.b.1
|
|
%mul.2 = fmul fast float %l.a.2, %l.b.2
|
|
|
|
%add.0 = fadd fast float %mul.0, %mul.1
|
|
%add.1 = fadd fast float %add.0, %mul.2
|
|
ret float %add.1
|
|
}
|
|
|
|
; Same as above, except the reduction order has been perturbed. This
|
|
; is checking for our ability to reorder.
|
|
define float @dot_product_fp32_reorder(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @dot_product_fp32_reorder(
|
|
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
|
|
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
|
|
; CHECK-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
|
|
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
|
|
; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
|
|
; CHECK-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP5]], [[TMP4]]
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
|
|
; CHECK-NEXT: ret float [[ADD_1]]
|
|
;
|
|
%gep.a.0 = getelementptr inbounds float, ptr %a, i32 0
|
|
%l.a.0 = load float, ptr %gep.a.0, align 4
|
|
%gep.a.1 = getelementptr inbounds float, ptr %a, i32 1
|
|
%l.a.1 = load float, ptr %gep.a.1, align 4
|
|
%gep.a.2 = getelementptr inbounds float, ptr %a, i32 2
|
|
%l.a.2 = load float, ptr %gep.a.2, align 4
|
|
|
|
%gep.b.0 = getelementptr inbounds float, ptr %b, i32 0
|
|
%l.b.0 = load float, ptr %gep.b.0, align 4
|
|
%gep.b.1 = getelementptr inbounds float, ptr %b, i32 1
|
|
%l.b.1 = load float, ptr %gep.b.1, align 4
|
|
%gep.b.2 = getelementptr inbounds float, ptr %b, i32 2
|
|
%l.b.2 = load float, ptr %gep.b.2, align 4
|
|
|
|
%mul.0 = fmul fast float %l.a.0, %l.b.0
|
|
%mul.1 = fmul fast float %l.a.1, %l.b.1
|
|
%mul.2 = fmul fast float %l.a.2, %l.b.2
|
|
|
|
%add.0 = fadd fast float %mul.1, %mul.0
|
|
%add.1 = fadd fast float %add.0, %mul.2
|
|
ret float %add.1
|
|
}
|
|
|
|
|
|
define double @dot_product_fp64(ptr %a, ptr %b) {
|
|
; NON-POW2-LABEL: @dot_product_fp64(
|
|
; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
|
|
; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x double>, ptr [[GEP_A_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x double>, ptr [[GEP_B_0]], align 4
|
|
; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x double> [[TMP1]], [[TMP2]]
|
|
; NON-POW2-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> [[TMP3]])
|
|
; NON-POW2-NEXT: ret double [[TMP4]]
|
|
;
|
|
; POW2-ONLY-LABEL: @dot_product_fp64(
|
|
; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
|
|
; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
|
|
; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
|
|
; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[GEP_A_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4
|
|
; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]]
|
|
; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
|
|
; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
|
|
; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
|
|
; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]]
|
|
; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
|
|
; POW2-ONLY-NEXT: ret double [[ADD_1]]
|
|
;
|
|
%gep.a.0 = getelementptr inbounds double, ptr %a, i32 0
|
|
%l.a.0 = load double, ptr %gep.a.0, align 4
|
|
%gep.a.1 = getelementptr inbounds double, ptr %a, i32 1
|
|
%l.a.1 = load double, ptr %gep.a.1, align 4
|
|
%gep.a.2 = getelementptr inbounds double, ptr %a, i32 2
|
|
%l.a.2 = load double, ptr %gep.a.2, align 4
|
|
|
|
%gep.b.0 = getelementptr inbounds double, ptr %b, i32 0
|
|
%l.b.0 = load double, ptr %gep.b.0, align 4
|
|
%gep.b.1 = getelementptr inbounds double, ptr %b, i32 1
|
|
%l.b.1 = load double, ptr %gep.b.1, align 4
|
|
%gep.b.2 = getelementptr inbounds double, ptr %b, i32 2
|
|
%l.b.2 = load double, ptr %gep.b.2, align 4
|
|
|
|
%mul.0 = fmul fast double %l.a.0, %l.b.0
|
|
%mul.1 = fmul fast double %l.a.1, %l.b.1
|
|
%mul.2 = fmul fast double %l.a.2, %l.b.2
|
|
|
|
%add.0 = fadd fast double %mul.0, %mul.1
|
|
%add.1 = fadd fast double %add.0, %mul.2
|
|
ret double %add.1
|
|
}
|
|
|
|
|
|
declare float @llvm.fmuladd.f32(float, float, float)
|
|
|
|
declare double @llvm.fmuladd.f64(double, double, double)
|