This patch canonicalizes getelementptr instructions with constant indices to use the `i8` source element type. This makes it easier for optimizations to recognize that two GEPs are identical, because they don't need to see past many different ways to express the same offset. This is a first step towards https://discourse.llvm.org/t/rfc-replacing-getelementptr-with-ptradd/68699. This is limited to constant GEPs only for now, as they have a clear canonical form, while we're not yet sure how exactly to deal with variable indices. The test llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll gives two representative examples of the kind of optimization improvement we expect from this change. In the first test SimplifyCFG can now realize that all switch branches are actually the same. In the second test it can convert it into simple arithmetic. These are representative of common optimization failures we see in Rust. Fixes https://github.com/llvm/llvm-project/issues/69841.
600 lines
26 KiB
LLVM
600 lines
26 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=slp-vectorizer,instcombine,dce -slp-threshold=-100 -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
|
; RUN: opt < %s -passes=slp-vectorizer,instcombine,dce -slp-threshold=-100 -S -mtriple=i386-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
|
|
|
|
; Make sure we order the operands of commutative operations so that we get
|
|
; bigger vectorizable trees.
|
|
|
|
define void @shuffle_operands1(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
|
|
; CHECK-LABEL: @shuffle_operands1(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2:%.*]], i64 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @shuffle_operands1(
|
|
; SSE2-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
|
|
; SSE2-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2:%.*]], i64 1
|
|
; SSE2-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
%from_1 = getelementptr double, ptr %from, i64 1
|
|
%v0_1 = load double , ptr %from
|
|
%v0_2 = load double , ptr %from_1
|
|
%v1_1 = fadd double %v0_1, %v1
|
|
%v1_2 = fadd double %v2, %v0_2
|
|
%to_2 = getelementptr double, ptr %to, i64 1
|
|
store double %v1_1, ptr %to
|
|
store double %v1_2, ptr %to_2
|
|
ret void
|
|
}
|
|
|
|
define void @vecload_vs_broadcast(ptr noalias %from, ptr noalias %to, double %v1, double %v2, i1 %c) {
|
|
; CHECK-LABEL: @vecload_vs_broadcast(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LP:%.*]]
|
|
; CHECK: lp:
|
|
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP0]], [[TMP2]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; CHECK: ext:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @vecload_vs_broadcast(
|
|
; SSE2-NEXT: entry:
|
|
; SSE2-NEXT: br label [[LP:%.*]]
|
|
; SSE2: lp:
|
|
; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
|
|
; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP0]], [[TMP2]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; SSE2-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; SSE2: ext:
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %lp
|
|
|
|
lp:
|
|
%p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
|
|
%from_1 = getelementptr double, ptr %from, i64 1
|
|
%v0_1 = load double , ptr %from
|
|
%v0_2 = load double , ptr %from_1
|
|
%v1_1 = fadd double %v0_1, %p
|
|
%v1_2 = fadd double %v0_1, %v0_2
|
|
%to_2 = getelementptr double, ptr %to, i64 1
|
|
store double %v1_1, ptr %to
|
|
store double %v1_2, ptr %to_2
|
|
br i1 %c, label %lp, label %ext
|
|
|
|
ext:
|
|
ret void
|
|
}
|
|
|
|
define void @vecload_vs_broadcast2(ptr noalias %from, ptr noalias %to, double %v1, double %v2, i1 %c) {
|
|
; CHECK-LABEL: @vecload_vs_broadcast2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LP:%.*]]
|
|
; CHECK: lp:
|
|
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; CHECK: ext:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @vecload_vs_broadcast2(
|
|
; SSE2-NEXT: entry:
|
|
; SSE2-NEXT: br label [[LP:%.*]]
|
|
; SSE2: lp:
|
|
; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
|
|
; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; SSE2-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; SSE2: ext:
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %lp
|
|
|
|
lp:
|
|
%p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
|
|
%from_1 = getelementptr double, ptr %from, i64 1
|
|
%v0_1 = load double , ptr %from
|
|
%v0_2 = load double , ptr %from_1
|
|
%v1_1 = fadd double %p, %v0_1
|
|
%v1_2 = fadd double %v0_2, %v0_1
|
|
%to_2 = getelementptr double, ptr %to, i64 1
|
|
store double %v1_1, ptr %to
|
|
store double %v1_2, ptr %to_2
|
|
br i1 %c, label %lp, label %ext
|
|
|
|
ext:
|
|
ret void
|
|
}
|
|
|
|
define void @vecload_vs_broadcast3(ptr noalias %from, ptr noalias %to, double %v1, double %v2, i1 %c) {
|
|
; CHECK-LABEL: @vecload_vs_broadcast3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LP:%.*]]
|
|
; CHECK: lp:
|
|
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; CHECK: ext:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @vecload_vs_broadcast3(
|
|
; SSE2-NEXT: entry:
|
|
; SSE2-NEXT: br label [[LP:%.*]]
|
|
; SSE2: lp:
|
|
; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
|
|
; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; SSE2-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; SSE2: ext:
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %lp
|
|
|
|
lp:
|
|
%p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
|
|
%from_1 = getelementptr double, ptr %from, i64 1
|
|
%v0_1 = load double , ptr %from
|
|
%v0_2 = load double , ptr %from_1
|
|
%v1_1 = fadd double %p, %v0_1
|
|
%v1_2 = fadd double %v0_1, %v0_2
|
|
%to_2 = getelementptr double, ptr %to, i64 1
|
|
store double %v1_1, ptr %to
|
|
store double %v1_2, ptr %to_2
|
|
br i1 %c, label %lp, label %ext
|
|
|
|
ext:
|
|
ret void
|
|
}
|
|
|
|
define void @shuffle_nodes_match1(ptr noalias %from, ptr noalias %to, double %v1, double %v2, i1 %c) {
|
|
; CHECK-LABEL: @shuffle_nodes_match1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LP:%.*]]
|
|
; CHECK: lp:
|
|
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP1]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; CHECK: ext:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @shuffle_nodes_match1(
|
|
; SSE2-NEXT: entry:
|
|
; SSE2-NEXT: br label [[LP:%.*]]
|
|
; SSE2: lp:
|
|
; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
|
; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP1]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; SSE2-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; SSE2: ext:
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %lp
|
|
|
|
lp:
|
|
%p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
|
|
%from_1 = getelementptr double, ptr %from, i64 1
|
|
%v0_1 = load double , ptr %from
|
|
%v0_2 = load double , ptr %from_1
|
|
%v1_1 = fadd double %v0_2, %v0_1
|
|
%v1_2 = fadd double %p, %v0_1
|
|
%to_2 = getelementptr double, ptr %to, i64 1
|
|
store double %v1_1, ptr %to
|
|
store double %v1_2, ptr %to_2
|
|
br i1 %c, label %lp, label %ext
|
|
|
|
ext:
|
|
ret void
|
|
}
|
|
|
|
define void @vecload_vs_broadcast4(ptr noalias %from, ptr noalias %to, double %v1, double %v2, i1 %c) {
|
|
; CHECK-LABEL: @vecload_vs_broadcast4(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LP:%.*]]
|
|
; CHECK: lp:
|
|
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr i8, ptr [[FROM:%.*]], i32 8
|
|
; CHECK-NEXT: [[V0_1:%.*]] = load double, ptr [[FROM]], align 4
|
|
; CHECK-NEXT: [[V0_2:%.*]] = load double, ptr [[FROM_1]], align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; CHECK: ext:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @vecload_vs_broadcast4(
|
|
; SSE2-NEXT: entry:
|
|
; SSE2-NEXT: br label [[LP:%.*]]
|
|
; SSE2: lp:
|
|
; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
|
; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP1]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; SSE2-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; SSE2: ext:
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %lp
|
|
|
|
lp:
|
|
%p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
|
|
%from_1 = getelementptr double, ptr %from, i64 1
|
|
%v0_1 = load double , ptr %from
|
|
%v0_2 = load double , ptr %from_1
|
|
%v1_1 = fadd double %v0_1, %v0_2
|
|
%v1_2 = fadd double %p, %v0_1
|
|
%to_2 = getelementptr double, ptr %to, i64 1
|
|
store double %v1_1, ptr %to
|
|
store double %v1_2, ptr %to_2
|
|
br i1 %c, label %lp, label %ext
|
|
|
|
ext:
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @shuffle_nodes_match2(ptr noalias %from, ptr noalias %to, double %v1, double %v2, i1 %c) {
|
|
; CHECK-LABEL: @shuffle_nodes_match2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LP:%.*]]
|
|
; CHECK: lp:
|
|
; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[FROM_1:%.*]] = getelementptr i8, ptr [[FROM:%.*]], i32 8
|
|
; CHECK-NEXT: [[V0_1:%.*]] = load double, ptr [[FROM]], align 4
|
|
; CHECK-NEXT: [[V0_2:%.*]] = load double, ptr [[FROM_1]], align 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i64 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
|
|
; CHECK-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; CHECK: ext:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @shuffle_nodes_match2(
|
|
; SSE2-NEXT: entry:
|
|
; SSE2-NEXT: br label [[LP:%.*]]
|
|
; SSE2: lp:
|
|
; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
|
; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
|
|
; SSE2-NEXT: br i1 [[C:%.*]], label [[LP]], label [[EXT:%.*]]
|
|
; SSE2: ext:
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %lp
|
|
|
|
lp:
|
|
%p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
|
|
%from_1 = getelementptr double, ptr %from, i64 1
|
|
%v0_1 = load double , ptr %from
|
|
%v0_2 = load double , ptr %from_1
|
|
%v1_1 = fadd double %v0_1, %v0_2
|
|
%v1_2 = fadd double %v0_1, %p
|
|
%to_2 = getelementptr double, ptr %to, i64 1
|
|
store double %v1_1, ptr %to
|
|
store double %v1_2, ptr %to_2
|
|
br i1 %c, label %lp, label %ext
|
|
|
|
ext:
|
|
ret void
|
|
}
|
|
|
|
; Make sure we don't scramble operands when we reorder them and destroy
|
|
; 'good' source order.
|
|
|
|
@a = common global [32000 x float] zeroinitializer, align 16
|
|
|
|
define void @good_load_order() {
|
|
; CHECK-LABEL: @good_load_order(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
|
|
; CHECK: for.cond1.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @a, align 16
|
|
; CHECK-NEXT: br label [[FOR_BODY3:%.*]]
|
|
; CHECK: for.body3:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP12:%.*]], [[FOR_BODY3]] ]
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP3]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 4
|
|
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 2>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i64 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x float> [[TMP7]], [[TMP9]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
|
|
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP12]] = load float, ptr [[ARRAYIDX41]], align 4
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP7]], i64 3
|
|
; CHECK-NEXT: [[MUL45:%.*]] = fmul float [[TMP12]], [[TMP13]]
|
|
; CHECK-NEXT: store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP14]], 31995
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END:%.*]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @good_load_order(
|
|
; SSE2-NEXT: entry:
|
|
; SSE2-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
|
|
; SSE2: for.cond1.preheader:
|
|
; SSE2-NEXT: [[TMP0:%.*]] = load float, ptr @a, align 16
|
|
; SSE2-NEXT: br label [[FOR_BODY3:%.*]]
|
|
; SSE2: for.body3:
|
|
; SSE2-NEXT: [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP12:%.*]], [[FOR_BODY3]] ]
|
|
; SSE2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
|
|
; SSE2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; SSE2-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
|
|
; SSE2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP3]]
|
|
; SSE2-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; SSE2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP4]]
|
|
; SSE2-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV]] to i32
|
|
; SSE2-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 4
|
|
; SSE2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]]
|
|
; SSE2-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
|
|
; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 2>
|
|
; SSE2-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i64 0
|
|
; SSE2-NEXT: [[TMP10:%.*]] = fmul <4 x float> [[TMP7]], [[TMP9]]
|
|
; SSE2-NEXT: store <4 x float> [[TMP10]], ptr [[ARRAYIDX5]], align 4
|
|
; SSE2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
|
|
; SSE2-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; SSE2-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP11]]
|
|
; SSE2-NEXT: [[TMP12]] = load float, ptr [[ARRAYIDX41]], align 4
|
|
; SSE2-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP7]], i64 3
|
|
; SSE2-NEXT: [[MUL45:%.*]] = fmul float [[TMP12]], [[TMP13]]
|
|
; SSE2-NEXT: store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
|
|
; SSE2-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
|
; SSE2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP14]], 31995
|
|
; SSE2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END:%.*]]
|
|
; SSE2: for.end:
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond1.preheader
|
|
|
|
for.cond1.preheader:
|
|
%0 = load float, ptr @a, align 16
|
|
br label %for.body3
|
|
|
|
for.body3:
|
|
%1 = phi float [ %0, %for.cond1.preheader ], [ %10, %for.body3 ]
|
|
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
|
%2 = add nsw i64 %indvars.iv, 1
|
|
%arrayidx = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %2
|
|
%3 = load float, ptr %arrayidx, align 4
|
|
%arrayidx5 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv
|
|
%mul6 = fmul float %3, %1
|
|
store float %mul6, ptr %arrayidx5, align 4
|
|
%4 = add nsw i64 %indvars.iv, 2
|
|
%arrayidx11 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %4
|
|
%5 = load float, ptr %arrayidx11, align 4
|
|
%mul15 = fmul float %5, %3
|
|
store float %mul15, ptr %arrayidx, align 4
|
|
%6 = add nsw i64 %indvars.iv, 3
|
|
%arrayidx21 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %6
|
|
%7 = load float, ptr %arrayidx21, align 4
|
|
%mul25 = fmul float %7, %5
|
|
store float %mul25, ptr %arrayidx11, align 4
|
|
%8 = add nsw i64 %indvars.iv, 4
|
|
%arrayidx31 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %8
|
|
%9 = load float, ptr %arrayidx31, align 4
|
|
%mul35 = fmul float %9, %7
|
|
store float %mul35, ptr %arrayidx21, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
|
|
%arrayidx41 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv.next
|
|
%10 = load float, ptr %arrayidx41, align 4
|
|
%mul45 = fmul float %10, %9
|
|
store float %mul45, ptr %arrayidx31, align 4
|
|
%11 = trunc i64 %indvars.iv.next to i32
|
|
%cmp2 = icmp slt i32 %11, 31995
|
|
br i1 %cmp2, label %for.body3, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Check vectorization of following code for double data type-
|
|
; c[0] = a[0]+b[0];
|
|
; c[1] = b[1]+a[1]; // swapped b[1] and a[1]
|
|
|
|
define void @load_reorder_double(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b){
|
|
; CHECK-LABEL: @load_reorder_double(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[C:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @load_reorder_double(
|
|
; SSE2-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
|
|
; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[C:%.*]], align 4
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
%1 = load double, ptr %a
|
|
%2 = load double, ptr %b
|
|
%3 = fadd double %1, %2
|
|
store double %3, ptr %c
|
|
%4 = getelementptr inbounds double, ptr %b, i64 1
|
|
%5 = load double, ptr %4
|
|
%6 = getelementptr inbounds double, ptr %a, i64 1
|
|
%7 = load double, ptr %6
|
|
%8 = fadd double %5, %7
|
|
%9 = getelementptr inbounds double, ptr %c, i64 1
|
|
store double %8, ptr %9
|
|
ret void
|
|
}
|
|
|
|
; Check vectorization of following code for float data type-
|
|
; c[0] = a[0]+b[0];
|
|
; c[1] = b[1]+a[1]; // swapped b[1] and a[1]
|
|
; c[2] = a[2]+b[2];
|
|
; c[3] = a[3]+b[3];
|
|
|
|
define void @load_reorder_float(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b){
|
|
; CHECK-LABEL: @load_reorder_float(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[C:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @load_reorder_float(
|
|
; SSE2-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
|
|
; SSE2-NEXT: store <4 x float> [[TMP3]], ptr [[C:%.*]], align 4
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
%1 = load float, ptr %a
|
|
%2 = load float, ptr %b
|
|
%3 = fadd float %1, %2
|
|
store float %3, ptr %c
|
|
%4 = getelementptr inbounds float, ptr %b, i64 1
|
|
%5 = load float, ptr %4
|
|
%6 = getelementptr inbounds float, ptr %a, i64 1
|
|
%7 = load float, ptr %6
|
|
%8 = fadd float %5, %7
|
|
%9 = getelementptr inbounds float, ptr %c, i64 1
|
|
store float %8, ptr %9
|
|
%10 = getelementptr inbounds float, ptr %a, i64 2
|
|
%11 = load float, ptr %10
|
|
%12 = getelementptr inbounds float, ptr %b, i64 2
|
|
%13 = load float, ptr %12
|
|
%14 = fadd float %11, %13
|
|
%15 = getelementptr inbounds float, ptr %c, i64 2
|
|
store float %14, ptr %15
|
|
%16 = getelementptr inbounds float, ptr %a, i64 3
|
|
%17 = load float, ptr %16
|
|
%18 = getelementptr inbounds float, ptr %b, i64 3
|
|
%19 = load float, ptr %18
|
|
%20 = fadd float %17, %19
|
|
%21 = getelementptr inbounds float, ptr %c, i64 3
|
|
store float %20, ptr %21
|
|
ret void
|
|
}
|
|
|
|
; Check we properly reorder the below code so that it gets vectorized optimally-
|
|
; a[0] = (b[0]+c[0])+d[0];
|
|
; a[1] = d[1]+(b[1]+c[1]);
|
|
; a[2] = (b[2]+c[2])+d[2];
|
|
; a[3] = (b[3]+c[3])+d[3];
|
|
|
|
define void @opcode_reorder(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c,ptr noalias nocapture readonly %d) {
|
|
; CHECK-LABEL: @opcode_reorder(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], [[TMP3]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[A:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; SSE2-LABEL: @opcode_reorder(
|
|
; SSE2-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
|
|
; SSE2-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
|
|
; SSE2-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], [[TMP3]]
|
|
; SSE2-NEXT: store <4 x float> [[TMP5]], ptr [[A:%.*]], align 4
|
|
; SSE2-NEXT: ret void
|
|
;
|
|
%1 = load float, ptr %b
|
|
%2 = load float, ptr %c
|
|
%3 = fadd float %1, %2
|
|
%4 = load float, ptr %d
|
|
%5 = fadd float %3, %4
|
|
store float %5, ptr %a
|
|
%6 = getelementptr inbounds float, ptr %d, i64 1
|
|
%7 = load float, ptr %6
|
|
%8 = getelementptr inbounds float, ptr %b, i64 1
|
|
%9 = load float, ptr %8
|
|
%10 = getelementptr inbounds float, ptr %c, i64 1
|
|
%11 = load float, ptr %10
|
|
%12 = fadd float %9, %11
|
|
%13 = fadd float %7, %12
|
|
%14 = getelementptr inbounds float, ptr %a, i64 1
|
|
store float %13, ptr %14
|
|
%15 = getelementptr inbounds float, ptr %b, i64 2
|
|
%16 = load float, ptr %15
|
|
%17 = getelementptr inbounds float, ptr %c, i64 2
|
|
%18 = load float, ptr %17
|
|
%19 = fadd float %16, %18
|
|
%20 = getelementptr inbounds float, ptr %d, i64 2
|
|
%21 = load float, ptr %20
|
|
%22 = fadd float %19, %21
|
|
%23 = getelementptr inbounds float, ptr %a, i64 2
|
|
store float %22, ptr %23
|
|
%24 = getelementptr inbounds float, ptr %b, i64 3
|
|
%25 = load float, ptr %24
|
|
%26 = getelementptr inbounds float, ptr %c, i64 3
|
|
%27 = load float, ptr %26
|
|
%28 = fadd float %25, %27
|
|
%29 = getelementptr inbounds float, ptr %d, i64 3
|
|
%30 = load float, ptr %29
|
|
%31 = fadd float %28, %30
|
|
%32 = getelementptr inbounds float, ptr %a, i64 3
|
|
store float %31, ptr %32
|
|
ret void
|
|
}
|