Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
David Green 2a859b2014 [AArch64] Change the cost of vector insert/extract to 2
The cost of vector instructions has always been high under AArch64, in order to
add a high cost for inserts/extracts, shuffles and scalarization. This is a
conservative approach to limit the scope of unusual SLP vectorization where the
codegen ends up being quite poor, but has always been higher than the correct
costs would be for any specific core.

This relaxes that, reducing the vector insert/extract cost from 3 to 2. It is a
generalization of D142359 to all AArch64 cpus. The ScalarizationOverhead is
also overridden for integer vector at the same time, to remove the effect of
lane 0 being considered free for integer vectors (something that should only be
true for float when scalarizing).

The lower insert/extract cost will reduce the cost of insert, extracts,
shuffling and scalarization. The adjustments of ScalaizationOverhead will
increase the cost on integer, especially for small vectors. The end result will
be lower cost for float and long-integer types, some higher cost for some
smaller vectors. This, along with the raw insert/extract cost being lower, will
generally mean more vectorization from the Loop and SLP vectorizer.

We may end up regretting this, as that vectorization is not always profitable.
In all the benchmarking I have done this is generally an improvement in the
overall performance, and I've attempted to address the places where it wasn't
with other costmodel adjustments.

Differential Revision: https://reviews.llvm.org/D155459
2023-07-28 21:26:50 +01:00

1289 lines
57 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -aa-pipeline='basic-aa,scoped-noalias-aa' -passes=slp-vectorizer -mtriple=arm64-apple-darwin -S %s | FileCheck %s
define void @needs_versioning_not_profitable(ptr %dst, ptr %src) {
; CHECK-LABEL: @needs_versioning_not_profitable(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: ret void
;
entry:
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
ret void
}
define void @needs_versioning_profitable(ptr %dst, ptr %src) {
; CHECK-LABEL: @needs_versioning_profitable(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, ptr [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], ptr [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, ptr [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], ptr [[DST_GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, ptr %src, i64 2
%src.2 = load i32, ptr %src.gep.2, align 4
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, ptr %dst, i64 2
store i32 %r.2, ptr %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, ptr %src, i64 3
%src.3 = load i32, ptr %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, ptr %dst, i64 3
store i32 %r.3, ptr %dst.gep.3, align 4
ret void
}
define void @needs_versioning_profitable_2_sources(ptr %dst, ptr %A, ptr %B) {
; CHECK-LABEL: @needs_versioning_profitable_2_sources(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_0:%.*]] = load i32, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[B_0:%.*]] = load i32, ptr [[B:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = add i32 [[A_0]], [[B_0]]
; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[R_0]], 2
; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
; CHECK-NEXT: [[A_1:%.*]] = load i32, ptr [[A_GEP_1]], align 4
; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 1
; CHECK-NEXT: [[B_1:%.*]] = load i32, ptr [[B_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = add i32 [[A_1]], [[B_1]]
; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[R_1]], 2
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2
; CHECK-NEXT: [[A_2:%.*]] = load i32, ptr [[A_GEP_2]], align 4
; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 2
; CHECK-NEXT: [[B_2:%.*]] = load i32, ptr [[B_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = add i32 [[A_2]], [[B_2]]
; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[R_2]], 2
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_GEP_2]], align 4
; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3
; CHECK-NEXT: [[A_3:%.*]] = load i32, ptr [[A_GEP_3]], align 4
; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 3
; CHECK-NEXT: [[B_3:%.*]] = load i32, ptr [[B_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = add i32 [[A_3]], [[B_3]]
; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[R_3]], 2
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; CHECK-NEXT: store i32 [[MUL_3]], ptr [[DST_GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
%A.0 = load i32, ptr %A, align 4
%B.0 = load i32, ptr %B, align 4
%r.0 = add i32 %A.0, %B.0
%mul.0 = mul i32 %r.0, 2
store i32 %mul.0, ptr %dst, align 4
%A.gep.1 = getelementptr inbounds i32, ptr %A, i64 1
%A.1 = load i32, ptr %A.gep.1, align 4
%B.gep.1 = getelementptr inbounds i32, ptr %B, i64 1
%B.1 = load i32, ptr %B.gep.1, align 4
%r.1 = add i32 %A.1, %B.1
%mul.1 = mul i32 %r.1, 2
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %mul.1, ptr %dst.gep.1, align 4
%A.gep.2 = getelementptr inbounds i32, ptr %A, i64 2
%A.2 = load i32, ptr %A.gep.2, align 4
%B.gep.2 = getelementptr inbounds i32, ptr %B, i64 2
%B.2 = load i32, ptr %B.gep.2, align 4
%r.2 = add i32 %A.2, %B.2
%mul.2 = mul i32 %r.2, 2
%dst.gep.2 = getelementptr inbounds i32, ptr %dst, i64 2
store i32 %mul.2, ptr %dst.gep.2, align 4
%A.gep.3 = getelementptr inbounds i32, ptr %A, i64 3
%A.3 = load i32, ptr %A.gep.3, align 4
%B.gep.3 = getelementptr inbounds i32, ptr %B, i64 3
%B.3 = load i32, ptr %B.gep.3, align 4
%r.3 = add i32 %A.3, %B.3
%mul.3 = mul i32 %r.3, 2
%dst.gep.3 = getelementptr inbounds i32, ptr %dst, i64 3
store i32 %mul.3, ptr %dst.gep.3, align 4
ret void
}
declare void @use(i32)
declare void @bar()
define void @needs_versioning_profitable_split_points(ptr %dst, ptr %src) {
; CHECK-LABEL: @needs_versioning_profitable_split_points(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, ptr [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], ptr [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, ptr [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], ptr [[DST_GEP_3]], align 4
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: ret void
;
entry:
call void @bar()
call void @bar()
call void @bar()
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, ptr %src, i64 2
%src.2 = load i32, ptr %src.gep.2, align 4
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, ptr %dst, i64 2
store i32 %r.2, ptr %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, ptr %src, i64 3
%src.3 = load i32, ptr %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, ptr %dst, i64 3
store i32 %r.3, ptr %dst.gep.3, align 4
call void @bar()
ret void
}
define void @needs_versioning_profitable_load_used_outside_region1(ptr %dst, ptr %src, i1 %c) {
; CHECK-LABEL: @needs_versioning_profitable_load_used_outside_region1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
; CHECK: then:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, ptr [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], ptr [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, ptr [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], ptr [[DST_GEP_3]], align 4
; CHECK-NEXT: [[SRC_GEP_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[SRC_GEP_5]], align 4
; CHECK-NEXT: call void @use(i32 [[L]])
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br i1 %c, label %then, label %exit
then:
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, ptr %src, i64 2
%src.2 = load i32, ptr %src.gep.2, align 4
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, ptr %dst, i64 2
store i32 %r.2, ptr %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, ptr %src, i64 3
%src.3 = load i32, ptr %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, ptr %dst, i64 3
store i32 %r.3, ptr %dst.gep.3, align 4
%src.gep.5 = getelementptr inbounds i32, ptr %src, i64 5
%l = load i32, ptr %src.gep.5
call void @use(i32 %l)
br label %exit
exit:
ret void
}
define void @needs_versioning_profitable_load_used_outside_region2(ptr %dst, ptr %src, i1 %c) {
; CHECK-LABEL: @needs_versioning_profitable_load_used_outside_region2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
; CHECK: then:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, ptr [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[SRC_GEP_5]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], ptr [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, ptr [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], ptr [[DST_GEP_3]], align 4
; CHECK-NEXT: call void @use(i32 [[L]])
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br i1 %c, label %then, label %exit
then:
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, ptr %src, i64 2
%src.2 = load i32, ptr %src.gep.2, align 4
%src.gep.5 = getelementptr inbounds i32, ptr %src, i64 5
%l = load i32, ptr %src.gep.5
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, ptr %dst, i64 2
store i32 %r.2, ptr %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, ptr %src, i64 3
%src.3 = load i32, ptr %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, ptr %dst, i64 3
store i32 %r.3, ptr %dst.gep.3, align 4
call void @use(i32 %l)
br label %exit
exit:
ret void
}
define void @no_version(ptr nocapture %dst, ptr nocapture readonly %src) {
; CHECK-LABEL: @no_version(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[TMP0]], <i32 16, i32 16>
; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%src.0 = load i32, ptr %src, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.0 = ashr i32 %src.0, 16
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.0, ptr %dst, align 4
store i32 %r.1, ptr %dst.gep.1, align 4
ret void
}
define void @version_multiple(ptr nocapture %out_block, ptr nocapture readonly %counter) {
; CHECK-LABEL: @version_multiple(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COUNTER:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[OUT_BLOCK:%.*]], align 4
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], [[TMP0]]
; CHECK-NEXT: store i32 [[XOR]], ptr [[OUT_BLOCK]], align 4
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[COUNTER]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[OUT_BLOCK]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX2_1]], align 4
; CHECK-NEXT: [[XOR_1:%.*]] = xor i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: store i32 [[XOR_1]], ptr [[ARRAYIDX2_1]], align 4
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[COUNTER]], i64 2
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[OUT_BLOCK]], i64 2
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX2_2]], align 4
; CHECK-NEXT: [[XOR_2:%.*]] = xor i32 [[TMP5]], [[TMP4]]
; CHECK-NEXT: store i32 [[XOR_2]], ptr [[ARRAYIDX2_2]], align 4
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[COUNTER]], i64 3
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[OUT_BLOCK]], i64 3
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX2_3]], align 4
; CHECK-NEXT: [[XOR_3:%.*]] = xor i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: store i32 [[XOR_3]], ptr [[ARRAYIDX2_3]], align 4
; CHECK-NEXT: ret void
;
entry:
%0 = load i32, ptr %counter, align 4
%1 = load i32, ptr %out_block, align 4
%xor = xor i32 %1, %0
store i32 %xor, ptr %out_block, align 4
%arrayidx.1 = getelementptr inbounds i32, ptr %counter, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx2.1 = getelementptr inbounds i32, ptr %out_block, i64 1
%3 = load i32, ptr %arrayidx2.1, align 4
%xor.1 = xor i32 %3, %2
store i32 %xor.1, ptr %arrayidx2.1, align 4
%arrayidx.2 = getelementptr inbounds i32, ptr %counter, i64 2
%4 = load i32, ptr %arrayidx.2, align 4
%arrayidx2.2 = getelementptr inbounds i32, ptr %out_block, i64 2
%5 = load i32, ptr %arrayidx2.2, align 4
%xor.2 = xor i32 %5, %4
store i32 %xor.2, ptr %arrayidx2.2, align 4
%arrayidx.3 = getelementptr inbounds i32, ptr %counter, i64 3
%6 = load i32, ptr %arrayidx.3, align 4
%arrayidx2.3 = getelementptr inbounds i32, ptr %out_block, i64 3
%7 = load i32, ptr %arrayidx2.3, align 4
%xor.3 = xor i32 %7, %6
store i32 %xor.3, ptr %arrayidx2.3, align 4
ret void
}
define i32 @use_outside_version_bb(ptr %dst, ptr %src, i1 %c.1) {
; CHECK-LABEL: @use_outside_version_bb(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret i32 [[R_0]]
;
entry:
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
br label %exit
exit:
ret i32 %r.0
}
define i32 @value_used_in_return(ptr %dst, ptr %src, i32 %x) {
; CHECK-LABEL: @value_used_in_return(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 20
; CHECK-NEXT: ret i32 [[ADD]]
;
entry:
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
%add = add i32 %x, 20
ret i32 %add
}
define i32 @needs_versioning2_cond_br(ptr %dst, ptr %src, i1 %c.1) {
; CHECK-LABEL: @needs_versioning2_cond_br(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: ret i32 10
; CHECK: else:
; CHECK-NEXT: ret i32 0
;
entry:
br i1 %c.1, label %then, label %else
then:
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
ret i32 10
else:
ret i32 0
}
define void @pointer_defined_in_bb(ptr %dst, ptr %src.p) {
; CHECK-LABEL: @pointer_defined_in_bb(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC:%.*]] = load ptr, ptr [[SRC_P:%.*]], align 8
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, ptr [[SRC]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], ptr [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, ptr [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], ptr [[DST_GEP_1]], align 4
; CHECK-NEXT: ret void
;
entry:
%src = load ptr, ptr %src.p
%src.0 = load i32, ptr %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, ptr %dst, align 4
%src.gep.1 = getelementptr inbounds i32, ptr %src, i64 1
%src.1 = load i32, ptr %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, ptr %dst, i64 1
store i32 %r.1, ptr %dst.gep.1, align 4
ret void
}
define void @clobber_same_underlying_object(ptr %this) {
; CHECK-LABEL: @clobber_same_underlying_object(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P_3:%.*]] = getelementptr inbounds i32, ptr [[THIS:%.*]], i32 3
; CHECK-NEXT: store i32 10, ptr [[P_3]], align 8
; CHECK-NEXT: tail call void @clobber()
; CHECK-NEXT: [[P_4:%.*]] = getelementptr inbounds i32, ptr [[THIS]], i32 4
; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[P_4]], align 8
; CHECK-NEXT: store i32 20, ptr [[P_4]], align 8
; CHECK-NEXT: ret void
;
entry:
%p.3 = getelementptr inbounds i32, ptr %this, i32 3
store i32 10, ptr %p.3, align 8
tail call void @clobber()
%p.4 = getelementptr inbounds i32, ptr %this, i32 4
%l2 = load i32, ptr %p.4, align 8
store i32 20, ptr %p.4, align 8
ret void
}
declare void @clobber()
define void @slp_not_beneficial(ptr %A, ptr %B) {
; CHECK-LABEL: @slp_not_beneficial(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 4
; CHECK-NEXT: store i32 0, ptr [[TMP]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 5
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 4
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8
; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8
; CHECK-NEXT: ret void
;
bb:
%tmp = getelementptr inbounds i32, ptr %A, i32 4
store i32 0, ptr %tmp, align 8
%tmp3 = getelementptr inbounds i32, ptr %A, i32 5
%tmp4 = getelementptr inbounds i32, ptr %B, i32 4
%tmp5 = load i32, ptr %tmp4, align 8
store i32 %tmp5, ptr %tmp3, align 8
ret void
}
define void @widget(ptr %ptr, ptr %ptr.2) {
; CHECK-LABEL: @widget(
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: [[TMP4:%.*]] = fmul double undef, [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[PTR:%.*]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = fadd double [[TMP6]], [[TMP4]]
; CHECK-NEXT: store double [[TMP7]], ptr [[PTR]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[PTR_2:%.*]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = fmul double undef, [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[PTR]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = fadd double [[TMP12]], [[TMP10]]
; CHECK-NEXT: store double [[TMP13]], ptr [[TMP11]], align 8
; CHECK-NEXT: br label [[BB15:%.*]]
; CHECK: bb15:
; CHECK-NEXT: br label [[BB15]]
;
bb1: ; preds = %bb
%tmp3 = load double, ptr null, align 8
%tmp4 = fmul double undef, %tmp3
%tmp6 = load double, ptr %ptr, align 8
%tmp7 = fadd double %tmp6, %tmp4
store double %tmp7, ptr %ptr, align 8
%tmp9 = load double, ptr %ptr.2, align 8
%tmp10 = fmul double undef, %tmp9
%tmp11 = getelementptr inbounds double, ptr %ptr, i32 1
%tmp12 = load double, ptr %tmp11, align 8
%tmp13 = fadd double %tmp12, %tmp10
store double %tmp13, ptr %tmp11, align 8
br label %bb15
bb15: ; preds = %bb15, %bb14
br label %bb15
}
%struct = type { i32, i32, float, float }
; Some points we collected as candidates for runtime checks have been removed
; before generating runtime checks. Make sure versioning is skipped.
define void @test_bounds_removed_before_runtime_checks(ptr %A, ptr %B, i1 %c) {
; CHECK-LABEL: @test_bounds_removed_before_runtime_checks(
; CHECK-NEXT: entry:
; CHECK-NEXT: store <2 x i32> <i32 10, i32 300>, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[B:%.*]], align 8
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB23:%.*]], label [[BB14:%.*]]
; CHECK: bb14:
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 10 to i64
; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 2, [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 3
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT:%.*]], ptr [[A]], i64 0, i32 2
; CHECK-NEXT: store float 0.000000e+00, ptr [[TMP20]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP19]], align 1
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT]], ptr [[A]], i64 0, i32 3
; CHECK-NEXT: store float 0.000000e+00, ptr [[TMP22]], align 4
; CHECK-NEXT: br label [[BB23]]
; CHECK: bb23:
; CHECK-NEXT: ret void
;
entry:
%tmp1 = fmul float 10.0, 20.0
%tmp2 = fptosi float %tmp1 to i32
%tmp3 = fmul float 30.0, 20.0
%tmp4 = fptosi float %tmp3 to i32
%tmp5 = icmp sgt i32 100, %tmp2
%tmp6 = select i1 %tmp5, i32 %tmp2, i32 10
%tmp7 = select i1 false, i32 0, i32 %tmp6
%tmp8 = icmp sgt i32 200, %tmp4
%tmp9 = select i1 %tmp8, i32 %tmp4, i32 300
%tmp10 = select i1 false, i32 0, i32 %tmp9
store i32 %tmp7, ptr %A, align 8
%tmp12 = getelementptr inbounds %struct, ptr %A, i64 0, i32 1
store i32 %tmp10, ptr %tmp12, align 4
%tmp13 = load ptr, ptr %B, align 8
br i1 %c, label %bb23, label %bb14
bb14:
%tmp15 = sext i32 %tmp7 to i64
%tmp16 = add nsw i64 2, %tmp15
%tmp17 = getelementptr inbounds i32, ptr %tmp13, i64 %tmp16
%tmp19 = getelementptr inbounds i8, ptr %tmp17, i64 3
%tmp20 = getelementptr inbounds %struct, ptr %A, i64 0, i32 2
store float 0.0, ptr %tmp20, align 8
%tmp21 = load i8, ptr %tmp19, align 1
%tmp22 = getelementptr inbounds %struct, ptr %A, i64 0, i32 3
store float 0.0, ptr %tmp22, align 4
br label %bb23
bb23:
ret void
}
; In this test there's a single bound, do not generate runtime checks.
define void @single_membound(ptr %arg, ptr %arg1, double %x) {
; CHECK-LABEL: @single_membound(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[ARG:%.*]], i64 1
; CHECK-NEXT: [[TMP:%.*]] = fsub double [[X:%.*]], 9.900000e+01
; CHECK-NEXT: store double [[TMP]], ptr [[TMP9]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[ARG1:%.*]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = fsub double 1.000000e+00, [[TMP12]]
; CHECK-NEXT: br label [[BB15:%.*]]
; CHECK: bb15:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[TMP]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP13]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 2.000000e+01, double 3.000000e+01>
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[TMP9]], align 8
; CHECK-NEXT: ret void
;
entry:
%tmp = fsub double %x, 99.0
%tmp9 = getelementptr inbounds double, ptr %arg, i64 1
store double %tmp, ptr %tmp9, align 8
%tmp12 = load double, ptr %arg1, align 8
%tmp13 = fsub double 1.0, %tmp12
%tmp14 = getelementptr inbounds double, ptr %arg, i64 2
br label %bb15
bb15:
%tmp16 = fmul double %tmp, 20.0
store double %tmp16, ptr %tmp9, align 8
%tmp17 = fmul double %tmp13, 30.0
store double %tmp17, ptr %tmp14, align 8
ret void
}
%struct.2 = type { [4 x float] }
; Make sure we do not crash when we encounter a SCEVCouldNotCompute.
define void @no_lcssa_phi(ptr %A, ptr %B, i1 %c) {
; CHECK-LABEL: @no_lcssa_phi(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[A:%.*]], [[BB:%.*]] ], [ null, [[LOOP]] ]
; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[ADD_0:%.*]] = fadd float [[L_0]], 1.000000e+01
; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[ADD_0]], 3.000000e+01
; CHECK-NEXT: store float [[MUL_0]], ptr [[PTR_PHI]], align 8
; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[B_GEP_1]], align 8
; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], 1.000000e+01
; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds [[STRUCT_2:%.*]], ptr [[PTR_PHI]], i64 0, i32 0, i32 1
; CHECK-NEXT: store float [[MUL_1]], ptr [[A_GEP_1]], align 8
; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[B_GEP_2]], align 8
; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_2]], 1.000000e+01
; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr inbounds [[STRUCT_2]], ptr [[PTR_PHI]], i64 0, i32 0, i32 2
; CHECK-NEXT: store float [[MUL_2]], ptr [[A_GEP_2]], align 8
; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 3
; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[B_GEP_3]], align 8
; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_3]], 1.000000e+01
; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr inbounds [[STRUCT_2]], ptr [[PTR_PHI]], i64 0, i32 0, i32 3
; CHECK-NEXT: store float [[MUL_3]], ptr [[A_GEP_3]], align 8
; CHECK-NEXT: ret void
;
bb:
br label %loop
loop:
%ptr.phi = phi ptr [ %A, %bb ], [ null, %loop ]
br i1 %c, label %exit, label %loop
exit:
%l.0 = load float, ptr %B, align 8
%add.0 = fadd float %l.0, 10.0
%mul.0 = fmul float %add.0, 30.0
store float %mul.0, ptr %ptr.phi, align 8
%B.gep.1 = getelementptr inbounds float, ptr %B, i64 1
%l.1 = load float, ptr %B.gep.1, align 8
%add.1 = fadd float %l.1, 10.0
%mul.1 = fmul float %add.1, 30.0
%A.gep.1 = getelementptr inbounds %struct.2, ptr %ptr.phi, i64 0, i32 0, i32 1
store float %mul.1, ptr %A.gep.1, align 8
%B.gep.2 = getelementptr inbounds float, ptr %B, i64 2
%l.2 = load float, ptr %B.gep.2, align 8
%add.2 = fadd float %l.2, 10.0
%mul.2 = fmul float %add.2, 30.0
%A.gep.2 = getelementptr inbounds %struct.2, ptr %ptr.phi, i64 0, i32 0, i32 2
store float %mul.2, ptr %A.gep.2, align 8
%B.gep.3 = getelementptr inbounds float, ptr %B, i64 3
%l.3 = load float, ptr %B.gep.3, align 8
%add.3 = fadd float %l.3, 10.0
%mul.3 = fmul float %add.3, 30.0
%A.gep.3 = getelementptr inbounds %struct.2, ptr %ptr.phi, i64 0, i32 0, i32 3
store float %mul.3, ptr %A.gep.3, align 8
ret void
}
; Make sure lcssa phis as pointer bases are handled properly.
define void @lcssa_phi(ptr %A, ptr %B, i1 %c) {
; CHECK-LABEL: @lcssa_phi(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[A:%.*]], [[BB:%.*]] ], [ null, [[LOOP]] ]
; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: [[PTR_PHI_LCSSA:%.*]] = phi ptr [ [[PTR_PHI]], [[LOOP]] ]
; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[ADD_0:%.*]] = fadd float [[L_0]], 1.000000e+01
; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[ADD_0]], 3.000000e+01
; CHECK-NEXT: store float [[MUL_0]], ptr [[PTR_PHI_LCSSA]], align 8
; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[B_GEP_1]], align 8
; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], 1.000000e+01
; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds [[STRUCT_2:%.*]], ptr [[PTR_PHI_LCSSA]], i64 0, i32 0, i32 1
; CHECK-NEXT: store float [[MUL_1]], ptr [[A_GEP_1]], align 8
; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[B_GEP_2]], align 8
; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_2]], 1.000000e+01
; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr inbounds [[STRUCT_2]], ptr [[PTR_PHI_LCSSA]], i64 0, i32 0, i32 2
; CHECK-NEXT: store float [[MUL_2]], ptr [[A_GEP_2]], align 8
; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 3
; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[B_GEP_3]], align 8
; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_3]], 1.000000e+01
; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr inbounds [[STRUCT_2]], ptr [[PTR_PHI_LCSSA]], i64 0, i32 0, i32 3
; CHECK-NEXT: store float [[MUL_3]], ptr [[A_GEP_3]], align 8
; CHECK-NEXT: ret void
;
bb:
br label %loop
loop:
%ptr.phi = phi ptr [ %A, %bb ], [ null, %loop ]
br i1 %c, label %exit, label %loop
exit:
%ptr.phi.lcssa = phi ptr [ %ptr.phi, %loop ]
%l.0 = load float, ptr %B, align 8
%add.0 = fadd float %l.0, 10.0
%mul.0 = fmul float %add.0, 30.0
store float %mul.0, ptr %ptr.phi.lcssa, align 8
%B.gep.1 = getelementptr inbounds float, ptr %B, i64 1
%l.1 = load float, ptr %B.gep.1, align 8
%add.1 = fadd float %l.1, 10.0
%mul.1 = fmul float %add.1, 30.0
%A.gep.1 = getelementptr inbounds %struct.2, ptr %ptr.phi.lcssa, i64 0, i32 0, i32 1
store float %mul.1, ptr %A.gep.1, align 8
%B.gep.2 = getelementptr inbounds float, ptr %B, i64 2
%l.2 = load float, ptr %B.gep.2, align 8
%add.2 = fadd float %l.2, 10.0
%mul.2 = fmul float %add.2, 30.0
%A.gep.2 = getelementptr inbounds %struct.2, ptr %ptr.phi.lcssa, i64 0, i32 0, i32 2
store float %mul.2, ptr %A.gep.2, align 8
%B.gep.3 = getelementptr inbounds float, ptr %B, i64 3
%l.3 = load float, ptr %B.gep.3, align 8
%add.3 = fadd float %l.3, 10.0
%mul.3 = fmul float %add.3, 30.0
%A.gep.3 = getelementptr inbounds %struct.2, ptr %ptr.phi.lcssa, i64 0, i32 0, i32 3
store float %mul.3, ptr %A.gep.3, align 8
ret void
}
%struct.spam = type { [60 x i32], i32, [12 x i8] }
declare void @foo(ptr)
; Test case with a basic block where parts can be vectorized without versioning.
define i32 @block_partly_vectorized_without_versioning(ptr readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2, ptr nocapture readonly %arg3, ptr %A, ptr %B) {
; CHECK-LABEL: @block_partly_vectorized_without_versioning(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T:%.*]] = alloca <16 x i8>, align 16
; CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds i8, ptr [[ARG3:%.*]], i64 1
; CHECK-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 2
; CHECK-NEXT: [[T7:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 3
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 4
; CHECK-NEXT: [[T9:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 5
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 6
; CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 7
; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 8
; CHECK-NEXT: [[T13:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 9
; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 10
; CHECK-NEXT: [[T15:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 11
; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 12
; CHECK-NEXT: [[T17:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 13
; CHECK-NEXT: [[T18:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 14
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]]
; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[ARG1:%.*]], align 1
; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds i8, ptr [[ARG3]], i64 15
; CHECK-NEXT: call void @foo(ptr nonnull [[T]])
; CHECK-NEXT: [[T26:%.*]] = load i8, ptr [[ARG3]], align 1
; CHECK-NEXT: [[T27:%.*]] = load i8, ptr [[ARG2:%.*]], align 1
; CHECK-NEXT: [[T28:%.*]] = xor i8 [[T27]], [[T26]]
; CHECK-NEXT: store i8 [[T28]], ptr [[ARG2]], align 1
; CHECK-NEXT: [[T29:%.*]] = load i8, ptr [[T5]], align 1
; CHECK-NEXT: [[T30:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 1
; CHECK-NEXT: [[T31:%.*]] = load i8, ptr [[T30]], align 1
; CHECK-NEXT: [[T32:%.*]] = xor i8 [[T31]], [[T29]]
; CHECK-NEXT: store i8 [[T32]], ptr [[T30]], align 1
; CHECK-NEXT: [[T33:%.*]] = load i8, ptr [[T6]], align 1
; CHECK-NEXT: [[T34:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 2
; CHECK-NEXT: [[T35:%.*]] = load i8, ptr [[T34]], align 1
; CHECK-NEXT: [[T36:%.*]] = xor i8 [[T35]], [[T33]]
; CHECK-NEXT: store i8 [[T36]], ptr [[T34]], align 1
; CHECK-NEXT: [[T37:%.*]] = load i8, ptr [[T7]], align 1
; CHECK-NEXT: [[T38:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 3
; CHECK-NEXT: [[T39:%.*]] = load i8, ptr [[T38]], align 1
; CHECK-NEXT: [[T40:%.*]] = xor i8 [[T39]], [[T37]]
; CHECK-NEXT: store i8 [[T40]], ptr [[T38]], align 1
; CHECK-NEXT: [[T41:%.*]] = load i8, ptr [[T8]], align 1
; CHECK-NEXT: [[T42:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 4
; CHECK-NEXT: [[T43:%.*]] = load i8, ptr [[T42]], align 1
; CHECK-NEXT: [[T44:%.*]] = xor i8 [[T43]], [[T41]]
; CHECK-NEXT: store i8 [[T44]], ptr [[T42]], align 1
; CHECK-NEXT: [[T45:%.*]] = load i8, ptr [[T9]], align 1
; CHECK-NEXT: [[T46:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 5
; CHECK-NEXT: [[T47:%.*]] = load i8, ptr [[T46]], align 1
; CHECK-NEXT: [[T48:%.*]] = xor i8 [[T47]], [[T45]]
; CHECK-NEXT: store i8 [[T48]], ptr [[T46]], align 1
; CHECK-NEXT: [[T49:%.*]] = load i8, ptr [[T10]], align 1
; CHECK-NEXT: [[T50:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 6
; CHECK-NEXT: [[T51:%.*]] = load i8, ptr [[T50]], align 1
; CHECK-NEXT: [[T52:%.*]] = xor i8 [[T51]], [[T49]]
; CHECK-NEXT: store i8 [[T52]], ptr [[T50]], align 1
; CHECK-NEXT: [[T53:%.*]] = load i8, ptr [[T11]], align 1
; CHECK-NEXT: [[T54:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 7
; CHECK-NEXT: [[T55:%.*]] = load i8, ptr [[T54]], align 1
; CHECK-NEXT: [[T56:%.*]] = xor i8 [[T55]], [[T53]]
; CHECK-NEXT: store i8 [[T56]], ptr [[T54]], align 1
; CHECK-NEXT: [[T57:%.*]] = load i8, ptr [[T12]], align 1
; CHECK-NEXT: [[T58:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 8
; CHECK-NEXT: [[T59:%.*]] = load i8, ptr [[T58]], align 1
; CHECK-NEXT: [[T60:%.*]] = xor i8 [[T59]], [[T57]]
; CHECK-NEXT: store i8 [[T60]], ptr [[T58]], align 1
; CHECK-NEXT: [[T61:%.*]] = load i8, ptr [[T13]], align 1
; CHECK-NEXT: [[T62:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 9
; CHECK-NEXT: [[T63:%.*]] = load i8, ptr [[T62]], align 1
; CHECK-NEXT: [[T64:%.*]] = xor i8 [[T63]], [[T61]]
; CHECK-NEXT: store i8 [[T64]], ptr [[T62]], align 1
; CHECK-NEXT: [[T65:%.*]] = load i8, ptr [[T14]], align 1
; CHECK-NEXT: [[T66:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 10
; CHECK-NEXT: [[T67:%.*]] = load i8, ptr [[T66]], align 1
; CHECK-NEXT: [[T68:%.*]] = xor i8 [[T67]], [[T65]]
; CHECK-NEXT: store i8 [[T68]], ptr [[T66]], align 1
; CHECK-NEXT: [[T69:%.*]] = load i8, ptr [[T15]], align 1
; CHECK-NEXT: [[T70:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 11
; CHECK-NEXT: [[T71:%.*]] = load i8, ptr [[T70]], align 1
; CHECK-NEXT: [[T72:%.*]] = xor i8 [[T71]], [[T69]]
; CHECK-NEXT: store i8 [[T72]], ptr [[T70]], align 1
; CHECK-NEXT: [[T73:%.*]] = load i8, ptr [[T16]], align 1
; CHECK-NEXT: [[T74:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 12
; CHECK-NEXT: [[T75:%.*]] = load i8, ptr [[T74]], align 1
; CHECK-NEXT: [[T76:%.*]] = xor i8 [[T75]], [[T73]]
; CHECK-NEXT: store i8 [[T76]], ptr [[T74]], align 1
; CHECK-NEXT: [[T77:%.*]] = load i8, ptr [[T17]], align 1
; CHECK-NEXT: [[T78:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 13
; CHECK-NEXT: [[T79:%.*]] = load i8, ptr [[T78]], align 1
; CHECK-NEXT: [[T80:%.*]] = xor i8 [[T79]], [[T77]]
; CHECK-NEXT: store i8 [[T80]], ptr [[T78]], align 1
; CHECK-NEXT: [[T81:%.*]] = load i8, ptr [[T18]], align 1
; CHECK-NEXT: [[T82:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 14
; CHECK-NEXT: [[T83:%.*]] = load i8, ptr [[T82]], align 1
; CHECK-NEXT: [[T84:%.*]] = xor i8 [[T83]], [[T81]]
; CHECK-NEXT: store i8 [[T84]], ptr [[T82]], align 1
; CHECK-NEXT: [[T85:%.*]] = load i8, ptr [[T21]], align 1
; CHECK-NEXT: [[T86:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 15
; CHECK-NEXT: [[T87:%.*]] = load i8, ptr [[T86]], align 1
; CHECK-NEXT: [[T88:%.*]] = xor i8 [[T87]], [[T85]]
; CHECK-NEXT: store i8 [[T88]], ptr [[T86]], align 1
; CHECK-NEXT: ret i32 1
;
bb:
%t = alloca <16 x i8>, align 16
%t5 = getelementptr inbounds i8, ptr %arg3, i64 1
%t6 = getelementptr inbounds i8, ptr %arg3, i64 2
%t7 = getelementptr inbounds i8, ptr %arg3, i64 3
%t8 = getelementptr inbounds i8, ptr %arg3, i64 4
%t9 = getelementptr inbounds i8, ptr %arg3, i64 5
%t10 = getelementptr inbounds i8, ptr %arg3, i64 6
%t11 = getelementptr inbounds i8, ptr %arg3, i64 7
%t12 = getelementptr inbounds i8, ptr %arg3, i64 8
%t13 = getelementptr inbounds i8, ptr %arg3, i64 9
%t14 = getelementptr inbounds i8, ptr %arg3, i64 10
%t15 = getelementptr inbounds i8, ptr %arg3, i64 11
%t16 = getelementptr inbounds i8, ptr %arg3, i64 12
%t17 = getelementptr inbounds i8, ptr %arg3, i64 13
%t18 = getelementptr inbounds i8, ptr %arg3, i64 14
%A.0 = load i8, ptr %A
%B.0 = load i8, ptr %B
%xor.0 = xor i8 %A.0, %B.0
%A.gep.1 = getelementptr i8, ptr %A, i64 1
%A.1 = load i8, ptr %A.gep.1
%B.gep.1 = getelementptr i8, ptr %B, i64 1
%B.1 = load i8, ptr %B.gep.1
%xor.1 = xor i8 %A.1, %B.1
%A.gep.2 = getelementptr i8, ptr %A, i64 2
%A.2 = load i8, ptr %A.gep.2
%B.gep.2 = getelementptr i8, ptr %B, i64 2
%B.2 = load i8, ptr %B.gep.2
%xor.2 = xor i8 %A.2, %B.2
%A.gep.3 = getelementptr i8, ptr %A, i64 3
%A.3 = load i8, ptr %A.gep.3
%B.gep.3 = getelementptr i8, ptr %B, i64 3
%B.3 = load i8, ptr %B.gep.3
%xor.3 = xor i8 %A.3, %B.3
%A.gep.4 = getelementptr i8, ptr %A, i64 4
%A.4 = load i8, ptr %A.gep.4
%B.gep.4 = getelementptr i8, ptr %B, i64 4
%B.4 = load i8, ptr %B.gep.4
%xor.4 = xor i8 %A.4, %B.4
%A.gep.5 = getelementptr i8, ptr %A, i64 5
%A.5 = load i8, ptr %A.gep.5
%B.gep.5 = getelementptr i8, ptr %B, i64 5
%B.5 = load i8, ptr %B.gep.5
%xor.5 = xor i8 %A.5, %B.5
%A.gep.6 = getelementptr i8, ptr %A, i64 6
%A.6 = load i8, ptr %A.gep.6
%B.gep.6 = getelementptr i8, ptr %B, i64 6
%B.6 = load i8, ptr %B.gep.6
%xor.6 = xor i8 %A.6, %B.6
%A.gep.7 = getelementptr i8, ptr %A, i64 7
%A.7 = load i8, ptr %A.gep.7
%B.gep.7 = getelementptr i8, ptr %B, i64 7
%B.7 = load i8, ptr %B.gep.7
%xor.7 = xor i8 %A.7, %B.7
%A.gep.8 = getelementptr i8, ptr %A, i64 8
%A.8 = load i8, ptr %A.gep.8
%B.gep.8 = getelementptr i8, ptr %B, i64 8
%B.8 = load i8, ptr %B.gep.8
%xor.8 = xor i8 %A.8, %B.8
%A.gep.9 = getelementptr i8, ptr %A, i64 9
%A.9 = load i8, ptr %A.gep.9
%B.gep.9 = getelementptr i8, ptr %B, i64 9
%B.9 = load i8, ptr %B.gep.9
%xor.9 = xor i8 %A.9, %B.9
%A.gep.10 = getelementptr i8, ptr %A, i64 10
%A.10 = load i8, ptr %A.gep.10
%B.gep.10 = getelementptr i8, ptr %B, i64 10
%B.10 = load i8, ptr %B.gep.10
%xor.10 = xor i8 %A.10, %B.10
%A.gep.11 = getelementptr i8, ptr %A, i64 11
%A.11 = load i8, ptr %A.gep.11
%B.gep.11 = getelementptr i8, ptr %B, i64 11
%B.11 = load i8, ptr %B.gep.11
%xor.11 = xor i8 %A.11, %B.11
%A.gep.12 = getelementptr i8, ptr %A, i64 12
%A.12 = load i8, ptr %A.gep.12
%B.gep.12 = getelementptr i8, ptr %B, i64 12
%B.12 = load i8, ptr %B.gep.12
%xor.12 = xor i8 %A.12, %B.12
%A.gep.13 = getelementptr i8, ptr %A, i64 13
%A.13 = load i8, ptr %A.gep.13
%B.gep.13 = getelementptr i8, ptr %B, i64 13
%B.13 = load i8, ptr %B.gep.13
%xor.13 = xor i8 %A.13, %B.13
%A.gep.14 = getelementptr i8, ptr %A, i64 14
%A.14 = load i8, ptr %A.gep.14
%B.gep.14 = getelementptr i8, ptr %B, i64 14
%B.14 = load i8, ptr %B.gep.14
%xor.14 = xor i8 %A.14, %B.14
%A.gep.15 = getelementptr i8, ptr %A, i64 15
%A.15 = load i8, ptr %A.gep.15
%B.gep.15 = getelementptr i8, ptr %B, i64 15
%B.15 = load i8, ptr %B.gep.15
%xor.15 = xor i8 %A.15, %B.15
store i8 %xor.0, ptr %arg1
%R.gep.1 = getelementptr i8, ptr %arg1, i64 1
store i8 %xor.1, ptr %R.gep.1
%R.gep.2 = getelementptr i8, ptr %arg1, i64 2
store i8 %xor.2, ptr %R.gep.2
%R.gep.3 = getelementptr i8, ptr %arg1, i64 3
store i8 %xor.3, ptr %R.gep.3
%R.gep.4 = getelementptr i8, ptr %arg1, i64 4
store i8 %xor.4, ptr %R.gep.4
%R.gep.5 = getelementptr i8, ptr %arg1, i64 5
store i8 %xor.5, ptr %R.gep.5
%R.gep.6 = getelementptr i8, ptr %arg1, i64 6
store i8 %xor.6, ptr %R.gep.6
%R.gep.7 = getelementptr i8, ptr %arg1, i64 7
store i8 %xor.7, ptr %R.gep.7
%R.gep.8 = getelementptr i8, ptr %arg1, i64 8
store i8 %xor.8, ptr %R.gep.8
%R.gep.9 = getelementptr i8, ptr %arg1, i64 9
store i8 %xor.9, ptr %R.gep.9
%R.gep.10 = getelementptr i8, ptr %arg1, i64 10
store i8 %xor.10, ptr %R.gep.10
%R.gep.11 = getelementptr i8, ptr %arg1, i64 11
store i8 %xor.11, ptr %R.gep.11
%R.gep.12 = getelementptr i8, ptr %arg1, i64 12
store i8 %xor.12, ptr %R.gep.12
%R.gep.13 = getelementptr i8, ptr %arg1, i64 13
store i8 %xor.13, ptr %R.gep.13
%R.gep.14 = getelementptr i8, ptr %arg1, i64 14
store i8 %xor.14, ptr %R.gep.14
%R.gep.15 = getelementptr i8, ptr %arg1, i64 15
store i8 %xor.15, ptr %R.gep.15
%t21 = getelementptr inbounds i8, ptr %arg3, i64 15
call void @foo(ptr nonnull %t)
%t26 = load i8, ptr %arg3, align 1
%t27 = load i8, ptr %arg2, align 1
%t28 = xor i8 %t27, %t26
store i8 %t28, ptr %arg2, align 1
%t29 = load i8, ptr %t5, align 1
%t30 = getelementptr inbounds i8, ptr %arg2, i64 1
%t31 = load i8, ptr %t30, align 1
%t32 = xor i8 %t31, %t29
store i8 %t32, ptr %t30, align 1
%t33 = load i8, ptr %t6, align 1
%t34 = getelementptr inbounds i8, ptr %arg2, i64 2
%t35 = load i8, ptr %t34, align 1
%t36 = xor i8 %t35, %t33
store i8 %t36, ptr %t34, align 1
%t37 = load i8, ptr %t7, align 1
%t38 = getelementptr inbounds i8, ptr %arg2, i64 3
%t39 = load i8, ptr %t38, align 1
%t40 = xor i8 %t39, %t37
store i8 %t40, ptr %t38, align 1
%t41 = load i8, ptr %t8, align 1
%t42 = getelementptr inbounds i8, ptr %arg2, i64 4
%t43 = load i8, ptr %t42, align 1
%t44 = xor i8 %t43, %t41
store i8 %t44, ptr %t42, align 1
%t45 = load i8, ptr %t9, align 1
%t46 = getelementptr inbounds i8, ptr %arg2, i64 5
%t47 = load i8, ptr %t46, align 1
%t48 = xor i8 %t47, %t45
store i8 %t48, ptr %t46, align 1
%t49 = load i8, ptr %t10, align 1
%t50 = getelementptr inbounds i8, ptr %arg2, i64 6
%t51 = load i8, ptr %t50, align 1
%t52 = xor i8 %t51, %t49
store i8 %t52, ptr %t50, align 1
%t53 = load i8, ptr %t11, align 1
%t54 = getelementptr inbounds i8, ptr %arg2, i64 7
%t55 = load i8, ptr %t54, align 1
%t56 = xor i8 %t55, %t53
store i8 %t56, ptr %t54, align 1
%t57 = load i8, ptr %t12, align 1
%t58 = getelementptr inbounds i8, ptr %arg2, i64 8
%t59 = load i8, ptr %t58, align 1
%t60 = xor i8 %t59, %t57
store i8 %t60, ptr %t58, align 1
%t61 = load i8, ptr %t13, align 1
%t62 = getelementptr inbounds i8, ptr %arg2, i64 9
%t63 = load i8, ptr %t62, align 1
%t64 = xor i8 %t63, %t61
store i8 %t64, ptr %t62, align 1
%t65 = load i8, ptr %t14, align 1
%t66 = getelementptr inbounds i8, ptr %arg2, i64 10
%t67 = load i8, ptr %t66, align 1
%t68 = xor i8 %t67, %t65
store i8 %t68, ptr %t66, align 1
%t69 = load i8, ptr %t15, align 1
%t70 = getelementptr inbounds i8, ptr %arg2, i64 11
%t71 = load i8, ptr %t70, align 1
%t72 = xor i8 %t71, %t69
store i8 %t72, ptr %t70, align 1
%t73 = load i8, ptr %t16, align 1
%t74 = getelementptr inbounds i8, ptr %arg2, i64 12
%t75 = load i8, ptr %t74, align 1
%t76 = xor i8 %t75, %t73
store i8 %t76, ptr %t74, align 1
%t77 = load i8, ptr %t17, align 1
%t78 = getelementptr inbounds i8, ptr %arg2, i64 13
%t79 = load i8, ptr %t78, align 1
%t80 = xor i8 %t79, %t77
store i8 %t80, ptr %t78, align 1
%t81 = load i8, ptr %t18, align 1
%t82 = getelementptr inbounds i8, ptr %arg2, i64 14
%t83 = load i8, ptr %t82, align 1
%t84 = xor i8 %t83, %t81
store i8 %t84, ptr %t82, align 1
%t85 = load i8, ptr %t21, align 1
%t86 = getelementptr inbounds i8, ptr %arg2, i64 15
%t87 = load i8, ptr %t86, align 1
%t88 = xor i8 %t87, %t85
store i8 %t88, ptr %t86, align 1
ret i32 1
}
; A test case where instructions required to compute the pointer bounds get
; vectorized before versioning. Make sure there is no crash.
define void @crash_instructions_deleted(ptr %t, ptr %a, ptr noalias %ptr) {
; CHECK-LABEL: @crash_instructions_deleted(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 2
; CHECK-NEXT: store <2 x i32> <i32 0, i32 10>, ptr [[T15]], align 8
; CHECK-NEXT: [[T17:%.*]] = load ptr, ptr [[PTR:%.*]], align 8
; CHECK-NEXT: br label [[BB18:%.*]]
; CHECK: bb18:
; CHECK-NEXT: [[T19:%.*]] = sext i32 0 to i64
; CHECK-NEXT: [[T20:%.*]] = add nsw i64 1, [[T19]]
; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds i32, ptr [[T17]], i64 [[T20]]
; CHECK-NEXT: [[T23:%.*]] = getelementptr inbounds i8, ptr [[T21]], i64 1
; CHECK-NEXT: [[T24:%.*]] = getelementptr inbounds i8, ptr [[T21]], i64 2
; CHECK-NEXT: [[T25:%.*]] = getelementptr inbounds i8, ptr [[T21]], i64 3
; CHECK-NEXT: [[T26:%.*]] = load i8, ptr [[T21]], align 1
; CHECK-NEXT: [[T27:%.*]] = uitofp i8 [[T26]] to float
; CHECK-NEXT: [[T28:%.*]] = fdiv float [[T27]], 2.550000e+02
; CHECK-NEXT: store float [[T28]], ptr [[T:%.*]], align 8
; CHECK-NEXT: [[T30:%.*]] = load i8, ptr [[T23]], align 1
; CHECK-NEXT: [[T31:%.*]] = uitofp i8 [[T30]] to float
; CHECK-NEXT: [[T32:%.*]] = fdiv float [[T31]], 2.550000e+02
; CHECK-NEXT: [[T33:%.*]] = getelementptr inbounds float, ptr [[T]], i64 1
; CHECK-NEXT: store float [[T32]], ptr [[T33]], align 4
; CHECK-NEXT: [[T34:%.*]] = load i8, ptr [[T24]], align 1
; CHECK-NEXT: [[T35:%.*]] = uitofp i8 [[T34]] to float
; CHECK-NEXT: [[T36:%.*]] = fdiv float [[T35]], 2.550000e+02
; CHECK-NEXT: [[T37:%.*]] = getelementptr inbounds float, ptr [[T]], i64 2
; CHECK-NEXT: store float [[T36]], ptr [[T37]], align 8
; CHECK-NEXT: [[T38:%.*]] = load i8, ptr [[T25]], align 1
; CHECK-NEXT: [[T39:%.*]] = uitofp i8 [[T38]] to float
; CHECK-NEXT: [[T40:%.*]] = fdiv float [[T39]], 2.550000e+02
; CHECK-NEXT: [[T41:%.*]] = getelementptr inbounds float, ptr [[T]], i64 3
; CHECK-NEXT: store float [[T40]], ptr [[T41]], align 4
; CHECK-NEXT: ret void
;
bb:
%t6 = icmp slt i32 10, 0
%t7 = icmp sgt i32 20, 20
%t9 = select i1 %t7, i32 5, i32 0
%t10 = select i1 %t6, i32 0, i32 %t9
%t11 = icmp slt i32 10, 0
%t12 = icmp sgt i32 20, 20
%t13 = select i1 %t12, i32 5, i32 10
%t14 = select i1 %t11, i32 0, i32 %t13
%t15 = getelementptr inbounds i32, ptr %a, i32 2
store i32 %t10, ptr %t15, align 8
%t16 = getelementptr inbounds i32, ptr %a, i32 3
store i32 %t14, ptr %t16, align 4
%t17 = load ptr, ptr %ptr, align 8
br label %bb18
bb18: ; preds = %bb5
%t19 = sext i32 %t10 to i64
%t20 = add nsw i64 1, %t19
%t21 = getelementptr inbounds i32, ptr %t17, i64 %t20
%t23 = getelementptr inbounds i8, ptr %t21, i64 1
%t24 = getelementptr inbounds i8, ptr %t21, i64 2
%t25 = getelementptr inbounds i8, ptr %t21, i64 3
%t26 = load i8, ptr %t21, align 1
%t27 = uitofp i8 %t26 to float
%t28 = fdiv float %t27, 2.550000e+02
store float %t28, ptr %t, align 8
%t30 = load i8, ptr %t23, align 1
%t31 = uitofp i8 %t30 to float
%t32 = fdiv float %t31, 2.550000e+02
%t33 = getelementptr inbounds float, ptr %t, i64 1
store float %t32, ptr %t33, align 4
%t34 = load i8, ptr %t24, align 1
%t35 = uitofp i8 %t34 to float
%t36 = fdiv float %t35, 2.550000e+02
%t37 = getelementptr inbounds float, ptr %t, i64 2
store float %t36, ptr %t37, align 8
%t38 = load i8, ptr %t25, align 1
%t39 = uitofp i8 %t38 to float
%t40 = fdiv float %t39, 2.550000e+02
%t41 = getelementptr inbounds float, ptr %t, i64 3
store float %t40, ptr %t41, align 4
ret void
}
; A test case where there are no instructions accessing a tracked object in a
; block for which versioning was requested.
define void @crash_no_tracked_instructions(ptr %arg, ptr %arg.2, ptr %arg.3, i1 %c) {
; CHECK-LABEL: @crash_no_tracked_instructions(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T19:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
; CHECK-NEXT: [[T20:%.*]] = load float, ptr [[ARG_3:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[T20]], i32 1
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB22:%.*]], label [[BB30:%.*]]
; CHECK: bb22:
; CHECK-NEXT: [[T23:%.*]] = fmul float [[T20]], 9.900000e+01
; CHECK-NEXT: [[T25:%.*]] = getelementptr inbounds float, ptr [[T19]], i64 2
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[T23]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], <float 9.900000e+01, float 1.000000e+01>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
; CHECK-NEXT: store float [[TMP4]], ptr [[T25]], align 4
; CHECK-NEXT: [[T27:%.*]] = load float, ptr [[ARG_2:%.*]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP3]], <float 2.000000e+01, float 2.000000e+01>
; CHECK-NEXT: br label [[BB30]]
; CHECK: bb30:
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[BB22]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[BB36:%.*]]
; CHECK: bb36:
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], <float 3.000000e+00, float 3.000000e+00>
; CHECK-NEXT: store <2 x float> [[TMP7]], ptr [[ARG_3]], align 4
; CHECK-NEXT: br label [[BB41:%.*]]
; CHECK: bb41:
; CHECK-NEXT: ret void
;
entry:
%t19 = load ptr, ptr %arg
%t20 = load float, ptr %arg.3, align 4
br i1 %c, label %bb22, label %bb30
bb22:
%t23 = fmul float %t20, 99.0
%t24 = fmul float %t23, 99.0
%t25 = getelementptr inbounds float, ptr %t19, i64 2
%t26 = fmul float %t23, 10.0
store float %t26, ptr %t25, align 4
%t27 = load float, ptr %arg.2, align 8
%t28 = fadd float %t24, 20.0
%t29 = fadd float %t26, 20.0
br label %bb30
bb30:
%t31 = phi float [ %t28, %bb22 ], [ 0.0, %entry ]
%t32 = phi float [ %t29, %bb22 ], [ %t20, %entry ]
br label %bb36
bb36:
%t37 = fmul float %t31, 3.0
store float %t37, ptr %arg.3, align 4
%t39 = fmul float %t32, 3.0
%t40 = getelementptr inbounds float, ptr %arg.3, i64 1
store float %t39, ptr %t40, align 4
br label %bb41
bb41:
ret void
}