This patch updates VectorCombine to use a worklist to allow iterative simplifications where a combine enables other combines. Suggested in D100302. The main use case at the moment is foldSingleElementStore and scalarizeLoadExtract working together to improve scalarization. Note that we now also do not run SimplifyInstructionsInBlock on the whole function if there have been changes. This means we fail to remove/simplify instructions not related to any of the vector combines. IMO this is fine, as simplifying the whole function seems more like a workaround for not tracking the changed instructions. Compile-time impact looks neutral: NewPM-O3: +0.02% NewPM-ReleaseThinLTO: -0.00% NewPM-ReleaseLTO-g: -0.02% http://llvm-compile-time-tracker.com/compare.php?from=52832cd917af00e2b9c6a9d1476ba79754dcabff&to=e66520a4637290550a945d528e3e59573485dd40&stat=instructions Reviewed By: spatel, lebedev.ri Differential Revision: https://reviews.llvm.org/D110171
676 lines
28 KiB
LLVM
676 lines
28 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -vector-combine -data-layout=e < %s | FileCheck %s
|
|
; RUN: opt -S -vector-combine -data-layout=E < %s | FileCheck %s
|
|
|
|
define void @insert_store(<16 x i8>* %q, i8 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 3
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 3
|
|
store <16 x i8> %vecins, <16 x i8>* %q, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store_i16_align1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3
|
|
; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <8 x i16>, <8 x i16>* %q
|
|
%vecins = insertelement <8 x i16> %0, i16 %s, i32 3
|
|
store <8 x i16> %vecins, <8 x i16>* %q, align 1
|
|
ret void
|
|
}
|
|
|
|
; To verify case when index is out of bounds
|
|
define void @insert_store_outofbounds(<8 x i16>* %q, i16 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store_outofbounds(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 9
|
|
; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <8 x i16>, <8 x i16>* %q
|
|
%vecins = insertelement <8 x i16> %0, i16 %s, i32 9
|
|
store <8 x i16> %vecins, <8 x i16>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_vscale(<vscale x 8 x i16>* %q, i16 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store_vscale(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 8 x i16>, <vscale x 8 x i16>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <vscale x 8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store <vscale x 8 x i16> [[VECINS]], <vscale x 8 x i16>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <vscale x 8 x i16>, <vscale x 8 x i16>* %q
|
|
%vecins = insertelement <vscale x 8 x i16> %0, i16 %s, i32 3
|
|
store <vscale x 8 x i16> %vecins, <vscale x 8 x i16>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_v9i4(<9 x i4>* %q, i4 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store_v9i4(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, <9 x i4>* [[Q:%.*]], align 8
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <9 x i4> [[TMP0]], i4 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store <9 x i4> [[VECINS]], <9 x i4>* [[Q]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <9 x i4>, <9 x i4>* %q
|
|
%vecins = insertelement <9 x i4> %0, i4 %s, i32 3
|
|
store <9 x i4> %vecins, <9 x i4>* %q, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_v4i27(<4 x i27>* %q, i27 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store_v4i27(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i27>, <4 x i27>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x i27> [[TMP0]], i27 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store <4 x i27> [[VECINS]], <4 x i27>* [[Q]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <4 x i27>, <4 x i27>* %q
|
|
%vecins = insertelement <4 x i27> %0, i27 %s, i32 3
|
|
store <4 x i27> %vecins, <4 x i27>* %q, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_blk_differ(<8 x i16>* %q, i16 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store_blk_differ(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: br label [[CONT:%.*]]
|
|
; CHECK: cont:
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store <8 x i16> [[VECINS]], <8 x i16>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <8 x i16>, <8 x i16>* %q
|
|
br label %cont
|
|
cont:
|
|
%vecins = insertelement <8 x i16> %0, i16 %s, i32 3
|
|
store <8 x i16> %vecins, <8 x i16>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX:%.*]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
; To verify align here is narrowed to scalar store size
|
|
define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_large_alignment(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Q:%.*]], i32 0, i32 [[IDX]]
|
|
; CHECK-NEXT: store i32 [[S:%.*]], i32* [[TMP0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp = icmp ult i32 %idx, 4
|
|
call void @llvm.assume(i1 %cmp)
|
|
%i = load <4 x i32>, <4 x i32>* %q, align 128
|
|
%vecins = insertelement <4 x i32> %i, i32 %s, i32 %idx
|
|
store <4 x i32> %vecins, <4 x i32>* %q, align 128
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_align_maximum_8(
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
|
|
; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cmp = icmp ult i32 %idx, 2
|
|
call void @llvm.assume(i1 %cmp)
|
|
%i = load <8 x i64>, <8 x i64>* %q, align 8
|
|
%vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
|
|
store <8 x i64> %vecins, <8 x i64>* %q, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_align_maximum_4(<8 x i64>* %q, i64 %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_align_maximum_4(
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
|
|
; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cmp = icmp ult i32 %idx, 2
|
|
call void @llvm.assume(i1 %cmp)
|
|
%i = load <8 x i64>, <8 x i64>* %q, align 4
|
|
%vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
|
|
store <8 x i64> %vecins, <8 x i64>* %q, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_align_larger(
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
|
|
; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%cmp = icmp ult i32 %idx, 2
|
|
call void @llvm.assume(i1 %cmp)
|
|
%i = load <8 x i64>, <8 x i64>* %q, align 4
|
|
%vecins = insertelement <8 x i64> %i, i64 %s, i32 %idx
|
|
store <8 x i64> %vecins, <8 x i64>* %q, align 2
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_assume(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX]]
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp = icmp ult i32 %idx, 4
|
|
call void @llvm.assume(i1 %cmp)
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
declare void @maythrow() readnone
|
|
|
|
define void @insert_store_nonconst_index_not_known_valid_by_assume_after_load(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume_after_load(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: call void @maythrow()
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp = icmp ult i32 %idx, 4
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
call void @maythrow()
|
|
call void @llvm.assume(i1 %cmp)
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_not_known_valid_by_assume(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_assume(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 17
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp = icmp ult i32 %idx, 17
|
|
call void @llvm.assume(i1 %cmp)
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.assume(i1)
|
|
|
|
define void @insert_store_nonconst_index_known_noundef_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_and(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = and i32 %idx, 7
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_base_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_and(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX_FROZEN]], 7
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.frozen = freeze i32 %idx
|
|
%idx.clamped = and i32 %idx.frozen, 7
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_frozen_and_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_and(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7
|
|
; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = and i32 %idx, 7
|
|
%idx.clamped.frozen = freeze i32 %idx.clamped
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_and_but_may_be_poison(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]]
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[TMP0]], 7
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = and i32 %idx, 7
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_and(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = and i32 %idx, 16
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_and(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 16
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = and i32 %idx, 16
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
define void @insert_store_nonconst_index_known_noundef_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_and_valid_by_urem(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = urem i32 %idx, 16
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_base_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_base_frozen_and_valid_by_urem(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i32 [[IDX:%.*]]
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX_FROZEN]], 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.frozen = freeze i32 %idx
|
|
%idx.clamped = urem i32 %idx.frozen, 16
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_frozen_and_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_frozen_and_valid_by_urem(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16
|
|
; CHECK-NEXT: [[IDX_CLAMPED_FROZEN:%.*]] = freeze i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED_FROZEN]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = urem i32 %idx, 16
|
|
%idx.clamped.frozen = freeze i32 %idx.clamped
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped.frozen
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_known_valid_by_urem_but_may_be_poison(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[IDX:%.*]]
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[TMP0]], 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP1]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = urem i32 %idx, 16
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_not_known_valid_by_urem(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = urem i32 %idx, 17
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(<16 x i8>* %q, i8 zeroext %s, i32 noundef %idx) {
|
|
; CHECK-LABEL: @insert_store_nonconst_index_known_noundef_not_known_valid_by_urem(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 17
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]]
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%idx.clamped = urem i32 %idx, 17
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 %idx.clamped
|
|
store <16 x i8> %vecins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_ptr_strip(<16 x i8>* %q, i8 zeroext %s) {
|
|
; CHECK-LABEL: @insert_store_ptr_strip(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ADDR0:%.*]] = bitcast <16 x i8>* [[Q:%.*]] to <2 x i64>*
|
|
; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[ADDR0]], i64 0
|
|
; CHECK-NEXT: [[ADDR2:%.*]] = bitcast <2 x i64>* [[ADDR1]] to <16 x i8>*
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[ADDR2]], i32 0, i32 3
|
|
; CHECK-NEXT: store i8 [[S:%.*]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%vecins = insertelement <16 x i8> %0, i8 %s, i32 3
|
|
%addr0 = bitcast <16 x i8>* %q to <2 x i64>*
|
|
%addr1 = getelementptr <2 x i64>, <2 x i64>* %addr0, i64 0
|
|
%addr2 = bitcast <2 x i64>* %addr1 to <16 x i8>*
|
|
store <16 x i8> %vecins, <16 x i8>* %addr2
|
|
ret void
|
|
}
|
|
|
|
define void @volatile_update(<16 x i8>* %q, <16 x i8>* %p, i8 zeroext %s) {
|
|
; CHECK-LABEL: @volatile_update(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store volatile <16 x i8> [[VECINS0]], <16 x i8>* [[Q]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load volatile <16 x i8>, <16 x i8>* [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[S]], i32 1
|
|
; CHECK-NEXT: store <16 x i8> [[VECINS1]], <16 x i8>* [[P]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* %q
|
|
%vecins0 = insertelement <16 x i8> %0, i8 %s, i32 3
|
|
store volatile <16 x i8> %vecins0, <16 x i8>* %q
|
|
|
|
%1 = load volatile <16 x i8>, <16 x i8>* %p
|
|
%vecins1 = insertelement <16 x i8> %1, i8 %s, i32 1
|
|
store <16 x i8> %vecins1, <16 x i8>* %p
|
|
ret void
|
|
}
|
|
|
|
define void @insert_store_addr_differ(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
|
|
; CHECK-LABEL: @insert_store_addr_differ(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%ld = load <16 x i8>, <16 x i8>* %p
|
|
%ins = insertelement <16 x i8> %ld, i8 %s, i32 3
|
|
store <16 x i8> %ins, <16 x i8>* %q
|
|
ret void
|
|
}
|
|
|
|
; We can't transform if any instr could modify memory in between.
|
|
define void @insert_store_mem_modify(<16 x i8>* %p, <16 x i8>* %q, <16 x i8>* noalias %r, i8 %s, i32 %m) {
|
|
; CHECK-LABEL: @insert_store_mem_modify(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
|
|
; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[Q:%.*]], align 16
|
|
; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
|
|
; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[R:%.*]], align 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[Q]], i32 0, i32 7
|
|
; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: [[PTR0:%.*]] = bitcast <16 x i8>* [[P]] to <4 x i32>*
|
|
; CHECK-NEXT: [[LD3:%.*]] = load <4 x i32>, <4 x i32>* [[PTR0]], align 16
|
|
; CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[P]], align 16
|
|
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i32> [[LD3]], i32 [[M:%.*]], i32 0
|
|
; CHECK-NEXT: store <4 x i32> [[INS3]], <4 x i32>* [[PTR0]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
; p may alias q
|
|
%ld = load <16 x i8>, <16 x i8>* %p
|
|
store <16 x i8> zeroinitializer, <16 x i8>* %q
|
|
%ins = insertelement <16 x i8> %ld, i8 %s, i32 3
|
|
store <16 x i8> %ins, <16 x i8>* %p
|
|
|
|
; p never aliases r
|
|
%ld2 = load <16 x i8>, <16 x i8>* %q
|
|
store <16 x i8> zeroinitializer, <16 x i8>* %r
|
|
%ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
|
|
store <16 x i8> %ins2, <16 x i8>* %q
|
|
|
|
; p must alias ptr0
|
|
%ptr0 = bitcast <16 x i8>* %p to <4 x i32>*
|
|
%ld3 = load <4 x i32>, <4 x i32>* %ptr0
|
|
store <16 x i8> zeroinitializer, <16 x i8>* %p
|
|
%ins3 = insertelement <4 x i32> %ld3, i32 %m, i32 0
|
|
store <4 x i32> %ins3, <4 x i32>* %ptr0
|
|
|
|
ret void
|
|
}
|
|
|
|
; Check cases when calls may modify memory
|
|
define void @insert_store_with_call(<16 x i8>* %p, <16 x i8>* %q, i8 %s) {
|
|
; CHECK-LABEL: @insert_store_with_call(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[P:%.*]], align 16
|
|
; CHECK-NEXT: call void @maywrite(<16 x i8>* [[P]])
|
|
; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> [[LD]], i8 [[S:%.*]], i32 3
|
|
; CHECK-NEXT: store <16 x i8> [[INS]], <16 x i8>* [[P]], align 16
|
|
; CHECK-NEXT: call void @foo()
|
|
; CHECK-NEXT: call void @nowrite(<16 x i8>* [[P]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P]], i32 0, i32 7
|
|
; CHECK-NEXT: store i8 [[S]], i8* [[TMP0]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%ld = load <16 x i8>, <16 x i8>* %p
|
|
call void @maywrite(<16 x i8>* %p)
|
|
%ins = insertelement <16 x i8> %ld, i8 %s, i32 3
|
|
store <16 x i8> %ins, <16 x i8>* %p
|
|
call void @foo() ; Barrier
|
|
%ld2 = load <16 x i8>, <16 x i8>* %p
|
|
call void @nowrite(<16 x i8>* %p)
|
|
%ins2 = insertelement <16 x i8> %ld2, i8 %s, i32 7
|
|
store <16 x i8> %ins2, <16 x i8>* %p
|
|
ret void
|
|
}
|
|
|
|
declare void @foo()
|
|
declare void @maywrite(<16 x i8>*)
|
|
declare void @nowrite(<16 x i8>*) readonly
|
|
|
|
; To test if number of instructions in-between exceeds the limit (default 30),
|
|
; the combine will quit.
|
|
define i32 @insert_store_maximum_scan_instrs(i32 %arg, i16* %arg1, <16 x i8>* %arg2, i8 zeroext %arg3) {
|
|
; CHECK-LABEL: @insert_store_maximum_scan_instrs(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[I:%.*]] = or i32 [[ARG:%.*]], 1
|
|
; CHECK-NEXT: [[I4:%.*]] = load <16 x i8>, <16 x i8>* [[ARG2:%.*]], align 16
|
|
; CHECK-NEXT: [[I5:%.*]] = tail call i32 @bar(i32 [[I]], i1 true)
|
|
; CHECK-NEXT: [[I6:%.*]] = shl i32 [[ARG]], [[I5]]
|
|
; CHECK-NEXT: [[I7:%.*]] = lshr i32 [[I6]], 26
|
|
; CHECK-NEXT: [[I8:%.*]] = trunc i32 [[I7]] to i8
|
|
; CHECK-NEXT: [[I9:%.*]] = and i8 [[I8]], 31
|
|
; CHECK-NEXT: [[I10:%.*]] = lshr i32 [[I6]], 11
|
|
; CHECK-NEXT: [[I11:%.*]] = and i32 [[I10]], 32767
|
|
; CHECK-NEXT: [[I12:%.*]] = zext i8 [[I9]] to i64
|
|
; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds i16, i16* [[ARG1:%.*]], i64 [[I12]]
|
|
; CHECK-NEXT: [[I14:%.*]] = load i16, i16* [[I13]], align 2
|
|
; CHECK-NEXT: [[I15:%.*]] = zext i16 [[I14]] to i32
|
|
; CHECK-NEXT: [[I16:%.*]] = add nuw nsw i8 [[I9]], 1
|
|
; CHECK-NEXT: [[I17:%.*]] = zext i8 [[I16]] to i64
|
|
; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds i16, i16* [[ARG1]], i64 [[I17]]
|
|
; CHECK-NEXT: [[I19:%.*]] = load i16, i16* [[I18]], align 2
|
|
; CHECK-NEXT: [[I20:%.*]] = zext i16 [[I19]] to i32
|
|
; CHECK-NEXT: [[I21:%.*]] = sub nsw i32 [[I20]], [[I15]]
|
|
; CHECK-NEXT: [[I22:%.*]] = mul nsw i32 [[I11]], [[I21]]
|
|
; CHECK-NEXT: [[I23:%.*]] = ashr i32 [[I22]], 15
|
|
; CHECK-NEXT: [[I24:%.*]] = shl nuw nsw i32 [[I5]], 15
|
|
; CHECK-NEXT: [[I25:%.*]] = xor i32 [[I24]], 1015808
|
|
; CHECK-NEXT: [[I26:%.*]] = add nuw nsw i32 [[I25]], [[I15]]
|
|
; CHECK-NEXT: [[I27:%.*]] = add nsw i32 [[I26]], [[I23]]
|
|
; CHECK-NEXT: [[I28:%.*]] = sitofp i32 [[ARG]] to double
|
|
; CHECK-NEXT: [[I29:%.*]] = tail call double @llvm.log2.f64(double [[I28]])
|
|
; CHECK-NEXT: [[I30:%.*]] = fptosi double [[I29]] to i32
|
|
; CHECK-NEXT: [[I31:%.*]] = shl nsw i32 [[I30]], 15
|
|
; CHECK-NEXT: [[I32:%.*]] = or i32 [[I31]], 4
|
|
; CHECK-NEXT: [[I33:%.*]] = icmp eq i32 [[I27]], [[I32]]
|
|
; CHECK-NEXT: [[I34:%.*]] = select i1 [[I33]], i32 [[ARG]], i32 [[I31]]
|
|
; CHECK-NEXT: [[I35:%.*]] = lshr i32 [[I34]], 1
|
|
; CHECK-NEXT: [[I36:%.*]] = insertelement <16 x i8> [[I4]], i8 [[ARG3:%.*]], i32 3
|
|
; CHECK-NEXT: store <16 x i8> [[I36]], <16 x i8>* [[ARG2]], align 16
|
|
; CHECK-NEXT: ret i32 [[I35]]
|
|
;
|
|
bb:
|
|
%i = or i32 %arg, 1
|
|
%i4 = load <16 x i8>, <16 x i8>* %arg2, align 16
|
|
%i5 = tail call i32 @bar(i32 %i, i1 true)
|
|
%i6 = shl i32 %arg, %i5
|
|
%i7 = lshr i32 %i6, 26
|
|
%i8 = trunc i32 %i7 to i8
|
|
%i9 = and i8 %i8, 31
|
|
%i10 = lshr i32 %i6, 11
|
|
%i11 = and i32 %i10, 32767
|
|
%i12 = zext i8 %i9 to i64
|
|
%i13 = getelementptr inbounds i16, i16* %arg1, i64 %i12
|
|
%i14 = load i16, i16* %i13, align 2
|
|
%i15 = zext i16 %i14 to i32
|
|
%i16 = add nuw nsw i8 %i9, 1
|
|
%i17 = zext i8 %i16 to i64
|
|
%i18 = getelementptr inbounds i16, i16* %arg1, i64 %i17
|
|
%i19 = load i16, i16* %i18, align 2
|
|
%i20 = zext i16 %i19 to i32
|
|
%i21 = sub nsw i32 %i20, %i15
|
|
%i22 = mul nsw i32 %i11, %i21
|
|
%i23 = ashr i32 %i22, 15
|
|
%i24 = shl nuw nsw i32 %i5, 15
|
|
%i25 = xor i32 %i24, 1015808
|
|
%i26 = add nuw nsw i32 %i25, %i15
|
|
%i27 = add nsw i32 %i26, %i23
|
|
%i28 = sitofp i32 %arg to double
|
|
%i29 = tail call double @llvm.log2.f64(double %i28)
|
|
%i30 = fptosi double %i29 to i32
|
|
%i31 = shl nsw i32 %i30, 15
|
|
%i32 = or i32 %i31, 4
|
|
%i33 = icmp eq i32 %i27, %i32
|
|
%i34 = select i1 %i33, i32 %arg, i32 %i31
|
|
%i35 = lshr i32 %i34, 1
|
|
%i36 = insertelement <16 x i8> %i4, i8 %arg3, i32 3
|
|
store <16 x i8> %i36, <16 x i8>* %arg2, align 16
|
|
ret i32 %i35
|
|
}
|
|
|
|
declare i32 @bar(i32, i1) readonly
|
|
declare double @llvm.log2.f64(double)
|