Files
clang-p2996/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
Benjamin Maxwell fc28971fb9 Add nocapture to pointer parameters of masked stores/loads
The lack of this attribute (particularly on the load intrinsics)
prevented InstCombine from optimizing away allocas and memcpys
for arrays that could be read directly from rodata.

This now also includes a new test to check the masked load/store
intrinsics have the expected attributes (specifically nocapture).

Differential Revision: https://reviews.llvm.org/D135656
2022-10-24 11:15:55 +00:00

41 lines
2.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
@contant_int_array = private unnamed_addr constant [10 x i64] [i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9]
; InstCombine should be able to optimize out the alloca and memcpy:
define void @combine_masked_load_store_from_constant_array(ptr %ptr) {
; CHECK-LABEL: @combine_masked_load_store_from_constant_array(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull @contant_int_array, i32 8, <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP2]], ptr [[PTR:%.*]], i32 1, <vscale x 2 x i1> [[TMP1]])
; CHECK-NEXT: ret void
;
%1 = alloca [10 x i64]
call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr @contant_int_array, i64 80, i1 false)
%2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
%3 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull %1, i32 8, <vscale x 2 x i1> %2, <vscale x 2 x i64> zeroinitializer)
call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> %3, ptr %ptr, i32 1, <vscale x 2 x i1> %2)
ret void
}
define void @combine_masked_expandload_compressstore_from_constant_array(ptr %ptr) {
; CHECK-LABEL: @combine_masked_expandload_compressstore_from_constant_array(
; CHECK-NEXT: [[TMP1:%.*]] = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull @contant_int_array, <10 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <10 x i64> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.compressstore.v10i64(<10 x i64> [[TMP1]], ptr [[PTR:%.*]], <10 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: ret void
;
%1 = alloca [10 x i64]
call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr @contant_int_array, i64 80, i1 false)
%2 = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull %1, <10 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <10 x i64> zeroinitializer)
call void @llvm.masked.compressstore.nxv10i64.p0(<10 x i64> %2, ptr %ptr, <10 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>)
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64>, ptr, i32, <vscale x 2 x i1>)
declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32, i32)
declare <10 x i64> @llvm.masked.expandload.v10i64(ptr, <10 x i1>, <10 x i64>)
declare void @llvm.masked.compressstore.nxv10i64.p0(<10 x i64>, ptr, <10 x i1>)