Files
clang-p2996/llvm/test/Transforms/MemCpyOpt/sret.ll
Nikita Popov 369c9b791b [MemCpyOpt] Require writable object during call slot optimization (#71542)
Call slot optimization may introduce writes to the destination object
that occur earlier than in the original function. We currently already
check that that the destination is dereferenceable and aligned, but we
do not make sure that it is writable. As such, we might introduce a
write to read-only memory, or introduce a data race.

Fix this by checking that the object is writable. For arguments, this is
indicated by the new writable attribute. Tests using
sret/dereferenceable are updated to use it.
2023-11-09 15:55:44 +01:00

42 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin9"
%0 = type { x86_fp80, x86_fp80 }
define void @ccosl(ptr noalias writable sret(%0) %agg.result, ptr byval(%0) align 8 %z) nounwind {
; CHECK-LABEL: @ccosl(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IZ:%.*]] = alloca [[TMP0:%.*]], align 16
; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[TMP0]], ptr [[Z:%.*]], i32 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP1]], align 16
; CHECK-NEXT: [[TMP3:%.*]] = fsub x86_fp80 0xK80000000000000000000, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[TMP0]], ptr [[IZ]], i32 0, i32 1
; CHECK-NEXT: [[TMP8:%.*]] = load x86_fp80, ptr [[Z]], align 16
; CHECK-NEXT: store x86_fp80 [[TMP3]], ptr [[IZ]], align 16
; CHECK-NEXT: store x86_fp80 [[TMP8]], ptr [[TMP4]], align 16
; CHECK-NEXT: call void @ccoshl(ptr noalias sret([[TMP0]]) [[AGG_RESULT:%.*]], ptr byval([[TMP0]]) align 8 [[IZ]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: ret void
;
entry:
%iz = alloca %0
%memtmp = alloca %0, align 16
%tmp1 = getelementptr %0, ptr %z, i32 0, i32 1
%tmp2 = load x86_fp80, ptr %tmp1, align 16
%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
%tmp4 = getelementptr %0, ptr %iz, i32 0, i32 1
%tmp8 = load x86_fp80, ptr %z, align 16
store x86_fp80 %tmp3, ptr %iz, align 16
store x86_fp80 %tmp8, ptr %tmp4, align 16
call void @ccoshl(ptr noalias sret(%0) %memtmp, ptr byval(%0) align 8 %iz) nounwind
call void @llvm.memcpy.p0.p0.i32(ptr align 16 %agg.result, ptr align 16 %memtmp, i32 32, i1 false)
ret void
}
declare void @ccoshl(ptr noalias nocapture sret(%0), ptr byval(%0)) nounwind
declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind