Call slot optimization may introduce writes to the destination object that occur earlier than in the original function. We currently already check that that the destination is dereferenceable and aligned, but we do not make sure that it is writable. As such, we might introduce a write to read-only memory, or introduce a data race. Fix this by checking that the object is writable. For arguments, this is indicated by the new writable attribute. Tests using sret/dereferenceable are updated to use it.
42 lines
2.0 KiB
LLVM
42 lines
2.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
|
target triple = "i686-apple-darwin9"
|
|
|
|
%0 = type { x86_fp80, x86_fp80 }
|
|
|
|
define void @ccosl(ptr noalias writable sret(%0) %agg.result, ptr byval(%0) align 8 %z) nounwind {
|
|
; CHECK-LABEL: @ccosl(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[IZ:%.*]] = alloca [[TMP0:%.*]], align 16
|
|
; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[TMP0]], ptr [[Z:%.*]], i32 0, i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP1]], align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fsub x86_fp80 0xK80000000000000000000, [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[TMP0]], ptr [[IZ]], i32 0, i32 1
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load x86_fp80, ptr [[Z]], align 16
|
|
; CHECK-NEXT: store x86_fp80 [[TMP3]], ptr [[IZ]], align 16
|
|
; CHECK-NEXT: store x86_fp80 [[TMP8]], ptr [[TMP4]], align 16
|
|
; CHECK-NEXT: call void @ccoshl(ptr noalias sret([[TMP0]]) [[AGG_RESULT:%.*]], ptr byval([[TMP0]]) align 8 [[IZ]]) #[[ATTR0:[0-9]+]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%iz = alloca %0
|
|
%memtmp = alloca %0, align 16
|
|
%tmp1 = getelementptr %0, ptr %z, i32 0, i32 1
|
|
%tmp2 = load x86_fp80, ptr %tmp1, align 16
|
|
%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
|
|
%tmp4 = getelementptr %0, ptr %iz, i32 0, i32 1
|
|
%tmp8 = load x86_fp80, ptr %z, align 16
|
|
store x86_fp80 %tmp3, ptr %iz, align 16
|
|
store x86_fp80 %tmp8, ptr %tmp4, align 16
|
|
call void @ccoshl(ptr noalias sret(%0) %memtmp, ptr byval(%0) align 8 %iz) nounwind
|
|
call void @llvm.memcpy.p0.p0.i32(ptr align 16 %agg.result, ptr align 16 %memtmp, i32 32, i1 false)
|
|
ret void
|
|
}
|
|
|
|
declare void @ccoshl(ptr noalias nocapture sret(%0), ptr byval(%0)) nounwind
|
|
|
|
declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
|