Reapply "[msan] Automatically print shadow for failing outlined checks" (#145611) (#145615)

This reverts commit 5eb5f0d876 i.e.,
relands 1b71ea411a.

Test case was failing on aarch64 because the long double type is
implemented differently on x86 vs aarch64. This reland restricts the
test to x86.

----

Original CL description:
    
A commonly used aid for debugging MSan reports is
`__msan_print_shadow()`, which requires manual app code annotations
(typically of the variable in the UUM report or nearby). This is in
contrast to ASan, which automatically prints out the shadow map when a
check fails.
    
This patch changes MSan to print the shadow that failed an outlined
check (checks are outlined per function after the
`-msan-instrumentation-with-call-threshold` is exceeded) if verbosity >=
1. Note that we do not print out the shadow map of "neighboring"
variables because this is technically infeasible; see "Caveat" below.
    
This patch can be easier to use than `__msan_print_shadow()` because
this does not require manual app code annotations. Additionally, due to
optimizations, `__msan_print_shadow()` calls can sometimes spuriously
affect whether a variable is initialized.
    
As a side effect, this patch also enables outlined checks for
arbitrary-sized shadows (vs. the current hardcoded handlers for
{1,2,4,8}-byte shadows).
    
Caveat: the shadow does not necessarily correspond to an individual user
variable, because MSan instrumentation may combine and/or truncate
multiple shadows prior to emitting a check that the mangled shadow is
zero (e.g., `convertShadowToScalar()`,
`handleSSEVectorConvertIntrinsic()`, `materializeInstructionChecks()`).
OTOH it is arguably a strength that this feature emit the shadow that
directly matters for the MSan check, but which cannot be obtained using
the MSan API.
This commit is contained in:
Thurston Dang
2025-06-24 20:33:11 -07:00
committed by GitHub
parent 3de2af3ef5
commit c85466dcd4
5 changed files with 145 additions and 22 deletions

View File

@@ -352,16 +352,38 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(
using namespace __msan;
#define MSAN_MAYBE_WARNING(type, size) \
void __msan_maybe_warning_##size(type s, u32 o) { \
GET_CALLER_PC_BP; \
if (UNLIKELY(s)) { \
PrintWarningWithOrigin(pc, bp, o); \
if (__msan::flags()->halt_on_error) { \
Printf("Exiting\n"); \
Die(); \
} \
} \
// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
// an MSan shadow region.
static void print_shadow_value(void *shadow, u64 size) {
Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s");
for (unsigned int i = 0; i < size; i++) {
if (i % 4 == 0)
Printf(" ");
unsigned char x = ((unsigned char *)shadow)[i];
Printf("%x%x", x >> 4, x & 0xf);
}
Printf("\n");
Printf(
"Caveat: the shadow value does not necessarily directly correspond to a "
"single user variable. The correspondence is stronger, but not always "
"perfect, when origin tracking is enabled.\n");
Printf("\n");
}
#define MSAN_MAYBE_WARNING(type, size) \
void __msan_maybe_warning_##size(type s, u32 o) { \
GET_CALLER_PC_BP; \
\
if (UNLIKELY(s)) { \
if (Verbosity() >= 1) \
print_shadow_value((void *)(&s), sizeof(s)); \
PrintWarningWithOrigin(pc, bp, o); \
if (__msan::flags()->halt_on_error) { \
Printf("Exiting\n"); \
Die(); \
} \
} \
}
MSAN_MAYBE_WARNING(u8, 1)
@@ -369,6 +391,30 @@ MSAN_MAYBE_WARNING(u16, 2)
MSAN_MAYBE_WARNING(u32, 4)
MSAN_MAYBE_WARNING(u64, 8)
// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
// an MSan shadow region.
void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) {
GET_CALLER_PC_BP;
bool allZero = true;
for (unsigned int i = 0; i < size; i++) {
if (((char *)shadow)[i]) {
allZero = false;
break;
}
}
if (UNLIKELY(!allZero)) {
if (Verbosity() >= 1)
print_shadow_value(shadow, size);
PrintWarningWithOrigin(pc, bp, o);
if (__msan::flags()->halt_on_error) {
Printf("Exiting\n");
Die();
}
}
}
#define MSAN_MAYBE_STORE_ORIGIN(type, size) \
void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \
if (UNLIKELY(s)) { \

View File

@@ -60,6 +60,8 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_4(u32 s, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_8(u64 s, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_N(void *shadow, u64 size, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_store_origin_1(u8 s, void *p, u32 o);

View File

@@ -0,0 +1,42 @@
// RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \
// RUN: && not env MSAN_OPTIONS=verbosity=1 %run %t 2>&1 | FileCheck %s
// REQUIRES: x86_64-target-arch
// 'long double' implementation varies between platforms.
#include <ctype.h>
#include <stdio.h>
#include <sanitizer/msan_interface.h>
int main(int argc, char *argv[]) {
long double a;
printf("a: %Lf\n", a);
// CHECK: Shadow value (16 bytes): ffffffff ffffffff ffff0000 00000000
unsigned long long b;
printf("b: %llu\n", b);
// CHECK: Shadow value (8 bytes): ffffffff ffffffff
char *p = (char *)(&b);
p[2] = 36;
printf("b: %lld\n", b);
// CHECK: Shadow value (8 bytes): ffff00ff ffffffff
b = b << 8;
printf("b: %lld\n", b);
__msan_print_shadow(&b, sizeof(b));
// CHECK: Shadow value (8 bytes): 00ffff00 ffffffff
unsigned int c;
printf("c: %u\n", c);
// CHECK: Shadow value (4 bytes): ffffffff
// Converted to boolean
if (c) {
// CHECK: Shadow value (1 byte): 01
printf("Hello\n");
}
return 0;
}

View File

@@ -665,6 +665,7 @@ private:
// These arrays are indexed by log2(AccessSize).
FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
FunctionCallee MaybeWarningVarSizeFn;
FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
/// Run-time helper that generates a new origin value for a stack
@@ -939,7 +940,9 @@ void MemorySanitizer::createUserspaceApi(Module &M,
MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
MaybeWarningVarSizeFn = M.getOrInsertFunction(
"__msan_maybe_warning_N", TLI.getAttrList(C, {}, /*Signed=*/false),
IRB.getVoidTy(), PtrTy, IRB.getInt64Ty(), IRB.getInt32Ty());
FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
@@ -1248,7 +1251,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Constants likely will be eliminated by follow-up passes.
if (isa<Constant>(V))
return false;
++SplittableBlocksCount;
return ClInstrumentationWithCallThreshold >= 0 &&
SplittableBlocksCount > ClInstrumentationWithCallThreshold;
@@ -1447,18 +1449,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getDataLayout();
TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (instrumentWithCalls(ConvertedShadow) &&
SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
if (instrumentWithCalls(ConvertedShadow) && !MS.CompileKernel) {
// ZExt cannot convert between vector and scalar
ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB);
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
CallBase *CB = IRB.CreateCall(
Fn, {ConvertedShadow2,
MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
CB->addParamAttr(0, Attribute::ZExt);
CB->addParamAttr(1, Attribute::ZExt);
if (SizeIndex < kNumberOfAccessSizes) {
FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
CallBase *CB = IRB.CreateCall(
Fn,
{ConvertedShadow2,
MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
CB->addParamAttr(0, Attribute::ZExt);
CB->addParamAttr(1, Attribute::ZExt);
} else {
FunctionCallee Fn = MS.MaybeWarningVarSizeFn;
Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u);
IRB.CreateStore(ConvertedShadow2, ShadowAlloca);
unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType());
CallBase *CB = IRB.CreateCall(
Fn,
{ShadowAlloca, ConstantInt::get(IRB.getInt64Ty(), ShadowSize),
MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
CB->addParamAttr(1, Attribute::ZExt);
CB->addParamAttr(2, Attribute::ZExt);
}
} else {
Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(

View File

@@ -73,13 +73,30 @@ define <4 x i32> @test64(<4 x i32> %vec, i64 %idx, i32 %x) sanitize_memory {
; CHECK: call void @__msan_maybe_warning_8(i64 zeroext %{{.*}}, i32 zeroext 0)
; CHECK: ret <4 x i32>
; Type size too large => inline check.
; Type size too large => use variable-size handler.
define <4 x i32> @test65(<4 x i32> %vec, i65 %idx, i32 %x) sanitize_memory {
%vec1 = insertelement <4 x i32> %vec, i32 %x, i65 %idx
ret <4 x i32> %vec1
}
; CHECK-LABEL: @test65(
; CHECK: call void @__msan_warning_noreturn
; CHECK: %[[A:.*]] = zext i65 %1 to i128
; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0)
; CHECK: ret <4 x i32>
define <4 x i32> @test128(<4 x i32> %vec, i128 %idx, i32 %x) sanitize_memory {
%vec1 = insertelement <4 x i32> %vec, i32 %x, i128 %idx
ret <4 x i32> %vec1
}
; CHECK-LABEL: @test128(
; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0)
; CHECK: ret <4 x i32>
define <4 x i32> @test256(<4 x i32> %vec, i256 %idx, i32 %x) sanitize_memory {
%vec1 = insertelement <4 x i32> %vec, i32 %x, i256 %idx
ret <4 x i32> %vec1
}
; CHECK-LABEL: @test256(
; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 32, i32 zeroext 0)
; CHECK: ret <4 x i32>
define <4 x i32> @testUndef(<4 x i32> %vec, i32 %x) sanitize_memory {