From c85466dcd485d4a1320eec9cbf32bd47ff90464f Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 24 Jun 2025 20:33:11 -0700 Subject: [PATCH] Reapply "[msan] Automatically print shadow for failing outlined checks" (#145611) (#145615) This reverts commit 5eb5f0d8760c6b757c1da22682b5cf722efee489 i.e., relands 1b71ea411a9d36705663b1724ececbdfec7cc98c. Test case was failing on aarch64 because the long double type is implemented differently on x86 vs aarch64. This reland restricts the test to x86. ---- Original CL description: A commonly used aid for debugging MSan reports is `__msan_print_shadow()`, which requires manual app code annotations (typically of the variable in the UUM report or nearby). This is in contrast to ASan, which automatically prints out the shadow map when a check fails. This patch changes MSan to print the shadow that failed an outlined check (checks are outlined per function after the `-msan-instrumentation-with-call-threshold` is exceeded) if verbosity >= 1. Note that we do not print out the shadow map of "neighboring" variables because this is technically infeasible; see "Caveat" below. This patch can be easier to use than `__msan_print_shadow()` because this does not require manual app code annotations. Additionally, due to optimizations, `__msan_print_shadow()` calls can sometimes spuriously affect whether a variable is initialized. As a side effect, this patch also enables outlined checks for arbitrary-sized shadows (vs. the current hardcoded handlers for {1,2,4,8}-byte shadows). Caveat: the shadow does not necessarily correspond to an individual user variable, because MSan instrumentation may combine and/or truncate multiple shadows prior to emitting a check that the mangled shadow is zero (e.g., `convertShadowToScalar()`, `handleSSEVectorConvertIntrinsic()`, `materializeInstructionChecks()`). OTOH it is arguably a strength that this feature emit the shadow that directly matters for the MSan check, but which cannot be obtained using the MSan API. --- compiler-rt/lib/msan/msan.cpp | 66 ++++++++++++++++--- .../lib/msan/msan_interface_internal.h | 2 + .../msan_print_shadow_on_outlined_check.cpp | 42 ++++++++++++ .../Instrumentation/MemorySanitizer.cpp | 36 +++++++--- .../MemorySanitizer/with-call-type-size.ll | 21 +++++- 5 files changed, 145 insertions(+), 22 deletions(-) create mode 100644 compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index a3c0c2e485af..67879e37fad5 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -352,16 +352,38 @@ void __sanitizer::BufferedStackTrace::UnwindImpl( using namespace __msan; -#define MSAN_MAYBE_WARNING(type, size) \ - void __msan_maybe_warning_##size(type s, u32 o) { \ - GET_CALLER_PC_BP; \ - if (UNLIKELY(s)) { \ - PrintWarningWithOrigin(pc, bp, o); \ - if (__msan::flags()->halt_on_error) { \ - Printf("Exiting\n"); \ - Die(); \ - } \ - } \ +// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into +// an MSan shadow region. +static void print_shadow_value(void *shadow, u64 size) { + Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s"); + for (unsigned int i = 0; i < size; i++) { + if (i % 4 == 0) + Printf(" "); + + unsigned char x = ((unsigned char *)shadow)[i]; + Printf("%x%x", x >> 4, x & 0xf); + } + Printf("\n"); + Printf( + "Caveat: the shadow value does not necessarily directly correspond to a " + "single user variable. The correspondence is stronger, but not always " + "perfect, when origin tracking is enabled.\n"); + Printf("\n"); +} + +#define MSAN_MAYBE_WARNING(type, size) \ + void __msan_maybe_warning_##size(type s, u32 o) { \ + GET_CALLER_PC_BP; \ + \ + if (UNLIKELY(s)) { \ + if (Verbosity() >= 1) \ + print_shadow_value((void *)(&s), sizeof(s)); \ + PrintWarningWithOrigin(pc, bp, o); \ + if (__msan::flags()->halt_on_error) { \ + Printf("Exiting\n"); \ + Die(); \ + } \ + } \ } MSAN_MAYBE_WARNING(u8, 1) @@ -369,6 +391,30 @@ MSAN_MAYBE_WARNING(u16, 2) MSAN_MAYBE_WARNING(u32, 4) MSAN_MAYBE_WARNING(u64, 8) +// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into +// an MSan shadow region. +void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) { + GET_CALLER_PC_BP; + + bool allZero = true; + for (unsigned int i = 0; i < size; i++) { + if (((char *)shadow)[i]) { + allZero = false; + break; + } + } + + if (UNLIKELY(!allZero)) { + if (Verbosity() >= 1) + print_shadow_value(shadow, size); + PrintWarningWithOrigin(pc, bp, o); + if (__msan::flags()->halt_on_error) { + Printf("Exiting\n"); + Die(); + } + } +} + #define MSAN_MAYBE_STORE_ORIGIN(type, size) \ void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \ if (UNLIKELY(s)) { \ diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index c2eead13c20c..75425b98166a 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -60,6 +60,8 @@ SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_warning_4(u32 s, u32 o); SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_warning_8(u64 s, u32 o); +SANITIZER_INTERFACE_ATTRIBUTE +void __msan_maybe_warning_N(void *shadow, u64 size, u32 o); SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_store_origin_1(u8 s, void *p, u32 o); diff --git a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp new file mode 100644 index 000000000000..74de0c322f60 --- /dev/null +++ b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp @@ -0,0 +1,42 @@ +// RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \ +// RUN: && not env MSAN_OPTIONS=verbosity=1 %run %t 2>&1 | FileCheck %s + +// REQUIRES: x86_64-target-arch +// 'long double' implementation varies between platforms. + +#include +#include + +#include + +int main(int argc, char *argv[]) { + long double a; + printf("a: %Lf\n", a); + // CHECK: Shadow value (16 bytes): ffffffff ffffffff ffff0000 00000000 + + unsigned long long b; + printf("b: %llu\n", b); + // CHECK: Shadow value (8 bytes): ffffffff ffffffff + + char *p = (char *)(&b); + p[2] = 36; + printf("b: %lld\n", b); + // CHECK: Shadow value (8 bytes): ffff00ff ffffffff + + b = b << 8; + printf("b: %lld\n", b); + __msan_print_shadow(&b, sizeof(b)); + // CHECK: Shadow value (8 bytes): 00ffff00 ffffffff + + unsigned int c; + printf("c: %u\n", c); + // CHECK: Shadow value (4 bytes): ffffffff + + // Converted to boolean + if (c) { + // CHECK: Shadow value (1 byte): 01 + printf("Hello\n"); + } + + return 0; +} diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 3941bed37eba..a6f9992383cd 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -665,6 +665,7 @@ private: // These arrays are indexed by log2(AccessSize). FunctionCallee MaybeWarningFn[kNumberOfAccessSizes]; + FunctionCallee MaybeWarningVarSizeFn; FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes]; /// Run-time helper that generates a new origin value for a stack @@ -939,7 +940,9 @@ void MemorySanitizer::createUserspaceApi(Module &M, MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false), IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty()); - + MaybeWarningVarSizeFn = M.getOrInsertFunction( + "__msan_maybe_warning_N", TLI.getAttrList(C, {}, /*Signed=*/false), + IRB.getVoidTy(), PtrTy, IRB.getInt64Ty(), IRB.getInt32Ty()); FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false), @@ -1248,7 +1251,6 @@ struct MemorySanitizerVisitor : public InstVisitor { // Constants likely will be eliminated by follow-up passes. if (isa(V)) return false; - ++SplittableBlocksCount; return ClInstrumentationWithCallThreshold >= 0 && SplittableBlocksCount > ClInstrumentationWithCallThreshold; @@ -1447,18 +1449,32 @@ struct MemorySanitizerVisitor : public InstVisitor { const DataLayout &DL = F.getDataLayout(); TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType()); unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); - if (instrumentWithCalls(ConvertedShadow) && - SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) { - FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex]; + if (instrumentWithCalls(ConvertedShadow) && !MS.CompileKernel) { // ZExt cannot convert between vector and scalar ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB); Value *ConvertedShadow2 = IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); - CallBase *CB = IRB.CreateCall( - Fn, {ConvertedShadow2, - MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)}); - CB->addParamAttr(0, Attribute::ZExt); - CB->addParamAttr(1, Attribute::ZExt); + + if (SizeIndex < kNumberOfAccessSizes) { + FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex]; + CallBase *CB = IRB.CreateCall( + Fn, + {ConvertedShadow2, + MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)}); + CB->addParamAttr(0, Attribute::ZExt); + CB->addParamAttr(1, Attribute::ZExt); + } else { + FunctionCallee Fn = MS.MaybeWarningVarSizeFn; + Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u); + IRB.CreateStore(ConvertedShadow2, ShadowAlloca); + unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType()); + CallBase *CB = IRB.CreateCall( + Fn, + {ShadowAlloca, ConstantInt::get(IRB.getInt64Ty(), ShadowSize), + MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)}); + CB->addParamAttr(1, Attribute::ZExt); + CB->addParamAttr(2, Attribute::ZExt); + } } else { Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( diff --git a/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll b/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll index 0b81e5682062..3b1ab3364b31 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll @@ -73,13 +73,30 @@ define <4 x i32> @test64(<4 x i32> %vec, i64 %idx, i32 %x) sanitize_memory { ; CHECK: call void @__msan_maybe_warning_8(i64 zeroext %{{.*}}, i32 zeroext 0) ; CHECK: ret <4 x i32> -; Type size too large => inline check. +; Type size too large => use variable-size handler. define <4 x i32> @test65(<4 x i32> %vec, i65 %idx, i32 %x) sanitize_memory { %vec1 = insertelement <4 x i32> %vec, i32 %x, i65 %idx ret <4 x i32> %vec1 } ; CHECK-LABEL: @test65( -; CHECK: call void @__msan_warning_noreturn +; CHECK: %[[A:.*]] = zext i65 %1 to i128 +; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0) +; CHECK: ret <4 x i32> + +define <4 x i32> @test128(<4 x i32> %vec, i128 %idx, i32 %x) sanitize_memory { + %vec1 = insertelement <4 x i32> %vec, i32 %x, i128 %idx + ret <4 x i32> %vec1 +} +; CHECK-LABEL: @test128( +; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0) +; CHECK: ret <4 x i32> + +define <4 x i32> @test256(<4 x i32> %vec, i256 %idx, i32 %x) sanitize_memory { + %vec1 = insertelement <4 x i32> %vec, i32 %x, i256 %idx + ret <4 x i32> %vec1 +} +; CHECK-LABEL: @test256( +; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 32, i32 zeroext 0) ; CHECK: ret <4 x i32> define <4 x i32> @testUndef(<4 x i32> %vec, i32 %x) sanitize_memory {