[msan] Add optional flag to improve instrumentation of disjoint OR (#145990)

The disjoint OR (https://github.com/llvm/llvm-project/pull/72583) of two '1's is poison, hence the MSan ought to consider the result uninitialized (rather than initialized - i.e. a false negative - as per the existing instrumentation which ignores disjointedness). This patch adds a flag, `-msan-precise-disjoint-or`, which defaults to false (the legacy behavior). A future patch will default this flag to true.

Updates the test from https://github.com/llvm/llvm-project/pull/145982
This commit is contained in:
Thurston Dang
2025-06-26 22:55:55 -07:00
committed by GitHub
parent 56b2c7d988
commit afe6af14ff
2 changed files with 36 additions and 11 deletions

View File

@@ -282,6 +282,12 @@ static cl::opt<bool> ClPoisonUndefVectors(
"unaffected by this flag (see -msan-poison-undef)."),
cl::Hidden, cl::init(false));
static cl::opt<bool> ClPreciseDisjointOr(
"msan-precise-disjoint-or",
cl::desc("Precisely poison disjoint OR. If false (legacy behavior), "
"disjointedness is ignored (i.e., 1|1 is initialized)."),
cl::Hidden, cl::init(false));
static cl::opt<bool>
ClHandleICmp("msan-handle-icmp",
cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
@@ -2497,11 +2503,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitOr(BinaryOperator &I) {
IRBuilder<> IRB(&I);
// "Or" of 1 and a poisoned value results in unpoisoned value.
// 1|1 => 1; 0|1 => 1; p|1 => 1;
// 1|0 => 1; 0|0 => 0; p|0 => p;
// 1|p => 1; 0|p => p; p|p => p;
// S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
// "Or" of 1 and a poisoned value results in unpoisoned value:
// 1|1 => 1; 0|1 => 1; p|1 => 1;
// 1|0 => 1; 0|0 => 0; p|0 => p;
// 1|p => 1; 0|p => p; p|p => p;
//
// S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
//
// Addendum if the "Or" is "disjoint":
// 1|1 => p;
// S = S | (V1 & V2)
Value *S1 = getShadow(&I, 0);
Value *S2 = getShadow(&I, 1);
Value *V1 = IRB.CreateNot(I.getOperand(0));
@@ -2513,7 +2524,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *S1S2 = IRB.CreateAnd(S1, S2);
Value *V1S2 = IRB.CreateAnd(V1, S2);
Value *S1V2 = IRB.CreateAnd(S1, V2);
setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
Value *S = IRB.CreateOr({S1S2, V1S2, S1V2});
if (ClPreciseDisjointOr && cast<PossiblyDisjointInst>(&I)->isDisjoint()) {
Value *V1V2 = IRB.CreateAnd(V1, V2);
S = IRB.CreateOr({S, V1V2});
}
setShadow(&I, S);
setOriginForNaryOp(I);
}

View File

@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
; RUN: opt < %s -S -passes=msan -msan-precise-disjoint-or=false 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-IMPRECISE
; RUN: opt < %s -S -passes=msan -msan-precise-disjoint-or=true 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE
;
; Test bitwise OR instructions, especially the "disjoint OR", which is
; currently handled incorrectly by MSan (as if it was a regular OR).
; Test bitwise OR instructions, including "disjoint OR".
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -41,8 +41,15 @@ define i8 @test_disjoint_or(i8 %a, i8 %b) sanitize_memory {
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP1]], [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = or i8 [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[C:%.*]] = or disjoint i8 [[A]], [[B]]
; CHECK-NEXT: store i8 [[TMP11]], ptr @__msan_retval_tls, align 8
;
; CHECK-IMPRECISE: [[C:%.*]] = or disjoint i8 [[A]], [[B]]
; CHECK-IMPRECISE-NEXT: store i8 [[TMP11]], ptr @__msan_retval_tls, align 8
;
; CHECK-PRECISE: [[TMP10:%.*]] = and i8 [[TMP3]], [[TMP4]]
; CHECK-PRECISE-NEXT: [[TMP12:%.*]] = or i8 [[TMP11]], [[TMP10]]
; CHECK-PRECISE-NEXT: [[C:%.*]] = or disjoint i8 [[A]], [[B]]
; CHECK-PRECISE-NEXT: store i8 [[TMP12]], ptr @__msan_retval_tls, align 8
;
; CHECK-NEXT: ret i8 [[C]]
;
%c = or disjoint i8 %a, %b