tsan: fix deadlock in pthread_atfork callbacks
We take report/thread_registry locks around fork. This means we cannot report any bugs in atfork handlers. We resolved this by enabling per-thread ignores around fork. This resolved some of the cases, but not all. The added test triggers a race report from a signal handler called from atfork callback, we reset per-thread ignores around signal handlers, so we tried to report it and deadlocked. But there are more cases: a signal handler can be called synchronously if it's sent to itself. Or any other report types would cause deadlocks as well: mutex misuse, signal handler spoiling errno, etc. Disable all reports for the duration of fork with thr->suppress_reports and don't re-enable them around signal handlers. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D101154
This commit is contained in:
@@ -1976,7 +1976,8 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
|
||||
// because in async signal processing case (when handler is called directly
|
||||
// from rtl_generic_sighandler) we have not yet received the reraised
|
||||
// signal; and it looks too fragile to intercept all ways to reraise a signal.
|
||||
if (flags()->report_bugs && !sync && sig != SIGTERM && errno != 99) {
|
||||
if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM &&
|
||||
errno != 99) {
|
||||
VarSizeStackTrace stack;
|
||||
// StackTrace::GetNestInstructionPc(pc) is used because return address is
|
||||
// expected, OutputReport() will undo this.
|
||||
|
||||
@@ -145,7 +145,7 @@ void AllocatorPrintStats() {
|
||||
|
||||
static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
|
||||
if (atomic_load_relaxed(&thr->in_signal_handler) == 0 ||
|
||||
!flags()->report_signal_unsafe)
|
||||
!ShouldReport(thr, ReportTypeSignalUnsafe))
|
||||
return;
|
||||
VarSizeStackTrace stack;
|
||||
ObtainCurrentStack(thr, pc, &stack);
|
||||
|
||||
@@ -519,23 +519,22 @@ int Finalize(ThreadState *thr) {
|
||||
void ForkBefore(ThreadState *thr, uptr pc) {
|
||||
ctx->thread_registry->Lock();
|
||||
ctx->report_mtx.Lock();
|
||||
// Ignore memory accesses in the pthread_atfork callbacks.
|
||||
// If any of them triggers a data race we will deadlock
|
||||
// on the report_mtx.
|
||||
// Suppress all reports in the pthread_atfork callbacks.
|
||||
// Reports will deadlock on the report_mtx.
|
||||
// We could ignore interceptors and sync operations as well,
|
||||
// but so far it's unclear if it will do more good or harm.
|
||||
// Unnecessarily ignoring things can lead to false positives later.
|
||||
ThreadIgnoreBegin(thr, pc);
|
||||
thr->suppress_reports++;
|
||||
}
|
||||
|
||||
void ForkParentAfter(ThreadState *thr, uptr pc) {
|
||||
ThreadIgnoreEnd(thr, pc); // Begin is in ForkBefore.
|
||||
thr->suppress_reports--; // Enabled in ForkBefore.
|
||||
ctx->report_mtx.Unlock();
|
||||
ctx->thread_registry->Unlock();
|
||||
}
|
||||
|
||||
void ForkChildAfter(ThreadState *thr, uptr pc) {
|
||||
ThreadIgnoreEnd(thr, pc); // Begin is in ForkBefore.
|
||||
thr->suppress_reports--; // Enabled in ForkBefore.
|
||||
ctx->report_mtx.Unlock();
|
||||
ctx->thread_registry->Unlock();
|
||||
|
||||
|
||||
@@ -624,6 +624,7 @@ class ScopedReport : public ScopedReportBase {
|
||||
ScopedErrorReportLock lock_;
|
||||
};
|
||||
|
||||
bool ShouldReport(ThreadState *thr, ReportType typ);
|
||||
ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
|
||||
void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
|
||||
MutexSet *mset, uptr *tag = nullptr);
|
||||
|
||||
@@ -51,6 +51,8 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
|
||||
// or false positives (e.g. unlock in a different thread).
|
||||
if (SANITIZER_GO)
|
||||
return;
|
||||
if (!ShouldReport(thr, typ))
|
||||
return;
|
||||
ThreadRegistryLock l(ctx->thread_registry);
|
||||
ScopedReport rep(typ);
|
||||
rep.AddMutex(mid);
|
||||
@@ -107,7 +109,7 @@ void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
|
||||
if (!unlock_locked)
|
||||
s->Reset(thr->proc()); // must not reset it before the report is printed
|
||||
s->mtx.Unlock();
|
||||
if (unlock_locked) {
|
||||
if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) {
|
||||
ThreadRegistryLock l(ctx->thread_registry);
|
||||
ScopedReport rep(ReportTypeMutexDestroyLocked);
|
||||
rep.AddMutex(mid);
|
||||
@@ -534,7 +536,7 @@ void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
|
||||
}
|
||||
|
||||
void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
|
||||
if (r == 0)
|
||||
if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
|
||||
return;
|
||||
ThreadRegistryLock l(ctx->thread_registry);
|
||||
ScopedReport rep(ReportTypeDeadlock);
|
||||
|
||||
@@ -142,6 +142,28 @@ static ReportStack *SymbolizeStack(StackTrace trace) {
|
||||
return stack;
|
||||
}
|
||||
|
||||
bool ShouldReport(ThreadState *thr, ReportType typ) {
|
||||
// We set thr->suppress_reports in the fork context.
|
||||
// Taking any locking in the fork context can lead to deadlocks.
|
||||
// If any locks are already taken, it's too late to do this check.
|
||||
CheckNoLocks(thr);
|
||||
// For the same reason check we didn't lock thread_registry yet.
|
||||
if (SANITIZER_DEBUG)
|
||||
ThreadRegistryLock l(ctx->thread_registry);
|
||||
if (!flags()->report_bugs || thr->suppress_reports)
|
||||
return false;
|
||||
switch (typ) {
|
||||
case ReportTypeSignalUnsafe:
|
||||
return flags()->report_signal_unsafe;
|
||||
case ReportTypeThreadLeak:
|
||||
return flags()->report_thread_leaks;
|
||||
case ReportTypeMutexDestroyLocked:
|
||||
return flags()->report_destroy_locked;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ScopedReportBase::ScopedReportBase(ReportType typ, uptr tag) {
|
||||
ctx->thread_registry->CheckLocked();
|
||||
void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc));
|
||||
@@ -497,8 +519,10 @@ static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
|
||||
}
|
||||
|
||||
bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
|
||||
if (!flags()->report_bugs || thr->suppress_reports)
|
||||
return false;
|
||||
// These should have been checked in ShouldReport.
|
||||
// It's too late to check them here, we have already taken locks.
|
||||
CHECK(flags()->report_bugs);
|
||||
CHECK(!thr->suppress_reports);
|
||||
atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime());
|
||||
const ReportDesc *rep = srep.GetReport();
|
||||
CHECK_EQ(thr->current_report, nullptr);
|
||||
@@ -589,7 +613,7 @@ void ReportRace(ThreadState *thr) {
|
||||
// at best it will cause deadlocks on internal mutexes.
|
||||
ScopedIgnoreInterceptors ignore;
|
||||
|
||||
if (!flags()->report_bugs)
|
||||
if (!ShouldReport(thr, ReportTypeRace))
|
||||
return;
|
||||
if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr))
|
||||
return;
|
||||
|
||||
@@ -210,7 +210,7 @@ static void ThreadCheckIgnore(ThreadState *thr) {}
|
||||
void ThreadFinalize(ThreadState *thr) {
|
||||
ThreadCheckIgnore(thr);
|
||||
#if !SANITIZER_GO
|
||||
if (!flags()->report_thread_leaks)
|
||||
if (!ShouldReport(thr, ReportTypeThreadLeak))
|
||||
return;
|
||||
ThreadRegistryLock l(ctx->thread_registry);
|
||||
Vector<ThreadLeak> leaks;
|
||||
|
||||
71
compiler-rt/test/tsan/pthread_atfork_deadlock3.c
Normal file
71
compiler-rt/test/tsan/pthread_atfork_deadlock3.c
Normal file
@@ -0,0 +1,71 @@
|
||||
// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
|
||||
// Regression test for
|
||||
// https://groups.google.com/g/thread-sanitizer/c/TQrr4-9PRYo/m/HFR4FMi6AQAJ
|
||||
#include "test.h"
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
|
||||
long glob = 0;
|
||||
|
||||
void *worker(void *main) {
|
||||
glob++;
|
||||
barrier_wait(&barrier);
|
||||
barrier_wait(&barrier);
|
||||
pthread_kill((pthread_t)main, SIGPROF);
|
||||
barrier_wait(&barrier);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void atfork() {
|
||||
barrier_wait(&barrier);
|
||||
barrier_wait(&barrier);
|
||||
write(2, "in atfork\n", strlen("in atfork\n"));
|
||||
static volatile long a;
|
||||
__atomic_fetch_add(&a, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
void handler(int sig) {
|
||||
write(2, "in handler\n", strlen("in handler\n"));
|
||||
glob++;
|
||||
}
|
||||
|
||||
int main() {
|
||||
barrier_init(&barrier, 2);
|
||||
struct sigaction act = {};
|
||||
act.sa_handler = &handler;
|
||||
if (sigaction(SIGPROF, &act, 0)) {
|
||||
perror("sigaction");
|
||||
exit(1);
|
||||
}
|
||||
pthread_atfork(atfork, NULL, NULL);
|
||||
pthread_t t;
|
||||
pthread_create(&t, NULL, worker, (void*)pthread_self());
|
||||
barrier_wait(&barrier);
|
||||
pid_t pid = fork();
|
||||
if (pid < 0) {
|
||||
fprintf(stderr, "fork failed: %d\n", errno);
|
||||
return 1;
|
||||
}
|
||||
if (pid == 0) {
|
||||
fprintf(stderr, "CHILD\n");
|
||||
return 0;
|
||||
}
|
||||
if (pid != waitpid(pid, NULL, 0)) {
|
||||
fprintf(stderr, "waitpid failed: %d\n", errno);
|
||||
return 1;
|
||||
}
|
||||
pthread_join(t, NULL);
|
||||
fprintf(stderr, "PARENT\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: in atfork
|
||||
// CHECK: in handler
|
||||
// Note: There is a race, but we won't report it
|
||||
// to not deadlock.
|
||||
// CHECK-NOT: ThreadSanitizer: data race
|
||||
// CHECK: CHILD
|
||||
// CHECK: PARENT
|
||||
Reference in New Issue
Block a user