-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[rtsan] Support legacy pthread_cond variables #152947
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Chris Apple (cjappl) Changes(hopefully) fixes #146120 Follows a pattern put forward in tsan: llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp Lines 1366 to 1371 in 71ffa2a
llvm-project/compiler-rt/lib/tsan/dd/dd_interceptors.cpp Lines 204 to 208 in 71ffa2a
To properly deal with memory corruption on older versions of pthread_cond variables. I was never able to repro this problem, but I'm hopeful it's in the right direction considering the crash report. Seeing as it's basically behind a feature flag, I think introducing it to see if it fixes the problem is low risk. Full diff: https://github.com/llvm/llvm-project/pull/152947.diff 1 Files Affected:
diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
index a9d864e9fe926..2f9a80f7ebeda 100644
--- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
@@ -46,12 +46,45 @@
using namespace __sanitizer;
+
+#if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1 || \
+ defined(__s390x__)
+#define PTHREAD_ABI_BASE "GLIBC_2.3.2"
+#elif defined(__aarch64__) || SANITIZER_PPC64V2
+#define PTHREAD_ABI_BASE "GLIBC_2.17"
+#elif SANITIZER_LOONGARCH64
+#define PTHREAD_ABI_BASE "GLIBC_2.36"
+#elif SANITIZER_RISCV64
+# define PTHREAD_ABI_BASE "GLIBC_2.27"
+#endif
+
+DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, usize size)
+DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+
namespace {
struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
static bool UseImpl() { return !__rtsan_is_initialized(); }
};
} // namespace
+// See note in tsan or ddsan as to why this is necessary
+static pthread_cond_t *init_cond(pthread_cond_t *c, bool force = false) {
+ if (!common_flags()->legacy_pthread_cond)
+ return c;
+
+ atomic_uintptr_t *p = (atomic_uintptr_t *)c;
+ uptr cond = atomic_load(p, memory_order_acquire);
+ if (!force && cond != 0)
+ return (pthread_cond_t *)cond;
+ void *newcond = WRAP(malloc)(sizeof(pthread_cond_t));
+ internal_memset(newcond, 0, sizeof(pthread_cond_t));
+ if (atomic_compare_exchange_strong(p, &cond, (uptr)newcond,
+ memory_order_acq_rel))
+ return (pthread_cond_t *)newcond;
+ WRAP(free)(newcond);
+ return (pthread_cond_t *)cond;
+}
+
// Filesystem
INTERCEPTOR(int, open, const char *path, int oflag, ...) {
@@ -766,26 +799,49 @@ INTERCEPTOR(int, pthread_join, pthread_t thread, void **value_ptr) {
return REAL(pthread_join)(thread, value_ptr);
}
+INTERCEPTOR(int, pthread_cond_init, pthread_cond_t *cond,
+ const pthread_condattr_t *a) {
+ __rtsan_notify_intercepted_call("pthread_cond_init");
+ pthread_cond_t *c = init_cond(cond, true);
+ return REAL(pthread_cond_init)(c, a);
+}
+
INTERCEPTOR(int, pthread_cond_signal, pthread_cond_t *cond) {
__rtsan_notify_intercepted_call("pthread_cond_signal");
- return REAL(pthread_cond_signal)(cond);
+ pthread_cond_t *c = init_cond(cond);
+ return REAL(pthread_cond_signal)(c);
}
INTERCEPTOR(int, pthread_cond_broadcast, pthread_cond_t *cond) {
__rtsan_notify_intercepted_call("pthread_cond_broadcast");
- return REAL(pthread_cond_broadcast)(cond);
+ pthread_cond_t *c = init_cond(cond);
+ return REAL(pthread_cond_broadcast)(c);
}
INTERCEPTOR(int, pthread_cond_wait, pthread_cond_t *cond,
pthread_mutex_t *mutex) {
__rtsan_notify_intercepted_call("pthread_cond_wait");
- return REAL(pthread_cond_wait)(cond, mutex);
+ pthread_cond_t *c = init_cond(cond);
+ return REAL(pthread_cond_wait)(c, mutex);
}
INTERCEPTOR(int, pthread_cond_timedwait, pthread_cond_t *cond,
pthread_mutex_t *mutex, const timespec *ts) {
__rtsan_notify_intercepted_call("pthread_cond_timedwait");
- return REAL(pthread_cond_timedwait)(cond, mutex, ts);
+ pthread_cond_t *c = init_cond(cond);
+ return REAL(pthread_cond_timedwait)(c, mutex, ts);
+}
+
+INTERCEPTOR(int, pthread_cond_destroy, pthread_cond_t *cond) {
+ __rtsan_notify_intercepted_call("pthread_cond_destroy");
+ pthread_cond_t *c = init_cond(cond);
+ int res = REAL(pthread_cond_destroy)(c);
+ if (common_flags()->legacy_pthread_cond) {
+ // Free our aux cond and zero the pointer to not leave dangling pointers.
+ WRAP(free)(c);
+ atomic_store((atomic_uintptr_t *)c, 0, memory_order_relaxed);
+ }
+ return res;
}
INTERCEPTOR(int, pthread_rwlock_rdlock, pthread_rwlock_t *lock) {
@@ -1641,10 +1697,14 @@ void __rtsan::InitializeInterceptors() {
INTERCEPT_FUNCTION(pthread_mutex_lock);
INTERCEPT_FUNCTION(pthread_mutex_unlock);
INTERCEPT_FUNCTION(pthread_join);
- INTERCEPT_FUNCTION(pthread_cond_signal);
- INTERCEPT_FUNCTION(pthread_cond_broadcast);
- INTERCEPT_FUNCTION(pthread_cond_wait);
- INTERCEPT_FUNCTION(pthread_cond_timedwait);
+
+ INTERCEPT_FUNCTION_VER(pthread_cond_init, PTHREAD_ABI_BASE);
+ INTERCEPT_FUNCTION_VER(pthread_cond_signal, PTHREAD_ABI_BASE);
+ INTERCEPT_FUNCTION_VER(pthread_cond_broadcast, PTHREAD_ABI_BASE);
+ INTERCEPT_FUNCTION_VER(pthread_cond_wait, PTHREAD_ABI_BASE);
+ INTERCEPT_FUNCTION_VER(pthread_cond_timedwait, PTHREAD_ABI_BASE);
+ INTERCEPT_FUNCTION_VER(pthread_cond_destroy, PTHREAD_ABI_BASE);
+
INTERCEPT_FUNCTION(pthread_rwlock_rdlock);
INTERCEPT_FUNCTION(pthread_rwlock_unlock);
INTERCEPT_FUNCTION(pthread_rwlock_wrlock);
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
5f9843f
to
c9708f8
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is similar to compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp. Looks good if @adrew0809 can confirm that this fixes the issue.
Thanks all. I will try to confirm fix soon. |
I just hopped off a call with @adrew0809 and we confirmed this fixes the issue! The surprising part is his code no longer crashes when the flag is omitted, or when it is it's default value (false)
I'm trying to reason about why this might be. Maybe it has something to do with the PTHREAD_ABI_BASE? I'm not entirely sure. Considering this fixes the issue, I'm inclined to merge. Any objections @fmayer or @davidtrevelyan ? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No strong objections to merging this from me. If it fixes the issue on @adrew0809's machine and the CI is happy then I think it's low-enough risk to go for it. I do think we should consider whether it would be feasible to get this issue and its solution under meaningful test - if it's not too hard I think it would be wise to do it soon.
INTERCEPT_FUNCTION(pthread_cond_wait); | ||
INTERCEPT_FUNCTION(pthread_cond_timedwait); | ||
|
||
INTERCEPT_FUNCTION_VER(pthread_cond_init, PTHREAD_ABI_BASE); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because pthread_cond_init
and pthread_cond_destroy
are new, I think we would benefit from unit tests for these interceptors. Sorry I just noticed this now. What do you think? Add them now or post-merge?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea, added
Copying from our private chain, for future posterity: The problem with a bigger functional test is that some systems will need this flag, and others will not. Systems that need the new flag will crash when it isn't applied. Systems that don't need the new flag will crash when it is applied This means in an ideal world, we would figure out the exact specs of a system that needs the flag, determine it in cmake land, and apply or not apply the test All that's to say...complicated Tsan does not have a test for this flag, I suspect because of this complexity |
namespace { | ||
struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> { | ||
static bool UseImpl() { return !__rtsan_is_initialized(); } | ||
}; | ||
} // namespace | ||
|
||
// See note in tsan as to why this is necessary | ||
static pthread_cond_t *init_cond(pthread_cond_t *c, bool force = false) { | ||
if (!common_flags()->legacy_pthread_cond) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The option is for DSO built against glibc 2.2.5 (released in 2002). For rtsan we can reasonably assume newer shared objects and don't allow this at all.
So, just delete this code path?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I delete this codepath, I get a crash on almost all tests on my mac, with this stack trace:
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x3cb0b1bb)
* frame #0: 0x000000018d619e4c libsystem_pthread.dylib`pthread_cond_broadcast + 48
frame #1: 0x000000018d54ae70 libc++.1.dylib`std::__1::__call_once(unsigned long volatile&, void*, void (*)(void*)) + 224
frame #2: 0x000000018d582914 libc++.1.dylib`std::__1::locale::id::__get() + 104
frame #3: 0x000000018d57fb94 libc++.1.dylib`std::__1::locale::__imp::__imp(unsigned long) + 148
frame #4: 0x000000018d580c10 libc++.1.dylib`std::__1::locale::classic() + 84
frame #5: 0x000000018d582140 libc++.1.dylib`std::__1::locale::__global() + 72
frame #6: 0x000000018d5821a0 libc++.1.dylib`std::__1::locale::locale() + 24
frame #7: 0x000000018d55875c libc++.1.dylib`std::__1::basic_streambuf<char, std::__1::char_traits<char>>::basic_streambuf() + 52
frame #8: 0x000000018d56aac8 libc++.1.dylib`std::__1::__stdinbuf<char>::__stdinbuf(__sFILE*, __mbstate_t*) + 40
frame #9: 0x000000018d56a45c libc++.1.dylib`std::__1::DoIOSInit::DoIOSInit() + 68
frame #10: 0x000000018d56aa50 libc++.1.dylib`std::__1::ios_base::Init::Init() + 80
frame #11: 0x000000018d56be70 libc++.1.dylib`_GLOBAL__I_000100 + 32
frame #12: 0x000000018d2b105c dyld`invocation function for block in dyld4::Loader::findAndRunAllInitializers(dyld4::RuntimeState&) const::$_0::operator()() const + 168
frame #13: 0x000000018d2ef308 dyld`invocation function for block in dyld3::MachOAnalyzer::forEachInitializer(Diagnostics&, dyld3::MachOAnalyzer::VMAddrConverter const&, void (unsigned int) block_pointer, void const*) const + 340
frame #14: 0x000000018d2e299c dyld`invocation function for block in dyld3::MachOFile::forEachSection(void (dyld3::MachOFile::SectionInfo const&, bool, bool&) block_pointer) const + 496
frame #15: 0x000000018d2922fc dyld`dyld3::MachOFile::forEachLoadCommand(Diagnostics&, void (load_command const*, bool&) block_pointer) const + 300
frame #16: 0x000000018d2e1930 dyld`dyld3::MachOFile::forEachSection(void (dyld3::MachOFile::SectionInfo const&, bool, bool&) block_pointer) const + 192
frame #17: 0x000000018d2eee1c dyld`dyld3::MachOAnalyzer::forEachInitializer(Diagnostics&, dyld3::MachOAnalyzer::VMAddrConverter const&, void (unsigned int) block_pointer, void const*) const + 516
frame #18: 0x000000018d2ad070 dyld`dyld4::Loader::findAndRunAllInitializers(dyld4::RuntimeState&) const + 524
frame #19: 0x000000018d2b6d28 dyld`dyld4::PrebuiltLoader::runInitializers(dyld4::RuntimeState&) const + 44
frame #20: 0x000000018d2ad45c dyld`dyld4::Loader::runInitializersBottomUp(dyld4::RuntimeState&, dyld3::Array<dyld4::Loader const*>&) const + 220
frame #21: 0x000000018d2ad400 dyld`dyld4::Loader::runInitializersBottomUp(dyld4::RuntimeState&, dyld3::Array<dyld4::Loader const*>&) const + 128
frame #22: 0x000000018d2b10ec dyld`dyld4::Loader::runInitializersBottomUpPlusUpwardLinks(dyld4::RuntimeState&) const::$_1::operator()() const + 116
frame #23: 0x000000018d2ad628 dyld`dyld4::Loader::runInitializersBottomUpPlusUpwardLinks(dyld4::RuntimeState&) const + 380
frame #24: 0x000000018d2d04d8 dyld`dyld4::APIs::runAllInitializersForMain() + 464
frame #25: 0x000000018d296f7c dyld`dyld4::prepare(dyld4::APIs&, dyld3::MachOAnalyzer const*) + 3156
frame #26: 0x000000018d295edc dyld`start + 1844
Specifically, the pthread_cond_broadcast interceptor gets called, but seemingly our pthread_cond_init
function is never called. This means:
INTERCEPTOR(int, pthread_cond_broadcast, pthread_cond_t *cond) {
__rtsan_notify_intercepted_call("pthread_cond_broadcast");
// This function returns a cond not initialized by us,
pthread_cond_t *c = init_cond(cond);
// this call crashes, as it operates on a pthread_cond that wasn't initialized properly
return REAL(pthread_cond_broadcast)(c);
}
Any thoughts on getting around this? I've tried a few things, but nothing seems to work.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should assume common_flags()->legacy_pthread_cond
is always false for rtsan and delete the unreachable code path. Can you double check macOS really crashes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I misunderstood your original comment. This does not crash mac.
See: #155181
I have created #154268 to simplify the tsan https://maskray.me/blog/2023-01-08-all-about-sanitizer-interceptors
https://maskray.me/blog/2022-05-29-glibc
|
Thanks @MaskRay ! After that's merged, I'll update to follow that pattern. |
01fee83
to
73d3968
Compare
73d3968
to
6d33293
Compare
After I get confirmation that the tests are stable on main, I will cherry pick this to the LLVM 21 release branch |
|
This reverts commit aa4bc2e.
fixes #146120
Follows a pattern put forward in tsan:
llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
Lines 1366 to 1371 in 71ffa2a
llvm-project/compiler-rt/lib/tsan/dd/dd_interceptors.cpp
Lines 204 to 208 in 71ffa2a
To properly deal with memory corruption on older versions of pthread_cond variables.
I was never able to repro this problem, but I'm hopeful it's in the right direction considering the crash report. Seeing as it's basically behind a feature flag, I think introducing it to see if it fixes the problem is low risk.