Close https://github.com/llvm/llvm-project/issues/59221. The root cause for the problem is that we marked the parameter of the resume/destroy functions as noalias previously. But this is not true. See https://github.com/llvm/llvm-project/issues/59221 for the details. Long story short, for this C++ program (https://compiler-explorer.com/z/6qGcozG93), the optimized frame will be something like: ``` struct test_frame { void (*__resume_)(), // a function pointer points to the `test.resume` function, which can be imaged as the test() function in the example. .... struct a_frame { ... void **caller; // may points to test_frame at runtime. }; }; ``` And the function a and function test looks just like: ``` define i32 @a(ptr noalias %alloc_8) { %alloc_8_16 = getelementptr ptr, ptr %alloc_8, i64 16 store i32 42, ptr %alloc_8_16, align 8 %alloc_8_8 = getelementptr ptr, ptr %alloc_8, i64 8 %alloc = load ptr, ptr %alloc_8_8, align 8 %p = load ptr, ptr %alloc, align 8 %r = call i32 %p(ptr %alloc) ret i32 %r } define i32 @b(ptr %p) { entry: %alloc = alloca [128 x i8], align 8 %alloc_8 = getelementptr ptr, ptr %alloc, i64 8 %alloc_8_8 = getelementptr ptr, ptr %alloc_8, i64 8 store ptr %alloc, ptr %alloc_8_8, align 8 store ptr %p, ptr %alloc, align 8 %r = call i32 @a(ptr nonnull %alloc_8) ret i32 %r } ``` Here inside the function `a`, we can access the parameter `%alloc_8` by `%alloc` and we pass `%alloc` to an unknown function. So it breaks the assumption of `noalias` parameter. Note that although only CoroElide optimization can put a frame inside another frame directly, the following case is not valid too: ``` struct test_frame { .... void **a_frame; // may points to a_frame at runtime. }; struct a_frame { void **caller; // may points to test_frame at runtime. }; ``` Since the C++ language allows the programmer to get the address of coroutine frames, we can't assume the above case wouldn't happen in the source codes. So we can't set the parameter as noalias no matter if CoroElide applies or not. And for other languages, it may be safe if they don't allow the programmers to get the address of coroutine frames. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D139295
89 lines
2.0 KiB
C++
89 lines
2.0 KiB
C++
// Test for PR59221. Tests the compiler wouldn't misoptimize the final result.
|
|
//
|
|
// REQUIRES: x86-registered-target
|
|
//
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 %s -O3 -S -emit-llvm -o - | FileCheck %s
|
|
|
|
#include "Inputs/coroutine.h"
|
|
|
|
template <typename T> struct task {
|
|
struct promise_type {
|
|
T value{123};
|
|
std::coroutine_handle<> caller{std::noop_coroutine()};
|
|
|
|
struct final_awaiter: std::suspend_always {
|
|
auto await_suspend(std::coroutine_handle<promise_type> me) const noexcept {
|
|
return me.promise().caller;
|
|
}
|
|
};
|
|
|
|
constexpr auto initial_suspend() const noexcept {
|
|
return std::suspend_always();
|
|
}
|
|
constexpr auto final_suspend() const noexcept {
|
|
return final_awaiter{};
|
|
}
|
|
auto unhandled_exception() noexcept {
|
|
// ignore
|
|
}
|
|
constexpr void return_value(T v) noexcept {
|
|
value = v;
|
|
}
|
|
constexpr auto & get_return_object() noexcept {
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
using coroutine_handle = std::coroutine_handle<promise_type>;
|
|
|
|
promise_type & promise{nullptr};
|
|
|
|
task(promise_type & p) noexcept: promise{p} { }
|
|
|
|
~task() noexcept {
|
|
coroutine_handle::from_promise(promise).destroy();
|
|
}
|
|
|
|
auto await_ready() noexcept {
|
|
return false;
|
|
}
|
|
|
|
auto await_suspend(std::coroutine_handle<> caller) noexcept {
|
|
promise.caller = caller;
|
|
return coroutine_handle::from_promise(promise);
|
|
}
|
|
|
|
constexpr auto await_resume() const noexcept {
|
|
return promise.value;
|
|
}
|
|
|
|
// non-coroutine access to result
|
|
auto get() noexcept {
|
|
const auto handle = coroutine_handle::from_promise(promise);
|
|
|
|
if (!handle.done()) {
|
|
handle.resume();
|
|
}
|
|
|
|
return promise.value;
|
|
}
|
|
};
|
|
|
|
|
|
static inline auto a() noexcept -> task<int> {
|
|
co_return 42;
|
|
}
|
|
|
|
static inline auto test() noexcept -> task<int> {
|
|
co_return co_await a();
|
|
}
|
|
|
|
int foo() {
|
|
return test().get();
|
|
}
|
|
|
|
// Checks that the store for the result value 42 is not misoptimized out.
|
|
// CHECK: define{{.*}}_Z3foov(
|
|
// CHECK: store i32 42, ptr %{{.*}}
|
|
// CHECK: }
|