From 63b4c946a2fea2cbae956ed1afdf5ba653157574 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 6 May 2025 14:03:16 +0200 Subject: [PATCH 01/36] Fix exec context for future coroutines --- lib/Async/include/Async/async.h | 1 + lib/Futures/include/Futures/coro-helper.h | 515 +++++++++------------- tests/Futures/CMakeLists.txt | 1 + tests/Futures/FutureCoroutineTest.cpp | 81 +++- 4 files changed, 280 insertions(+), 318 deletions(-) diff --git a/lib/Async/include/Async/async.h b/lib/Async/include/Async/async.h index 324b8955324a..9bc72b53e10f 100644 --- a/lib/Async/include/Async/async.h +++ b/lib/Async/include/Async/async.h @@ -103,6 +103,7 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { } void unhandled_exception() { _value.set_exception(std::current_exception()); + ExecContext::set(_callerExecContext); *async_registry::get_current_coroutine() = _requester; ExecContext::set(_callerExecContext); } diff --git a/lib/Futures/include/Futures/coro-helper.h b/lib/Futures/include/Futures/coro-helper.h index 0a9b93e2e2a9..4565917a7114 100644 --- a/lib/Futures/include/Futures/coro-helper.h +++ b/lib/Futures/include/Futures/coro-helper.h @@ -38,77 +38,232 @@ namespace std_coro = std; #include "Try.h" #include "Utils/ExecContext.h" -/// This file contains helper classes and tools for coroutines. We use -/// coroutines for asynchronous operations. Every function, method or -/// closure which contains at least one of the keywords -/// - co_await -/// - co_yield -/// - co_return -/// is a coroutine and is thus compiled differently by the C++ compiler -/// than normal. Essentially, the compiler creates a state machine for -/// each such functions. All instances of co_await and co_yield are potential -/// suspension points. The code before and after a co_await/co_yield can -/// be executed by different threads! -/// The return type of a coroutine plays a very special role. For us, it -/// will usually be `Future` for some type T. The code in this file -/// uses this fact and essentially implements the magic for coroutines -/// by providing a few helper classes. See below for details. - -/// See below at (*) for an explanation why we need this class -/// `FutureAwaitable` here! +/// This file contains helper classes and tools for coroutines. -namespace arangodb::futures { template -class Future; +struct future_promise; + +/** + Promise type for a future coroutine + + The type holds two pieces of data: + - first an `arangodb::futures::Promise` (not to be confused with the + promise_type!), and + - second an `arangodb::futures::Try` + After all, we want that the coroutine "returns" an empty `Future` + when it suspends, and it is supposed to set the return value (or + exception) via the corresponding `Promise` object to trigger + potential callbacks which are attached to the Future. + */ +template +struct future_promise_base { + using promise_type = future_promise; + + future_promise_base(std::source_location loc) + : promise{std::move(loc)}, + callerExecContext{arangodb::ExecContext::currentAsShared()}, + requester{*arangodb::async_registry::get_current_coroutine()} { + *arangodb::async_registry::get_current_coroutine() = {promise.id()}; + } + ~future_promise_base() {} + + auto initial_suspend() noexcept { + promise.update_state(arangodb::async_registry::State::Running); + return std_coro::suspend_never{}; + } + auto final_suspend() noexcept { + // TODO use symmetric transfer here + struct awaitable { + bool await_ready() noexcept { return false; } + bool await_suspend(std::coroutine_handle self) noexcept { + arangodb::ExecContext::set(_promise->callerExecContext); + *arangodb::async_registry::get_current_coroutine() = + _promise->requester; + // we have to destroy the coroutine frame before + // we resolve the promise + _promise->promise.setTry(std::move(_promise->result)); + return false; + } + void await_resume() noexcept {} + + promise_type* _promise; + }; + + return awaitable{static_cast(this)}; + } + + template + auto await_transform( + U&& co_awaited_expression, + std::source_location loc = std::source_location::current()) noexcept { + using inner_awaitable_type = decltype(arangodb::get_awaitable_object( + std::forward(co_awaited_expression))); + + struct awaitable { + bool await_ready() { return inner_awaitable.await_ready(); } + auto await_suspend(std::coroutine_handle<> handle) { + outer_promise->promise.update_state( + arangodb::async_registry::State::Suspended); + arangodb::ExecContext::set(outer_promise->callerExecContext); + *arangodb::async_registry::get_current_coroutine() = + outer_promise->requester; + return inner_awaitable.await_suspend(handle); + } + auto await_resume() { + auto old_state = outer_promise->promise.update_state( + arangodb::async_registry::State::Running); + if (old_state.has_value() && + old_state.value() == arangodb::async_registry::State::Suspended) { + outer_promise->callerExecContext = + arangodb::ExecContext::currentAsShared(); + outer_promise->requester = + *arangodb::async_registry::get_current_coroutine(); + } + arangodb::ExecContext::set(myExecContext); + *arangodb::async_registry::get_current_coroutine() = { + outer_promise->promise.id()}; + return inner_awaitable.await_resume(); + } + + promise_type* outer_promise; + inner_awaitable_type inner_awaitable; + std::shared_ptr myExecContext; + }; + + // update promises in registry + if constexpr (arangodb::CanUpdateRequester) { + co_awaited_expression.update_requester({promise.id()}); + } + promise.update_source_location(std::move(loc)); + + return awaitable{.outer_promise = static_cast(this), + .inner_awaitable = arangodb::get_awaitable_object( + std::forward(co_awaited_expression)), + .myExecContext = arangodb::ExecContext::currentAsShared()}; + } + + auto get_return_object() -> arangodb::futures::Future { + return promise.getFuture(); + } + + auto unhandled_exception() noexcept { + result.set_exception(std::current_exception()); + arangodb::ExecContext::set(callerExecContext); + *arangodb::async_registry::get_current_coroutine() = requester; + } + + arangodb::futures::Promise promise; + arangodb::futures::Try result; + std::shared_ptr callerExecContext; + arangodb::async_registry::Requester requester; +}; template -struct FutureAwaitable { - [[nodiscard]] auto await_ready() const noexcept -> bool { return false; } - bool await_suspend(std_coro::coroutine_handle<> coro) noexcept { - // returning false resumes `coro` - _execContext = ExecContext::currentAsShared(); - std::move(_future).thenFinal( - [coro, this](futures::Try&& result) mutable noexcept { - _result = std::move(result); - if (_counter.fetch_sub(1) == 1) { - ExecContextScope exec(_execContext); - coro.resume(); - } - }); - return _counter.fetch_sub(1) != 1; +struct future_promise : future_promise_base { + future_promise(std::source_location loc = std::source_location::current()) + : future_promise_base(std::move(loc)) {} + auto return_value( + T const& t, + std::source_location loc = std::source_location:: + current()) noexcept(std::is_nothrow_copy_constructible_v) { + static_assert(std::is_copy_constructible_v); + future_promise_base::promise.update_state( + arangodb::async_registry::State::Resolved); + future_promise_base::promise.update_source_location(std::move(loc)); + future_promise_base::result.emplace(t); } - auto await_resume() -> T { return std::move(_result.value().get()); } - explicit FutureAwaitable(Future fut) : _future(std::move(fut)) {} - private: - std::atomic_uint8_t _counter{2}; - Future _future; - std::optional> _result; - std::shared_ptr _execContext; + auto return_value( + T&& t, std::source_location loc = std::source_location:: + current()) noexcept(std::is_nothrow_move_constructible_v) { + static_assert(std::is_move_constructible_v); + future_promise_base::promise.update_state( + arangodb::async_registry::State::Resolved); + future_promise_base::promise.update_source_location(std::move(loc)); + future_promise_base::result.emplace(std::move(t)); + } }; -/// See below at (*) for an explanation why we need this operator co_await -/// here! +template<> +struct future_promise + : future_promise_base { + future_promise(std::source_location loc = std::source_location::current()) + : future_promise_base(std::move(loc)) {} + auto return_void( + std::source_location loc = std::source_location::current()) noexcept { + promise.update_state(arangodb::async_registry::State::Resolved); + promise.update_source_location(std::move(loc)); + result.emplace(); + } +}; +/** + With this definition, Future can be used as a coroutine + */ +template +struct std_coro::coroutine_traits, Args...> { + using promise_type = future_promise; +}; + +/** + With this definition, Future can be used as a + coroutine + */ +template +struct std_coro::coroutine_traits< + arangodb::futures::Future, Args...> { + using promise_type = future_promise; +}; + +namespace arangodb::futures { + +/** + Be able to call co_await on a future + */ template auto operator co_await(Future&& f) noexcept { - return FutureAwaitable{std::move(f)}; + struct FutureAwaitable { + [[nodiscard]] auto await_ready() const noexcept -> bool { return false; } + bool await_suspend(std_coro::coroutine_handle<> coro) noexcept { + std::move(_future).thenFinal( + [coro, this](futures::Try&& result) mutable noexcept { + _result = std::move(result); + if (_counter.fetch_sub(1) == 1) { + coro.resume(); + } + }); + // returning false resumes `coro` + return _counter.fetch_sub(1) != 1; + } + auto await_resume() -> T { return std::move(_result.value().get()); } + explicit FutureAwaitable(Future fut) : _future(std::move(fut)) {} + + private: + std::atomic_uint8_t _counter{2}; + Future _future; + std::optional> _result; + }; + + return FutureAwaitable{std::move(f)}; } +/** + Be able to call co_await on some transformation of a future + + Transformations are defined below + */ template struct FutureTransformAwaitable : F { [[nodiscard]] auto await_ready() const noexcept -> bool { return false; } bool await_suspend(std_coro::coroutine_handle<> coro) noexcept { - // returning false resumes `coro` - _execContext = ExecContext::currentAsShared(); std::move(_future).thenFinal( [coro, this](futures::Try&& result) noexcept { _result = F::operator()(std::move(result)); if (_counter.fetch_sub(1) == 1) { - ExecContextScope exec(_execContext); coro.resume(); } }); + // returning false resumes `coro` return _counter.fetch_sub(1) != 1; } using ResultType = std::invoke_result_t&&>; @@ -123,7 +278,6 @@ struct FutureTransformAwaitable : F { std::atomic_uint8_t _counter{2}; Future _future; std::optional _result; - std::shared_ptr _execContext; }; template @@ -154,269 +308,4 @@ auto asResult(Future>&& f) noexcept { return basics::catchToResult([&] { return res.get(); }); }}; } - } // namespace arangodb::futures - -/// For every coroutine, there must be a so-called `promise_type`, which -/// is a helper class providing a few methods to configure the behaviour -/// of the coroutine. This can either be a member type called `promise_type` -/// of the return type of the coroutine, or, as in our case, it is determined -/// using the `std_coro::coroutine_traits` template with template parameters -/// using the return type (see -/// https://en.cppreference.com/w/cpp/language/coroutines -/// under "Promise") and then some. Since our return type for coroutines -/// is `arangodb::futures::Future`, we specialize this template here -/// to configure our coroutines (for an explanation see below the class): - -template -struct std_coro::coroutine_traits, Args...> { - struct promise_type { - // For some reason, non-maintainer compilation fails with a linker error - // if these are missing or defaulted. - promise_type(std::source_location loc = std::source_location::current()) - : promise{std::move(loc)}, - requester{*arangodb::async_registry::get_current_coroutine()} { - *arangodb::async_registry::get_current_coroutine() = {promise.id()}; - } - ~promise_type() {} - - arangodb::futures::Promise promise; - arangodb::futures::Try result; - arangodb::async_registry::Requester requester; - - auto initial_suspend() noexcept { - promise.update_state(arangodb::async_registry::State::Running); - return std_coro::suspend_never{}; - } - auto final_suspend() noexcept { - // TODO use symmetric transfer here - struct awaitable { - bool await_ready() noexcept { return false; } - bool await_suspend(std::coroutine_handle self) noexcept { - *arangodb::async_registry::get_current_coroutine() = - _promise->requester; - // we have to destroy the coroutine frame before - // we resolve the promise - _promise->promise.setTry(std::move(_promise->result)); - return false; - } - void await_resume() noexcept {} - - promise_type* _promise; - }; - - return awaitable{this}; - } - - auto get_return_object() -> arangodb::futures::Future { - return promise.getFuture(); - } - - auto return_value( - T const& t, - std::source_location loc = std::source_location:: - current()) noexcept(std::is_nothrow_copy_constructible_v) { - static_assert(std::is_copy_constructible_v); - promise.update_state(arangodb::async_registry::State::Resolved); - promise.update_source_location(std::move(loc)); - result.emplace(t); - } - - auto return_value( - T&& t, - std::source_location loc = std::source_location:: - current()) noexcept(std::is_nothrow_move_constructible_v) { - static_assert(std::is_move_constructible_v); - promise.update_state(arangodb::async_registry::State::Resolved); - promise.update_source_location(std::move(loc)); - result.emplace(std::move(t)); - } - - auto unhandled_exception() noexcept { - result.set_exception(std::current_exception()); - *arangodb::async_registry::get_current_coroutine() = requester; - } - - template - auto await_transform( - U&& co_awaited_expression, - std::source_location loc = std::source_location::current()) noexcept { - using inner_awaitable_type = decltype(arangodb::get_awaitable_object( - std::forward(co_awaited_expression))); - - struct awaitable { - bool await_ready() { return inner_awaitable.await_ready(); } - auto await_suspend(std::coroutine_handle<> handle) { - *arangodb::async_registry::get_current_coroutine() = - outer_promise->requester; - outer_promise->promise.update_state( - arangodb::async_registry::State::Suspended); - return inner_awaitable.await_suspend(handle); - } - auto await_resume() { - auto old_state = outer_promise->promise.update_state( - arangodb::async_registry::State::Running); - if (old_state.has_value() && - old_state.value() == arangodb::async_registry::State::Suspended) { - outer_promise->requester = - *arangodb::async_registry::get_current_coroutine(); - } - *arangodb::async_registry::get_current_coroutine() = { - outer_promise->promise.id()}; - return inner_awaitable.await_resume(); - } - - promise_type* outer_promise; - inner_awaitable_type inner_awaitable; - }; - - // update promises in registry - if constexpr (arangodb::CanUpdateRequester) { - co_awaited_expression.update_requester({promise.id()}); - } - promise.update_source_location(std::move(loc)); - - return awaitable{this, arangodb::get_awaitable_object( - std::forward(co_awaited_expression))}; - } - }; -}; - -/// (*) Explanation for the details: -/// The `promise_type` holds two pieces of data: -/// - first an `arangodb::futures::Promise` (not to be confused with the -/// promise_type!), and -/// - second an `arangodb::futures::Try` -/// After all, we want that the coroutine "returns" an empty `Future` -/// when it suspends, and it is supposed to set the return value (or -/// exception) via the corresponding `Promise` object to trigger -/// potential callbacks which are attached to the Future. -/// So how does this all work? -/// When the coroutine is first called an object of type `promise_type` -/// is contructed, which constructs its member `promise` of type -/// `Promise`. Then, early in the life of the coroutine, the method -/// `get_return_object` is called, which builds an object of type -/// `Future` from the `promise` member, so that it is associated with -/// the `promise` member. This is what will be returned when the coroutine -/// is first suspended. -/// Since `initial_suspend` returns `std_coro::suspend_never{}` no -/// suspension happens before the first code of the coroutine is run. -/// When the coroutine reaches a `co_await`, the expression behind it is -/// first evaluated. It is then the "awaitable" object (unless there is -/// a method `await_transform` in the current coroutines promise object, -/// which we haven't). In most cases, this will be another `Future` -/// which is returned from another coroutine. -/// The "awaitable" is now transformed to an "awaiter". This is done by -/// means of an `operator co_await` defined earlier in this file. It -/// essentially wraps our `Future` into a `FutureAwaitable` class -/// also defined above. -/// The C++ coroutine framework will then cal methods on the "awaiter" -/// for events to unfold: First it calls `await_ready` to see if we have -/// to suspend after all. We always return `false` there. -/// Then it calls `await_suspend` to suspend and later `await_resume` to -/// resume. The `FutureAwaitable` class essentially attaches a closure -/// to the `Future` which resumes the coroutine. - -/// The following is the version for return type `Future`, -/// corresponding to coroutines which return nothing. The differences -/// are purely technical (`return_void` instead of `return_value`, -/// basically). - -template -struct std_coro::coroutine_traits< - arangodb::futures::Future, Args...> { - struct promise_type { - arangodb::futures::Promise promise; - arangodb::futures::Try result; - arangodb::async_registry::Requester requester; - - promise_type(std::source_location loc = std::source_location::current()) - : promise{std::move(loc)}, - requester{*arangodb::async_registry::get_current_coroutine()} { - *arangodb::async_registry::get_current_coroutine() = {promise.id()}; - } - auto initial_suspend() noexcept { - promise.update_state(arangodb::async_registry::State::Running); - return std_coro::suspend_never{}; - } - auto final_suspend() noexcept { - // TODO use symmetric transfer here - struct awaitable { - bool await_ready() noexcept { return false; } - bool await_suspend(std::coroutine_handle self) noexcept { - *arangodb::async_registry::get_current_coroutine() = - _promise->requester; - // we have to destroy the coroutine frame before - // we resolve the promise - _promise->promise.setTry(std::move(_promise->result)); - return false; - } - void await_resume() noexcept {} - - promise_type* _promise; - }; - - return awaitable{this}; - } - - auto get_return_object() - -> arangodb::futures::Future { - return promise.getFuture(); - } - - auto return_void( - std::source_location loc = std::source_location::current()) noexcept { - promise.update_state(arangodb::async_registry::State::Resolved); - promise.update_source_location(std::move(loc)); - result.emplace(); - } - - auto unhandled_exception() noexcept { - result.set_exception(std::current_exception()); - *arangodb::async_registry::get_current_coroutine() = requester; - } - - template - auto await_transform( - U&& co_awaited_expression, - std::source_location loc = std::source_location::current()) noexcept { - using inner_awaitable_type = decltype(arangodb::get_awaitable_object( - std::forward(co_awaited_expression))); - - struct awaitable { - bool await_ready() { return inner_awaitable.await_ready(); } - auto await_suspend(std::coroutine_handle<> handle) { - *arangodb::async_registry::get_current_coroutine() = - outer_promise->requester; - outer_promise->promise.update_state( - arangodb::async_registry::State::Suspended); - return inner_awaitable.await_suspend(handle); - } - auto await_resume() { - auto old_state = outer_promise->promise.update_state( - arangodb::async_registry::State::Running); - if (old_state.has_value() && - old_state.value() == arangodb::async_registry::State::Suspended) { - outer_promise->requester = - *arangodb::async_registry::get_current_coroutine(); - } - *arangodb::async_registry::get_current_coroutine() = { - outer_promise->promise.id()}; - return inner_awaitable.await_resume(); - } - - promise_type* outer_promise; - inner_awaitable_type inner_awaitable; - }; - - // update promises in registry - if constexpr (arangodb::CanUpdateRequester) { - co_awaited_expression.update_requester({promise.id()}); - } - promise.update_source_location(std::move(loc)); - - return awaitable{this, arangodb::get_awaitable_object( - std::forward(co_awaited_expression))}; - } - }; -}; diff --git a/tests/Futures/CMakeLists.txt b/tests/Futures/CMakeLists.txt index bd2607d16f19..63c2c7247dbb 100644 --- a/tests/Futures/CMakeLists.txt +++ b/tests/Futures/CMakeLists.txt @@ -5,6 +5,7 @@ add_library(arango_tests_futures OBJECT TryTest.cpp) target_link_libraries(arango_tests_futures PRIVATE + arango arango_futures gtest velocypack diff --git a/tests/Futures/FutureCoroutineTest.cpp b/tests/Futures/FutureCoroutineTest.cpp index 43db4f01b49a..78e842ecdc2e 100644 --- a/tests/Futures/FutureCoroutineTest.cpp +++ b/tests/Futures/FutureCoroutineTest.cpp @@ -10,6 +10,7 @@ #include +using namespace arangodb; using namespace arangodb::futures; namespace { @@ -107,7 +108,7 @@ auto expect_all_promises_in_state(arangodb::async_registry::State state, } // namespace template -struct FutureCoroutineTest : ::testing::Test { +struct FutureTest : ::testing::Test { void SetUp() override { arangodb::async_registry::get_thread_registry().garbage_collect(); EXPECT_TRUE(std::holds_alternative( @@ -123,9 +124,9 @@ struct FutureCoroutineTest : ::testing::Test { }; using MyTypes = ::testing::Types; -TYPED_TEST_SUITE(FutureCoroutineTest, MyTypes); +TYPED_TEST_SUITE(FutureTest, MyTypes); -TYPED_TEST(FutureCoroutineTest, promises_in_async_registry_know_their_state) { +TYPED_TEST(FutureTest, promises_in_async_registry_know_their_state) { { auto coro = [&]() -> Future { co_await this->wait; @@ -162,7 +163,7 @@ auto find_promise_by_name(std::string_view name) } // namespace TYPED_TEST( - FutureCoroutineTest, + FutureTest, promises_in_async_registry_know_their_requester_with_nested_coroutines) { using TestType = decltype(this); struct Functions { @@ -231,7 +232,7 @@ TYPED_TEST( this->wait.await(); } -TYPED_TEST(FutureCoroutineTest, +TYPED_TEST(FutureTest, promises_in_async_registry_know_their_requester_with_move) { using TestType = decltype(this); struct Functions { @@ -279,3 +280,73 @@ TYPED_TEST(FutureCoroutineTest, this->wait.resume(); this->wait.await(); } + +struct ExecContext_Waiting : public arangodb::ExecContext { + ExecContext_Waiting() + : arangodb::ExecContext(arangodb::ExecContext::ConstructorToken{}, + arangodb::ExecContext::Type::Default, "Waiting", + "", arangodb::auth::Level::RW, + arangodb::auth::Level::NONE, true) {} +}; +struct ExecContext_Calling : public arangodb::ExecContext { + ExecContext_Calling() + : arangodb::ExecContext(arangodb::ExecContext::ConstructorToken{}, + arangodb::ExecContext::Type::Default, "Calling", + "", arangodb::auth::Level::RW, + arangodb::auth::Level::NONE, true) {} +}; +struct ExecContext_Begin : public arangodb::ExecContext { + ExecContext_Begin() + : arangodb::ExecContext(arangodb::ExecContext::ConstructorToken{}, + arangodb::ExecContext::Type::Default, "Begin", "", + arangodb::auth::Level::RW, + arangodb::auth::Level::NONE, true) {} +}; +struct ExecContext_End : public arangodb::ExecContext { + ExecContext_End() + : arangodb::ExecContext(arangodb::ExecContext::ConstructorToken{}, + arangodb::ExecContext::Type::Default, "End", "", + arangodb::auth::Level::RW, + arangodb::auth::Level::NONE, true) {} +}; +TYPED_TEST(FutureTest, execution_context_is_local_to_coroutine) { + ExecContextScope exec(std::make_shared()); + EXPECT_EQ(ExecContext::current().user(), "Begin"); + + auto waiting_coro = [&]() -> Future { + EXPECT_EQ(ExecContext::current().user(), "Begin"); + ExecContextScope exec(std::make_shared()); + EXPECT_EQ(ExecContext::current().user(), "Waiting"); + co_await this->wait; + EXPECT_EQ(ExecContext::current().user(), "Waiting"); + co_return; + }(); + EXPECT_EQ(ExecContext::current().user(), "Begin"); + + auto trivial_coro = []() -> Future { + EXPECT_EQ(ExecContext::current().user(), "Begin"); + co_return; + }(); + + auto calling_coro = [&]() -> Future { + EXPECT_EQ(ExecContext::current().user(), "Begin"); + ExecContextScope exec(std::make_shared()); + EXPECT_EQ(ExecContext::current().user(), "Calling"); + co_await std::move(waiting_coro); + EXPECT_EQ(ExecContext::current().user(), "Calling"); + co_await std::move(trivial_coro); + EXPECT_EQ(ExecContext::current().user(), "Calling"); + co_return; + }; + EXPECT_EQ(ExecContext::current().user(), "Begin"); + + std::ignore = calling_coro(); + EXPECT_EQ(ExecContext::current().user(), "Begin"); + + ExecContextScope new_exec(std::make_shared()); + EXPECT_EQ(ExecContext::current().user(), "End"); + + this->wait.resume(); + this->wait.await(); + EXPECT_EQ(ExecContext::current().user(), "End"); +} From ed28bb59be431917141e5de3ba9cdb59af1960cf Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 6 May 2025 14:45:43 +0200 Subject: [PATCH 02/36] Extract context variables into struct --- lib/Async/include/Async/async.h | 41 ++++++++++------------ lib/Async/include/Async/context.h | 40 +++++++++++++++++++++ lib/Futures/include/Futures/coro-helper.h | 42 ++++++++++------------- 3 files changed, 77 insertions(+), 46 deletions(-) create mode 100644 lib/Async/include/Async/context.h diff --git a/lib/Async/include/Async/async.h b/lib/Async/include/Async/async.h index 9bc72b53e10f..d97bbcd1d1ac 100644 --- a/lib/Async/include/Async/async.h +++ b/lib/Async/include/Async/async.h @@ -1,9 +1,10 @@ #pragma once -#include "Async/Registry/promise.h" -#include "Async/Registry/registry_variable.h" +#include "Async/context.h" #include "Async/coro-utils.h" #include "Async/expected.h" +#include "Async/Registry/promise.h" +#include "Async/Registry/registry_variable.h" #include "Utils/ExecContext.h" #include "Inspection/Format.h" @@ -27,8 +28,8 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { async_promise_base(std::source_location loc) : async_registry::AddToAsyncRegistry{std::move(loc)}, - _callerExecContext{ExecContext::currentAsShared()}, - _requester{*async_registry::get_current_coroutine()} { + _context{ExecContext::currentAsShared(), + *async_registry::get_current_coroutine()} { *async_registry::get_current_coroutine() = {id()}; } @@ -41,8 +42,7 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { bool await_ready() noexcept { return false; } std::coroutine_handle<> await_suspend( std::coroutine_handle<> self) noexcept { - ExecContext::set(_promise->_callerExecContext); - *async_registry::get_current_coroutine() = _promise->_requester; + _promise->_context.set(); auto addr = _promise->_continuation.exchange(self.address(), std::memory_order_acq_rel); if (addr == nullptr) { @@ -70,24 +70,23 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { bool await_ready() { return inner_awaitable.await_ready(); } auto await_suspend(std::coroutine_handle<> handle) { outer_promise->update_state(async_registry::State::Suspended); - ExecContext::set(outer_promise->_callerExecContext); - *async_registry::get_current_coroutine() = outer_promise->_requester; + outer_promise->_context.set(); return inner_awaitable.await_suspend(handle); } auto await_resume() { auto old_state = outer_promise->update_state(async_registry::State::Running); if (old_state.value() == async_registry::State::Suspended) { - outer_promise->_callerExecContext = ExecContext::currentAsShared(); - outer_promise->_requester = *async_registry::get_current_coroutine(); + outer_promise->_context = + Context{ExecContext::currentAsShared(), + *async_registry::get_current_coroutine()}; } - *async_registry::get_current_coroutine() = {outer_promise->id()}; - ExecContext::set(_myExecContext); + myContext.set(); return inner_awaitable.await_resume(); } async_promise_base* outer_promise; inner_awaitable_type inner_awaitable; - std::shared_ptr _myExecContext; + Context myContext; }; // update promises in registry @@ -96,16 +95,15 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { } update_source_location(loc); - return awaitable{.outer_promise = this, - .inner_awaitable = get_awaitable_object( - std::forward(co_awaited_expression)), - ._myExecContext = ExecContext::currentAsShared()}; + return awaitable{ + .outer_promise = this, + .inner_awaitable = + get_awaitable_object(std::forward(co_awaited_expression)), + .myContext = Context{ExecContext::currentAsShared(), {id()}}}; } void unhandled_exception() { _value.set_exception(std::current_exception()); - ExecContext::set(_callerExecContext); - *async_registry::get_current_coroutine() = _requester; - ExecContext::set(_callerExecContext); + _context.set(); } auto get_return_object() { return async{std::coroutine_handle::from_promise( @@ -114,8 +112,7 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { std::atomic _continuation = nullptr; expected _value; - std::shared_ptr _callerExecContext; - async_registry::Requester _requester; + Context _context; }; template diff --git a/lib/Async/include/Async/context.h b/lib/Async/include/Async/context.h new file mode 100644 index 000000000000..292d315df0ae --- /dev/null +++ b/lib/Async/include/Async/context.h @@ -0,0 +1,40 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include "Async/Registry/promise.h" +#include "Utils/ExecContext.h" + +namespace arangodb { + +struct Context { + std::shared_ptr _callerExecContext; + async_registry::Requester _requester; + + auto set() -> void { + ExecContext::set(_callerExecContext); + *async_registry::get_current_coroutine() = _requester; + } +}; + +} // namespace arangodb diff --git a/lib/Futures/include/Futures/coro-helper.h b/lib/Futures/include/Futures/coro-helper.h index 4565917a7114..3c2a55225c72 100644 --- a/lib/Futures/include/Futures/coro-helper.h +++ b/lib/Futures/include/Futures/coro-helper.h @@ -32,6 +32,7 @@ namespace std_coro = std; #endif #include +#include "Async/context.h" #include "Basics/Exceptions.h" #include "Basics/Result.h" #include "Promise.h" @@ -61,8 +62,8 @@ struct future_promise_base { future_promise_base(std::source_location loc) : promise{std::move(loc)}, - callerExecContext{arangodb::ExecContext::currentAsShared()}, - requester{*arangodb::async_registry::get_current_coroutine()} { + context{arangodb::ExecContext::currentAsShared(), + *arangodb::async_registry::get_current_coroutine()} { *arangodb::async_registry::get_current_coroutine() = {promise.id()}; } ~future_promise_base() {} @@ -76,9 +77,7 @@ struct future_promise_base { struct awaitable { bool await_ready() noexcept { return false; } bool await_suspend(std::coroutine_handle self) noexcept { - arangodb::ExecContext::set(_promise->callerExecContext); - *arangodb::async_registry::get_current_coroutine() = - _promise->requester; + _promise->context.set(); // we have to destroy the coroutine frame before // we resolve the promise _promise->promise.setTry(std::move(_promise->result)); @@ -104,9 +103,7 @@ struct future_promise_base { auto await_suspend(std::coroutine_handle<> handle) { outer_promise->promise.update_state( arangodb::async_registry::State::Suspended); - arangodb::ExecContext::set(outer_promise->callerExecContext); - *arangodb::async_registry::get_current_coroutine() = - outer_promise->requester; + outer_promise->context.set(); return inner_awaitable.await_suspend(handle); } auto await_resume() { @@ -114,20 +111,17 @@ struct future_promise_base { arangodb::async_registry::State::Running); if (old_state.has_value() && old_state.value() == arangodb::async_registry::State::Suspended) { - outer_promise->callerExecContext = - arangodb::ExecContext::currentAsShared(); - outer_promise->requester = - *arangodb::async_registry::get_current_coroutine(); + outer_promise->context = arangodb::Context{ + arangodb::ExecContext::currentAsShared(), + *arangodb::async_registry::get_current_coroutine()}; } - arangodb::ExecContext::set(myExecContext); - *arangodb::async_registry::get_current_coroutine() = { - outer_promise->promise.id()}; + myContext.set(); return inner_awaitable.await_resume(); } promise_type* outer_promise; inner_awaitable_type inner_awaitable; - std::shared_ptr myExecContext; + arangodb::Context myContext; }; // update promises in registry @@ -136,10 +130,12 @@ struct future_promise_base { } promise.update_source_location(std::move(loc)); - return awaitable{.outer_promise = static_cast(this), - .inner_awaitable = arangodb::get_awaitable_object( - std::forward(co_awaited_expression)), - .myExecContext = arangodb::ExecContext::currentAsShared()}; + return awaitable{ + .outer_promise = static_cast(this), + .inner_awaitable = arangodb::get_awaitable_object( + std::forward(co_awaited_expression)), + .myContext = arangodb::Context{arangodb::ExecContext::currentAsShared(), + {promise.id()}}}; } auto get_return_object() -> arangodb::futures::Future { @@ -148,14 +144,12 @@ struct future_promise_base { auto unhandled_exception() noexcept { result.set_exception(std::current_exception()); - arangodb::ExecContext::set(callerExecContext); - *arangodb::async_registry::get_current_coroutine() = requester; + context.set(); } arangodb::futures::Promise promise; arangodb::futures::Try result; - std::shared_ptr callerExecContext; - arangodb::async_registry::Requester requester; + arangodb::Context context; }; template From 5a6f3036a502a315009ab1d5e07b879b30002cff Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 6 May 2025 21:46:44 +0200 Subject: [PATCH 03/36] Add constructor to context --- lib/Async/include/Async/async.h | 17 ++++++----------- lib/Async/include/Async/context.h | 8 ++++++-- lib/Futures/include/Futures/coro-helper.h | 18 ++++++------------ 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/lib/Async/include/Async/async.h b/lib/Async/include/Async/async.h index d97bbcd1d1ac..2ab04cdb5f4e 100644 --- a/lib/Async/include/Async/async.h +++ b/lib/Async/include/Async/async.h @@ -27,9 +27,7 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { using promise_type = async_promise; async_promise_base(std::source_location loc) - : async_registry::AddToAsyncRegistry{std::move(loc)}, - _context{ExecContext::currentAsShared(), - *async_registry::get_current_coroutine()} { + : async_registry::AddToAsyncRegistry{std::move(loc)}, _context{} { *async_registry::get_current_coroutine() = {id()}; } @@ -77,9 +75,7 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { auto old_state = outer_promise->update_state(async_registry::State::Running); if (old_state.value() == async_registry::State::Suspended) { - outer_promise->_context = - Context{ExecContext::currentAsShared(), - *async_registry::get_current_coroutine()}; + outer_promise->_context = Context{}; } myContext.set(); return inner_awaitable.await_resume(); @@ -95,11 +91,10 @@ struct async_promise_base : async_registry::AddToAsyncRegistry { } update_source_location(loc); - return awaitable{ - .outer_promise = this, - .inner_awaitable = - get_awaitable_object(std::forward(co_awaited_expression)), - .myContext = Context{ExecContext::currentAsShared(), {id()}}}; + return awaitable{.outer_promise = this, + .inner_awaitable = get_awaitable_object( + std::forward(co_awaited_expression)), + .myContext = Context{}}; } void unhandled_exception() { _value.set_exception(std::current_exception()); diff --git a/lib/Async/include/Async/context.h b/lib/Async/include/Async/context.h index 292d315df0ae..ff2d35b74408 100644 --- a/lib/Async/include/Async/context.h +++ b/lib/Async/include/Async/context.h @@ -28,11 +28,15 @@ namespace arangodb { struct Context { - std::shared_ptr _callerExecContext; + std::shared_ptr _execContext; async_registry::Requester _requester; + Context() + : _execContext{ExecContext::currentAsShared()}, + _requester{*async_registry::get_current_coroutine()} {} + auto set() -> void { - ExecContext::set(_callerExecContext); + ExecContext::set(_execContext); *async_registry::get_current_coroutine() = _requester; } }; diff --git a/lib/Futures/include/Futures/coro-helper.h b/lib/Futures/include/Futures/coro-helper.h index 3c2a55225c72..e6ac22a3e630 100644 --- a/lib/Futures/include/Futures/coro-helper.h +++ b/lib/Futures/include/Futures/coro-helper.h @@ -61,9 +61,7 @@ struct future_promise_base { using promise_type = future_promise; future_promise_base(std::source_location loc) - : promise{std::move(loc)}, - context{arangodb::ExecContext::currentAsShared(), - *arangodb::async_registry::get_current_coroutine()} { + : promise{std::move(loc)}, context{} { *arangodb::async_registry::get_current_coroutine() = {promise.id()}; } ~future_promise_base() {} @@ -111,9 +109,7 @@ struct future_promise_base { arangodb::async_registry::State::Running); if (old_state.has_value() && old_state.value() == arangodb::async_registry::State::Suspended) { - outer_promise->context = arangodb::Context{ - arangodb::ExecContext::currentAsShared(), - *arangodb::async_registry::get_current_coroutine()}; + outer_promise->context = arangodb::Context{}; } myContext.set(); return inner_awaitable.await_resume(); @@ -130,12 +126,10 @@ struct future_promise_base { } promise.update_source_location(std::move(loc)); - return awaitable{ - .outer_promise = static_cast(this), - .inner_awaitable = arangodb::get_awaitable_object( - std::forward(co_awaited_expression)), - .myContext = arangodb::Context{arangodb::ExecContext::currentAsShared(), - {promise.id()}}}; + return awaitable{.outer_promise = static_cast(this), + .inner_awaitable = arangodb::get_awaitable_object( + std::forward(co_awaited_expression)), + .myContext = arangodb::Context{}}; } auto get_return_object() -> arangodb::futures::Future { From 892a501cbb8a834ac692174438c6ea6c61acbf9a Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Fri, 4 Apr 2025 09:37:32 +0200 Subject: [PATCH 04/36] Extract source location --- lib/Async/Registry/promise.h | 34 +----------- lib/Containers/Concurrent/source_location.h | 61 +++++++++++++++++++++ tests/Async/Registry/RegistryTest.cpp | 4 +- 3 files changed, 66 insertions(+), 33 deletions(-) create mode 100644 lib/Containers/Concurrent/source_location.h diff --git a/lib/Async/Registry/promise.h b/lib/Async/Registry/promise.h index 2b40c8426a5d..8f1a4ee178e6 100644 --- a/lib/Async/Registry/promise.h +++ b/lib/Async/Registry/promise.h @@ -31,6 +31,7 @@ #include #include "Containers/Concurrent/ThreadOwnedList.h" #include "Containers/Concurrent/thread.h" +#include "Containers/Concurrent/source_location.h" #include "fmt/format.h" #include "fmt/std.h" @@ -46,34 +47,6 @@ overloaded(Ts...) -> overloaded; namespace arangodb::async_registry { -struct SourceLocationSnapshot { - std::string_view file_name; - std::string_view function_name; - std::uint_least32_t line; - bool operator==(SourceLocationSnapshot const&) const = default; - static auto from(std::source_location loc) -> SourceLocationSnapshot { - return SourceLocationSnapshot{.file_name = loc.file_name(), - .function_name = loc.function_name(), - .line = loc.line()}; - } -}; -template -auto inspect(Inspector& f, SourceLocationSnapshot& x) { - return f.object(x).fields(f.field("file_name", x.file_name), - f.field("line", x.line), - f.field("function_name", x.function_name)); -} -struct SourceLocation { - auto snapshot() -> SourceLocationSnapshot { - return SourceLocationSnapshot{.file_name = file_name, - .function_name = function_name, - .line = line.load()}; - } - const std::string_view file_name; - const std::string_view function_name; - std::atomic line; -}; - enum class State { Running = 0, Suspended, Resolved, Deleted }; template auto inspect(Inspector& f, State& x) { @@ -130,7 +103,7 @@ auto inspect(Inspector& f, Requester& x) { struct PromiseSnapshot { void* id; basics::ThreadId thread; - SourceLocationSnapshot source_location; + basics::SourceLocationSnapshot source_location; Requester requester; State state; bool operator==(PromiseSnapshot const&) const = default; @@ -162,8 +135,7 @@ struct Promise { } basics::ThreadId thread; - - SourceLocation source_location; + basics::VariableSourceLocation source_location; std::atomic requester; std::atomic state = State::Running; }; diff --git a/lib/Containers/Concurrent/source_location.h b/lib/Containers/Concurrent/source_location.h new file mode 100644 index 000000000000..87f8fd28e01d --- /dev/null +++ b/lib/Containers/Concurrent/source_location.h @@ -0,0 +1,61 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include +#include + +namespace arangodb::basics { + +struct SourceLocationSnapshot { + std::string_view file_name; + std::string_view function_name; + std::uint_least32_t line; + bool operator==(SourceLocationSnapshot const&) const = default; + static auto from(std::source_location loc) -> SourceLocationSnapshot { + return SourceLocationSnapshot{.file_name = loc.file_name(), + .function_name = loc.function_name(), + .line = loc.line()}; + } +}; +template +auto inspect(Inspector& f, SourceLocationSnapshot& x) { + return f.object(x).fields(f.field("file_name", x.file_name), + f.field("line", x.line), + f.field("function_name", x.function_name)); +} + +struct VariableSourceLocation { + auto snapshot() -> SourceLocationSnapshot { + return SourceLocationSnapshot{.file_name = file_name, + .function_name = function_name, + .line = line.load()}; + } + const std::string_view file_name; + const std::string_view function_name; + std::atomic line; +}; + +} // namespace arangodb::basics diff --git a/tests/Async/Registry/RegistryTest.cpp b/tests/Async/Registry/RegistryTest.cpp index b75bde7d366d..b9a7258d34df 100644 --- a/tests/Async/Registry/RegistryTest.cpp +++ b/tests/Async/Registry/RegistryTest.cpp @@ -40,11 +40,11 @@ auto promises_in_registry() -> std::vector { } struct MyPromise : public AddToAsyncRegistry { - SourceLocationSnapshot source_location; + basics::SourceLocationSnapshot source_location; basics::ThreadId thread; MyPromise(std::source_location loc = std::source_location::current()) : AddToAsyncRegistry{loc}, - source_location{SourceLocationSnapshot::from(std::move(loc))}, + source_location{basics::SourceLocationSnapshot::from(std::move(loc))}, thread{basics::ThreadId::current()} {} auto snapshot(State state = State::Running) -> PromiseSnapshot { return PromiseSnapshot{.id = id(), From eab010c868681af15414a90fa3a16a4a212363ec Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Fri, 4 Apr 2025 10:56:23 +0200 Subject: [PATCH 05/36] Add task registry --- lib/CMakeLists.txt | 1 + lib/Tasks/CMakeLists.txt | 9 + lib/Tasks/include/Tasks/task.h | 204 ++++++++++++++++++ .../include/Tasks/task_registry_variable.h | 37 ++++ lib/Tasks/task.cpp | 155 +++++++++++++ lib/Tasks/task_registry_variable.cpp | 29 +++ 6 files changed, 435 insertions(+) create mode 100644 lib/Tasks/CMakeLists.txt create mode 100644 lib/Tasks/include/Tasks/task.h create mode 100644 lib/Tasks/include/Tasks/task_registry_variable.h create mode 100644 lib/Tasks/task.cpp create mode 100644 lib/Tasks/task_registry_variable.cpp diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index dd04f5f1488c..b55a7d98f015 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -451,3 +451,4 @@ add_subdirectory(CrashHandler) add_subdirectory(Assertions) add_subdirectory(Inspection) add_subdirectory(BuildId) +add_subdirectory(Tasks) diff --git a/lib/Tasks/CMakeLists.txt b/lib/Tasks/CMakeLists.txt new file mode 100644 index 000000000000..fe33944774dd --- /dev/null +++ b/lib/Tasks/CMakeLists.txt @@ -0,0 +1,9 @@ +add_library(arango_task_registry STATIC + task.cpp) +target_include_directories(arango_task_registry PUBLIC + include) +target_link_libraries(arango_task_registry PUBLIC fmt arango) + +add_library(arango_task_registry_global STATIC + task_registry_variable.cpp) +target_link_libraries(arango_task_registry_global PUBLIC arango_task_registry) diff --git a/lib/Tasks/include/Tasks/task.h b/lib/Tasks/include/Tasks/task.h new file mode 100644 index 000000000000..0329fc653c7c --- /dev/null +++ b/lib/Tasks/include/Tasks/task.h @@ -0,0 +1,204 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include "Containers/Concurrent/ThreadOwnedList.h" +#include "Containers/Concurrent/source_location.h" +#include "Containers/Concurrent/thread.h" +#include "Inspection/Types.h" +#include "fmt/format.h" + +#include +#include +#include + +namespace arangodb::task_registry { + +struct RootTask { + bool operator==(RootTask const&) const = default; +}; +template +auto inspect(Inspector& f, RootTask& x) { + return f.object(x).fields(); +} + +struct TransactionId { + std::uint64_t id; + bool operator==(TransactionId const&) const = default; +}; +template +auto inspect(Inspector& f, TransactionId& x) { + return f.object(x).fields(f.field("tid", x.id)); +} + +struct TransactionTask { + std::string name; + TransactionId tid; + bool operator==(TransactionTask const&) const = default; +}; +template +auto inspect(Inspector& f, TransactionTask& x) { + return f.object(x).fields(f.field("name", x.name), f.embedFields(x.tid)); +} + +struct TaskIdWrapper { + void* id; + bool operator==(TaskIdWrapper const&) const = default; +}; +template +auto inspect(Inspector& f, TaskIdWrapper& x) { + return f.object(x).fields(f.field("id", fmt::format("{}", x.id))); +} + +struct ParentTaskSnapshot + : std::variant {}; +template +auto inspect(Inspector& f, ParentTaskSnapshot& x) { + return f.variant(x).unqualified().alternatives( + inspection::inlineType(), + inspection::inlineType(), + inspection::inlineType()); +} + +struct TaskSnapshot { + std::string name; + std::string state; + void* id; + ParentTaskSnapshot parent; + std::optional transaction; + std::optional thread; + basics::SourceLocationSnapshot source_location; + bool operator==(TaskSnapshot const&) const = default; +}; +template +auto inspect(Inspector& f, TaskSnapshot& x) { + return f.object(x).fields( + f.field("id", fmt::format("{}", x.id)), f.field("name", x.name), + f.field("state", x.state), f.field("parent", x.parent), + f.field("transaction", x.transaction), f.field("thread", x.thread), + f.field("source_location", x.source_location)); +} + +struct TaskInRegistry; +struct ParentTask + : std::variant, TransactionId> {}; + +struct TaskScope; +struct ScheduledTaskScope; + +struct TaskInRegistry { + using Snapshot = TaskSnapshot; + static auto create(std::string name, std::source_location loc) + -> std::shared_ptr; + static auto subtask(TaskScope& parent, std::string name, + std::optional transaction, + std::source_location loc) + -> std::shared_ptr; + static auto scheduled(TaskScope& parent, std::string name, + std::source_location) + -> std::shared_ptr; + static auto transaction_task(TransactionId transaction, std::string name, + std::source_location loc) + -> std::shared_ptr; + ~TaskInRegistry(); + auto id() -> void* { return this; } + auto snapshot() -> TaskSnapshot; + auto set_to_deleted() -> void {} + + friend TaskScope; + friend ScheduledTaskScope; + + private: + TaskInRegistry(ParentTask parent, std::string name, std::string state, + std::optional transaction, + std::source_location loc); + /** + Update the state + + Can only be called on its own running thread, throws otherwise. + */ + auto update_state(std::string_view state, + std::source_location loc = std::source_location::current()) + -> void; // should only be called from scope + + std::string const _name; + std::string _state; // has to probably be atomic (for reading and writing + // concurrently on different threads), but is string... + ParentTask _parent; + std::optional _transaction; // stays constant + std::optional + _running_thread; // proably has to also be atomic because + // changes for scheduled task + std::source_location const _source_location; + // possibly interesting other properties: + // std::chrono::time_point creation = std:; +}; + +/** + A task in scope in a running task. + + The TaskScope sets the state of the corresponding task. + */ +struct TaskScope { + // TODO possibly update source location of task in this constructor, + TaskScope(std::shared_ptr task) : _task{task} { + if (task) { + _task->_running_thread = basics::ThreadId::current(); + _task->update_state("running"); + } + } + TaskScope() : _task{nullptr} {} + TaskScope(TaskScope const&) = delete; + TaskScope(TaskScope&&) = default; + TaskScope& operator=(TaskScope&& other) = default; + ~TaskScope() { + if (_task) { + _task->update_state("done"); + } + } + auto update_state(std::string_view state, + std::source_location loc = std::source_location::current()) + -> void { + if (_task) { + _task->update_state(std::move(state), std::move(loc)); + } + } + + friend TaskInRegistry; + + private: + auto task() -> std::shared_ptr { return _task; } + std::shared_ptr _task; +}; + +struct ScheduledTaskScope { + ScheduledTaskScope(std::shared_ptr task) : _task{task} {} + ScheduledTaskScope(ScheduledTaskScope&&) = default; + ScheduledTaskScope(ScheduledTaskScope const&) = delete; + auto start() && -> TaskScope { return TaskScope{std::move(_task)}; } + + private: + std::shared_ptr _task; +}; + +} // namespace arangodb::task_registry diff --git a/lib/Tasks/include/Tasks/task_registry_variable.h b/lib/Tasks/include/Tasks/task_registry_variable.h new file mode 100644 index 000000000000..98c83a6549e9 --- /dev/null +++ b/lib/Tasks/include/Tasks/task_registry_variable.h @@ -0,0 +1,37 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include "Containers/Concurrent/ListOfNonOwnedLists.h" +#include "Containers/Concurrent/ThreadOwnedList.h" +#include "Tasks/task.h" + +namespace arangodb::task_registry { + +using ThreadRegistry = containers::ThreadOwnedList; +struct Registry : public containers::ListOfNonOwnedLists { +}; + +extern Registry registry; + +} // namespace arangodb::task_registry diff --git a/lib/Tasks/task.cpp b/lib/Tasks/task.cpp new file mode 100644 index 000000000000..2da9123535b2 --- /dev/null +++ b/lib/Tasks/task.cpp @@ -0,0 +1,155 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#include "Tasks/task.h" + +#include "Assertions/ProdAssert.h" +#include "Containers/Concurrent/source_location.h" +#include "Inspection/Format.h" +#include + +// helper type for the visitor +namespace { +template +struct overloaded : Ts... { + using Ts::operator()...; +}; +template +overloaded(Ts...) -> overloaded; +} // namespace + +namespace arangodb::task_registry { + +auto TaskInRegistry::create(std::string name, std::source_location loc) + -> std::shared_ptr { + struct MakeSharedTask : TaskInRegistry { + MakeSharedTask(ParentTask parent, std::string name, std::string state, + std::source_location loc) + : TaskInRegistry(parent, std::move(name), std::move(state), + std::nullopt, std::move(loc)) {} + }; + return std::make_shared( + ParentTask{RootTask{}}, std::move(name), "created", std::move(loc)); +} +auto TaskInRegistry::subtask(TaskScope& parent, std::string name, + std::optional transaction, + std::source_location loc) + -> std::shared_ptr { + struct MakeSharedTask : TaskInRegistry { + MakeSharedTask(ParentTask parent, std::string name, std::string state, + std::optional transaction, + std::source_location loc) + : TaskInRegistry(parent, std::move(name), std::move(state), + std::move(transaction), std::move(loc)) {} + }; + return std::make_shared( + ParentTask{parent.task()}, std::move(name), "created", + std::move(transaction), std::move(loc)); +} +auto TaskInRegistry::scheduled(TaskScope& parent, std::string name, + std::source_location loc) + -> std::shared_ptr { + struct MakeSharedTask : TaskInRegistry { + MakeSharedTask(ParentTask parent, std::string name, std::string state, + std::source_location loc) + : TaskInRegistry(parent, std::move(name), std::move(state), + std::nullopt, std::move(loc)) {} + }; + return std::make_shared( + ParentTask{parent.task()}, std::move(name), "scheduled", std::move(loc)); +} + +auto TaskInRegistry::transaction_task(TransactionId transaction, + std::string name, + std::source_location loc) + -> std::shared_ptr { + struct MakeSharedTask : TaskInRegistry { + MakeSharedTask(ParentTask parent, std::string name, std::string state, + std::source_location loc) + : TaskInRegistry(parent, std::move(name), std::move(state), + std::nullopt, std::move(loc)) {} + }; + return std::make_shared(ParentTask{std::move(transaction)}, + std::move(name), "created", + std::move(loc)); +} + +TaskInRegistry::TaskInRegistry(ParentTask parent, std::string name, + std::string state, + std::optional transaction, + std::source_location loc) + : _name{std::move(name)}, + _state{std::move(state)}, + _parent{std::move(parent)}, + _transaction{std::move(transaction)}, + _source_location{std::move(loc)} {} + +// TODO +// Task::~Task() { +// if (_registry) { +// _registry->garbage_collect(); +// } +// } + +auto TaskInRegistry::update_state(std::string_view state, + std::source_location loc) -> void { + auto current_thread = basics::ThreadId::current(); + ADB_PROD_ASSERT(current_thread == _running_thread) << fmt::format( + "TaskRegistry::update_state was called from thread {} but needs to be " + "called from its owning thread {}. Called at {}. Task: {} ({}), {}", + fmt::format("{}", inspection::json(current_thread)), + fmt::format("{}", inspection::json(_running_thread)), + inspection::json(basics::SourceLocationSnapshot::from(loc)), _name, + _state, + inspection::json(basics::SourceLocationSnapshot::from(_source_location))); + _state = state; +} + +auto TaskInRegistry::snapshot() -> TaskSnapshot { + return TaskSnapshot{ + .name = _name, + .state = _state, + .id = id(), + .parent = std::visit( + overloaded{[&](RootTask root) { return ParentTaskSnapshot{root}; }, + [&](std::shared_ptr parent) { + return ParentTaskSnapshot{TaskIdWrapper{parent.get()}}; + }, + [&](TransactionId transaction) { + return ParentTaskSnapshot{transaction}; + }}, + _parent), + .transaction = _transaction, + .thread = _running_thread, + .source_location = basics::SourceLocationSnapshot{ + .file_name = _source_location.file_name(), + .function_name = _source_location.function_name(), + .line = _source_location.line()}}; +} + +} // namespace arangodb::task_registry + +std::ostream& operator<<(std::ostream& os, + arangodb::task_registry::TaskSnapshot const& t) { + os << fmt::format("{}", arangodb::inspection::json(t)); + return os; +} diff --git a/lib/Tasks/task_registry_variable.cpp b/lib/Tasks/task_registry_variable.cpp new file mode 100644 index 000000000000..7d0b79bfc242 --- /dev/null +++ b/lib/Tasks/task_registry_variable.cpp @@ -0,0 +1,29 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#include "Tasks/task_registry_variable.h" + +namespace arangodb::task_registry { + +Registry registry; + +} From d05e5673393bc4c2c18473105a36744faddb3140 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 22 Apr 2025 18:34:39 +0200 Subject: [PATCH 06/36] Add global task registry variable --- lib/CMakeLists.txt | 1 + lib/Tasks/include/Tasks/task_registry_variable.h | 2 ++ lib/Tasks/task_registry_variable.cpp | 12 ++++++++++++ 3 files changed, 15 insertions(+) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index b55a7d98f015..bf3d0d000b67 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -330,6 +330,7 @@ target_link_libraries(arango_lightweight absl::flat_hash_set absl::synchronization arango_async + arango_task_registry_global PRIVATE date_interface fmt arango_assertions diff --git a/lib/Tasks/include/Tasks/task_registry_variable.h b/lib/Tasks/include/Tasks/task_registry_variable.h index 98c83a6549e9..d965462395e9 100644 --- a/lib/Tasks/include/Tasks/task_registry_variable.h +++ b/lib/Tasks/include/Tasks/task_registry_variable.h @@ -34,4 +34,6 @@ struct Registry : public containers::ListOfNonOwnedLists { extern Registry registry; +auto get_thread_registry() noexcept -> ThreadRegistry&; + } // namespace arangodb::task_registry diff --git a/lib/Tasks/task_registry_variable.cpp b/lib/Tasks/task_registry_variable.cpp index 7d0b79bfc242..9c268a2e35ed 100644 --- a/lib/Tasks/task_registry_variable.cpp +++ b/lib/Tasks/task_registry_variable.cpp @@ -26,4 +26,16 @@ namespace arangodb::task_registry { Registry registry; +auto get_thread_registry() noexcept -> ThreadRegistry& { + struct ThreadRegistryGuard { + ThreadRegistryGuard() : _registry{ThreadRegistry::make()} { + registry.add(_registry); + } + + std::shared_ptr _registry; + }; + static thread_local auto registry_guard = ThreadRegistryGuard{}; + return *registry_guard._registry; } + +} // namespace arangodb::task_registry From 0e3f1b6945f6f47e4b6fdfa069836ed6dcd4573d Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 22 Apr 2025 20:39:20 +0200 Subject: [PATCH 07/36] Pass first test for a root task --- lib/Async/Registry/promise.h | 11 ++ lib/Tasks/include/Tasks/task.h | 99 ++++-------- .../include/Tasks/task_registry_variable.h | 10 ++ lib/Tasks/task.cpp | 151 +++++++----------- tests/CMakeLists.txt | 1 + tests/Tasks/CMakeLists.txt | 5 + tests/Tasks/TaskRegistryTest.cpp | 64 ++++++++ 7 files changed, 180 insertions(+), 161 deletions(-) create mode 100644 tests/Tasks/CMakeLists.txt create mode 100644 tests/Tasks/TaskRegistryTest.cpp diff --git a/lib/Async/Registry/promise.h b/lib/Async/Registry/promise.h index 8f1a4ee178e6..ddec2fff4761 100644 --- a/lib/Async/Registry/promise.h +++ b/lib/Async/Registry/promise.h @@ -117,6 +117,9 @@ auto inspect(Inspector& f, PromiseSnapshot& x) { f.field("state", x.state)); } +/** + Promise in the registry + */ struct Promise { using Snapshot = PromiseSnapshot; Promise(Requester requester, std::source_location location); @@ -146,6 +149,14 @@ struct Promise { */ auto get_current_coroutine() noexcept -> Requester*; +/** + Wrapper promise for easier usage in the code + + This is a wrapper around the promise: On construction, it creates a promise + and registers it in the global register. On destruction, it marks the promise + for deletion in the register. Therefore it has a shorter lifetime than the + promise itself. + */ struct AddToAsyncRegistry { AddToAsyncRegistry() = default; AddToAsyncRegistry(std::source_location loc); diff --git a/lib/Tasks/include/Tasks/task.h b/lib/Tasks/include/Tasks/task.h index 0329fc653c7c..a54053450300 100644 --- a/lib/Tasks/include/Tasks/task.h +++ b/lib/Tasks/include/Tasks/task.h @@ -98,6 +98,7 @@ auto inspect(Inspector& f, TaskSnapshot& x) { f.field("transaction", x.transaction), f.field("thread", x.thread), f.field("source_location", x.source_location)); } +void PrintTo(const TaskSnapshot& task, std::ostream* os); struct TaskInRegistry; struct ParentTask @@ -106,32 +107,15 @@ struct ParentTask struct TaskScope; struct ScheduledTaskScope; +/** + The task object inside the registry + */ struct TaskInRegistry { using Snapshot = TaskSnapshot; - static auto create(std::string name, std::source_location loc) - -> std::shared_ptr; - static auto subtask(TaskScope& parent, std::string name, - std::optional transaction, - std::source_location loc) - -> std::shared_ptr; - static auto scheduled(TaskScope& parent, std::string name, - std::source_location) - -> std::shared_ptr; - static auto transaction_task(TransactionId transaction, std::string name, - std::source_location loc) - -> std::shared_ptr; - ~TaskInRegistry(); auto id() -> void* { return this; } auto snapshot() -> TaskSnapshot; auto set_to_deleted() -> void {} - friend TaskScope; - friend ScheduledTaskScope; - - private: - TaskInRegistry(ParentTask parent, std::string name, std::string state, - std::optional transaction, - std::source_location loc); /** Update the state @@ -141,64 +125,49 @@ struct TaskInRegistry { std::source_location loc = std::source_location::current()) -> void; // should only be called from scope - std::string const _name; - std::string _state; // has to probably be atomic (for reading and writing - // concurrently on different threads), but is string... - ParentTask _parent; - std::optional _transaction; // stays constant + std::string const name; + std::string state; // has to probably be atomic (for reading and writing + // concurrently on different threads), but is string... + ParentTask parent; + std::optional transaction; // stays constant std::optional - _running_thread; // proably has to also be atomic because - // changes for scheduled task - std::source_location const _source_location; + running_thread; // proably has to also be atomic because + // changes for scheduled task + std::source_location const source_location; // possibly interesting other properties: // std::chrono::time_point creation = std:; }; /** - A task in scope in a running task. - - The TaskScope sets the state of the corresponding task. + This task adds an entry to the task registry on construction and mark the + entry for deletion on destruction. */ -struct TaskScope { - // TODO possibly update source location of task in this constructor, - TaskScope(std::shared_ptr task) : _task{task} { - if (task) { - _task->_running_thread = basics::ThreadId::current(); - _task->update_state("running"); - } - } - TaskScope() : _task{nullptr} {} - TaskScope(TaskScope const&) = delete; - TaskScope(TaskScope&&) = default; - TaskScope& operator=(TaskScope&& other) = default; - ~TaskScope() { - if (_task) { - _task->update_state("done"); - } - } +struct Task { + Task(Task const&) = delete; + Task& operator=(Task const&) = delete; + Task(Task&&) = delete; + Task& operator=(Task&&) = delete; + ~Task(); + + Task(TaskInRegistry task_in_registry); + + auto id() -> void*; auto update_state(std::string_view state, std::source_location loc = std::source_location::current()) - -> void { - if (_task) { - _task->update_state(std::move(state), std::move(loc)); - } - } - - friend TaskInRegistry; + -> void; private: - auto task() -> std::shared_ptr { return _task; } - std::shared_ptr _task; + struct noop { + void operator()(void*) {} + }; + std::unique_ptr::Node, noop> + _node_in_registry = nullptr; }; -struct ScheduledTaskScope { - ScheduledTaskScope(std::shared_ptr task) : _task{task} {} - ScheduledTaskScope(ScheduledTaskScope&&) = default; - ScheduledTaskScope(ScheduledTaskScope const&) = delete; - auto start() && -> TaskScope { return TaskScope{std::move(_task)}; } - - private: - std::shared_ptr _task; +/** Helper type to create a basic task */ +struct BaseTask : public Task { + BaseTask(std::string name, + std::source_location = std::source_location::current()); }; } // namespace arangodb::task_registry diff --git a/lib/Tasks/include/Tasks/task_registry_variable.h b/lib/Tasks/include/Tasks/task_registry_variable.h index d965462395e9..510db87d903d 100644 --- a/lib/Tasks/include/Tasks/task_registry_variable.h +++ b/lib/Tasks/include/Tasks/task_registry_variable.h @@ -25,11 +25,21 @@ #include "Containers/Concurrent/ListOfNonOwnedLists.h" #include "Containers/Concurrent/ThreadOwnedList.h" #include "Tasks/task.h" +#include "Logger/LogMacros.h" namespace arangodb::task_registry { using ThreadRegistry = containers::ThreadOwnedList; struct Registry : public containers::ListOfNonOwnedLists { + // TODO just here for debugging purpose + auto log(std::string_view message) -> void { + std::vector tasks; + for_node([&](task_registry::TaskSnapshot task) { + tasks.emplace_back(std::move(task)); + }); + LOG_DEVEL << fmt::format("{}: {}", message, + arangodb::inspection::json(tasks)); + } }; extern Registry registry; diff --git a/lib/Tasks/task.cpp b/lib/Tasks/task.cpp index 2da9123535b2..bae804861b50 100644 --- a/lib/Tasks/task.cpp +++ b/lib/Tasks/task.cpp @@ -24,7 +24,10 @@ #include "Assertions/ProdAssert.h" #include "Containers/Concurrent/source_location.h" +#include "Containers/Concurrent/thread.h" #include "Inspection/Format.h" +#include "Tasks/task_registry_variable.h" +#include #include // helper type for the visitor @@ -39,95 +42,14 @@ overloaded(Ts...) -> overloaded; namespace arangodb::task_registry { -auto TaskInRegistry::create(std::string name, std::source_location loc) - -> std::shared_ptr { - struct MakeSharedTask : TaskInRegistry { - MakeSharedTask(ParentTask parent, std::string name, std::string state, - std::source_location loc) - : TaskInRegistry(parent, std::move(name), std::move(state), - std::nullopt, std::move(loc)) {} - }; - return std::make_shared( - ParentTask{RootTask{}}, std::move(name), "created", std::move(loc)); -} -auto TaskInRegistry::subtask(TaskScope& parent, std::string name, - std::optional transaction, - std::source_location loc) - -> std::shared_ptr { - struct MakeSharedTask : TaskInRegistry { - MakeSharedTask(ParentTask parent, std::string name, std::string state, - std::optional transaction, - std::source_location loc) - : TaskInRegistry(parent, std::move(name), std::move(state), - std::move(transaction), std::move(loc)) {} - }; - return std::make_shared( - ParentTask{parent.task()}, std::move(name), "created", - std::move(transaction), std::move(loc)); -} -auto TaskInRegistry::scheduled(TaskScope& parent, std::string name, - std::source_location loc) - -> std::shared_ptr { - struct MakeSharedTask : TaskInRegistry { - MakeSharedTask(ParentTask parent, std::string name, std::string state, - std::source_location loc) - : TaskInRegistry(parent, std::move(name), std::move(state), - std::nullopt, std::move(loc)) {} - }; - return std::make_shared( - ParentTask{parent.task()}, std::move(name), "scheduled", std::move(loc)); -} - -auto TaskInRegistry::transaction_task(TransactionId transaction, - std::string name, - std::source_location loc) - -> std::shared_ptr { - struct MakeSharedTask : TaskInRegistry { - MakeSharedTask(ParentTask parent, std::string name, std::string state, - std::source_location loc) - : TaskInRegistry(parent, std::move(name), std::move(state), - std::nullopt, std::move(loc)) {} - }; - return std::make_shared(ParentTask{std::move(transaction)}, - std::move(name), "created", - std::move(loc)); -} - -TaskInRegistry::TaskInRegistry(ParentTask parent, std::string name, - std::string state, - std::optional transaction, - std::source_location loc) - : _name{std::move(name)}, - _state{std::move(state)}, - _parent{std::move(parent)}, - _transaction{std::move(transaction)}, - _source_location{std::move(loc)} {} - -// TODO -// Task::~Task() { -// if (_registry) { -// _registry->garbage_collect(); -// } -// } - -auto TaskInRegistry::update_state(std::string_view state, - std::source_location loc) -> void { - auto current_thread = basics::ThreadId::current(); - ADB_PROD_ASSERT(current_thread == _running_thread) << fmt::format( - "TaskRegistry::update_state was called from thread {} but needs to be " - "called from its owning thread {}. Called at {}. Task: {} ({}), {}", - fmt::format("{}", inspection::json(current_thread)), - fmt::format("{}", inspection::json(_running_thread)), - inspection::json(basics::SourceLocationSnapshot::from(loc)), _name, - _state, - inspection::json(basics::SourceLocationSnapshot::from(_source_location))); - _state = state; +void PrintTo(const TaskSnapshot& task, std::ostream* os) { + *os << inspection::json(task); } auto TaskInRegistry::snapshot() -> TaskSnapshot { return TaskSnapshot{ - .name = _name, - .state = _state, + .name = name, + .state = state, .id = id(), .parent = std::visit( overloaded{[&](RootTask root) { return ParentTaskSnapshot{root}; }, @@ -137,19 +59,56 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { [&](TransactionId transaction) { return ParentTaskSnapshot{transaction}; }}, - _parent), - .transaction = _transaction, - .thread = _running_thread, + parent), + .transaction = transaction, + .thread = running_thread, .source_location = basics::SourceLocationSnapshot{ - .file_name = _source_location.file_name(), - .function_name = _source_location.function_name(), - .line = _source_location.line()}}; + .file_name = source_location.file_name(), + .function_name = source_location.function_name(), + .line = source_location.line()}}; } -} // namespace arangodb::task_registry +Task::Task(TaskInRegistry task_in_registry) + : _node_in_registry{get_thread_registry().add( + [&]() { return std::move(task_in_registry); })} {} + +Task::~Task() { + if (_node_in_registry != nullptr) { + _node_in_registry->list->mark_for_deletion(_node_in_registry.get()); + } +} + +auto Task::id() -> void* { + if (_node_in_registry != nullptr) { + return _node_in_registry->data.id(); + } else { + return nullptr; + } +} -std::ostream& operator<<(std::ostream& os, - arangodb::task_registry::TaskSnapshot const& t) { - os << fmt::format("{}", arangodb::inspection::json(t)); - return os; +auto Task::update_state(std::string_view state, std::source_location loc) + -> void { + if (_node_in_registry) { + auto& task_data = _node_in_registry->data; + auto current_thread = basics::ThreadId::current(); + ADB_PROD_ASSERT(current_thread == task_data.running_thread) << fmt::format( + "TaskRegistry::update_state was called from thread {} but needs to be " + "called from its owning thread {}. Called at {}. Task: {} ({}), {}", + fmt::format("{}", inspection::json(current_thread)), + fmt::format("{}", inspection::json(task_data.running_thread)), + inspection::json(basics::SourceLocationSnapshot::from(loc)), + task_data.name, state, + inspection::json( + basics::SourceLocationSnapshot::from(task_data.source_location))); + task_data.state = state; + } } + +BaseTask::BaseTask(std::string name, std::source_location loc) + : Task{TaskInRegistry{.name = std::move(name), + .state = "created", + .parent = {ParentTask{RootTask{}}}, + .running_thread = basics::ThreadId::current(), + .source_location = std::move(loc)}} {} + +} // namespace arangodb::task_registry diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e1163282254b..1c7049da6a0d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -423,3 +423,4 @@ add_subdirectory(AsyncRegistryServer) add_subdirectory(Containers) add_subdirectory(sepp) add_subdirectory(VocBase/Properties) +add_subdirectory(Tasks) diff --git a/tests/Tasks/CMakeLists.txt b/tests/Tasks/CMakeLists.txt new file mode 100644 index 000000000000..540c58ca4c07 --- /dev/null +++ b/tests/Tasks/CMakeLists.txt @@ -0,0 +1,5 @@ +add_executable(task_registry_tests EXCLUDE_FROM_ALL + TaskRegistryTest.cpp) +target_link_libraries(task_registry_tests + arango_task_registry_global + gtest_main) diff --git a/tests/Tasks/TaskRegistryTest.cpp b/tests/Tasks/TaskRegistryTest.cpp new file mode 100644 index 000000000000..a6051b8876da --- /dev/null +++ b/tests/Tasks/TaskRegistryTest.cpp @@ -0,0 +1,64 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// + +#include "Containers/Concurrent/thread.h" +#include "Tasks/task.h" +#include "Tasks/task_registry_variable.h" +#include "Inspection/JsonPrintInspector.h" +#include + +#include + +using namespace arangodb; +using namespace arangodb::task_registry; + +namespace { +auto get_all_tasks(Registry& registry) -> std::vector { + std::vector tasks; + registry.for_node( + [&](TaskSnapshot task) { tasks.emplace_back(std::move(task)); }); + return tasks; +} + +struct MyBaseTask : public BaseTask { + basics::SourceLocationSnapshot source_location; + MyBaseTask(std::string name, + std::source_location loc = std::source_location::current()) + : BaseTask{std::move(name), loc}, + source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} +}; +} // namespace + +TEST(TaskRegistryTest, + a_root_task_is_saved_as_a_parent_of_an_entry_point_task) { + auto task = MyBaseTask{"test task"}; + + auto all_tasks = get_all_tasks(registry); + EXPECT_EQ(all_tasks, (std::vector{(TaskSnapshot{ + .name = "test task", + .state = "created", + .id = task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = task.source_location})})); +} From bcf4d447b3f77621df8183571671d8c4add6057d Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 28 Apr 2025 15:49:53 +0200 Subject: [PATCH 08/36] Be able to set a parent node as a shared ref --- lib/Tasks/include/Tasks/task.h | 29 ++++++++++++----- lib/Tasks/task.cpp | 31 +++++++++++------- tests/Tasks/TaskRegistryTest.cpp | 56 +++++++++++++++++++++++++------- 3 files changed, 84 insertions(+), 32 deletions(-) diff --git a/lib/Tasks/include/Tasks/task.h b/lib/Tasks/include/Tasks/task.h index a54053450300..57430613db8c 100644 --- a/lib/Tasks/include/Tasks/task.h +++ b/lib/Tasks/include/Tasks/task.h @@ -100,9 +100,11 @@ auto inspect(Inspector& f, TaskSnapshot& x) { } void PrintTo(const TaskSnapshot& task, std::ostream* os); -struct TaskInRegistry; -struct ParentTask - : std::variant, TransactionId> {}; +struct Node; +struct ParentNode { + std::shared_ptr node; +}; +struct ParentTask : std::variant {}; struct TaskScope; struct ScheduledTaskScope; @@ -138,16 +140,22 @@ struct TaskInRegistry { // std::chrono::time_point creation = std:; }; +struct Node : public containers::ThreadOwnedList::Node { + using containers::ThreadOwnedList::Node::Node; +}; + +struct ChildTask; + /** This task adds an entry to the task registry on construction and mark the entry for deletion on destruction. */ struct Task { + friend ChildTask; Task(Task const&) = delete; Task& operator=(Task const&) = delete; Task(Task&&) = delete; Task& operator=(Task&&) = delete; - ~Task(); Task(TaskInRegistry task_in_registry); @@ -157,17 +165,20 @@ struct Task { -> void; private: - struct noop { - void operator()(void*) {} - }; - std::unique_ptr::Node, noop> - _node_in_registry = nullptr; + std::shared_ptr _node_in_registry = nullptr; }; /** Helper type to create a basic task */ +// TODO automatically detect current task create +// - a base task if there is not current task on current thread +// - a child task if there exists a current task struct BaseTask : public Task { BaseTask(std::string name, std::source_location = std::source_location::current()); }; +struct ChildTask : public Task { + ChildTask(std::string name, Task& parent, + std::source_location = std::source_location::current()); +}; } // namespace arangodb::task_registry diff --git a/lib/Tasks/task.cpp b/lib/Tasks/task.cpp index bae804861b50..23aa1df80457 100644 --- a/lib/Tasks/task.cpp +++ b/lib/Tasks/task.cpp @@ -53,8 +53,9 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { .id = id(), .parent = std::visit( overloaded{[&](RootTask root) { return ParentTaskSnapshot{root}; }, - [&](std::shared_ptr parent) { - return ParentTaskSnapshot{TaskIdWrapper{parent.get()}}; + [&](ParentNode parent) { + return ParentTaskSnapshot{ + TaskIdWrapper{parent.node->data.id()}}; }, [&](TransactionId transaction) { return ParentTaskSnapshot{transaction}; @@ -69,14 +70,12 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { } Task::Task(TaskInRegistry task_in_registry) - : _node_in_registry{get_thread_registry().add( - [&]() { return std::move(task_in_registry); })} {} - -Task::~Task() { - if (_node_in_registry != nullptr) { - _node_in_registry->list->mark_for_deletion(_node_in_registry.get()); - } -} + : _node_in_registry{std::shared_ptr( + reinterpret_cast(get_thread_registry().add( + [&]() { return std::move(task_in_registry); })), + [](containers::ThreadOwnedList::Node* ptr) { + ptr->list->mark_for_deletion(ptr); + })} {} auto Task::id() -> void* { if (_node_in_registry != nullptr) { @@ -106,9 +105,17 @@ auto Task::update_state(std::string_view state, std::source_location loc) BaseTask::BaseTask(std::string name, std::source_location loc) : Task{TaskInRegistry{.name = std::move(name), - .state = "created", - .parent = {ParentTask{RootTask{}}}, + .state = "running", + .parent = ParentTask{RootTask{}}, .running_thread = basics::ThreadId::current(), .source_location = std::move(loc)}} {} +ChildTask::ChildTask(std::string name, Task& parent, std::source_location loc) + : Task{TaskInRegistry{ + .name = std::move(name), + .state = "running", + .parent = ParentTask{ParentNode{parent._node_in_registry}}, + .running_thread = basics::ThreadId::current(), + .source_location = std::move(loc)}} {} + } // namespace arangodb::task_registry diff --git a/tests/Tasks/TaskRegistryTest.cpp b/tests/Tasks/TaskRegistryTest.cpp index a6051b8876da..4c1a0bf99eaa 100644 --- a/tests/Tasks/TaskRegistryTest.cpp +++ b/tests/Tasks/TaskRegistryTest.cpp @@ -33,7 +33,7 @@ using namespace arangodb; using namespace arangodb::task_registry; namespace { -auto get_all_tasks(Registry& registry) -> std::vector { +auto get_all_tasks() -> std::vector { std::vector tasks; registry.for_node( [&](TaskSnapshot task) { tasks.emplace_back(std::move(task)); }); @@ -47,18 +47,52 @@ struct MyBaseTask : public BaseTask { : BaseTask{std::move(name), loc}, source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} }; +struct MyChildTask : public ChildTask { + basics::SourceLocationSnapshot source_location; + MyChildTask(std::string name, Task& task, + std::source_location loc = std::source_location::current()) + : ChildTask{std::move(name), task, loc}, + source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} +}; } // namespace -TEST(TaskRegistryTest, - a_root_task_is_saved_as_a_parent_of_an_entry_point_task) { +struct TaskRegistryTest : ::testing::Test { + void TearDown() override { + // execute garbage collection on current thread + get_thread_registry().garbage_collect(); + } +}; + +// TODO fix naming of base task vs root task +TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { auto task = MyBaseTask{"test task"}; - auto all_tasks = get_all_tasks(registry); - EXPECT_EQ(all_tasks, (std::vector{(TaskSnapshot{ - .name = "test task", - .state = "created", - .id = task.id(), - .parent = {RootTask{}}, - .thread = basics::ThreadId::current(), - .source_location = task.source_location})})); + EXPECT_EQ(get_all_tasks(), (std::vector{(TaskSnapshot{ + .name = "test task", + .state = "running", + .id = task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = task.source_location})})); +} + +TEST_F(TaskRegistryTest, create_child_task) { + auto parent_task = MyBaseTask{"parent task"}; + auto child_task = MyChildTask{"child task", parent_task}; + + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); } From b6a78acd6371ebda3dcadeb296fff53d69362657 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 28 Apr 2025 17:24:14 +0200 Subject: [PATCH 09/36] Add tests for parent lifetime --- lib/Tasks/include/Tasks/task.h | 4 +- tests/Tasks/TaskRegistryTest.cpp | 213 ++++++++++++++++++++++++++++--- 2 files changed, 195 insertions(+), 22 deletions(-) diff --git a/lib/Tasks/include/Tasks/task.h b/lib/Tasks/include/Tasks/task.h index 57430613db8c..9b77eb5efa84 100644 --- a/lib/Tasks/include/Tasks/task.h +++ b/lib/Tasks/include/Tasks/task.h @@ -152,10 +152,10 @@ struct ChildTask; */ struct Task { friend ChildTask; + Task(Task&& other) = default; + Task& operator=(Task&& other) = default; Task(Task const&) = delete; Task& operator=(Task const&) = delete; - Task(Task&&) = delete; - Task& operator=(Task&&) = delete; Task(TaskInRegistry task_in_registry); diff --git a/tests/Tasks/TaskRegistryTest.cpp b/tests/Tasks/TaskRegistryTest.cpp index 4c1a0bf99eaa..f2dc7a61714d 100644 --- a/tests/Tasks/TaskRegistryTest.cpp +++ b/tests/Tasks/TaskRegistryTest.cpp @@ -21,12 +21,14 @@ /// @author Julia Volmer //////////////////////////////////////////////////////////////////////////////// +#include "Async/async.h" #include "Containers/Concurrent/thread.h" #include "Tasks/task.h" #include "Tasks/task_registry_variable.h" #include "Inspection/JsonPrintInspector.h" #include +#include #include using namespace arangodb; @@ -60,10 +62,10 @@ struct TaskRegistryTest : ::testing::Test { void TearDown() override { // execute garbage collection on current thread get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 0); } }; -// TODO fix naming of base task vs root task TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { auto task = MyBaseTask{"test task"}; @@ -76,23 +78,194 @@ TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { .source_location = task.source_location})})); } -TEST_F(TaskRegistryTest, create_child_task) { - auto parent_task = MyBaseTask{"parent task"}; - auto child_task = MyChildTask{"child task", parent_task}; - - EXPECT_EQ( - get_all_tasks(), - (std::vector{ - (TaskSnapshot{.name = "child task", - .state = "running", - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location}), - (TaskSnapshot{.name = "parent task", - .state = "running", - .id = parent_task.id(), - .parent = {RootTask{}}, - .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); +TEST_F(TaskRegistryTest, creates_a_child_task) { + { + auto parent_task = MyBaseTask{"parent task"}; + auto child_task = MyChildTask{"child task", parent_task}; + + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + } + // here child task-in-registry is marked for deletion + // but parent task-in-registry is not because it is referenced inside the + // child task-in-registry as its parent + // therefore garbage collection has to run twice to clean up everything + // - in the first run, the child task-in-registry is destroyed + // now the shared ptr to parent is empty, therefore its destructor is called + // this marks the parent task in registry for deletion + // - in second run, the parent task-in-registry is destroyed + get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 0); +} + +TEST_F(TaskRegistryTest, creates_a_child_task_hierarchy) { + { + auto parent_task = MyBaseTask{"parent task"}; + auto child_task = MyChildTask{"child task", parent_task}; + auto child_of_child_task = MyChildTask{"child of child task", child_task}; + + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{ + .name = "child of child task", + .state = "running", + .id = child_of_child_task.id(), + .parent = {TaskIdWrapper{child_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_of_child_task.source_location}), + (TaskSnapshot{.name = "child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + } + get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 0); +} + +struct WaitSlot { + void resume() { + ready = true; + _continuation.resume(); + } + + std::coroutine_handle<> _continuation; + + bool await_ready() { return ready; } + void await_resume() {} + void await_suspend(std::coroutine_handle<> continuation) { + _continuation = continuation; + } + + bool ready = false; +}; +TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { + WaitSlot wait; + { + auto parent_task = MyBaseTask{"parent task"}; + auto a = [&parent_task, &wait]() -> async { + auto child_task = MyChildTask{"child task", parent_task}; + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + co_await wait; + co_return; + }(); + } + + // both task-in-registries still exist: + // child lives in suspended coroutine and references parent + get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 2); + + // resume coroutine, mark child for deletion and therefore also parent + wait.resume(); + get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 0); +} + +TEST_F(TaskRegistryTest, + a_base_task_lives_as_long_as_its_longest_living_child) { + WaitSlot first_wait; + WaitSlot second_wait; + TaskSnapshot first_child_task; + { + auto parent_task = MyBaseTask{"parent task"}; + auto a = [&parent_task, &first_wait, &first_child_task]() -> async { + auto child_task = MyChildTask{"first child task", parent_task}; + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "first child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + first_child_task = get_all_tasks()[0]; + co_await first_wait; + co_return; + }(); + auto b = [&parent_task, &second_wait, &first_child_task]() -> async { + auto child_task = MyChildTask{"second child task", parent_task}; + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "second child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + first_child_task, + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + co_await second_wait; + co_return; + }(); + } + + // all three task-in-registries still exist: + // childs live in suspended coroutines and reference parent + get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 3); + + // resume second coroutine, mark child for deletion + second_wait.resume(); + get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 2); + + // resume first coroutine, mark child for deletion and therefore also parent + first_wait.resume(); + get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); + EXPECT_EQ(get_all_tasks().size(), 0); } From 6a530c4cd835a03e0e29cfc87a9a917d0eebcb95 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 28 Apr 2025 21:21:19 +0200 Subject: [PATCH 10/36] Add tests to arangodbtests --- tests/CMakeLists.txt | 1 + tests/Tasks/CMakeLists.txt | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1c7049da6a0d..9ad94eda0847 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -331,6 +331,7 @@ target_link_libraries(arangodbtests arango_tests_basics arango_tests_replication2 arango_tests_replication2_pure + arango_tests_task_registry arango_tests_graph arango_tests_futures arango_tests_zkd diff --git a/tests/Tasks/CMakeLists.txt b/tests/Tasks/CMakeLists.txt index 540c58ca4c07..1b7fe3e86702 100644 --- a/tests/Tasks/CMakeLists.txt +++ b/tests/Tasks/CMakeLists.txt @@ -1,5 +1,12 @@ -add_executable(task_registry_tests EXCLUDE_FROM_ALL +add_library(arango_tests_task_registry OBJECT TaskRegistryTest.cpp) -target_link_libraries(task_registry_tests +target_link_libraries(arango_tests_task_registry PRIVATE arango_task_registry_global + gtest +) + +add_executable(arangodbtests_task_registry EXCLUDE_FROM_ALL) +target_link_libraries(arangodbtests_task_registry + arango_tests_task_registry gtest_main) + From a6540591f7cbeb9c90435234d17f285ed9818e93 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 28 Apr 2025 21:29:34 +0200 Subject: [PATCH 11/36] Get rid of currently unused stuff --- lib/Tasks/include/Tasks/task.h | 54 +++---------------- .../include/Tasks/task_registry_variable.h | 13 +---- lib/Tasks/task.cpp | 22 -------- 3 files changed, 9 insertions(+), 80 deletions(-) diff --git a/lib/Tasks/include/Tasks/task.h b/lib/Tasks/include/Tasks/task.h index 9b77eb5efa84..a4bca8819d86 100644 --- a/lib/Tasks/include/Tasks/task.h +++ b/lib/Tasks/include/Tasks/task.h @@ -42,25 +42,6 @@ auto inspect(Inspector& f, RootTask& x) { return f.object(x).fields(); } -struct TransactionId { - std::uint64_t id; - bool operator==(TransactionId const&) const = default; -}; -template -auto inspect(Inspector& f, TransactionId& x) { - return f.object(x).fields(f.field("tid", x.id)); -} - -struct TransactionTask { - std::string name; - TransactionId tid; - bool operator==(TransactionTask const&) const = default; -}; -template -auto inspect(Inspector& f, TransactionTask& x) { - return f.object(x).fields(f.field("name", x.name), f.embedFields(x.tid)); -} - struct TaskIdWrapper { void* id; bool operator==(TaskIdWrapper const&) const = default; @@ -70,14 +51,12 @@ auto inspect(Inspector& f, TaskIdWrapper& x) { return f.object(x).fields(f.field("id", fmt::format("{}", x.id))); } -struct ParentTaskSnapshot - : std::variant {}; +struct ParentTaskSnapshot : std::variant {}; template auto inspect(Inspector& f, ParentTaskSnapshot& x) { return f.variant(x).unqualified().alternatives( inspection::inlineType(), - inspection::inlineType(), - inspection::inlineType()); + inspection::inlineType()); } struct TaskSnapshot { @@ -85,18 +64,17 @@ struct TaskSnapshot { std::string state; void* id; ParentTaskSnapshot parent; - std::optional transaction; std::optional thread; basics::SourceLocationSnapshot source_location; bool operator==(TaskSnapshot const&) const = default; }; template auto inspect(Inspector& f, TaskSnapshot& x) { - return f.object(x).fields( - f.field("id", fmt::format("{}", x.id)), f.field("name", x.name), - f.field("state", x.state), f.field("parent", x.parent), - f.field("transaction", x.transaction), f.field("thread", x.thread), - f.field("source_location", x.source_location)); + return f.object(x).fields(f.field("id", fmt::format("{}", x.id)), + f.field("name", x.name), f.field("state", x.state), + f.field("parent", x.parent), + f.field("thread", x.thread), + f.field("source_location", x.source_location)); } void PrintTo(const TaskSnapshot& task, std::ostream* os); @@ -104,7 +82,7 @@ struct Node; struct ParentNode { std::shared_ptr node; }; -struct ParentTask : std::variant {}; +struct ParentTask : std::variant {}; struct TaskScope; struct ScheduledTaskScope; @@ -118,20 +96,10 @@ struct TaskInRegistry { auto snapshot() -> TaskSnapshot; auto set_to_deleted() -> void {} - /** - Update the state - - Can only be called on its own running thread, throws otherwise. - */ - auto update_state(std::string_view state, - std::source_location loc = std::source_location::current()) - -> void; // should only be called from scope - std::string const name; std::string state; // has to probably be atomic (for reading and writing // concurrently on different threads), but is string... ParentTask parent; - std::optional transaction; // stays constant std::optional running_thread; // proably has to also be atomic because // changes for scheduled task @@ -160,18 +128,12 @@ struct Task { Task(TaskInRegistry task_in_registry); auto id() -> void*; - auto update_state(std::string_view state, - std::source_location loc = std::source_location::current()) - -> void; private: std::shared_ptr _node_in_registry = nullptr; }; /** Helper type to create a basic task */ -// TODO automatically detect current task create -// - a base task if there is not current task on current thread -// - a child task if there exists a current task struct BaseTask : public Task { BaseTask(std::string name, std::source_location = std::source_location::current()); diff --git a/lib/Tasks/include/Tasks/task_registry_variable.h b/lib/Tasks/include/Tasks/task_registry_variable.h index 510db87d903d..67ea3ecc92ec 100644 --- a/lib/Tasks/include/Tasks/task_registry_variable.h +++ b/lib/Tasks/include/Tasks/task_registry_variable.h @@ -25,22 +25,11 @@ #include "Containers/Concurrent/ListOfNonOwnedLists.h" #include "Containers/Concurrent/ThreadOwnedList.h" #include "Tasks/task.h" -#include "Logger/LogMacros.h" namespace arangodb::task_registry { using ThreadRegistry = containers::ThreadOwnedList; -struct Registry : public containers::ListOfNonOwnedLists { - // TODO just here for debugging purpose - auto log(std::string_view message) -> void { - std::vector tasks; - for_node([&](task_registry::TaskSnapshot task) { - tasks.emplace_back(std::move(task)); - }); - LOG_DEVEL << fmt::format("{}: {}", message, - arangodb::inspection::json(tasks)); - } -}; +struct Registry : public containers::ListOfNonOwnedLists {}; extern Registry registry; diff --git a/lib/Tasks/task.cpp b/lib/Tasks/task.cpp index 23aa1df80457..e114f4d1e6ff 100644 --- a/lib/Tasks/task.cpp +++ b/lib/Tasks/task.cpp @@ -56,12 +56,8 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { [&](ParentNode parent) { return ParentTaskSnapshot{ TaskIdWrapper{parent.node->data.id()}}; - }, - [&](TransactionId transaction) { - return ParentTaskSnapshot{transaction}; }}, parent), - .transaction = transaction, .thread = running_thread, .source_location = basics::SourceLocationSnapshot{ .file_name = source_location.file_name(), @@ -85,24 +81,6 @@ auto Task::id() -> void* { } } -auto Task::update_state(std::string_view state, std::source_location loc) - -> void { - if (_node_in_registry) { - auto& task_data = _node_in_registry->data; - auto current_thread = basics::ThreadId::current(); - ADB_PROD_ASSERT(current_thread == task_data.running_thread) << fmt::format( - "TaskRegistry::update_state was called from thread {} but needs to be " - "called from its owning thread {}. Called at {}. Task: {} ({}), {}", - fmt::format("{}", inspection::json(current_thread)), - fmt::format("{}", inspection::json(task_data.running_thread)), - inspection::json(basics::SourceLocationSnapshot::from(loc)), - task_data.name, state, - inspection::json( - basics::SourceLocationSnapshot::from(task_data.source_location))); - task_data.state = state; - } -} - BaseTask::BaseTask(std::string name, std::source_location loc) : Task{TaskInRegistry{.name = std::move(name), .state = "running", From fe9d1a9cf4ac743bb75267c54de1fa8a1b6e0e97 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 5 May 2025 11:26:39 +0200 Subject: [PATCH 12/36] Rename directory and namespace --- lib/CMakeLists.txt | 2 +- lib/{Tasks => TaskMonitoring}/CMakeLists.txt | 0 .../include/TaskMonitoring}/task.h | 4 ++-- .../include/TaskMonitoring}/task_registry_variable.h | 6 +++--- lib/{Tasks => TaskMonitoring}/task.cpp | 8 ++++---- lib/{Tasks => TaskMonitoring}/task_registry_variable.cpp | 6 +++--- tests/CMakeLists.txt | 2 +- tests/{Tasks => TaskMonitoring}/CMakeLists.txt | 0 tests/{Tasks => TaskMonitoring}/TaskRegistryTest.cpp | 5 ++--- 9 files changed, 16 insertions(+), 17 deletions(-) rename lib/{Tasks => TaskMonitoring}/CMakeLists.txt (100%) rename lib/{Tasks/include/Tasks => TaskMonitoring/include/TaskMonitoring}/task.h (98%) rename lib/{Tasks/include/Tasks => TaskMonitoring/include/TaskMonitoring}/task_registry_variable.h (92%) rename lib/{Tasks => TaskMonitoring}/task.cpp (95%) rename lib/{Tasks => TaskMonitoring}/task_registry_variable.cpp (91%) rename tests/{Tasks => TaskMonitoring}/CMakeLists.txt (100%) rename tests/{Tasks => TaskMonitoring}/TaskRegistryTest.cpp (99%) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index bf3d0d000b67..b01d92b4f20c 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -452,4 +452,4 @@ add_subdirectory(CrashHandler) add_subdirectory(Assertions) add_subdirectory(Inspection) add_subdirectory(BuildId) -add_subdirectory(Tasks) +add_subdirectory(TaskMonitoring) diff --git a/lib/Tasks/CMakeLists.txt b/lib/TaskMonitoring/CMakeLists.txt similarity index 100% rename from lib/Tasks/CMakeLists.txt rename to lib/TaskMonitoring/CMakeLists.txt diff --git a/lib/Tasks/include/Tasks/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h similarity index 98% rename from lib/Tasks/include/Tasks/task.h rename to lib/TaskMonitoring/include/TaskMonitoring/task.h index a4bca8819d86..2fe7a6e7515e 100644 --- a/lib/Tasks/include/Tasks/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -32,7 +32,7 @@ #include #include -namespace arangodb::task_registry { +namespace arangodb::task_monitoring { struct RootTask { bool operator==(RootTask const&) const = default; @@ -143,4 +143,4 @@ struct ChildTask : public Task { std::source_location = std::source_location::current()); }; -} // namespace arangodb::task_registry +} // namespace arangodb::task_monitoring diff --git a/lib/Tasks/include/Tasks/task_registry_variable.h b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h similarity index 92% rename from lib/Tasks/include/Tasks/task_registry_variable.h rename to lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h index 67ea3ecc92ec..603dfca05c47 100644 --- a/lib/Tasks/include/Tasks/task_registry_variable.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h @@ -24,9 +24,9 @@ #include "Containers/Concurrent/ListOfNonOwnedLists.h" #include "Containers/Concurrent/ThreadOwnedList.h" -#include "Tasks/task.h" +#include "TaskMonitoring/task.h" -namespace arangodb::task_registry { +namespace arangodb::task_monitoring { using ThreadRegistry = containers::ThreadOwnedList; struct Registry : public containers::ListOfNonOwnedLists {}; @@ -35,4 +35,4 @@ extern Registry registry; auto get_thread_registry() noexcept -> ThreadRegistry&; -} // namespace arangodb::task_registry +} // namespace arangodb::task_monitoring diff --git a/lib/Tasks/task.cpp b/lib/TaskMonitoring/task.cpp similarity index 95% rename from lib/Tasks/task.cpp rename to lib/TaskMonitoring/task.cpp index e114f4d1e6ff..51812a444bb1 100644 --- a/lib/Tasks/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -20,13 +20,13 @@ /// /// @author Julia Volmer //////////////////////////////////////////////////////////////////////////////// -#include "Tasks/task.h" +#include "TaskMonitoring/task.h" #include "Assertions/ProdAssert.h" #include "Containers/Concurrent/source_location.h" #include "Containers/Concurrent/thread.h" #include "Inspection/Format.h" -#include "Tasks/task_registry_variable.h" +#include "TaskMonitoring/task_registry_variable.h" #include #include @@ -40,7 +40,7 @@ template overloaded(Ts...) -> overloaded; } // namespace -namespace arangodb::task_registry { +namespace arangodb::task_monitoring { void PrintTo(const TaskSnapshot& task, std::ostream* os) { *os << inspection::json(task); @@ -96,4 +96,4 @@ ChildTask::ChildTask(std::string name, Task& parent, std::source_location loc) .running_thread = basics::ThreadId::current(), .source_location = std::move(loc)}} {} -} // namespace arangodb::task_registry +} // namespace arangodb::task_monitoring diff --git a/lib/Tasks/task_registry_variable.cpp b/lib/TaskMonitoring/task_registry_variable.cpp similarity index 91% rename from lib/Tasks/task_registry_variable.cpp rename to lib/TaskMonitoring/task_registry_variable.cpp index 9c268a2e35ed..03fc02aaec6b 100644 --- a/lib/Tasks/task_registry_variable.cpp +++ b/lib/TaskMonitoring/task_registry_variable.cpp @@ -20,9 +20,9 @@ /// /// @author Julia Volmer //////////////////////////////////////////////////////////////////////////////// -#include "Tasks/task_registry_variable.h" +#include "TaskMonitoring/task_registry_variable.h" -namespace arangodb::task_registry { +namespace arangodb::task_monitoring { Registry registry; @@ -38,4 +38,4 @@ auto get_thread_registry() noexcept -> ThreadRegistry& { return *registry_guard._registry; } -} // namespace arangodb::task_registry +} // namespace arangodb::task_monitoring diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9ad94eda0847..ed47f83aa162 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -424,4 +424,4 @@ add_subdirectory(AsyncRegistryServer) add_subdirectory(Containers) add_subdirectory(sepp) add_subdirectory(VocBase/Properties) -add_subdirectory(Tasks) +add_subdirectory(TaskMonitoring) diff --git a/tests/Tasks/CMakeLists.txt b/tests/TaskMonitoring/CMakeLists.txt similarity index 100% rename from tests/Tasks/CMakeLists.txt rename to tests/TaskMonitoring/CMakeLists.txt diff --git a/tests/Tasks/TaskRegistryTest.cpp b/tests/TaskMonitoring/TaskRegistryTest.cpp similarity index 99% rename from tests/Tasks/TaskRegistryTest.cpp rename to tests/TaskMonitoring/TaskRegistryTest.cpp index f2dc7a61714d..484454abf0e2 100644 --- a/tests/Tasks/TaskRegistryTest.cpp +++ b/tests/TaskMonitoring/TaskRegistryTest.cpp @@ -23,8 +23,7 @@ #include "Async/async.h" #include "Containers/Concurrent/thread.h" -#include "Tasks/task.h" -#include "Tasks/task_registry_variable.h" +#include "TaskMonitoring/task_registry_variable.h" #include "Inspection/JsonPrintInspector.h" #include @@ -32,7 +31,7 @@ #include using namespace arangodb; -using namespace arangodb::task_registry; +using namespace arangodb::task_monitoring; namespace { auto get_all_tasks() -> std::vector { From 358ca7e90858e7872cf60b8208777694b4d2bd1f Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Wed, 30 Apr 2025 11:16:34 +0200 Subject: [PATCH 13/36] Do reference counting manually --- .../include/TaskMonitoring/shared_reference.h | 96 +++++++++++++++++++ .../include/TaskMonitoring/task.h | 17 ++-- .../TaskMonitoring/task_registry_variable.h | 13 +++ lib/TaskMonitoring/task.cpp | 31 +++--- 4 files changed, 130 insertions(+), 27 deletions(-) create mode 100644 lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h diff --git a/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h b/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h new file mode 100644 index 000000000000..b2905d5feee1 --- /dev/null +++ b/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h @@ -0,0 +1,96 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include + +/** + Reference counting wrapper for a resource + + Destroys itself and calls a custom cleanup function on the resource when the + reference count decrements to zero. + */ +template +struct Shared { + static auto create(T* resource, std::function cleanup) -> Shared* { + if (resource == nullptr) { + std::abort(); + } + return new Shared{resource, cleanup}; + } + auto get_ref() const -> T& { return *_resource; } + auto get() const -> T* { return _resource; } + auto increment() -> void { ref_count.fetch_add(1); } + auto decrement() -> void { + auto old = ref_count.fetch_sub(1); + if (old == 1) { + _cleanup(_resource); + delete this; + } + } + + private: + T* _resource; + std::function _cleanup; + std::atomic ref_count = 0; + Shared(T* node, std::function cleanup) + : _resource{node}, _cleanup{cleanup} {} +}; + +/** + Shared reference to a resource + + Increases reference counter on construction and decreases it on destruction. + */ +template +struct SharedReference { + SharedReference(SharedReference const& other) + : _shared_node{other._shared_node} { + _shared_node->increment(); + } + auto operator=(SharedReference const& other) -> SharedReference { + _shared_node = other._shared_node; + _shared_node->increment(); + } + ~SharedReference() { _shared_node->decrement(); } + static auto create(Shared* node) -> SharedReference { + if (node == nullptr) { + std::abort(); + } + return SharedReference{node}; + } + static auto create(T* resource, std::function cleanup) + -> SharedReference { + return SharedReference{Shared::create(resource, cleanup)}; + } + auto operator*() const -> T& { return _shared_node->get_ref(); } + auto operator->() const -> T* { return _shared_node->get(); } + + private: + Shared* _shared_node; + SharedReference(Shared* node) : _shared_node{node} { + _shared_node->increment(); + } +}; diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h index 2fe7a6e7515e..722ee59d3002 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -27,6 +27,7 @@ #include "Containers/Concurrent/thread.h" #include "Inspection/Types.h" #include "fmt/format.h" +#include "shared_reference.h" #include #include @@ -79,10 +80,8 @@ auto inspect(Inspector& f, TaskSnapshot& x) { void PrintTo(const TaskSnapshot& task, std::ostream* os); struct Node; -struct ParentNode { - std::shared_ptr node; -}; -struct ParentTask : std::variant {}; + +struct ParentTask : std::variant> {}; struct TaskScope; struct ScheduledTaskScope; @@ -108,12 +107,12 @@ struct TaskInRegistry { // std::chrono::time_point creation = std:; }; -struct Node : public containers::ThreadOwnedList::Node { - using containers::ThreadOwnedList::Node::Node; -}; +/** + Use inheritance to circumvent problems with non-satified constraints for Node + */ +struct Node : public containers::ThreadOwnedList::Node {}; struct ChildTask; - /** This task adds an entry to the task registry on construction and mark the entry for deletion on destruction. @@ -130,7 +129,7 @@ struct Task { auto id() -> void*; private: - std::shared_ptr _node_in_registry = nullptr; + SharedReference _node_in_registry; }; /** Helper type to create a basic task */ diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h index 603dfca05c47..94528374fdbf 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h @@ -31,8 +31,21 @@ namespace arangodb::task_monitoring { using ThreadRegistry = containers::ThreadOwnedList; struct Registry : public containers::ListOfNonOwnedLists {}; + +/** + Global variable that holds all active tasks. + + Includes a list of thread owned lists, one for each initialized + thread. + */ extern Registry registry; +/** + Get thread registry of all active tasks on current thread. + + Creates the thread registry when called for the first time and adds it to the + global registry. + */ auto get_thread_registry() noexcept -> ThreadRegistry&; } // namespace arangodb::task_monitoring diff --git a/lib/TaskMonitoring/task.cpp b/lib/TaskMonitoring/task.cpp index 51812a444bb1..18bb6f94a6a0 100644 --- a/lib/TaskMonitoring/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -53,9 +53,9 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { .id = id(), .parent = std::visit( overloaded{[&](RootTask root) { return ParentTaskSnapshot{root}; }, - [&](ParentNode parent) { + [&](SharedReference parent) { return ParentTaskSnapshot{ - TaskIdWrapper{parent.node->data.id()}}; + TaskIdWrapper{parent->data.id()}}; }}, parent), .thread = running_thread, @@ -66,20 +66,16 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { } Task::Task(TaskInRegistry task_in_registry) - : _node_in_registry{std::shared_ptr( + : _node_in_registry{SharedReference::create( reinterpret_cast(get_thread_registry().add( [&]() { return std::move(task_in_registry); })), - [](containers::ThreadOwnedList::Node* ptr) { - ptr->list->mark_for_deletion(ptr); + [](Node* ptr) { + auto specific_node = reinterpret_cast< + containers::ThreadOwnedList::Node*>(ptr); + specific_node->list->mark_for_deletion(specific_node); })} {} -auto Task::id() -> void* { - if (_node_in_registry != nullptr) { - return _node_in_registry->data.id(); - } else { - return nullptr; - } -} +auto Task::id() -> void* { return _node_in_registry->data.id(); } BaseTask::BaseTask(std::string name, std::source_location loc) : Task{TaskInRegistry{.name = std::move(name), @@ -89,11 +85,10 @@ BaseTask::BaseTask(std::string name, std::source_location loc) .source_location = std::move(loc)}} {} ChildTask::ChildTask(std::string name, Task& parent, std::source_location loc) - : Task{TaskInRegistry{ - .name = std::move(name), - .state = "running", - .parent = ParentTask{ParentNode{parent._node_in_registry}}, - .running_thread = basics::ThreadId::current(), - .source_location = std::move(loc)}} {} + : Task{TaskInRegistry{.name = std::move(name), + .state = "running", + .parent = ParentTask{parent._node_in_registry}, + .running_thread = basics::ThreadId::current(), + .source_location = std::move(loc)}} {} } // namespace arangodb::task_monitoring From 35708396049eaa3b4387dc8299e850006be97404 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Wed, 30 Apr 2025 22:43:44 +0200 Subject: [PATCH 14/36] Mark for deletion up the hierarchy --- .../include/TaskMonitoring/shared_reference.h | 22 +- .../include/TaskMonitoring/task.h | 9 +- lib/TaskMonitoring/task.cpp | 66 +++++- tests/TaskMonitoring/TaskRegistryTest.cpp | 216 ++++++++++++------ 4 files changed, 221 insertions(+), 92 deletions(-) diff --git a/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h b/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h index b2905d5feee1..2903dc221081 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h @@ -42,19 +42,20 @@ struct Shared { } auto get_ref() const -> T& { return *_resource; } auto get() const -> T* { return _resource; } - auto increment() -> void { ref_count.fetch_add(1); } + auto increment() -> void { _count.fetch_add(1, std::memory_order_acq_rel); } auto decrement() -> void { - auto old = ref_count.fetch_sub(1); + auto old = _count.fetch_sub(1, std::memory_order_acq_rel); if (old == 1) { _cleanup(_resource); delete this; } } + auto ref_count() -> size_t { return _count.load(std::memory_order_release); } private: T* _resource; std::function _cleanup; - std::atomic ref_count = 0; + std::atomic _count = 0; Shared(T* node, std::function cleanup) : _resource{node}, _cleanup{cleanup} {} }; @@ -74,7 +75,18 @@ struct SharedReference { _shared_node = other._shared_node; _shared_node->increment(); } - ~SharedReference() { _shared_node->decrement(); } + SharedReference(SharedReference&& other) : _shared_node{other._shared_node} { + other._shared_node = nullptr; + } + auto operator=(SharedReference&& other) -> SharedReference { + _shared_node = other._shared_node; + other._shared_node = nullptr; + } + ~SharedReference() { + if (_shared_node) { + _shared_node->decrement(); + } + } static auto create(Shared* node) -> SharedReference { if (node == nullptr) { std::abort(); @@ -87,6 +99,8 @@ struct SharedReference { } auto operator*() const -> T& { return _shared_node->get_ref(); } auto operator->() const -> T* { return _shared_node->get(); } + auto get() const -> T* { return _shared_node->get(); } + auto ref_count() -> size_t { return _shared_node->ref_count(); } private: Shared* _shared_node; diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h index 722ee59d3002..590a0e751e5d 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -80,8 +80,8 @@ auto inspect(Inspector& f, TaskSnapshot& x) { void PrintTo(const TaskSnapshot& task, std::ostream* os); struct Node; - -struct ParentTask : std::variant> {}; +using NodeReference = SharedReference; +struct ParentTask : std::variant {}; struct TaskScope; struct ScheduledTaskScope; @@ -93,11 +93,12 @@ struct TaskInRegistry { using Snapshot = TaskSnapshot; auto id() -> void* { return this; } auto snapshot() -> TaskSnapshot; - auto set_to_deleted() -> void {} + auto set_to_deleted() -> void { deleted = true; } std::string const name; std::string state; // has to probably be atomic (for reading and writing // concurrently on different threads), but is string... + bool deleted = false; ParentTask parent; std::optional running_thread; // proably has to also be atomic because @@ -129,7 +130,7 @@ struct Task { auto id() -> void*; private: - SharedReference _node_in_registry; + NodeReference _node_in_registry; }; /** Helper type to create a basic task */ diff --git a/lib/TaskMonitoring/task.cpp b/lib/TaskMonitoring/task.cpp index 18bb6f94a6a0..f29ac06d0dcc 100644 --- a/lib/TaskMonitoring/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -29,6 +29,7 @@ #include "TaskMonitoring/task_registry_variable.h" #include #include +#include // helper type for the visitor namespace { @@ -52,11 +53,11 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { .state = state, .id = id(), .parent = std::visit( - overloaded{[&](RootTask root) { return ParentTaskSnapshot{root}; }, - [&](SharedReference parent) { - return ParentTaskSnapshot{ - TaskIdWrapper{parent->data.id()}}; - }}, + overloaded{ + [&](RootTask const& root) { return ParentTaskSnapshot{root}; }, + [&](NodeReference const& parent) { + return ParentTaskSnapshot{TaskIdWrapper{parent->data.id()}}; + }}, parent), .thread = running_thread, .source_location = basics::SourceLocationSnapshot{ @@ -65,15 +66,58 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { .line = source_location.line()}}; } +namespace { +/** + Gives a stack of nodes that can be marked for deletion + + We first have to go up in the hierarchy and collect all nodes that can be + marked for deletion: Otherwise a garbage collection could run in between and + destroy an already marked for deletion node while we are working on its + parent ptr. + */ +auto deletable_nodes_dependent_on_node(Node* node) + -> std::vector::Node*> { + auto stack = + std::vector::Node*>{}; + auto current_node = node; + while (true) { + auto specific_node = + reinterpret_cast::Node*>( + current_node); + // make sure that we don't mark a node twice for deletion + if (specific_node->data.deleted) { + break; + } + stack.push_back(specific_node); + + auto& parent = specific_node->data.parent; + if (not std::holds_alternative(parent)) { + break; + } + auto& parent_ref = std::get(parent); + if (parent_ref.ref_count() != 1) { + break; + } + // node is last reference to parent, therefore it can be marked for deletion + current_node = parent_ref.get(); + } + return stack; +} +auto mark_finished_nodes_for_deletion(Node* node) { + auto stack = deletable_nodes_dependent_on_node(node); + while (!stack.empty()) { + auto specific_node = stack.back(); + stack.pop_back(); + specific_node->list->mark_for_deletion(specific_node); + } +} +} // namespace + Task::Task(TaskInRegistry task_in_registry) - : _node_in_registry{SharedReference::create( + : _node_in_registry{NodeReference::create( reinterpret_cast(get_thread_registry().add( [&]() { return std::move(task_in_registry); })), - [](Node* ptr) { - auto specific_node = reinterpret_cast< - containers::ThreadOwnedList::Node*>(ptr); - specific_node->list->mark_for_deletion(specific_node); - })} {} + mark_finished_nodes_for_deletion)} {} auto Task::id() -> void* { return _node_in_registry->data.id(); } diff --git a/tests/TaskMonitoring/TaskRegistryTest.cpp b/tests/TaskMonitoring/TaskRegistryTest.cpp index 484454abf0e2..2471c4707a4d 100644 --- a/tests/TaskMonitoring/TaskRegistryTest.cpp +++ b/tests/TaskMonitoring/TaskRegistryTest.cpp @@ -59,7 +59,24 @@ struct MyChildTask : public ChildTask { struct TaskRegistryTest : ::testing::Test { void TearDown() override { - // execute garbage collection on current thread + // garbage collection has to run at most twice in order to clean everything + // up on the current thread: + // - when a child task scope is deleted, the child's task-in-registry is + // marked for deletion + // - at this point its parent task scope can still exist, therefore it is + // not marked for deletion inside the child task scope destructor + // - when then the parent task scope is deleted, the parent's + // task-in-registry is still referenced by the child's task-in-registry + // (which is not yet deleted), therefore it is not yet marked for deletion + + // the first gc run destroys the child's task-in-registry + // which destroys the last reference to the parent's task-in-registry, which + // is therfore marked for deletion (together with all remaining + // task-in-registries higher up in the hierarchy that are not referenced by + // any other tasks) + get_thread_registry().garbage_collect(); + // the second gc run destroys the parent's task-in-registry (and possibly + // other marked for deletion items) get_thread_registry().garbage_collect(); EXPECT_EQ(get_all_tasks().size(), 0); } @@ -78,72 +95,62 @@ TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { } TEST_F(TaskRegistryTest, creates_a_child_task) { - { - auto parent_task = MyBaseTask{"parent task"}; - auto child_task = MyChildTask{"child task", parent_task}; + auto parent_task = MyBaseTask{"parent task"}; + auto child_task = MyChildTask{"child task", parent_task}; - EXPECT_EQ( - get_all_tasks(), - (std::vector{ - (TaskSnapshot{.name = "child task", - .state = "running", - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location}), - (TaskSnapshot{.name = "parent task", - .state = "running", - .id = parent_task.id(), - .parent = {RootTask{}}, - .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); - } - // here child task-in-registry is marked for deletion - // but parent task-in-registry is not because it is referenced inside the - // child task-in-registry as its parent - // therefore garbage collection has to run twice to clean up everything - // - in the first run, the child task-in-registry is destroyed - // now the shared ptr to parent is empty, therefore its destructor is called - // this marks the parent task in registry for deletion - // - in second run, the parent task-in-registry is destroyed - get_thread_registry().garbage_collect(); - get_thread_registry().garbage_collect(); - EXPECT_EQ(get_all_tasks().size(), 0); + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); } TEST_F(TaskRegistryTest, creates_a_child_task_hierarchy) { - { - auto parent_task = MyBaseTask{"parent task"}; - auto child_task = MyChildTask{"child task", parent_task}; - auto child_of_child_task = MyChildTask{"child of child task", child_task}; + auto parent_task = MyBaseTask{"parent task"}; + auto child_task = MyChildTask{"child task", parent_task}; + auto child_of_child_task = MyChildTask{"child of child task", child_task}; + auto child_of_child_of_child_task = + MyChildTask{"child of child of child task", child_of_child_task}; - EXPECT_EQ( - get_all_tasks(), - (std::vector{ - (TaskSnapshot{ - .name = "child of child task", - .state = "running", - .id = child_of_child_task.id(), - .parent = {TaskIdWrapper{child_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_of_child_task.source_location}), - (TaskSnapshot{.name = "child task", - .state = "running", - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location}), - (TaskSnapshot{.name = "parent task", - .state = "running", - .id = parent_task.id(), - .parent = {RootTask{}}, - .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); - } - get_thread_registry().garbage_collect(); - get_thread_registry().garbage_collect(); - get_thread_registry().garbage_collect(); - EXPECT_EQ(get_all_tasks().size(), 0); + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{ + .name = "child of child of child task", + .state = "running", + .id = child_of_child_of_child_task.id(), + .parent = {TaskIdWrapper{child_of_child_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_of_child_of_child_task.source_location}), + (TaskSnapshot{ + .name = "child of child task", + .state = "running", + .id = child_of_child_task.id(), + .parent = {TaskIdWrapper{child_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_of_child_task.source_location}), + (TaskSnapshot{.name = "child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); } struct WaitSlot { @@ -162,11 +169,12 @@ struct WaitSlot { bool ready = false; }; + TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { WaitSlot wait; { auto parent_task = MyBaseTask{"parent task"}; - auto a = [&parent_task, &wait]() -> async { + std::ignore = [&parent_task, &wait]() -> async { auto child_task = MyChildTask{"child task", parent_task}; EXPECT_EQ( get_all_tasks(), @@ -190,14 +198,74 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { // both task-in-registries still exist: // child lives in suspended coroutine and references parent - get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); // does not do anything EXPECT_EQ(get_all_tasks().size(), 2); - // resume coroutine, mark child for deletion and therefore also parent + // resume coroutine, mark child for deletion at end of coroutine and mark + // parent for deletion at end of scope + wait.resume(); +} + +TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { + WaitSlot wait; + { + auto parent_task = MyBaseTask{"parent task"}; + + std::ignore = [&parent_task, &wait]() -> async { + auto child_task = MyChildTask{"child task", parent_task}; + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "child task", + .state = "running", + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + + co_await [&parent_task, &child_task, &wait, + first_child_task = get_all_tasks()[0]]() -> async { + auto child_of_child_task = + MyChildTask{"child of child task", child_task}; + EXPECT_EQ(get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "child of child task", + .state = "running", + .id = child_of_child_task.id(), + .parent = {TaskIdWrapper{child_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = + child_of_child_task.source_location}), + first_child_task, + (TaskSnapshot{ + .name = "parent task", + .state = "running", + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + co_await wait; + co_return; + }(); + + co_return; + }(); + } + + // both task-in-registries still exist: + // child lives in suspended coroutine and references parent + get_thread_registry().garbage_collect(); // does not do anything + EXPECT_EQ(get_all_tasks().size(), 3); + + // resume coroutine, mark child of child for deletion, child and parent are + // marked for deletion when child of child is destroyed wait.resume(); - get_thread_registry().garbage_collect(); - get_thread_registry().garbage_collect(); - EXPECT_EQ(get_all_tasks().size(), 0); } TEST_F(TaskRegistryTest, @@ -253,18 +321,20 @@ TEST_F(TaskRegistryTest, // all three task-in-registries still exist: // childs live in suspended coroutines and reference parent - get_thread_registry().garbage_collect(); + get_thread_registry().garbage_collect(); // does not do anything EXPECT_EQ(get_all_tasks().size(), 3); - // resume second coroutine, mark child for deletion + // marks second child for deletion, parent is still in scope second_wait.resume(); get_thread_registry().garbage_collect(); - get_thread_registry().garbage_collect(); EXPECT_EQ(get_all_tasks().size(), 2); + EXPECT_EQ(get_all_tasks()[0].name, "first child task"); + EXPECT_EQ(get_all_tasks()[1].name, "parent task"); - // resume first coroutine, mark child for deletion and therefore also parent + // marks first child for deletion, parent is still referenced by both first + // and second task-in-registry and will only be marked for deleted when both + // are deleted first_wait.resume(); get_thread_registry().garbage_collect(); - get_thread_registry().garbage_collect(); EXPECT_EQ(get_all_tasks().size(), 0); } From 661119cb9f3ecc944590ee393854cb570f89cbc6 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 5 May 2025 12:34:28 +0200 Subject: [PATCH 15/36] Make deleted property atomic --- .../include/TaskMonitoring/task.h | 42 ++++++++++------ lib/TaskMonitoring/task.cpp | 32 ++++++------- tests/TaskMonitoring/TaskRegistryTest.cpp | 48 +++++++++---------- 3 files changed, 63 insertions(+), 59 deletions(-) diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h index 590a0e751e5d..2b58f3886fa2 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -29,8 +29,10 @@ #include "fmt/format.h" #include "shared_reference.h" +#include #include #include +#include #include namespace arangodb::task_monitoring { @@ -83,8 +85,7 @@ struct Node; using NodeReference = SharedReference; struct ParentTask : std::variant {}; -struct TaskScope; -struct ScheduledTaskScope; +struct Task; /** The task object inside the registry @@ -93,12 +94,30 @@ struct TaskInRegistry { using Snapshot = TaskSnapshot; auto id() -> void* { return this; } auto snapshot() -> TaskSnapshot; - auto set_to_deleted() -> void { deleted = true; } + auto set_to_deleted() -> void { + deleted.store(true, std::memory_order_release); + } + static auto root(std::string name, std::source_location loc) + -> TaskInRegistry { + return TaskInRegistry{.name = std::move(name), + .state = "running", + .parent = ParentTask{RootTask{}}, + .running_thread = basics::ThreadId::current(), + .source_location = std::move(loc)}; + } + static auto child(std::string name, NodeReference parent, + std::source_location loc) -> TaskInRegistry { + return TaskInRegistry{.name = std::move(name), + .state = "running", + .parent = ParentTask{parent}, + .running_thread = basics::ThreadId::current(), + .source_location = std::move(loc)}; + } std::string const name; std::string state; // has to probably be atomic (for reading and writing // concurrently on different threads), but is string... - bool deleted = false; + std::atomic deleted = false; ParentTask parent; std::optional running_thread; // proably has to also be atomic because @@ -125,7 +144,10 @@ struct Task { Task(Task const&) = delete; Task& operator=(Task const&) = delete; - Task(TaskInRegistry task_in_registry); + Task(std::string name, + std::source_location loc = std::source_location::current()); + Task(std::string name, Task& parent, + std::source_location loc = std::source_location::current()); auto id() -> void*; @@ -133,14 +155,4 @@ struct Task { NodeReference _node_in_registry; }; -/** Helper type to create a basic task */ -struct BaseTask : public Task { - BaseTask(std::string name, - std::source_location = std::source_location::current()); -}; -struct ChildTask : public Task { - ChildTask(std::string name, Task& parent, - std::source_location = std::source_location::current()); -}; - } // namespace arangodb::task_monitoring diff --git a/lib/TaskMonitoring/task.cpp b/lib/TaskMonitoring/task.cpp index f29ac06d0dcc..b7b889a845a0 100644 --- a/lib/TaskMonitoring/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -27,6 +27,7 @@ #include "Containers/Concurrent/thread.h" #include "Inspection/Format.h" #include "TaskMonitoring/task_registry_variable.h" +#include #include #include #include @@ -85,7 +86,7 @@ auto deletable_nodes_dependent_on_node(Node* node) reinterpret_cast::Node*>( current_node); // make sure that we don't mark a node twice for deletion - if (specific_node->data.deleted) { + if (specific_node->data.deleted.load(std::memory_order_acquire)) { break; } stack.push_back(specific_node); @@ -113,26 +114,21 @@ auto mark_finished_nodes_for_deletion(Node* node) { } } // namespace -Task::Task(TaskInRegistry task_in_registry) +Task::Task(std::string name, std::source_location loc) : _node_in_registry{NodeReference::create( - reinterpret_cast(get_thread_registry().add( - [&]() { return std::move(task_in_registry); })), + reinterpret_cast(get_thread_registry().add([&]() { + return TaskInRegistry::root(std::move(name), std::move(loc)); + })), mark_finished_nodes_for_deletion)} {} -auto Task::id() -> void* { return _node_in_registry->data.id(); } - -BaseTask::BaseTask(std::string name, std::source_location loc) - : Task{TaskInRegistry{.name = std::move(name), - .state = "running", - .parent = ParentTask{RootTask{}}, - .running_thread = basics::ThreadId::current(), - .source_location = std::move(loc)}} {} +Task::Task(std::string name, Task& parent, std::source_location loc) + : _node_in_registry{NodeReference::create( + reinterpret_cast(get_thread_registry().add([&]() { + return TaskInRegistry::child( + std::move(name), parent._node_in_registry, std::move(loc)); + })), + mark_finished_nodes_for_deletion)} {} -ChildTask::ChildTask(std::string name, Task& parent, std::source_location loc) - : Task{TaskInRegistry{.name = std::move(name), - .state = "running", - .parent = ParentTask{parent._node_in_registry}, - .running_thread = basics::ThreadId::current(), - .source_location = std::move(loc)}} {} +auto Task::id() -> void* { return _node_in_registry->data.id(); } } // namespace arangodb::task_monitoring diff --git a/tests/TaskMonitoring/TaskRegistryTest.cpp b/tests/TaskMonitoring/TaskRegistryTest.cpp index 2471c4707a4d..4afb5711c5d1 100644 --- a/tests/TaskMonitoring/TaskRegistryTest.cpp +++ b/tests/TaskMonitoring/TaskRegistryTest.cpp @@ -41,18 +41,15 @@ auto get_all_tasks() -> std::vector { return tasks; } -struct MyBaseTask : public BaseTask { +struct MyTask : public Task { basics::SourceLocationSnapshot source_location; - MyBaseTask(std::string name, - std::source_location loc = std::source_location::current()) - : BaseTask{std::move(name), loc}, + MyTask(std::string name, + std::source_location loc = std::source_location::current()) + : Task{std::move(name), loc}, source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} -}; -struct MyChildTask : public ChildTask { - basics::SourceLocationSnapshot source_location; - MyChildTask(std::string name, Task& task, - std::source_location loc = std::source_location::current()) - : ChildTask{std::move(name), task, loc}, + MyTask(std::string name, Task& task, + std::source_location loc = std::source_location::current()) + : Task{std::move(name), task, loc}, source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} }; } // namespace @@ -83,7 +80,7 @@ struct TaskRegistryTest : ::testing::Test { }; TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { - auto task = MyBaseTask{"test task"}; + auto task = MyTask{"test task"}; EXPECT_EQ(get_all_tasks(), (std::vector{(TaskSnapshot{ .name = "test task", @@ -95,8 +92,8 @@ TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { } TEST_F(TaskRegistryTest, creates_a_child_task) { - auto parent_task = MyBaseTask{"parent task"}; - auto child_task = MyChildTask{"child task", parent_task}; + auto parent_task = MyTask{"parent task"}; + auto child_task = MyTask{"child task", parent_task}; EXPECT_EQ( get_all_tasks(), @@ -116,11 +113,11 @@ TEST_F(TaskRegistryTest, creates_a_child_task) { } TEST_F(TaskRegistryTest, creates_a_child_task_hierarchy) { - auto parent_task = MyBaseTask{"parent task"}; - auto child_task = MyChildTask{"child task", parent_task}; - auto child_of_child_task = MyChildTask{"child of child task", child_task}; + auto parent_task = MyTask{"parent task"}; + auto child_task = MyTask{"child task", parent_task}; + auto child_of_child_task = MyTask{"child of child task", child_task}; auto child_of_child_of_child_task = - MyChildTask{"child of child of child task", child_of_child_task}; + MyTask{"child of child of child task", child_of_child_task}; EXPECT_EQ( get_all_tasks(), @@ -173,9 +170,9 @@ struct WaitSlot { TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { WaitSlot wait; { - auto parent_task = MyBaseTask{"parent task"}; + auto parent_task = MyTask{"parent task"}; std::ignore = [&parent_task, &wait]() -> async { - auto child_task = MyChildTask{"child task", parent_task}; + auto child_task = MyTask{"child task", parent_task}; EXPECT_EQ( get_all_tasks(), (std::vector{ @@ -209,10 +206,10 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { WaitSlot wait; { - auto parent_task = MyBaseTask{"parent task"}; + auto parent_task = MyTask{"parent task"}; std::ignore = [&parent_task, &wait]() -> async { - auto child_task = MyChildTask{"child task", parent_task}; + auto child_task = MyTask{"child task", parent_task}; EXPECT_EQ( get_all_tasks(), (std::vector{ @@ -231,8 +228,7 @@ TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { co_await [&parent_task, &child_task, &wait, first_child_task = get_all_tasks()[0]]() -> async { - auto child_of_child_task = - MyChildTask{"child of child task", child_task}; + auto child_of_child_task = MyTask{"child of child task", child_task}; EXPECT_EQ(get_all_tasks(), (std::vector{ (TaskSnapshot{.name = "child of child task", @@ -274,9 +270,9 @@ TEST_F(TaskRegistryTest, WaitSlot second_wait; TaskSnapshot first_child_task; { - auto parent_task = MyBaseTask{"parent task"}; + auto parent_task = MyTask{"parent task"}; auto a = [&parent_task, &first_wait, &first_child_task]() -> async { - auto child_task = MyChildTask{"first child task", parent_task}; + auto child_task = MyTask{"first child task", parent_task}; EXPECT_EQ( get_all_tasks(), (std::vector{ @@ -297,7 +293,7 @@ TEST_F(TaskRegistryTest, co_return; }(); auto b = [&parent_task, &second_wait, &first_child_task]() -> async { - auto child_task = MyChildTask{"second child task", parent_task}; + auto child_task = MyTask{"second child task", parent_task}; EXPECT_EQ( get_all_tasks(), (std::vector{ From 1478d32d64797206279116fc094562528c97d675 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 5 May 2025 12:04:34 +0200 Subject: [PATCH 16/36] Use an enum instead of a string for the state --- .../include/TaskMonitoring/task.h | 20 +++++++---- lib/TaskMonitoring/task.cpp | 3 +- tests/TaskMonitoring/TaskRegistryTest.cpp | 34 +++++++++---------- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h index 2b58f3886fa2..930518421b5c 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -62,9 +62,17 @@ auto inspect(Inspector& f, ParentTaskSnapshot& x) { inspection::inlineType()); } +enum class State { Created = 0, Running, Finished, Deleted }; +template +auto inspect(Inspector& f, State& x) { + return f.enumeration(x).values(State::Created, "Created", State::Running, + "Running", State::Finished, "Finished", + State::Deleted, "Deleted"); +} + struct TaskSnapshot { std::string name; - std::string state; + State state; void* id; ParentTaskSnapshot parent; std::optional thread; @@ -95,12 +103,12 @@ struct TaskInRegistry { auto id() -> void* { return this; } auto snapshot() -> TaskSnapshot; auto set_to_deleted() -> void { - deleted.store(true, std::memory_order_release); + state.store(State::Deleted, std::memory_order_release); } static auto root(std::string name, std::source_location loc) -> TaskInRegistry { return TaskInRegistry{.name = std::move(name), - .state = "running", + .state = State::Running, .parent = ParentTask{RootTask{}}, .running_thread = basics::ThreadId::current(), .source_location = std::move(loc)}; @@ -108,16 +116,14 @@ struct TaskInRegistry { static auto child(std::string name, NodeReference parent, std::source_location loc) -> TaskInRegistry { return TaskInRegistry{.name = std::move(name), - .state = "running", + .state = State::Running, .parent = ParentTask{parent}, .running_thread = basics::ThreadId::current(), .source_location = std::move(loc)}; } std::string const name; - std::string state; // has to probably be atomic (for reading and writing - // concurrently on different threads), but is string... - std::atomic deleted = false; + std::atomic state; ParentTask parent; std::optional running_thread; // proably has to also be atomic because diff --git a/lib/TaskMonitoring/task.cpp b/lib/TaskMonitoring/task.cpp index b7b889a845a0..6cad341fbaad 100644 --- a/lib/TaskMonitoring/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -86,7 +86,8 @@ auto deletable_nodes_dependent_on_node(Node* node) reinterpret_cast::Node*>( current_node); // make sure that we don't mark a node twice for deletion - if (specific_node->data.deleted.load(std::memory_order_acquire)) { + if (specific_node->data.state.load(std::memory_order_acquire) == + State::Deleted) { break; } stack.push_back(specific_node); diff --git a/tests/TaskMonitoring/TaskRegistryTest.cpp b/tests/TaskMonitoring/TaskRegistryTest.cpp index 4afb5711c5d1..1f41a911d58b 100644 --- a/tests/TaskMonitoring/TaskRegistryTest.cpp +++ b/tests/TaskMonitoring/TaskRegistryTest.cpp @@ -84,7 +84,7 @@ TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { EXPECT_EQ(get_all_tasks(), (std::vector{(TaskSnapshot{ .name = "test task", - .state = "running", + .state = State::Running, .id = task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), @@ -99,13 +99,13 @@ TEST_F(TaskRegistryTest, creates_a_child_task) { get_all_tasks(), (std::vector{ (TaskSnapshot{.name = "child task", - .state = "running", + .state = State::Running, .id = child_task.id(), .parent = {TaskIdWrapper{parent_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_task.source_location}), (TaskSnapshot{.name = "parent task", - .state = "running", + .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), @@ -124,26 +124,26 @@ TEST_F(TaskRegistryTest, creates_a_child_task_hierarchy) { (std::vector{ (TaskSnapshot{ .name = "child of child of child task", - .state = "running", + .state = State::Running, .id = child_of_child_of_child_task.id(), .parent = {TaskIdWrapper{child_of_child_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_of_child_of_child_task.source_location}), (TaskSnapshot{ .name = "child of child task", - .state = "running", + .state = State::Running, .id = child_of_child_task.id(), .parent = {TaskIdWrapper{child_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_of_child_task.source_location}), (TaskSnapshot{.name = "child task", - .state = "running", + .state = State::Running, .id = child_task.id(), .parent = {TaskIdWrapper{parent_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_task.source_location}), (TaskSnapshot{.name = "parent task", - .state = "running", + .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), @@ -177,13 +177,13 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { get_all_tasks(), (std::vector{ (TaskSnapshot{.name = "child task", - .state = "running", + .state = State::Running, .id = child_task.id(), .parent = {TaskIdWrapper{parent_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_task.source_location}), (TaskSnapshot{.name = "parent task", - .state = "running", + .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), @@ -214,13 +214,13 @@ TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { get_all_tasks(), (std::vector{ (TaskSnapshot{.name = "child task", - .state = "running", + .state = State::Running, .id = child_task.id(), .parent = {TaskIdWrapper{parent_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_task.source_location}), (TaskSnapshot{.name = "parent task", - .state = "running", + .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), @@ -232,7 +232,7 @@ TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { EXPECT_EQ(get_all_tasks(), (std::vector{ (TaskSnapshot{.name = "child of child task", - .state = "running", + .state = State::Running, .id = child_of_child_task.id(), .parent = {TaskIdWrapper{child_task.id()}}, .thread = basics::ThreadId::current(), @@ -241,7 +241,7 @@ TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { first_child_task, (TaskSnapshot{ .name = "parent task", - .state = "running", + .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), @@ -277,13 +277,13 @@ TEST_F(TaskRegistryTest, get_all_tasks(), (std::vector{ (TaskSnapshot{.name = "first child task", - .state = "running", + .state = State::Running, .id = child_task.id(), .parent = {TaskIdWrapper{parent_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_task.source_location}), (TaskSnapshot{.name = "parent task", - .state = "running", + .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), @@ -298,14 +298,14 @@ TEST_F(TaskRegistryTest, get_all_tasks(), (std::vector{ (TaskSnapshot{.name = "second child task", - .state = "running", + .state = State::Running, .id = child_task.id(), .parent = {TaskIdWrapper{parent_task.id()}}, .thread = basics::ThreadId::current(), .source_location = child_task.source_location}), first_child_task, (TaskSnapshot{.name = "parent task", - .state = "running", + .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), From 0ff598289e7098c985cd1baa8e22fab723d3ba72 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 5 May 2025 13:12:49 +0200 Subject: [PATCH 17/36] Set state to Finished when task goes out of scope --- .../include/TaskMonitoring/task.h | 5 + lib/TaskMonitoring/task.cpp | 4 + tests/TaskMonitoring/TaskRegistryTest.cpp | 209 +++++++++++------- 3 files changed, 137 insertions(+), 81 deletions(-) diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h index 930518421b5c..1a0c1b7bdbb8 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -78,6 +78,10 @@ struct TaskSnapshot { std::optional thread; basics::SourceLocationSnapshot source_location; bool operator==(TaskSnapshot const&) const = default; + auto update_state(State new_state) -> TaskSnapshot& { + state = new_state; + return *this; + } }; template auto inspect(Inspector& f, TaskSnapshot& x) { @@ -154,6 +158,7 @@ struct Task { std::source_location loc = std::source_location::current()); Task(std::string name, Task& parent, std::source_location loc = std::source_location::current()); + ~Task(); auto id() -> void*; diff --git a/lib/TaskMonitoring/task.cpp b/lib/TaskMonitoring/task.cpp index 6cad341fbaad..afc8da4dd10e 100644 --- a/lib/TaskMonitoring/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -129,6 +129,10 @@ Task::Task(std::string name, Task& parent, std::source_location loc) std::move(name), parent._node_in_registry, std::move(loc)); })), mark_finished_nodes_for_deletion)} {} +Task::~Task() { + _node_in_registry->data.state.store(State::Finished, + std::memory_order_relaxed); +} auto Task::id() -> void* { return _node_in_registry->data.id(); } diff --git a/tests/TaskMonitoring/TaskRegistryTest.cpp b/tests/TaskMonitoring/TaskRegistryTest.cpp index 1f41a911d58b..c6e1e6afc15b 100644 --- a/tests/TaskMonitoring/TaskRegistryTest.cpp +++ b/tests/TaskMonitoring/TaskRegistryTest.cpp @@ -23,6 +23,7 @@ #include "Async/async.h" #include "Containers/Concurrent/thread.h" +#include "TaskMonitoring/task.h" #include "TaskMonitoring/task_registry_variable.h" #include "Inspection/JsonPrintInspector.h" #include @@ -52,6 +53,7 @@ struct MyTask : public Task { : Task{std::move(name), task, loc}, source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} }; + } // namespace struct TaskRegistryTest : ::testing::Test { @@ -169,25 +171,37 @@ struct WaitSlot { TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { WaitSlot wait; + TaskSnapshot parent_task_snapshot; + TaskSnapshot child_task_snapshot; { auto parent_task = MyTask{"parent task"}; - std::ignore = [&parent_task, &wait]() -> async { - auto child_task = MyTask{"child task", parent_task}; - EXPECT_EQ( - get_all_tasks(), - (std::vector{ - (TaskSnapshot{.name = "child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location}), + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 1); + EXPECT_EQ(tasks_in_registry[0], (TaskSnapshot{.name = "parent task", .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); + .source_location = parent_task.source_location})); + parent_task_snapshot = tasks_in_registry[0]; + + std::ignore = [&parent_task, &wait, parent_task_snapshot, + &child_task_snapshot]() -> async { + auto child_task = MyTask{"child task", parent_task}; + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 2); + EXPECT_EQ(tasks_in_registry[0], + (TaskSnapshot{.name = "child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); + child_task_snapshot = tasks_in_registry[0]; + EXPECT_EQ(tasks_in_registry[1], parent_task_snapshot); co_await wait; co_return; }(); @@ -195,8 +209,12 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { // both task-in-registries still exist: // child lives in suspended coroutine and references parent + // although parent scope is deleted get_thread_registry().garbage_collect(); // does not do anything - EXPECT_EQ(get_all_tasks().size(), 2); + EXPECT_EQ(get_all_tasks(), + (std::vector{ + child_task_snapshot, + parent_task_snapshot.update_state(State::Finished)})); // resume coroutine, mark child for deletion at end of coroutine and mark // parent for deletion at end of scope @@ -205,47 +223,57 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { WaitSlot wait; + TaskSnapshot parent_task_snapshot; + TaskSnapshot child_task_snapshot; + TaskSnapshot child_of_child_task_snapshot; { auto parent_task = MyTask{"parent task"}; - - std::ignore = [&parent_task, &wait]() -> async { - auto child_task = MyTask{"child task", parent_task}; - EXPECT_EQ( - get_all_tasks(), - (std::vector{ - (TaskSnapshot{.name = "child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location}), + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 1); + EXPECT_EQ(tasks_in_registry[0], (TaskSnapshot{.name = "parent task", .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); + .source_location = parent_task.source_location})); + parent_task_snapshot = tasks_in_registry[0]; + + std::ignore = [&parent_task, &wait, parent_task_snapshot, + &child_task_snapshot, + &child_of_child_task_snapshot]() -> async { + auto child_task = MyTask{"child task", parent_task}; + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 2); + EXPECT_EQ(tasks_in_registry[0], + (TaskSnapshot{.name = "child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); + child_task_snapshot = tasks_in_registry[0]; + EXPECT_EQ(tasks_in_registry[1], parent_task_snapshot); - co_await [&parent_task, &child_task, &wait, - first_child_task = get_all_tasks()[0]]() -> async { + co_await [&child_task, &wait, parent_task_snapshot, child_task_snapshot, + &child_of_child_task_snapshot]() -> async { auto child_of_child_task = MyTask{"child of child task", child_task}; - EXPECT_EQ(get_all_tasks(), - (std::vector{ - (TaskSnapshot{.name = "child of child task", - .state = State::Running, - .id = child_of_child_task.id(), - .parent = {TaskIdWrapper{child_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = - child_of_child_task.source_location}), - first_child_task, - (TaskSnapshot{ - .name = "parent task", - .state = State::Running, - .id = parent_task.id(), - .parent = {RootTask{}}, - .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 3); + EXPECT_EQ(tasks_in_registry[0], + (TaskSnapshot{ + .name = "child of child task", + .state = State::Running, + .id = child_of_child_task.id(), + .parent = {TaskIdWrapper{child_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_of_child_task.source_location})); + child_of_child_task_snapshot = tasks_in_registry[0]; + EXPECT_EQ(tasks_in_registry[1], child_task_snapshot); + EXPECT_EQ(tasks_in_registry[2], parent_task_snapshot); + co_await wait; co_return; }(); @@ -257,7 +285,10 @@ TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { // both task-in-registries still exist: // child lives in suspended coroutine and references parent get_thread_registry().garbage_collect(); // does not do anything - EXPECT_EQ(get_all_tasks().size(), 3); + EXPECT_EQ(get_all_tasks(), + (std::vector{ + child_of_child_task_snapshot, child_task_snapshot, + parent_task_snapshot.update_state(State::Finished)})); // resume coroutine, mark child of child for deletion, child and parent are // marked for deletion when child of child is destroyed @@ -268,48 +299,60 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_longest_living_child) { WaitSlot first_wait; WaitSlot second_wait; - TaskSnapshot first_child_task; + TaskSnapshot parent_task_snapshot; + TaskSnapshot first_child_task_snapshot; + TaskSnapshot second_child_task_snapshot; { auto parent_task = MyTask{"parent task"}; - auto a = [&parent_task, &first_wait, &first_child_task]() -> async { - auto child_task = MyTask{"first child task", parent_task}; - EXPECT_EQ( - get_all_tasks(), - (std::vector{ - (TaskSnapshot{.name = "first child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location}), + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 1); + EXPECT_EQ(tasks_in_registry[0], (TaskSnapshot{.name = "parent task", .state = State::Running, .id = parent_task.id(), .parent = {RootTask{}}, .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); - first_child_task = get_all_tasks()[0]; + .source_location = parent_task.source_location})); + parent_task_snapshot = tasks_in_registry[0]; + + std::ignore = [&parent_task, &first_wait, parent_task_snapshot, + &first_child_task_snapshot]() -> async { + auto child_task = MyTask{"first child task", parent_task}; + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 2); + EXPECT_EQ(tasks_in_registry[0], + (TaskSnapshot{.name = "first child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); + first_child_task_snapshot = tasks_in_registry[0]; + EXPECT_EQ(tasks_in_registry[1], parent_task_snapshot); + co_await first_wait; co_return; }(); - auto b = [&parent_task, &second_wait, &first_child_task]() -> async { + + std::ignore = [&parent_task, &second_wait, parent_task_snapshot, + first_child_task_snapshot, + &second_child_task_snapshot]() -> async { auto child_task = MyTask{"second child task", parent_task}; - EXPECT_EQ( - get_all_tasks(), - (std::vector{ - (TaskSnapshot{.name = "second child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location}), - first_child_task, - (TaskSnapshot{.name = "parent task", - .state = State::Running, - .id = parent_task.id(), - .parent = {RootTask{}}, - .thread = basics::ThreadId::current(), - .source_location = parent_task.source_location})})); + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 3); + EXPECT_EQ(tasks_in_registry[0], + (TaskSnapshot{.name = "second child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); + second_child_task_snapshot = tasks_in_registry[0]; + EXPECT_EQ(tasks_in_registry[1], first_child_task_snapshot); + EXPECT_EQ(tasks_in_registry[2], parent_task_snapshot); + co_await second_wait; co_return; }(); @@ -318,14 +361,18 @@ TEST_F(TaskRegistryTest, // all three task-in-registries still exist: // childs live in suspended coroutines and reference parent get_thread_registry().garbage_collect(); // does not do anything - EXPECT_EQ(get_all_tasks().size(), 3); + EXPECT_EQ(get_all_tasks(), + (std::vector{ + second_child_task_snapshot, first_child_task_snapshot, + parent_task_snapshot.update_state(State::Finished)})); // marks second child for deletion, parent is still in scope second_wait.resume(); get_thread_registry().garbage_collect(); - EXPECT_EQ(get_all_tasks().size(), 2); - EXPECT_EQ(get_all_tasks()[0].name, "first child task"); - EXPECT_EQ(get_all_tasks()[1].name, "parent task"); + EXPECT_EQ(get_all_tasks(), + (std::vector{ + first_child_task_snapshot, + parent_task_snapshot.update_state(State::Finished)})); // marks first child for deletion, parent is still referenced by both first // and second task-in-registry and will only be marked for deleted when both From 45bd9949499c64e80e24f3c825bcc978816e3cf5 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 5 May 2025 14:43:30 +0200 Subject: [PATCH 18/36] Make really sure that node is not marked for deletion twice --- lib/Containers/Concurrent/ThreadOwnedList.h | 2 ++ .../include/TaskMonitoring/task.h | 7 +++-- .../TaskMonitoring/task_registry_variable.h | 1 - lib/TaskMonitoring/task.cpp | 31 +++++-------------- 4 files changed, 15 insertions(+), 26 deletions(-) diff --git a/lib/Containers/Concurrent/ThreadOwnedList.h b/lib/Containers/Concurrent/ThreadOwnedList.h index 7c9721e8e8af..95df13e18b05 100644 --- a/lib/Containers/Concurrent/ThreadOwnedList.h +++ b/lib/Containers/Concurrent/ThreadOwnedList.h @@ -165,6 +165,8 @@ struct ThreadOwnedList Can be called from any thread. The node needs to be part of the list, crashes otherwise. + Caller needs to make sure that this is not called twice: otherwise there + will be a double free. */ auto mark_for_deletion(Node* node) noexcept -> void { // makes sure that node is really in this list diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h index 1a0c1b7bdbb8..8c9cbaebdbc9 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -128,6 +128,7 @@ struct TaskInRegistry { std::string const name; std::atomic state; + std::atomic isDeleted = false; ParentTask parent; std::optional running_thread; // proably has to also be atomic because @@ -144,8 +145,10 @@ struct Node : public containers::ThreadOwnedList::Node {}; struct ChildTask; /** - This task adds an entry to the task registry on construction and mark the - entry for deletion on destruction. + This is a scope for an active task. + + It adds an entry to the task registry on construction and sets its + state to finished on destruction. */ struct Task { friend ChildTask; diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h index 94528374fdbf..8484307be7b6 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h @@ -31,7 +31,6 @@ namespace arangodb::task_monitoring { using ThreadRegistry = containers::ThreadOwnedList; struct Registry : public containers::ListOfNonOwnedLists {}; - /** Global variable that holds all active tasks. diff --git a/lib/TaskMonitoring/task.cpp b/lib/TaskMonitoring/task.cpp index afc8da4dd10e..cc20513e7153 100644 --- a/lib/TaskMonitoring/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -68,48 +68,33 @@ auto TaskInRegistry::snapshot() -> TaskSnapshot { } namespace { -/** - Gives a stack of nodes that can be marked for deletion - - We first have to go up in the hierarchy and collect all nodes that can be - marked for deletion: Otherwise a garbage collection could run in between and - destroy an already marked for deletion node while we are working on its - parent ptr. - */ -auto deletable_nodes_dependent_on_node(Node* node) - -> std::vector::Node*> { - auto stack = - std::vector::Node*>{}; +auto mark_finished_nodes_for_deletion(Node* node) { auto current_node = node; while (true) { auto specific_node = reinterpret_cast::Node*>( current_node); + // make sure that we don't mark a node twice for deletion - if (specific_node->data.state.load(std::memory_order_acquire) == - State::Deleted) { + auto expected = false; + if (not specific_node->data.isDeleted.compare_exchange_strong( + expected, true, std::memory_order_acq_rel)) { break; } - stack.push_back(specific_node); auto& parent = specific_node->data.parent; if (not std::holds_alternative(parent)) { + specific_node->list->mark_for_deletion(specific_node); break; } auto& parent_ref = std::get(parent); if (parent_ref.ref_count() != 1) { + specific_node->list->mark_for_deletion(specific_node); break; } // node is last reference to parent, therefore it can be marked for deletion current_node = parent_ref.get(); - } - return stack; -} -auto mark_finished_nodes_for_deletion(Node* node) { - auto stack = deletable_nodes_dependent_on_node(node); - while (!stack.empty()) { - auto specific_node = stack.back(); - stack.pop_back(); + specific_node->list->mark_for_deletion(specific_node); } } From 38ddf2c21d09600189ccb2a95ec426fe60a9901b Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 5 May 2025 18:53:30 +0200 Subject: [PATCH 19/36] Thread knows its current task --- lib/Async/CMakeLists.txt | 17 +- lib/Async/include/Async/async.h | 1 - lib/Async/include/Async/context.h | 6 +- lib/Async/include/CMakeLists.txt | 2 - .../include/TaskMonitoring/shared_reference.h | 4 +- .../include/TaskMonitoring/task.h | 11 +- lib/TaskMonitoring/task.cpp | 32 ++- tests/TaskMonitoring/TaskRegistryTest.cpp | 251 +++++++++++++----- 8 files changed, 234 insertions(+), 90 deletions(-) delete mode 100644 lib/Async/include/CMakeLists.txt diff --git a/lib/Async/CMakeLists.txt b/lib/Async/CMakeLists.txt index 865fc6bbb965..47b325d4e202 100644 --- a/lib/Async/CMakeLists.txt +++ b/lib/Async/CMakeLists.txt @@ -1,15 +1,18 @@ +add_library(arango_async_interface INTERFACE) +target_include_directories(arango_async_interface INTERFACE + ${PROJECT_SOURCE_DIR}/lib + include) +target_link_libraries(arango_async_interface INTERFACE + arango_task_registry) + add_library(arango_async INTERFACE) -target_include_directories(arango_async - INTERFACE - include -) +target_include_directories(arango_async INTERFACE + include) -target_link_libraries(arango_async - INTERFACE +target_link_libraries(arango_async INTERFACE arango_async_registry arango_async_interface arango_basic_utils ) -add_subdirectory(include) add_subdirectory(Registry) diff --git a/lib/Async/include/Async/async.h b/lib/Async/include/Async/async.h index 2ab04cdb5f4e..bebb084dd1a9 100644 --- a/lib/Async/include/Async/async.h +++ b/lib/Async/include/Async/async.h @@ -5,7 +5,6 @@ #include "Async/expected.h" #include "Async/Registry/promise.h" #include "Async/Registry/registry_variable.h" -#include "Utils/ExecContext.h" #include "Inspection/Format.h" #include diff --git a/lib/Async/include/Async/context.h b/lib/Async/include/Async/context.h index ff2d35b74408..a34219c87f63 100644 --- a/lib/Async/include/Async/context.h +++ b/lib/Async/include/Async/context.h @@ -23,6 +23,7 @@ #pragma once #include "Async/Registry/promise.h" +#include "TaskMonitoring/task.h" #include "Utils/ExecContext.h" namespace arangodb { @@ -30,14 +31,17 @@ namespace arangodb { struct Context { std::shared_ptr _execContext; async_registry::Requester _requester; + task_monitoring::Task* _task; Context() : _execContext{ExecContext::currentAsShared()}, - _requester{*async_registry::get_current_coroutine()} {} + _requester{*async_registry::get_current_coroutine()}, + _task{*task_monitoring::get_current_task()} {} auto set() -> void { ExecContext::set(_execContext); *async_registry::get_current_coroutine() = _requester; + *task_monitoring::get_current_task() = _task; } }; diff --git a/lib/Async/include/CMakeLists.txt b/lib/Async/include/CMakeLists.txt deleted file mode 100644 index 04d3d52d4838..000000000000 --- a/lib/Async/include/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_library(arango_async_interface INTERFACE) -target_include_directories(arango_async_interface INTERFACE .) diff --git a/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h b/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h index 2903dc221081..623de77329ee 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/shared_reference.h @@ -74,13 +74,15 @@ struct SharedReference { auto operator=(SharedReference const& other) -> SharedReference { _shared_node = other._shared_node; _shared_node->increment(); + return *this; } SharedReference(SharedReference&& other) : _shared_node{other._shared_node} { other._shared_node = nullptr; } - auto operator=(SharedReference&& other) -> SharedReference { + auto operator=(SharedReference&& other) -> SharedReference& { _shared_node = other._shared_node; other._shared_node = nullptr; + return *this; } ~SharedReference() { if (_shared_node) { diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task.h b/lib/TaskMonitoring/include/TaskMonitoring/task.h index 8c9cbaebdbc9..7c642f07a616 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task.h @@ -121,7 +121,7 @@ struct TaskInRegistry { std::source_location loc) -> TaskInRegistry { return TaskInRegistry{.name = std::move(name), .state = State::Running, - .parent = ParentTask{parent}, + .parent = ParentTask{std::move(parent)}, .running_thread = basics::ThreadId::current(), .source_location = std::move(loc)}; } @@ -152,21 +152,22 @@ struct ChildTask; */ struct Task { friend ChildTask; - Task(Task&& other) = default; - Task& operator=(Task&& other) = default; + Task(Task&& other) = delete; + Task& operator=(Task&& other) = delete; Task(Task const&) = delete; Task& operator=(Task const&) = delete; Task(std::string name, std::source_location loc = std::source_location::current()); - Task(std::string name, Task& parent, - std::source_location loc = std::source_location::current()); ~Task(); auto id() -> void*; private: + Task* parent; NodeReference _node_in_registry; }; +auto get_current_task() -> Task**; + } // namespace arangodb::task_monitoring diff --git a/lib/TaskMonitoring/task.cpp b/lib/TaskMonitoring/task.cpp index cc20513e7153..41de984de286 100644 --- a/lib/TaskMonitoring/task.cpp +++ b/lib/TaskMonitoring/task.cpp @@ -31,6 +31,7 @@ #include #include #include +#include // helper type for the visitor namespace { @@ -42,10 +43,13 @@ template overloaded(Ts...) -> overloaded; } // namespace -namespace arangodb::task_monitoring { +using namespace arangodb; +using namespace arangodb::task_monitoring; -void PrintTo(const TaskSnapshot& task, std::ostream* os) { - *os << inspection::json(task); +void arangodb::task_monitoring::PrintTo(const TaskSnapshot& task, + std::ostream* os) { + *os << task.id << "| " << task.name << " - " << inspection::json(task.parent); + // inspection::json(task); } auto TaskInRegistry::snapshot() -> TaskSnapshot { @@ -103,22 +107,26 @@ auto mark_finished_nodes_for_deletion(Node* node) { Task::Task(std::string name, std::source_location loc) : _node_in_registry{NodeReference::create( reinterpret_cast(get_thread_registry().add([&]() { + if (auto current = *get_current_task(); current != nullptr) { + return TaskInRegistry::child( + std::move(name), current->_node_in_registry, std::move(loc)); + } return TaskInRegistry::root(std::move(name), std::move(loc)); })), - mark_finished_nodes_for_deletion)} {} + mark_finished_nodes_for_deletion)} { + parent = *get_current_task(); + *get_current_task() = this; +} -Task::Task(std::string name, Task& parent, std::source_location loc) - : _node_in_registry{NodeReference::create( - reinterpret_cast(get_thread_registry().add([&]() { - return TaskInRegistry::child( - std::move(name), parent._node_in_registry, std::move(loc)); - })), - mark_finished_nodes_for_deletion)} {} Task::~Task() { _node_in_registry->data.state.store(State::Finished, std::memory_order_relaxed); + *get_current_task() = parent; } auto Task::id() -> void* { return _node_in_registry->data.id(); } -} // namespace arangodb::task_monitoring +auto arangodb::task_monitoring::get_current_task() -> Task** { + static thread_local Task* current = nullptr; + return ¤t; +} diff --git a/tests/TaskMonitoring/TaskRegistryTest.cpp b/tests/TaskMonitoring/TaskRegistryTest.cpp index c6e1e6afc15b..3897ecc3622a 100644 --- a/tests/TaskMonitoring/TaskRegistryTest.cpp +++ b/tests/TaskMonitoring/TaskRegistryTest.cpp @@ -48,10 +48,6 @@ struct MyTask : public Task { std::source_location loc = std::source_location::current()) : Task{std::move(name), loc}, source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} - MyTask(std::string name, Task& task, - std::source_location loc = std::source_location::current()) - : Task{std::move(name), task, loc}, - source_location{basics::SourceLocationSnapshot::from(std::move(loc))} {} }; } // namespace @@ -95,7 +91,7 @@ TEST_F(TaskRegistryTest, a_base_task_creates_a_root_task) { TEST_F(TaskRegistryTest, creates_a_child_task) { auto parent_task = MyTask{"parent task"}; - auto child_task = MyTask{"child task", parent_task}; + auto child_task = MyTask{"child task"}; EXPECT_EQ( get_all_tasks(), @@ -116,10 +112,9 @@ TEST_F(TaskRegistryTest, creates_a_child_task) { TEST_F(TaskRegistryTest, creates_a_child_task_hierarchy) { auto parent_task = MyTask{"parent task"}; - auto child_task = MyTask{"child task", parent_task}; - auto child_of_child_task = MyTask{"child of child task", child_task}; - auto child_of_child_of_child_task = - MyTask{"child of child of child task", child_of_child_task}; + auto child_task = MyTask{"child task"}; + auto child_of_child_task = MyTask{"child of child task"}; + auto child_of_child_of_child_task = MyTask{"child of child of child task"}; EXPECT_EQ( get_all_tasks(), @@ -152,6 +147,48 @@ TEST_F(TaskRegistryTest, creates_a_child_task_hierarchy) { .source_location = parent_task.source_location})})); } +TEST_F(TaskRegistryTest, uses_correct_parent_task) { + auto parent_task = MyTask{"parent task"}; + { + auto first_child_task = MyTask{"first child task"}; + + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "first child task", + .state = State::Running, + .id = first_child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = first_child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = State::Running, + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); + } + get_thread_registry().garbage_collect(); + + auto second_child_task = MyTask{"second child task"}; + + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "second child task", + .state = State::Running, + .id = second_child_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = second_child_task.source_location}), + (TaskSnapshot{.name = "parent task", + .state = State::Running, + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})})); +} + struct WaitSlot { void resume() { ready = true; @@ -187,19 +224,20 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { .source_location = parent_task.source_location})); parent_task_snapshot = tasks_in_registry[0]; - std::ignore = [&parent_task, &wait, parent_task_snapshot, + std::ignore = [&wait, parent_task_snapshot, &child_task_snapshot]() -> async { - auto child_task = MyTask{"child task", parent_task}; + auto child_task = MyTask{"child task"}; auto tasks_in_registry = get_all_tasks(); EXPECT_EQ(tasks_in_registry.size(), 2); - EXPECT_EQ(tasks_in_registry[0], - (TaskSnapshot{.name = "child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location})); + EXPECT_EQ( + tasks_in_registry[0], + (TaskSnapshot{.name = "child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task_snapshot.id}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); child_task_snapshot = tasks_in_registry[0]; EXPECT_EQ(tasks_in_registry[1], parent_task_snapshot); co_await wait; @@ -221,6 +259,77 @@ TEST_F(TaskRegistryTest, a_base_task_lives_as_long_as_its_child) { wait.resume(); } +TEST_F(TaskRegistryTest, create_another_task_after_child_suspended) { + WaitSlot wait; + TaskSnapshot parent_task_snapshot; + TaskSnapshot child_task_snapshot; + { + auto parent_task = MyTask{"parent task"}; + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 1); + EXPECT_EQ(tasks_in_registry[0], + (TaskSnapshot{.name = "parent task", + .state = State::Running, + .id = parent_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = parent_task.source_location})); + parent_task_snapshot = tasks_in_registry[0]; + + std::ignore = [&wait, parent_task_snapshot, + &child_task_snapshot]() -> async { + auto child_task = MyTask{"child task"}; + + auto tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 2); + EXPECT_EQ( + tasks_in_registry[0], + (TaskSnapshot{.name = "child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task_snapshot.id}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); + child_task_snapshot = tasks_in_registry[0]; + EXPECT_EQ(tasks_in_registry[1], parent_task_snapshot); + co_await wait; + co_return; + }(); + + auto some_other_task = MyTask{"some other task"}; + + EXPECT_EQ( + get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "some other task", + .state = State::Running, + .id = some_other_task.id(), + .parent = {TaskIdWrapper{parent_task.id()}}, + .thread = basics::ThreadId::current(), + .source_location = some_other_task.source_location}), + child_task_snapshot, parent_task_snapshot})); + } + + auto another_task = MyTask{"another task"}; + + get_thread_registry().garbage_collect(); // deletes some_other_task + EXPECT_EQ(get_all_tasks(), + (std::vector{ + (TaskSnapshot{.name = "another task", + .state = State::Running, + .id = another_task.id(), + .parent = {RootTask{}}, + .thread = basics::ThreadId::current(), + .source_location = another_task.source_location}), + child_task_snapshot, + parent_task_snapshot.update_state(State::Finished)})); + + // resume coroutine, mark child for deletion at end of coroutine and mark + // parent for deletion at end of scope + wait.resume(); +} + TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { WaitSlot wait; TaskSnapshot parent_task_snapshot; @@ -239,26 +348,26 @@ TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { .source_location = parent_task.source_location})); parent_task_snapshot = tasks_in_registry[0]; - std::ignore = [&parent_task, &wait, parent_task_snapshot, - &child_task_snapshot, + std::ignore = [&wait, parent_task_snapshot, &child_task_snapshot, &child_of_child_task_snapshot]() -> async { - auto child_task = MyTask{"child task", parent_task}; + auto child_task = MyTask{"child task"}; auto tasks_in_registry = get_all_tasks(); EXPECT_EQ(tasks_in_registry.size(), 2); - EXPECT_EQ(tasks_in_registry[0], - (TaskSnapshot{.name = "child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location})); + EXPECT_EQ( + tasks_in_registry[0], + (TaskSnapshot{.name = "child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task_snapshot.id}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); child_task_snapshot = tasks_in_registry[0]; EXPECT_EQ(tasks_in_registry[1], parent_task_snapshot); - co_await [&child_task, &wait, parent_task_snapshot, child_task_snapshot, + co_await [&wait, parent_task_snapshot, child_task_snapshot, &child_of_child_task_snapshot]() -> async { - auto child_of_child_task = MyTask{"child of child task", child_task}; + auto child_of_child_task = MyTask{"child of child task"}; auto tasks_in_registry = get_all_tasks(); EXPECT_EQ(tasks_in_registry.size(), 3); @@ -267,7 +376,7 @@ TEST_F(TaskRegistryTest, hierarchy_with_different_scopes) { .name = "child of child task", .state = State::Running, .id = child_of_child_task.id(), - .parent = {TaskIdWrapper{child_task.id()}}, + .parent = {TaskIdWrapper{child_task_snapshot.id}}, .thread = basics::ThreadId::current(), .source_location = child_of_child_task.source_location})); child_of_child_task_snapshot = tasks_in_registry[0]; @@ -302,6 +411,7 @@ TEST_F(TaskRegistryTest, TaskSnapshot parent_task_snapshot; TaskSnapshot first_child_task_snapshot; TaskSnapshot second_child_task_snapshot; + TaskSnapshot child_of_second_child_task_snapshot; { auto parent_task = MyTask{"parent task"}; auto tasks_in_registry = get_all_tasks(); @@ -315,19 +425,20 @@ TEST_F(TaskRegistryTest, .source_location = parent_task.source_location})); parent_task_snapshot = tasks_in_registry[0]; - std::ignore = [&parent_task, &first_wait, parent_task_snapshot, + std::ignore = [&first_wait, parent_task_snapshot, &first_child_task_snapshot]() -> async { - auto child_task = MyTask{"first child task", parent_task}; + auto child_task = MyTask{"first child task"}; auto tasks_in_registry = get_all_tasks(); EXPECT_EQ(tasks_in_registry.size(), 2); - EXPECT_EQ(tasks_in_registry[0], - (TaskSnapshot{.name = "first child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location})); + EXPECT_EQ( + tasks_in_registry[0], + (TaskSnapshot{.name = "first child task", + .state = State::Running, + .id = child_task.id(), + .parent = {TaskIdWrapper{parent_task_snapshot.id}}, + .thread = basics::ThreadId::current(), + .source_location = child_task.source_location})); first_child_task_snapshot = tasks_in_registry[0]; EXPECT_EQ(tasks_in_registry[1], parent_task_snapshot); @@ -335,38 +446,58 @@ TEST_F(TaskRegistryTest, co_return; }(); - std::ignore = [&parent_task, &second_wait, parent_task_snapshot, - first_child_task_snapshot, - &second_child_task_snapshot]() -> async { - auto child_task = MyTask{"second child task", parent_task}; + auto second_child_task = MyTask{"second child task"}; + tasks_in_registry = get_all_tasks(); + EXPECT_EQ(tasks_in_registry.size(), 3); + EXPECT_EQ( + tasks_in_registry[0], + (TaskSnapshot{.name = "second child task", + .state = State::Running, + .id = second_child_task.id(), + .parent = {TaskIdWrapper{parent_task_snapshot.id}}, + .thread = basics::ThreadId::current(), + .source_location = second_child_task.source_location})); + EXPECT_EQ(tasks_in_registry[1], first_child_task_snapshot); + EXPECT_EQ(tasks_in_registry[2], parent_task_snapshot); + second_child_task_snapshot = tasks_in_registry[0]; + + std::ignore = [&second_wait, parent_task_snapshot, + first_child_task_snapshot, second_child_task_snapshot, + &child_of_second_child_task_snapshot]() -> async { + auto child_of_child_task = MyTask{"child of second child task"}; auto tasks_in_registry = get_all_tasks(); - EXPECT_EQ(tasks_in_registry.size(), 3); + EXPECT_EQ(tasks_in_registry.size(), 4); EXPECT_EQ(tasks_in_registry[0], - (TaskSnapshot{.name = "second child task", - .state = State::Running, - .id = child_task.id(), - .parent = {TaskIdWrapper{parent_task.id()}}, - .thread = basics::ThreadId::current(), - .source_location = child_task.source_location})); - second_child_task_snapshot = tasks_in_registry[0]; - EXPECT_EQ(tasks_in_registry[1], first_child_task_snapshot); - EXPECT_EQ(tasks_in_registry[2], parent_task_snapshot); + (TaskSnapshot{ + .name = "child of second child task", + .state = State::Running, + .id = child_of_child_task.id(), + .parent = {TaskIdWrapper{second_child_task_snapshot.id}}, + .thread = basics::ThreadId::current(), + .source_location = child_of_child_task.source_location})); + child_of_second_child_task_snapshot = tasks_in_registry[0]; + EXPECT_EQ(tasks_in_registry[1], second_child_task_snapshot); + EXPECT_EQ(tasks_in_registry[2], first_child_task_snapshot); + EXPECT_EQ(tasks_in_registry[3], parent_task_snapshot); co_await second_wait; co_return; }(); } - // all three task-in-registries still exist: - // childs live in suspended coroutines and reference parent + // all four task-in-registries still exist: + // childs live in suspended coroutines and reference its parents get_thread_registry().garbage_collect(); // does not do anything EXPECT_EQ(get_all_tasks(), (std::vector{ - second_child_task_snapshot, first_child_task_snapshot, + child_of_second_child_task_snapshot, + second_child_task_snapshot.update_state(State::Finished), + first_child_task_snapshot, parent_task_snapshot.update_state(State::Finished)})); - // marks second child for deletion, parent is still in scope + // marks child of second child and second child for deletion, parent is still + // in scope second_wait.resume(); get_thread_registry().garbage_collect(); EXPECT_EQ(get_all_tasks(), @@ -374,9 +505,7 @@ TEST_F(TaskRegistryTest, first_child_task_snapshot, parent_task_snapshot.update_state(State::Finished)})); - // marks first child for deletion, parent is still referenced by both first - // and second task-in-registry and will only be marked for deleted when both - // are deleted + // marks first child and parent for deletion first_wait.resume(); get_thread_registry().garbage_collect(); EXPECT_EQ(get_all_tasks().size(), 0); From ef6b4fe85a8516b902a9c01d4758c53057932764 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 29 Apr 2025 12:15:33 +0200 Subject: [PATCH 20/36] Rename AsyncRegistryServer/ to SystemMonitor/AsyncRegistry --- arangod/AsyncRegistryServer/PrettyPrinter/.gdbinit | 8 -------- arangod/CMakeLists.txt | 2 +- arangod/GeneralServer/GeneralServerFeature.cpp | 2 +- arangod/RestServer/arangod_includes.h | 2 +- .../AsyncRegistry}/CMakeLists.txt | 0 .../AsyncRegistry}/Feature.cpp | 0 .../AsyncRegistry}/Feature.h | 2 +- .../AsyncRegistry}/Metrics.cpp | 0 .../AsyncRegistry}/Metrics.h | 0 .../SystemMonitor/AsyncRegistry/PrettyPrinter/.gdbinit | 8 ++++++++ .../AsyncRegistry}/PrettyPrinter/README.md | 0 .../AsyncRegistry}/PrettyPrinter/__init__.py | 0 .../PrettyPrinter/src/asyncregistry/__init__.py | 0 .../PrettyPrinter/src/asyncregistry/gdb_data.py | 0 .../PrettyPrinter/src/asyncregistry/gdb_forest.py | 0 .../PrettyPrinter/src/asyncregistry/gdb_printer.py | 0 .../PrettyPrinter/src/asyncregistry/stacktrace.py | 0 .../AsyncRegistry}/PrettyPrinter/src/pretty-printer.py | 0 .../AsyncRegistry}/PrettyPrinter/src/tests/__init__.py | 0 .../AsyncRegistry}/PrettyPrinter/src/tests/test_forest.py | 0 .../AsyncRegistry}/RestHandler.cpp | 4 ++-- .../AsyncRegistry}/RestHandler.h | 2 +- .../AsyncRegistry}/Stacktrace/CMakeLists.txt | 0 .../AsyncRegistry}/Stacktrace/depth_first.h | 0 .../AsyncRegistry}/Stacktrace/forest.h | 0 arangod/SystemMonitor/CMakeLists.txt | 1 + 26 files changed, 16 insertions(+), 15 deletions(-) delete mode 100644 arangod/AsyncRegistryServer/PrettyPrinter/.gdbinit rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/CMakeLists.txt (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/Feature.cpp (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/Feature.h (98%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/Metrics.cpp (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/Metrics.h (100%) create mode 100644 arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/.gdbinit rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/README.md (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/__init__.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/asyncregistry/__init__.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/asyncregistry/gdb_data.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/asyncregistry/gdb_forest.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/asyncregistry/gdb_printer.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/asyncregistry/stacktrace.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/pretty-printer.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/tests/__init__.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/PrettyPrinter/src/tests/test_forest.py (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/RestHandler.cpp (98%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/RestHandler.h (96%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/Stacktrace/CMakeLists.txt (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/Stacktrace/depth_first.h (100%) rename arangod/{AsyncRegistryServer => SystemMonitor/AsyncRegistry}/Stacktrace/forest.h (100%) create mode 100644 arangod/SystemMonitor/CMakeLists.txt diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/.gdbinit b/arangod/AsyncRegistryServer/PrettyPrinter/.gdbinit deleted file mode 100644 index 87b4c3f4befd..000000000000 --- a/arangod/AsyncRegistryServer/PrettyPrinter/.gdbinit +++ /dev/null @@ -1,8 +0,0 @@ -python -import sys -sys.path.insert(0, './arangod/AsyncRegistryServer/PrettyPrinter/src/') -end - -source ./arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/gdb_printer.py - -echo "asyncregistry pretty-printer loaded\n" diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 5c79ea9b5cf5..9bf5a4fac4ed 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -103,7 +103,6 @@ get_target_property(IRESEARCH_INCLUDE include(arangoserver.cmake) add_subdirectory(Agency) add_subdirectory(Aql) -add_subdirectory(AsyncRegistryServer) add_subdirectory(Cache) add_subdirectory(Cluster) add_subdirectory(ClusterEngine) @@ -118,6 +117,7 @@ add_subdirectory(RestHandler) add_subdirectory(RestServer) add_subdirectory(RocksDBEngine) add_subdirectory(StorageEngine) +add_subdirectory(SystemMonitor) add_subdirectory(Utils) if (USE_V8) add_subdirectory(V8Server) diff --git a/arangod/GeneralServer/GeneralServerFeature.cpp b/arangod/GeneralServer/GeneralServerFeature.cpp index c73c02125d55..cffaa1a806ae 100644 --- a/arangod/GeneralServer/GeneralServerFeature.cpp +++ b/arangod/GeneralServer/GeneralServerFeature.cpp @@ -30,7 +30,7 @@ #include "Agency/RestAgencyPrivHandler.h" #include "ApplicationFeatures/HttpEndpointProvider.h" #include "Aql/RestAqlHandler.h" -#include "AsyncRegistryServer/RestHandler.h" +#include "SystemMonitor/AsyncRegistry/RestHandler.h" #include "Basics/StringUtils.h" #include "Basics/application-exit.h" #include "Basics/debugging.h" diff --git a/arangod/RestServer/arangod_includes.h b/arangod/RestServer/arangod_includes.h index 17d6fe636713..43234c3c0315 100644 --- a/arangod/RestServer/arangod_includes.h +++ b/arangod/RestServer/arangod_includes.h @@ -51,7 +51,7 @@ #include "Aql/AqlFunctionFeature.h" #include "Aql/OptimizerRulesFeature.h" #include "Aql/QueryInfoLoggerFeature.h" -#include "AsyncRegistryServer/Feature.h" +#include "SystemMonitor/AsyncRegistry/Feature.h" #include "Basics/ArangoGlobalContext.h" #include "Basics/FileUtils.h" #include "Basics/directories.h" diff --git a/arangod/AsyncRegistryServer/CMakeLists.txt b/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt similarity index 100% rename from arangod/AsyncRegistryServer/CMakeLists.txt rename to arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt diff --git a/arangod/AsyncRegistryServer/Feature.cpp b/arangod/SystemMonitor/AsyncRegistry/Feature.cpp similarity index 100% rename from arangod/AsyncRegistryServer/Feature.cpp rename to arangod/SystemMonitor/AsyncRegistry/Feature.cpp diff --git a/arangod/AsyncRegistryServer/Feature.h b/arangod/SystemMonitor/AsyncRegistry/Feature.h similarity index 98% rename from arangod/AsyncRegistryServer/Feature.h rename to arangod/SystemMonitor/AsyncRegistry/Feature.h index 6b055729239a..09ef595cbb3f 100644 --- a/arangod/AsyncRegistryServer/Feature.h +++ b/arangod/SystemMonitor/AsyncRegistry/Feature.h @@ -23,7 +23,7 @@ #pragma once #include "Async/Registry/registry_variable.h" -#include "AsyncRegistryServer/Metrics.h" +#include "SystemMonitor/AsyncRegistry/Metrics.h" #include "Basics/FutureSharedLock.h" #include "RestServer/arangod.h" #include "Scheduler/SchedulerFeature.h" diff --git a/arangod/AsyncRegistryServer/Metrics.cpp b/arangod/SystemMonitor/AsyncRegistry/Metrics.cpp similarity index 100% rename from arangod/AsyncRegistryServer/Metrics.cpp rename to arangod/SystemMonitor/AsyncRegistry/Metrics.cpp diff --git a/arangod/AsyncRegistryServer/Metrics.h b/arangod/SystemMonitor/AsyncRegistry/Metrics.h similarity index 100% rename from arangod/AsyncRegistryServer/Metrics.h rename to arangod/SystemMonitor/AsyncRegistry/Metrics.h diff --git a/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/.gdbinit b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/.gdbinit new file mode 100644 index 000000000000..ec45b6912fda --- /dev/null +++ b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/.gdbinit @@ -0,0 +1,8 @@ +python +import sys +sys.path.insert(0, './arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/') +end + +source ./arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/gdb_printer.py + +echo "asyncregistry pretty-printer loaded\n" diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/README.md b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/README.md similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/README.md rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/README.md diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/__init__.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/__init__.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/__init__.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/__init__.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/__init__.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/__init__.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/__init__.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/__init__.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/gdb_data.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/gdb_data.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/gdb_data.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/gdb_data.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/gdb_forest.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/gdb_forest.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/gdb_forest.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/gdb_forest.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/gdb_printer.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/gdb_printer.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/gdb_printer.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/gdb_printer.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/stacktrace.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/stacktrace.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/asyncregistry/stacktrace.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/asyncregistry/stacktrace.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/pretty-printer.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/pretty-printer.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/pretty-printer.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/pretty-printer.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/tests/__init__.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/tests/__init__.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/tests/__init__.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/tests/__init__.py diff --git a/arangod/AsyncRegistryServer/PrettyPrinter/src/tests/test_forest.py b/arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/tests/test_forest.py similarity index 100% rename from arangod/AsyncRegistryServer/PrettyPrinter/src/tests/test_forest.py rename to arangod/SystemMonitor/AsyncRegistry/PrettyPrinter/src/tests/test_forest.py diff --git a/arangod/AsyncRegistryServer/RestHandler.cpp b/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp similarity index 98% rename from arangod/AsyncRegistryServer/RestHandler.cpp rename to arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp index ab40713a3959..ec7278c5c78b 100644 --- a/arangod/AsyncRegistryServer/RestHandler.cpp +++ b/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp @@ -25,8 +25,8 @@ #include #include "Async/Registry/promise.h" -#include "AsyncRegistryServer/Stacktrace/depth_first.h" -#include "AsyncRegistryServer/Stacktrace/forest.h" +#include "SystemMonitor/AsyncRegistry/Stacktrace/depth_first.h" +#include "SystemMonitor/AsyncRegistry/Stacktrace/forest.h" #include "ApplicationFeatures/ApplicationServer.h" #include "Async/Registry/promise.h" #include "Async/Registry/registry_variable.h" diff --git a/arangod/AsyncRegistryServer/RestHandler.h b/arangod/SystemMonitor/AsyncRegistry/RestHandler.h similarity index 96% rename from arangod/AsyncRegistryServer/RestHandler.h rename to arangod/SystemMonitor/AsyncRegistry/RestHandler.h index c16e2e44f2a5..e6759316bc3d 100644 --- a/arangod/AsyncRegistryServer/RestHandler.h +++ b/arangod/SystemMonitor/AsyncRegistry/RestHandler.h @@ -22,7 +22,7 @@ //////////////////////////////////////////////////////////////////////////////// #pragma once -#include "AsyncRegistryServer/Feature.h" +#include "SystemMonitor/AsyncRegistry/Feature.h" #include "RestHandler/RestVocbaseBaseHandler.h" namespace arangodb::async_registry { diff --git a/arangod/AsyncRegistryServer/Stacktrace/CMakeLists.txt b/arangod/SystemMonitor/AsyncRegistry/Stacktrace/CMakeLists.txt similarity index 100% rename from arangod/AsyncRegistryServer/Stacktrace/CMakeLists.txt rename to arangod/SystemMonitor/AsyncRegistry/Stacktrace/CMakeLists.txt diff --git a/arangod/AsyncRegistryServer/Stacktrace/depth_first.h b/arangod/SystemMonitor/AsyncRegistry/Stacktrace/depth_first.h similarity index 100% rename from arangod/AsyncRegistryServer/Stacktrace/depth_first.h rename to arangod/SystemMonitor/AsyncRegistry/Stacktrace/depth_first.h diff --git a/arangod/AsyncRegistryServer/Stacktrace/forest.h b/arangod/SystemMonitor/AsyncRegistry/Stacktrace/forest.h similarity index 100% rename from arangod/AsyncRegistryServer/Stacktrace/forest.h rename to arangod/SystemMonitor/AsyncRegistry/Stacktrace/forest.h diff --git a/arangod/SystemMonitor/CMakeLists.txt b/arangod/SystemMonitor/CMakeLists.txt new file mode 100644 index 000000000000..284b442d5893 --- /dev/null +++ b/arangod/SystemMonitor/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(AsyncRegistry) From f6c0739868483b12c1f6d38405f40a2a93cb53db Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 29 Apr 2025 14:30:31 +0200 Subject: [PATCH 21/36] Add task registry feature with metrics --- arangod/RestServer/arangod.cpp | 3 + arangod/RestServer/arangod.h | 6 + arangod/RestServer/arangod_includes.h | 1 + .../SystemMonitor/AsyncRegistry/Metrics.cpp | 2 + arangod/SystemMonitor/AsyncRegistry/Metrics.h | 4 + arangod/SystemMonitor/CMakeLists.txt | 1 + .../TaskMonitoring/CMakeLists.txt | 3 + .../SystemMonitor/TaskMonitoring/Feature.cpp | 114 ++++++++++++++++++ .../SystemMonitor/TaskMonitoring/Feature.h | 80 ++++++++++++ .../SystemMonitor/TaskMonitoring/Metrics.cpp | 51 ++++++++ .../SystemMonitor/TaskMonitoring/Metrics.h | 65 ++++++++++ .../TaskMonitoring/task_registry_variable.h | 15 ++- lib/TaskMonitoring/task_registry_variable.cpp | 2 +- 13 files changed, 345 insertions(+), 2 deletions(-) create mode 100644 arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt create mode 100644 arangod/SystemMonitor/TaskMonitoring/Feature.cpp create mode 100644 arangod/SystemMonitor/TaskMonitoring/Feature.h create mode 100644 arangod/SystemMonitor/TaskMonitoring/Metrics.cpp create mode 100644 arangod/SystemMonitor/TaskMonitoring/Metrics.h diff --git a/arangod/RestServer/arangod.cpp b/arangod/RestServer/arangod.cpp index 334c1ddbc7b0..2d8a81f3f697 100644 --- a/arangod/RestServer/arangod.cpp +++ b/arangod/RestServer/arangod.cpp @@ -84,6 +84,9 @@ static int runServer(int argc, char** argv, ArangoGlobalContext& context) { [](auto& server, TypeTag) { return std::make_unique(server); }, + [](auto& server, TypeTag) { + return std::make_unique(server); + }, #ifdef TRI_HAVE_GETRLIMIT [](auto& server, TypeTag) { return std::make_unique( diff --git a/arangod/RestServer/arangod.h b/arangod/RestServer/arangod.h index 7d7c88f0ade6..7b11b01af6b8 100644 --- a/arangod/RestServer/arangod.h +++ b/arangod/RestServer/arangod.h @@ -61,6 +61,11 @@ namespace async_registry { class Feature; +} +namespace task_monitoring { + +class Feature; + } class BootstrapFeature; class BumpFileDescriptorsFeature; @@ -206,6 +211,7 @@ using ArangodFeaturesList = TypeList< ApiRecordingFeature, AqlFeature, async_registry::Feature, + task_monitoring::Feature, AuthenticationFeature, BootstrapFeature, #ifdef TRI_HAVE_GETRLIMIT diff --git a/arangod/RestServer/arangod_includes.h b/arangod/RestServer/arangod_includes.h index 43234c3c0315..5435143d15a5 100644 --- a/arangod/RestServer/arangod_includes.h +++ b/arangod/RestServer/arangod_includes.h @@ -52,6 +52,7 @@ #include "Aql/OptimizerRulesFeature.h" #include "Aql/QueryInfoLoggerFeature.h" #include "SystemMonitor/AsyncRegistry/Feature.h" +#include "SystemMonitor/TaskMonitoring/Feature.h" #include "Basics/ArangoGlobalContext.h" #include "Basics/FileUtils.h" #include "Basics/directories.h" diff --git a/arangod/SystemMonitor/AsyncRegistry/Metrics.cpp b/arangod/SystemMonitor/AsyncRegistry/Metrics.cpp index d31bc040e007..266b2273e2c0 100644 --- a/arangod/SystemMonitor/AsyncRegistry/Metrics.cpp +++ b/arangod/SystemMonitor/AsyncRegistry/Metrics.cpp @@ -25,6 +25,8 @@ #include "Metrics/Counter.h" #include "Metrics/Gauge.h" +using namespace arangodb::async_registry; + auto RegistryMetrics::increment_total_nodes() -> void { promises_total->count(); } diff --git a/arangod/SystemMonitor/AsyncRegistry/Metrics.h b/arangod/SystemMonitor/AsyncRegistry/Metrics.h index c434bb1635fe..6aeafedc7fba 100644 --- a/arangod/SystemMonitor/AsyncRegistry/Metrics.h +++ b/arangod/SystemMonitor/AsyncRegistry/Metrics.h @@ -25,6 +25,8 @@ #include "Containers/Concurrent/metrics.h" #include "Metrics/Fwd.h" +namespace arangodb::async_registry { + struct RegistryMetrics : arangodb::containers::Metrics { RegistryMetrics( std::shared_ptr promises_total, @@ -60,3 +62,5 @@ struct RegistryMetrics : arangodb::containers::Metrics { std::shared_ptr> existing_thread_registries = nullptr; }; + +} // namespace arangodb::async_registry diff --git a/arangod/SystemMonitor/CMakeLists.txt b/arangod/SystemMonitor/CMakeLists.txt index 284b442d5893..1a0517cee88a 100644 --- a/arangod/SystemMonitor/CMakeLists.txt +++ b/arangod/SystemMonitor/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(AsyncRegistry) +add_subdirectory(TaskMonitoring) diff --git a/arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt b/arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt new file mode 100644 index 000000000000..8112210a172e --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt @@ -0,0 +1,3 @@ +target_sources(arangoserver PRIVATE + Feature.cpp + Metrics.cpp) diff --git a/arangod/SystemMonitor/TaskMonitoring/Feature.cpp b/arangod/SystemMonitor/TaskMonitoring/Feature.cpp new file mode 100644 index 000000000000..c6530e0c568f --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/Feature.cpp @@ -0,0 +1,114 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#include "Feature.h" + +#include "Basics/FutureSharedLock.h" +#include "Metrics/CounterBuilder.h" +#include "Metrics/GaugeBuilder.h" +#include "Metrics/MetricsFeature.h" +#include "ProgramOptions/Parameters.h" + +using namespace arangodb::task_monitoring; + +DECLARE_COUNTER(arangodb_tasks_total, + "Total number of created tasks since database creation"); + +DECLARE_GAUGE(arangodb_tasks_existing, std::uint64_t, + "Number of currently existing tasks"); + +DECLARE_GAUGE(arangodb_tasks_ready_for_deletion, std::uint64_t, + "Number of currently existing tasks that wait " + "for their garbage collection"); + +DECLARE_COUNTER(arangodb_tasks_thread_registries_total, + "Total number of threads that started tasks " + "since database creation"); + +DECLARE_GAUGE(arangodb_tasks_existing_thread_registries, std::uint64_t, + "Number of threads that started currently existing tasks"); + +Feature::Feature(Server& server) + : ArangodFeature{server, *this}, _async_mutex{_schedulerWrapper} { + startsAfter(); + startsAfter(); +} + +auto Feature::create_metrics(arangodb::metrics::MetricsFeature& metrics_feature) + -> std::shared_ptr { + return std::make_shared( + metrics_feature.addShared(arangodb_tasks_total{}), + metrics_feature.addShared(arangodb_tasks_existing{}), + metrics_feature.addShared(arangodb_tasks_ready_for_deletion{}), + metrics_feature.addShared(arangodb_tasks_thread_registries_total{}), + metrics_feature.addShared(arangodb_tasks_existing_thread_registries{})); +} +auto Feature::asyncLock() + -> futures::Future::LockGuard> { + return _async_mutex.asyncLockExclusive(); +} + +struct Feature::CleanupThread { + CleanupThread(size_t gc_timeout) + : _thread([gc_timeout, this](std::stop_token stoken) { + while (not stoken.stop_requested()) { + std::unique_lock guard(_mutex); + auto status = _cv.wait_for(guard, std::chrono::seconds{gc_timeout}); + if (status == std::cv_status::timeout) { + async_registry::registry.run_external_cleanup(); + } + } + }) {} + + ~CleanupThread() { + _thread.request_stop(); + _cv.notify_one(); + } + + std::mutex _mutex; + std::condition_variable _cv; + std::jthread _thread; +}; + +void Feature::start() { + metrics = create_metrics( + server().template getFeature()); + registry.set_metrics(metrics); + _cleanupThread = std::make_shared(_options.gc_timeout); +} + +void Feature::stop() { _cleanupThread.reset(); } + +void Feature::collectOptions(std::shared_ptr options) { + options->addSection("task-registry", "Options for the task-registry"); + + options + ->addOption("--task-registry.cleanup-timeout", + "Timeout in seconds between task-registry garbage collection " + "swipes.", + new options::SizeTParameter(&_options.gc_timeout, /*base*/ 1, + /*minValue*/ 1)) + .setLongDescription( + R"(Each thread that is involved in the task-registry needs to garbage collect its finished tasks regularly. This option controls how often this is done in seconds. This can possibly be performance relevant because each involved thread aquires a lock.)"); +} + +Feature::~Feature() { registry.set_metrics(nullptr); } diff --git a/arangod/SystemMonitor/TaskMonitoring/Feature.h b/arangod/SystemMonitor/TaskMonitoring/Feature.h new file mode 100644 index 000000000000..6f37f9d2151b --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/Feature.h @@ -0,0 +1,80 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include "TaskMonitoring/task_registry_variable.h" +#include "SystemMonitor/TaskMonitoring/Metrics.h" +#include "Basics/FutureSharedLock.h" +#include "RestServer/arangod.h" +#include "Scheduler/SchedulerFeature.h" + +namespace arangodb::task_monitoring { + +class Feature final : public ArangodFeature { + private: + static auto create_metrics(arangodb::metrics::MetricsFeature& metrics_feature) + -> std::shared_ptr; + struct SchedulerWrapper { + using WorkHandle = Scheduler::WorkHandle; + template + void queue(F&& fn) { + SchedulerFeature::SCHEDULER->queue(RequestLane::CLUSTER_INTERNAL, + std::forward(fn)); + } + template + WorkHandle queueDelayed(F&& fn, std::chrono::milliseconds timeout) { + return SchedulerFeature::SCHEDULER->queueDelayed( + "rocksdb-meta-collection-lock-timeout", RequestLane::CLUSTER_INTERNAL, + timeout, std::forward(fn)); + } + }; + + public: + static constexpr std::string_view name() { return "Coroutines"; } + auto asyncLock() -> futures::Future< + futures::FutureSharedLock::LockGuard>; + + Feature(Server& server); + + ~Feature(); + + void start() override final; + void stop() override final; + void collectOptions(std::shared_ptr) override final; + + private: + struct Options { + size_t gc_timeout{1}; + }; + Options _options; + + std::shared_ptr metrics; + + struct CleanupThread; + std::shared_ptr _cleanupThread; + + SchedulerWrapper _schedulerWrapper; + futures::FutureSharedLock _async_mutex; +}; + +} // namespace arangodb::task_monitoring diff --git a/arangod/SystemMonitor/TaskMonitoring/Metrics.cpp b/arangod/SystemMonitor/TaskMonitoring/Metrics.cpp new file mode 100644 index 000000000000..b38ca9850643 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/Metrics.cpp @@ -0,0 +1,51 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#include "Metrics.h" + +#include "Metrics/Counter.h" +#include "Metrics/Gauge.h" + +using namespace arangodb::task_monitoring; + +auto RegistryMetrics::increment_total_nodes() -> void { tasks_total->count(); } +auto RegistryMetrics::increment_registered_nodes() -> void { + existing_tasks->fetch_add(1); +} +auto RegistryMetrics::decrement_registered_nodes() -> void { + existing_tasks->fetch_sub(1); +} +auto RegistryMetrics::increment_ready_for_deletion_nodes() -> void { + existing_tasks->fetch_add(1); +} +auto RegistryMetrics::decrement_ready_for_deletion_nodes() -> void { + existing_tasks->fetch_sub(1); +} +auto RegistryMetrics::increment_total_lists() -> void { + thread_registries_total->count(); +} +auto RegistryMetrics::increment_existing_lists() -> void { + existing_thread_registries->fetch_add(1); +} +auto RegistryMetrics::decrement_existing_lists() -> void { + existing_thread_registries->fetch_sub(1); +} diff --git a/arangod/SystemMonitor/TaskMonitoring/Metrics.h b/arangod/SystemMonitor/TaskMonitoring/Metrics.h new file mode 100644 index 000000000000..35086ed33b74 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/Metrics.h @@ -0,0 +1,65 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include "Containers/Concurrent/metrics.h" +#include "Metrics/Fwd.h" + +namespace arangodb::task_monitoring { + +struct RegistryMetrics : arangodb::containers::Metrics { + RegistryMetrics( + std::shared_ptr tasks_total, + std::shared_ptr> existing_tasks, + std::shared_ptr> + ready_for_deletion_tasks, + std::shared_ptr thread_registries_total, + std::shared_ptr> + existing_thread_registries) + : tasks_total{tasks_total}, + existing_tasks{existing_tasks}, + ready_for_deletion_tasks{ready_for_deletion_tasks}, + thread_registries_total{thread_registries_total}, + existing_thread_registries{existing_thread_registries} {} + ~RegistryMetrics() = default; + auto increment_total_nodes() -> void override; + auto increment_registered_nodes() -> void override; + auto decrement_registered_nodes() -> void override; + auto increment_ready_for_deletion_nodes() -> void override; + auto decrement_ready_for_deletion_nodes() -> void override; + auto increment_total_lists() -> void override; + auto increment_existing_lists() -> void override; + auto decrement_existing_lists() -> void override; + + private: + std::shared_ptr tasks_total = nullptr; + std::shared_ptr> existing_tasks = + nullptr; + std::shared_ptr> + ready_for_deletion_tasks = nullptr; + std::shared_ptr thread_registries_total = nullptr; + std::shared_ptr> + existing_thread_registries = nullptr; +}; + +} // namespace arangodb::task_monitoring diff --git a/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h index 8484307be7b6..7f252726d7f3 100644 --- a/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h +++ b/lib/TaskMonitoring/include/TaskMonitoring/task_registry_variable.h @@ -23,13 +23,26 @@ #pragma once #include "Containers/Concurrent/ListOfNonOwnedLists.h" +#include "Containers/Concurrent/metrics.h" #include "Containers/Concurrent/ThreadOwnedList.h" #include "TaskMonitoring/task.h" namespace arangodb::task_monitoring { using ThreadRegistry = containers::ThreadOwnedList; -struct Registry : public containers::ListOfNonOwnedLists {}; +struct Registry : public containers::ListOfNonOwnedLists { + // all thread registries that are added to this registry will use these + // metrics + std::shared_ptr metrics; + // metrics-feature is only available after startup, therefore we need to + // update the metrics after construction + // thread registries that are added to the registry before setting the metrics + // properly are not accounted for in the metrics + auto set_metrics(std::shared_ptr new_metrics) -> void { + auto guard = std::lock_guard(_mutex); + metrics = new_metrics; + } +}; /** Global variable that holds all active tasks. diff --git a/lib/TaskMonitoring/task_registry_variable.cpp b/lib/TaskMonitoring/task_registry_variable.cpp index 03fc02aaec6b..8b1e02fe2031 100644 --- a/lib/TaskMonitoring/task_registry_variable.cpp +++ b/lib/TaskMonitoring/task_registry_variable.cpp @@ -28,7 +28,7 @@ Registry registry; auto get_thread_registry() noexcept -> ThreadRegistry& { struct ThreadRegistryGuard { - ThreadRegistryGuard() : _registry{ThreadRegistry::make()} { + ThreadRegistryGuard() : _registry{ThreadRegistry::make(registry.metrics)} { registry.add(_registry); } From 630053f1c8203c7d732a4a97fb8c83a7f2889b02 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Wed, 30 Apr 2025 08:21:06 +0200 Subject: [PATCH 22/36] Move forest to lib --- arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt | 2 -- arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp | 5 +++-- lib/Containers/CMakeLists.txt | 1 + .../Stacktrace => lib/Containers/Forest}/CMakeLists.txt | 2 +- .../Stacktrace => lib/Containers/Forest}/depth_first.h | 4 ++-- .../Stacktrace => lib/Containers/Forest}/forest.h | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) rename {arangod/SystemMonitor/AsyncRegistry/Stacktrace => lib/Containers/Forest}/CMakeLists.txt (82%) rename {arangod/SystemMonitor/AsyncRegistry/Stacktrace => lib/Containers/Forest}/depth_first.h (96%) rename {arangod/SystemMonitor/AsyncRegistry/Stacktrace => lib/Containers/Forest}/forest.h (98%) diff --git a/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt b/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt index acccd6c5e2b1..0495a949b8f0 100644 --- a/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt +++ b/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt @@ -4,5 +4,3 @@ target_sources(arangoserver PRIVATE RestHandler.cpp) target_link_libraries(arangoserver arango_async_registry_stacktrace) - -add_subdirectory(Stacktrace) diff --git a/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp b/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp index ec7278c5c78b..0b4f2f096a8f 100644 --- a/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp +++ b/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp @@ -25,8 +25,8 @@ #include #include "Async/Registry/promise.h" -#include "SystemMonitor/AsyncRegistry/Stacktrace/depth_first.h" -#include "SystemMonitor/AsyncRegistry/Stacktrace/forest.h" +#include "Containers/Forest/depth_first.h" +#include "Containers/Forest/forest.h" #include "ApplicationFeatures/ApplicationServer.h" #include "Async/Registry/promise.h" #include "Async/Registry/registry_variable.h" @@ -42,6 +42,7 @@ using namespace arangodb; using namespace arangodb::async_registry; +using namespace arangodb::containers; struct Entry { TreeHierarchy hierarchy; diff --git a/lib/Containers/CMakeLists.txt b/lib/Containers/CMakeLists.txt index 3f37c693cf96..b6eefb6434f8 100644 --- a/lib/Containers/CMakeLists.txt +++ b/lib/Containers/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(Concurrent) +add_subdirectory(Forest) diff --git a/arangod/SystemMonitor/AsyncRegistry/Stacktrace/CMakeLists.txt b/lib/Containers/Forest/CMakeLists.txt similarity index 82% rename from arangod/SystemMonitor/AsyncRegistry/Stacktrace/CMakeLists.txt rename to lib/Containers/Forest/CMakeLists.txt index 485f0388e42f..7bf70681b9aa 100644 --- a/arangod/SystemMonitor/AsyncRegistry/Stacktrace/CMakeLists.txt +++ b/lib/Containers/Forest/CMakeLists.txt @@ -3,4 +3,4 @@ add_library(arango_async_registry_stacktrace INTERFACE forest.h) target_include_directories(arango_async_registry_stacktrace INTERFACE - ${PROJECT_SOURCE_DIR}/arangod) + ${PROJECT_SOURCE_DIR}/lib) diff --git a/arangod/SystemMonitor/AsyncRegistry/Stacktrace/depth_first.h b/lib/Containers/Forest/depth_first.h similarity index 96% rename from arangod/SystemMonitor/AsyncRegistry/Stacktrace/depth_first.h rename to lib/Containers/Forest/depth_first.h index b333fa69d46e..69891bd60ee3 100644 --- a/arangod/SystemMonitor/AsyncRegistry/Stacktrace/depth_first.h +++ b/lib/Containers/Forest/depth_first.h @@ -27,7 +27,7 @@ #include #include -namespace arangodb::async_registry { +namespace arangodb::containers { using Id = void*; using TreeHierarchy = size_t; @@ -73,4 +73,4 @@ struct DFS_PostOrder { std::stack> _stack; }; -} // namespace arangodb::async_registry +} // namespace arangodb::containers diff --git a/arangod/SystemMonitor/AsyncRegistry/Stacktrace/forest.h b/lib/Containers/Forest/forest.h similarity index 98% rename from arangod/SystemMonitor/AsyncRegistry/Stacktrace/forest.h rename to lib/Containers/Forest/forest.h index 2293379f633d..b082fc0ed8f3 100644 --- a/arangod/SystemMonitor/AsyncRegistry/Stacktrace/forest.h +++ b/lib/Containers/Forest/forest.h @@ -26,7 +26,7 @@ #include #include -namespace arangodb::async_registry { +namespace arangodb::containers { using Id = void*; @@ -119,4 +119,4 @@ struct IndexedForestWithRoots : IndexedForest { std::vector _roots; }; -} // namespace arangodb::async_registry +} // namespace arangodb::containers From 0aff5136f5a59a30c32f4855bddb8a99835f4724 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Wed, 30 Apr 2025 08:27:42 +0200 Subject: [PATCH 23/36] Add REST handler --- .../GeneralServer/GeneralServerFeature.cpp | 7 +- .../TaskMonitoring/CMakeLists.txt | 3 +- .../TaskMonitoring/RestHandler.cpp | 196 ++++++++++++++++++ .../TaskMonitoring/RestHandler.h | 42 ++++ 4 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp create mode 100644 arangod/SystemMonitor/TaskMonitoring/RestHandler.h diff --git a/arangod/GeneralServer/GeneralServerFeature.cpp b/arangod/GeneralServer/GeneralServerFeature.cpp index cffaa1a806ae..7dcd2e3fac30 100644 --- a/arangod/GeneralServer/GeneralServerFeature.cpp +++ b/arangod/GeneralServer/GeneralServerFeature.cpp @@ -30,7 +30,6 @@ #include "Agency/RestAgencyPrivHandler.h" #include "ApplicationFeatures/HttpEndpointProvider.h" #include "Aql/RestAqlHandler.h" -#include "SystemMonitor/AsyncRegistry/RestHandler.h" #include "Basics/StringUtils.h" #include "Basics/application-exit.h" #include "Basics/debugging.h" @@ -129,6 +128,8 @@ #include "Scheduler/SchedulerFeature.h" #include "StorageEngine/EngineSelectorFeature.h" #include "StorageEngine/StorageEngine.h" +#include "SystemMonitor/AsyncRegistry/RestHandler.h" +#include "SystemMonitor/TaskMonitoring/RestHandler.h" #ifdef USE_V8 #include "V8Server/V8DealerFeature.h" #endif @@ -843,6 +844,10 @@ void GeneralServerFeature::defineRemainingHandlers( "/_admin/async-registry", RestHandlerCreator::createNoData); + f.addPrefixHandler( + "/_admin/task-monitoring", + RestHandlerCreator::createNoData); + f.addPrefixHandler( "/_admin/cluster", RestHandlerCreator::createNoData); diff --git a/arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt b/arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt index 8112210a172e..1e244be9a727 100644 --- a/arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt +++ b/arangod/SystemMonitor/TaskMonitoring/CMakeLists.txt @@ -1,3 +1,4 @@ target_sources(arangoserver PRIVATE Feature.cpp - Metrics.cpp) + Metrics.cpp + RestHandler.cpp) diff --git a/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp b/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp new file mode 100644 index 000000000000..4a9fd255cd46 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp @@ -0,0 +1,196 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#include "RestHandler.h" +#include +#include + +#include "Containers/Forest/depth_first.h" +#include "Containers/Forest/forest.h" +#include "ApplicationFeatures/ApplicationServer.h" +#include "TaskMonitoring/task.h" +#include "TaskMonitoring/task_registry_variable.h" +#include "Cluster/ClusterFeature.h" +#include "Cluster/ClusterInfo.h" +#include "Cluster/ServerState.h" +#include "Inspection/VPack.h" +#include "Network/ConnectionPool.h" +#include "Network/Methods.h" +#include "Network/NetworkFeature.h" +#include "Network/RequestOptions.h" +#include "Rest/CommonDefines.h" + +using namespace arangodb; +using namespace arangodb::task_monitoring; +using namespace arangodb::containers; + +struct Entry { + TreeHierarchy hierarchy; + TaskSnapshot data; +}; +template +auto inspect(Inspector& f, Entry& x) { + return f.object(x).fields(f.field("hierarchy", x.hierarchy), + f.field("data", x.data)); +} + +RestHandler::RestHandler(ArangodServer& server, GeneralRequest* request, + GeneralResponse* response) + : RestVocbaseBaseHandler(server, request, response), + _feature(server.getFeature()) {} + +namespace { +/** + Creates a forest of all current tasks + + An edge between two tasks means that the lower hierarchy tasks started the + larger hierarchy task. + **/ +auto all_undeleted_promises() -> ForestWithRoots { + Forest forest; + std::vector roots; + registry.for_node([&](TaskSnapshot task) { + // if (promise.state != State::Deleted) { + std::visit(overloaded{ + [&](TaskIdWrapper task_id) { + forest.insert(task.id, task_id.id, task); + }, + [&](RootTask root) { + forest.insert(task.id, nullptr, task); + roots.emplace_back(task.id); + }, + }, + task.parent); + // } + }); + return ForestWithRoots{forest, roots}; +} + +/** + Converts a forest of tasks into a list of stacktraces inside a + velocypack. + + The list of stacktraces include one stacktrace per tree in the forest. To + create one stacktrace, it uses a depth first search to traverse the forest in + post order, such that tasks with the highest hierarchy in a tree are given + first and the root task is given last. + **/ +auto getStacktraceData(IndexedForestWithRoots const& promises) + -> VPackBuilder { + VPackBuilder builder; + builder.openObject(); + builder.add(VPackValue("task_stacktraces")); + builder.openArray(); + for (auto const& root : promises.roots()) { + builder.openArray(); + auto dfs = DFS_PostOrder{promises, root}; + do { + auto next = dfs.next(); + if (next == std::nullopt) { + break; + } + auto [id, hierarchy] = next.value(); + auto data = promises.node(id); + if (data != std::nullopt) { + auto entry = Entry{.hierarchy = hierarchy, .data = data.value()}; + velocypack::serialize(builder, entry); + } + } while (true); + builder.close(); + } + builder.close(); + builder.close(); + return builder; +} +} // namespace + +auto RestHandler::executeAsync() -> futures::Future { + if (!ExecContext::current().isSuperuser()) { + generateError(rest::ResponseCode::FORBIDDEN, TRI_ERROR_HTTP_FORBIDDEN, + "you need super user rights for log operations"); + } + + if (_request->requestType() != rest::RequestType::GET) { + generateError(rest::ResponseCode::METHOD_NOT_ALLOWED, + TRI_ERROR_HTTP_METHOD_NOT_ALLOWED); + co_return; + } + + // forwarding + bool foundServerIdParameter; + std::string const& serverId = + _request->value("serverId", foundServerIdParameter); + + if (ServerState::instance()->isCoordinator() && foundServerIdParameter) { + if (serverId != ServerState::instance()->getId()) { + // not ourselves! - need to pass through the request + auto& ci = server().getFeature().clusterInfo(); + + bool found = false; + for (auto const& srv : ci.getServers()) { + // validate if server id exists + if (srv.first == serverId) { + found = true; + break; + } + } + + if (!found) { + generateError(rest::ResponseCode::NOT_FOUND, + TRI_ERROR_HTTP_BAD_PARAMETER, + "unknown serverId supplied."); + co_return; + } + + NetworkFeature const& nf = server().getFeature(); + network::ConnectionPool* pool = nf.pool(); + if (pool == nullptr) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_SHUTTING_DOWN); + } + network::RequestOptions options; + options.timeout = network::Timeout(30.0); + options.database = _request->databaseName(); + options.parameters = _request->parameters(); + + auto f = network::sendRequestRetry( + pool, "server:" + serverId, fuerte::RestVerb::Get, + _request->requestPath(), VPackBuffer{}, options); + co_await std::move(f).thenValue( + [self = std::dynamic_pointer_cast(shared_from_this())]( + network::Response const& r) { + if (r.fail()) { + self->generateError(r.combinedResult()); + } else { + self->generateResult(rest::ResponseCode::OK, r.slice()); + } + }); + co_return; + } + } + + auto lock_guard = co_await _feature.asyncLock(); + + // do actual work + auto promises = all_undeleted_promises().index_by_awaitee(); + generateResult(rest::ResponseCode::OK, getStacktraceData(promises).slice()); + co_return; +} diff --git a/arangod/SystemMonitor/TaskMonitoring/RestHandler.h b/arangod/SystemMonitor/TaskMonitoring/RestHandler.h new file mode 100644 index 000000000000..bc3a4c298090 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/RestHandler.h @@ -0,0 +1,42 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2024 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Business Source License 1.1 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// https://github.com/arangodb/arangodb/blob/devel/LICENSE +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Julia Volmer +//////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include "SystemMonitor/TaskMonitoring/Feature.h" +#include "RestHandler/RestVocbaseBaseHandler.h" + +namespace arangodb::task_monitoring { + +class RestHandler : public arangodb::RestVocbaseBaseHandler { + public: + RestHandler(ArangodServer&, GeneralRequest*, GeneralResponse*); + + public: + char const* name() const override final { return "TaskRegistryRestHandler"; } + RequestLane lane() const override final { return RequestLane::CLUSTER_ADMIN; } + futures::Future executeAsync() override; + + Feature& _feature; +}; + +} // namespace arangodb::task_monitoring From 5b1fe64f0761ce538b8fe2480a1be263cdb9cf72 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 6 May 2025 23:00:35 +0200 Subject: [PATCH 24/36] Add metrics documentation --- .../arangodb_monitoring_tasks_existing.yaml | 14 ++++++++ ...ring_tasks_existing_thread_registries.yaml | 14 ++++++++ ...b_monitoring_tasks_ready_for_deletion.yaml | 14 ++++++++ ...itoring_tasks_thread_registries_total.yaml | 14 ++++++++ .../arangodb_monitoring_tasks_total.yaml | 14 ++++++++ .../SystemMonitor/TaskMonitoring/Feature.cpp | 34 +++++++++++-------- .../SystemMonitor/TaskMonitoring/Metrics.cpp | 4 +-- 7 files changed, 91 insertions(+), 17 deletions(-) create mode 100644 Documentation/Metrics/arangodb_monitoring_tasks_existing.yaml create mode 100644 Documentation/Metrics/arangodb_monitoring_tasks_existing_thread_registries.yaml create mode 100644 Documentation/Metrics/arangodb_monitoring_tasks_ready_for_deletion.yaml create mode 100644 Documentation/Metrics/arangodb_monitoring_tasks_thread_registries_total.yaml create mode 100644 Documentation/Metrics/arangodb_monitoring_tasks_total.yaml diff --git a/Documentation/Metrics/arangodb_monitoring_tasks_existing.yaml b/Documentation/Metrics/arangodb_monitoring_tasks_existing.yaml new file mode 100644 index 000000000000..9ac8a8c79336 --- /dev/null +++ b/Documentation/Metrics/arangodb_monitoring_tasks_existing.yaml @@ -0,0 +1,14 @@ +name: arangodb_monitoring_tasks_existing +introducedIn: "3.12.5" +help: | + Number of currently existing monitoring tasks. +unit: number +type: gauge +category: Statistics +complexity: advanced +exposedBy: + - dbserver + - coordinator + - agent +description: | + Number of currently existing monitoring tasks. diff --git a/Documentation/Metrics/arangodb_monitoring_tasks_existing_thread_registries.yaml b/Documentation/Metrics/arangodb_monitoring_tasks_existing_thread_registries.yaml new file mode 100644 index 000000000000..4dd8fbbbe37c --- /dev/null +++ b/Documentation/Metrics/arangodb_monitoring_tasks_existing_thread_registries.yaml @@ -0,0 +1,14 @@ +name: arangodb_monitoring_tasks_existing_thread_registries +introducedIn: "3.12.5" +help: | + Number of threads that started currently existing monitoring tasks. +unit: number +type: gauge +category: Statistics +complexity: advanced +exposedBy: + - dbserver + - coordinator + - agent +description: | + Number of threads that started currently existing monitoring tasks. The thread itself does not need to exist any more. This number also includes still running threads that have started monitoring tasks that do not exist any more. diff --git a/Documentation/Metrics/arangodb_monitoring_tasks_ready_for_deletion.yaml b/Documentation/Metrics/arangodb_monitoring_tasks_ready_for_deletion.yaml new file mode 100644 index 000000000000..cb237dd4b322 --- /dev/null +++ b/Documentation/Metrics/arangodb_monitoring_tasks_ready_for_deletion.yaml @@ -0,0 +1,14 @@ +name: arangodb_monitoring_tasks_ready_for_deletion +introducedIn: "3.12.5" +help: | + Number of currently existing monitoring tasks that wait for their garbage collection. +unit: number +type: gauge +category: Statistics +complexity: advanced +exposedBy: + - dbserver + - coordinator + - agent +description: | + Number of currently existing monitoring tasks that wait for their garbage collection. diff --git a/Documentation/Metrics/arangodb_monitoring_tasks_thread_registries_total.yaml b/Documentation/Metrics/arangodb_monitoring_tasks_thread_registries_total.yaml new file mode 100644 index 000000000000..f2adb899b7d2 --- /dev/null +++ b/Documentation/Metrics/arangodb_monitoring_tasks_thread_registries_total.yaml @@ -0,0 +1,14 @@ +name: arangodb_monitoring_tasks_thread_registries_total +introducedIn: "3.12.5" +help: | + Total number of threads that started monitoring tasks since database creation. +unit: number +type: counter +category: Statistics +complexity: advanced +exposedBy: + - dbserver + - coordinator + - agent +description: | + Total number of threads that started monitoring tasks since database creation. diff --git a/Documentation/Metrics/arangodb_monitoring_tasks_total.yaml b/Documentation/Metrics/arangodb_monitoring_tasks_total.yaml new file mode 100644 index 000000000000..ea94ba8e2288 --- /dev/null +++ b/Documentation/Metrics/arangodb_monitoring_tasks_total.yaml @@ -0,0 +1,14 @@ +name: arangodb_monitoring_tasks_total +introducedIn: "3.12.5" +help: | + Total number of created monitoring tasks since database creation. +unit: number +type: counter +category: Statistics +complexity: advanced +exposedBy: + - dbserver + - coordinator + - agent +description: | + Total number of created monitoring tasks since database creation. diff --git a/arangod/SystemMonitor/TaskMonitoring/Feature.cpp b/arangod/SystemMonitor/TaskMonitoring/Feature.cpp index c6530e0c568f..a2b461325fa1 100644 --- a/arangod/SystemMonitor/TaskMonitoring/Feature.cpp +++ b/arangod/SystemMonitor/TaskMonitoring/Feature.cpp @@ -30,22 +30,24 @@ using namespace arangodb::task_monitoring; -DECLARE_COUNTER(arangodb_tasks_total, - "Total number of created tasks since database creation"); +DECLARE_COUNTER( + arangodb_monitoring_tasks_total, + "Total number of created monitoring tasks since database creation"); -DECLARE_GAUGE(arangodb_tasks_existing, std::uint64_t, - "Number of currently existing tasks"); +DECLARE_GAUGE(arangodb_monitoring_tasks_existing, std::uint64_t, + "Number of currently existing monitoring tasks"); -DECLARE_GAUGE(arangodb_tasks_ready_for_deletion, std::uint64_t, - "Number of currently existing tasks that wait " +DECLARE_GAUGE(arangodb_monitoring_tasks_ready_for_deletion, std::uint64_t, + "Number of currently existing monitoring tasks that wait " "for their garbage collection"); -DECLARE_COUNTER(arangodb_tasks_thread_registries_total, - "Total number of threads that started tasks " +DECLARE_COUNTER(arangodb_monitoring_tasks_thread_registries_total, + "Total number of threads that started monitoring tasks " "since database creation"); -DECLARE_GAUGE(arangodb_tasks_existing_thread_registries, std::uint64_t, - "Number of threads that started currently existing tasks"); +DECLARE_GAUGE( + arangodb_monitoring_tasks_existing_thread_registries, std::uint64_t, + "Number of threads that started currently existing monitoring tasks"); Feature::Feature(Server& server) : ArangodFeature{server, *this}, _async_mutex{_schedulerWrapper} { @@ -56,11 +58,13 @@ Feature::Feature(Server& server) auto Feature::create_metrics(arangodb::metrics::MetricsFeature& metrics_feature) -> std::shared_ptr { return std::make_shared( - metrics_feature.addShared(arangodb_tasks_total{}), - metrics_feature.addShared(arangodb_tasks_existing{}), - metrics_feature.addShared(arangodb_tasks_ready_for_deletion{}), - metrics_feature.addShared(arangodb_tasks_thread_registries_total{}), - metrics_feature.addShared(arangodb_tasks_existing_thread_registries{})); + metrics_feature.addShared(arangodb_monitoring_tasks_total{}), + metrics_feature.addShared(arangodb_monitoring_tasks_existing{}), + metrics_feature.addShared(arangodb_monitoring_tasks_ready_for_deletion{}), + metrics_feature.addShared( + arangodb_monitoring_tasks_thread_registries_total{}), + metrics_feature.addShared( + arangodb_monitoring_tasks_existing_thread_registries{})); } auto Feature::asyncLock() -> futures::Future::LockGuard> { diff --git a/arangod/SystemMonitor/TaskMonitoring/Metrics.cpp b/arangod/SystemMonitor/TaskMonitoring/Metrics.cpp index b38ca9850643..49ee9ee883ea 100644 --- a/arangod/SystemMonitor/TaskMonitoring/Metrics.cpp +++ b/arangod/SystemMonitor/TaskMonitoring/Metrics.cpp @@ -35,10 +35,10 @@ auto RegistryMetrics::decrement_registered_nodes() -> void { existing_tasks->fetch_sub(1); } auto RegistryMetrics::increment_ready_for_deletion_nodes() -> void { - existing_tasks->fetch_add(1); + ready_for_deletion_tasks->fetch_add(1); } auto RegistryMetrics::decrement_ready_for_deletion_nodes() -> void { - existing_tasks->fetch_sub(1); + ready_for_deletion_tasks->fetch_sub(1); } auto RegistryMetrics::increment_total_lists() -> void { thread_registries_total->count(); From 08db83b1c9a658ae7e9eb64df7db474921ef11ce Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Tue, 6 May 2025 23:17:04 +0200 Subject: [PATCH 25/36] Move forest test to container tests --- arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt | 2 +- lib/Containers/Forest/CMakeLists.txt | 5 ++--- tests/AsyncRegistryServer/CMakeLists.txt | 10 ---------- tests/CMakeLists.txt | 3 +-- tests/Containers/CMakeLists.txt | 11 +++++++++++ .../StacktraceTest.cpp => Containers/ForestTest.cpp} | 12 ++++++------ 6 files changed, 21 insertions(+), 22 deletions(-) delete mode 100644 tests/AsyncRegistryServer/CMakeLists.txt rename tests/{AsyncRegistryServer/StacktraceTest.cpp => Containers/ForestTest.cpp} (92%) diff --git a/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt b/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt index 0495a949b8f0..62b9e163798d 100644 --- a/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt +++ b/arangod/SystemMonitor/AsyncRegistry/CMakeLists.txt @@ -3,4 +3,4 @@ target_sources(arangoserver PRIVATE Metrics.cpp RestHandler.cpp) target_link_libraries(arangoserver - arango_async_registry_stacktrace) + arango_forest) diff --git a/lib/Containers/Forest/CMakeLists.txt b/lib/Containers/Forest/CMakeLists.txt index 7bf70681b9aa..a973816911a7 100644 --- a/lib/Containers/Forest/CMakeLists.txt +++ b/lib/Containers/Forest/CMakeLists.txt @@ -1,6 +1,5 @@ -add_library(arango_async_registry_stacktrace INTERFACE +add_library(arango_forest INTERFACE depth_first.h forest.h) -target_include_directories(arango_async_registry_stacktrace - INTERFACE +target_include_directories(arango_forest INTERFACE ${PROJECT_SOURCE_DIR}/lib) diff --git a/tests/AsyncRegistryServer/CMakeLists.txt b/tests/AsyncRegistryServer/CMakeLists.txt deleted file mode 100644 index f84891c944f2..000000000000 --- a/tests/AsyncRegistryServer/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_library(arango_tests_async_registry_server OBJECT - StacktraceTest.cpp) -target_link_libraries(arango_tests_async_registry_server PRIVATE - arango_async_registry_stacktrace - gtest) - -add_executable(arangodbtests_async_registry_server EXCLUDE_FROM_ALL) -target_link_libraries(arangodbtests_async_registry_server - arango_tests_async_registry_server - gtest_main) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ed47f83aa162..2877eeacf193 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -327,7 +327,7 @@ endif() target_link_libraries(arangodbtests arango - arango_tests_async_registry_server + arango_tests_forest arango_tests_basics arango_tests_replication2 arango_tests_replication2_pure @@ -420,7 +420,6 @@ endforeach() add_subdirectory(Actor) add_subdirectory(Async) -add_subdirectory(AsyncRegistryServer) add_subdirectory(Containers) add_subdirectory(sepp) add_subdirectory(VocBase/Properties) diff --git a/tests/Containers/CMakeLists.txt b/tests/Containers/CMakeLists.txt index 3f37c693cf96..2e209eb443d0 100644 --- a/tests/Containers/CMakeLists.txt +++ b/tests/Containers/CMakeLists.txt @@ -1 +1,12 @@ add_subdirectory(Concurrent) + +add_library(arango_tests_forest OBJECT + ForestTest.cpp) +target_link_libraries(arango_tests_forest PRIVATE + arango_forest + gtest) + +add_executable(arangodbtests_forest EXCLUDE_FROM_ALL) +target_link_libraries(arangodbtests_forest + arango_tests_forest + gtest_main) diff --git a/tests/AsyncRegistryServer/StacktraceTest.cpp b/tests/Containers/ForestTest.cpp similarity index 92% rename from tests/AsyncRegistryServer/StacktraceTest.cpp rename to tests/Containers/ForestTest.cpp index 6b324db89b7c..d1d1043fa23f 100644 --- a/tests/AsyncRegistryServer/StacktraceTest.cpp +++ b/tests/Containers/ForestTest.cpp @@ -20,14 +20,14 @@ /// /// @author Julia Volmer //////////////////////////////////////////////////////////////////////////////// -#include "AsyncRegistryServer/Stacktrace/forest.h" -#include "AsyncRegistryServer/Stacktrace/depth_first.h" +#include "Containers/Forest/forest.h" +#include "Containers/Forest/depth_first.h" #include -using namespace arangodb::async_registry; +using namespace arangodb::containers; -TEST(AsyncRegistryStacktraceTest, insert_nodes_into_forest) { +TEST(ForestTest, insert_nodes_into_forest) { Forest forest; forest.insert((void*)32, (void*)1, "first"); @@ -45,7 +45,7 @@ TEST(AsyncRegistryStacktraceTest, insert_nodes_into_forest) { ASSERT_EQ(forest.node((void*)1), std::nullopt); } -TEST(AsyncRegistryStacktraceTest, index_forest) { +TEST(ForestTest, index_forest) { Forest forest; forest.insert((void*)1, (void*)2, "first"); forest.insert((void*)2, (void*)4, "second"); @@ -66,7 +66,7 @@ TEST(AsyncRegistryStacktraceTest, index_forest) { ASSERT_EQ(forest, (Forest{{}, {}, {}})); } -TEST(AsyncRegistryStacktraceTest, executes_post_ordered_depth_first) { +TEST(ForestTest, executes_post_ordered_depth_first) { Forest forest; forest.insert((void*)1, (void*)0, "root"); forest.insert((void*)2, (void*)1, "node"); From ee7deacae359706c8c0fc788be782e580d926c89 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Thu, 8 May 2025 12:13:52 +0200 Subject: [PATCH 26/36] Workaround: Print bare task registry in REST handler --- arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp b/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp index 4a9fd255cd46..b1b32b5137ae 100644 --- a/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp +++ b/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp @@ -190,7 +190,11 @@ auto RestHandler::executeAsync() -> futures::Future { auto lock_guard = co_await _feature.asyncLock(); // do actual work - auto promises = all_undeleted_promises().index_by_awaitee(); - generateResult(rest::ResponseCode::OK, getStacktraceData(promises).slice()); + VPackBuilder builder; + builder.openArray(); + registry.for_node( + [&](TaskSnapshot task) { velocypack::serialize(builder, task); }); + builder.close(); + generateResult(rest::ResponseCode::OK, builder.slice()); co_return; } From f906ef66b283e4be65034198f5e11f3f6fa88e93 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Thu, 8 May 2025 23:24:01 +0200 Subject: [PATCH 27/36] Fix bad alloc by moving entry struct into namespace --- .../AsyncRegistry/RestHandler.cpp | 13 +++++---- .../TaskMonitoring/RestHandler.cpp | 27 ++++++++----------- lib/Containers/Forest/depth_first.h | 6 ++--- lib/Containers/Forest/forest.h | 10 +++---- 4 files changed, 25 insertions(+), 31 deletions(-) diff --git a/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp b/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp index 0b4f2f096a8f..2f9f6bffe034 100644 --- a/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp +++ b/arangod/SystemMonitor/AsyncRegistry/RestHandler.cpp @@ -44,6 +44,12 @@ using namespace arangodb; using namespace arangodb::async_registry; using namespace arangodb::containers; +RestHandler::RestHandler(ArangodServer& server, GeneralRequest* request, + GeneralResponse* response) + : RestVocbaseBaseHandler(server, request, response), + _feature(server.getFeature()) {} + +namespace { struct Entry { TreeHierarchy hierarchy; PromiseSnapshot data; @@ -53,13 +59,6 @@ auto inspect(Inspector& f, Entry& x) { return f.object(x).fields(f.field("hierarchy", x.hierarchy), f.field("data", x.data)); } - -RestHandler::RestHandler(ArangodServer& server, GeneralRequest* request, - GeneralResponse* response) - : RestVocbaseBaseHandler(server, request, response), - _feature(server.getFeature()) {} - -namespace { /** Creates a forest of all promises in the async registry diff --git a/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp b/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp index b1b32b5137ae..8715f24b09a2 100644 --- a/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp +++ b/arangod/SystemMonitor/TaskMonitoring/RestHandler.cpp @@ -43,6 +43,12 @@ using namespace arangodb; using namespace arangodb::task_monitoring; using namespace arangodb::containers; +RestHandler::RestHandler(ArangodServer& server, GeneralRequest* request, + GeneralResponse* response) + : RestVocbaseBaseHandler(server, request, response), + _feature(server.getFeature()) {} + +namespace { struct Entry { TreeHierarchy hierarchy; TaskSnapshot data; @@ -52,13 +58,6 @@ auto inspect(Inspector& f, Entry& x) { return f.object(x).fields(f.field("hierarchy", x.hierarchy), f.field("data", x.data)); } - -RestHandler::RestHandler(ArangodServer& server, GeneralRequest* request, - GeneralResponse* response) - : RestVocbaseBaseHandler(server, request, response), - _feature(server.getFeature()) {} - -namespace { /** Creates a forest of all current tasks @@ -66,13 +65,13 @@ namespace { larger hierarchy task. **/ auto all_undeleted_promises() -> ForestWithRoots { - Forest forest; + auto forest = Forest{}; std::vector roots; registry.for_node([&](TaskSnapshot task) { // if (promise.state != State::Deleted) { std::visit(overloaded{ - [&](TaskIdWrapper task_id) { - forest.insert(task.id, task_id.id, task); + [&](TaskIdWrapper parent) { + forest.insert(task.id, parent.id, task); }, [&](RootTask root) { forest.insert(task.id, nullptr, task); @@ -190,11 +189,7 @@ auto RestHandler::executeAsync() -> futures::Future { auto lock_guard = co_await _feature.asyncLock(); // do actual work - VPackBuilder builder; - builder.openArray(); - registry.for_node( - [&](TaskSnapshot task) { velocypack::serialize(builder, task); }); - builder.close(); - generateResult(rest::ResponseCode::OK, builder.slice()); + auto promises = all_undeleted_promises().index_by_awaitee(); + generateResult(rest::ResponseCode::OK, getStacktraceData(promises).slice()); co_return; } diff --git a/lib/Containers/Forest/depth_first.h b/lib/Containers/Forest/depth_first.h index 69891bd60ee3..3aca3b71aa62 100644 --- a/lib/Containers/Forest/depth_first.h +++ b/lib/Containers/Forest/depth_first.h @@ -68,9 +68,9 @@ struct DFS_PostOrder { return next(); } - Forest const& _forest; - const Id _start; - std::stack> _stack; + Forest const& _forest = {}; + const Id _start = nullptr; + std::stack> _stack = {}; }; } // namespace arangodb::containers diff --git a/lib/Containers/Forest/forest.h b/lib/Containers/Forest/forest.h index b082fc0ed8f3..04830d501226 100644 --- a/lib/Containers/Forest/forest.h +++ b/lib/Containers/Forest/forest.h @@ -68,11 +68,11 @@ struct Forest { bool operator==(Forest const&) const = default; - std::vector _parent; // has one entry for each node - std::vector _node; // has one entry for each node - std::unordered_map - _position; // at which position of the vectors _waiter and _data to find - // entries for Id + std::vector _parent = {}; // has one entry for each node + std::vector _node = {}; // has one entry for each node + // at which position of the vectors _waiter and _data to find + // entries for Id + std::unordered_map _position = {}; }; /** From fd1de2ffe407bfbd5e82c6e2bebfc70ee22fa944 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 9 May 2025 09:10:52 +0200 Subject: [PATCH 28/36] Instrumented Database Creation and UpgradeTasks --- arangod/RestHandler/RestDatabaseHandler.cpp | 4 ++++ arangod/VocBase/Methods/Databases.cpp | 8 ++++++++ arangod/VocBase/Methods/UpgradeTasks.cpp | 8 ++++++++ 3 files changed, 20 insertions(+) diff --git a/arangod/RestHandler/RestDatabaseHandler.cpp b/arangod/RestHandler/RestDatabaseHandler.cpp index e478ee42ed1e..d812baa374a4 100644 --- a/arangod/RestHandler/RestDatabaseHandler.cpp +++ b/arangod/RestHandler/RestDatabaseHandler.cpp @@ -29,6 +29,7 @@ #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterInfo.h" #include "Cluster/ServerState.h" +#include "TaskMonitoring/task.h" #include "Utils/Events.h" #include "VocBase/Methods/Databases.h" @@ -48,10 +49,13 @@ RestStatus RestDatabaseHandler::execute() { // extract the request type rest::RequestType const type = _request->requestType(); if (type == rest::RequestType::GET) { + auto task = task_monitoring::Task{"Request: List Databases"}; return getDatabases(); } else if (type == rest::RequestType::POST) { + auto task = task_monitoring::Task{"Request: Create Database"}; return createDatabase(); } else if (type == rest::RequestType::DELETE_REQ) { + auto task = task_monitoring::Task{"Request: Delete Database"}; return deleteDatabase(); } else { generateError(rest::ResponseCode::METHOD_NOT_ALLOWED, diff --git a/arangod/VocBase/Methods/Databases.cpp b/arangod/VocBase/Methods/Databases.cpp index 43661bef56a4..2f1abd4bdd8a 100644 --- a/arangod/VocBase/Methods/Databases.cpp +++ b/arangod/VocBase/Methods/Databases.cpp @@ -69,12 +69,15 @@ #include #include +#include "TaskMonitoring/task.h" + using namespace arangodb; using namespace arangodb::methods; using namespace arangodb::velocypack; std::vector Databases::list(ArangodServer& server, std::string const& user) { + auto task = task_monitoring::Task{"Collect List of Databases"}; if (!server.hasFeature()) { return std::vector(); } @@ -95,6 +98,7 @@ std::vector Databases::list(ArangodServer& server, } Result Databases::info(TRI_vocbase_t* vocbase, velocypack::Builder& result) { + auto task = task_monitoring::Task{"Collect Database information for " + vocbase->name()}; if (ServerState::instance()->isCoordinator()) { auto& cache = vocbase->server().getFeature().agencyCache(); auto [acb, idx] = cache.read(std::vector{ @@ -186,6 +190,8 @@ Result Databases::grantCurrentUser(CreateDatabaseInfo const& info, // Create database on cluster; Result Databases::createCoordinator(CreateDatabaseInfo const& info) { + auto task = task_monitoring::Task{"Create Database " + info.getName() + " on Coordinator"}; + // TODO: Add status strings to task for phases. TRI_ASSERT(ServerState::instance()->isCoordinator()); DatabaseFeature& databaseFeature = @@ -361,6 +367,7 @@ Result Databases::createOther(CreateDatabaseInfo const& info) { Result Databases::create(ArangodServer& server, ExecContext const& exec, std::string const& dbName, velocypack::Slice users, velocypack::Slice options) { + auto task = task_monitoring::Task{"Create Database: " + dbName}; Result res = basics::catchToResult([&]() { Result res; @@ -506,6 +513,7 @@ ErrorCode dropDBCoordinator(DatabaseFeature& df, std::string const& dbName) { Result Databases::drop(ExecContext const& exec, TRI_vocbase_t* systemVocbase, std::string const& dbName) { + auto task = task_monitoring::Task{"Drop Database: " + dbName}; TRI_ASSERT(systemVocbase->isSystem()); if (exec.systemAuthLevel() != auth::Level::RW) { events::DropDatabase(dbName, Result(TRI_ERROR_FORBIDDEN), exec); diff --git a/arangod/VocBase/Methods/UpgradeTasks.cpp b/arangod/VocBase/Methods/UpgradeTasks.cpp index 1c0fb2e47f7c..9e6df6f08484 100644 --- a/arangod/VocBase/Methods/UpgradeTasks.cpp +++ b/arangod/VocBase/Methods/UpgradeTasks.cpp @@ -55,6 +55,7 @@ #include "VocBase/vocbase.h" #include +#include "TaskMonitoring/include/TaskMonitoring/task.h" using namespace arangodb; using namespace arangodb::methods; @@ -391,6 +392,7 @@ Result createSystemStatisticsIndices( Result createSystemCollectionsIndices( TRI_vocbase_t& vocbase, std::vector>& collections) { + auto task = task_monitoring::Task{"Create System Collections Indices for " + vocbase.name()}; Result res; if (vocbase.isSystem()) { res = ::createIndex(StaticStrings::UsersCollection, @@ -439,6 +441,7 @@ Result createSystemCollectionsIndices( Result UpgradeTasks::createSystemCollectionsAndIndices( TRI_vocbase_t& vocbase, velocypack::Slice slice) { + auto task = task_monitoring::Task{"Create System Collections for " + vocbase.name()}; // after the call to ::createSystemCollections this vector should contain // a LogicalCollection for *every* (required) system collection. std::vector> presentSystemCollections; @@ -478,6 +481,7 @@ Result UpgradeTasks::createSystemCollectionsAndIndices( Result UpgradeTasks::createStatisticsCollectionsAndIndices( TRI_vocbase_t& vocbase, velocypack::Slice slice) { + auto task = task_monitoring::Task{"Create Statistics Collections for " + vocbase.name()}; // This vector should after the call to ::createSystemCollections contain // a LogicalCollection for *every* (required) system collection. std::vector> presentSystemCollections; @@ -507,6 +511,7 @@ Result UpgradeTasks::createStatisticsCollectionsAndIndices( //////////////////////////////////////////////////////////////////////////////// Result UpgradeTasks::dropLegacyAnalyzersCollection( TRI_vocbase_t& vocbase, velocypack::Slice /*upgradeParams*/) { + auto task = task_monitoring::Task{"Drop Legacy Analyzers for " + vocbase.name()}; // drop legacy collection if upgrading the system vocbase and collection found #ifdef ARANGODB_ENABLE_MAINTAINER_MODE if (!vocbase.server().hasFeature()) { @@ -541,6 +546,7 @@ Result UpgradeTasks::dropLegacyAnalyzersCollection( Result UpgradeTasks::addDefaultUserOther(TRI_vocbase_t& vocbase, velocypack::Slice params) { + auto task = task_monitoring::Task{"Add Default User for " + vocbase.name()}; TRI_ASSERT(!vocbase.isSystem()); TRI_ASSERT(params.isObject()); @@ -598,6 +604,7 @@ Result UpgradeTasks::addDefaultUserOther(TRI_vocbase_t& vocbase, Result UpgradeTasks::renameReplicationApplierStateFiles( TRI_vocbase_t& vocbase, velocypack::Slice slice) { + auto task = task_monitoring::Task{"Rename Replication Applier Files " + vocbase.name()}; std::string const path = vocbase.engine().databasePath(); std::string const source = arangodb::basics::FileUtils::buildFilename( @@ -634,6 +641,7 @@ Result UpgradeTasks::renameReplicationApplierStateFiles( Result UpgradeTasks::dropPregelQueriesCollection( TRI_vocbase_t& vocbase, velocypack::Slice /*upgradeParams*/) { + auto task = task_monitoring::Task{"Drop Pregel Queries Collection for " + vocbase.name()}; std::shared_ptr col; auto res = arangodb::methods::Collections::lookup(vocbase, "_pregel_queries", col); From 3e01a6e46085c78dee2e01e0bd69987ed11a2405 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Fri, 9 May 2025 10:07:39 +0200 Subject: [PATCH 29/36] Refactored MaintenanceActions to use a seperate folder. --- arangod/Cluster/Maintenance.cpp | 2 +- .../{ => MaintenanceActions}/Action.cpp | 22 +++++++-------- .../Cluster/{ => MaintenanceActions}/Action.h | 0 .../{ => MaintenanceActions}/ActionBase.cpp | 2 +- .../{ => MaintenanceActions}/ActionBase.h | 0 .../ActionDescription.cpp | 0 .../ActionDescription.h | 0 .../CreateCollection.cpp | 0 .../CreateCollection.h | 0 .../CreateDatabase.cpp | 0 .../{ => MaintenanceActions}/CreateDatabase.h | 0 .../DropCollection.cpp | 0 .../{ => MaintenanceActions}/DropCollection.h | 0 .../{ => MaintenanceActions}/DropDatabase.cpp | 0 .../{ => MaintenanceActions}/DropDatabase.h | 0 .../{ => MaintenanceActions}/DropIndex.cpp | 0 .../{ => MaintenanceActions}/DropIndex.h | 0 .../{ => MaintenanceActions}/EnsureIndex.cpp | 0 .../{ => MaintenanceActions}/EnsureIndex.h | 0 .../ResignShardLeadership.cpp | 0 .../ResignShardLeadership.h | 0 .../SynchronizeShard.cpp | 4 +-- .../SynchronizeShard.h | 4 +-- .../TakeoverShardLeadership.cpp | 0 .../TakeoverShardLeadership.h | 0 .../UpdateCollection.cpp | 0 .../UpdateCollection.h | 0 .../UpdateReplicatedLogAction.cpp | 0 .../UpdateReplicatedLogAction.h | 2 +- arangod/Cluster/MaintenanceFeature.cpp | 6 ++-- arangod/Cluster/MaintenanceFeature.h | 2 +- arangod/Cluster/MaintenanceRestHandler.h | 2 +- arangod/Cluster/MaintenanceWorker.h | 2 +- .../Document/MaintenanceActionExecutor.cpp | 6 ++-- arangod/RestHandler/RestCollectionHandler.cpp | 2 +- .../RestHandler/RestReplicationHandler.cpp | 2 +- arangod/arangoserver.cmake | 28 +++++++++---------- tests/Maintenance/MaintenanceFeatureTest.cpp | 2 +- tests/Maintenance/MaintenanceTest.cpp | 2 +- tests/Mocks/Servers.cpp | 8 +++--- 40 files changed, 49 insertions(+), 49 deletions(-) rename arangod/Cluster/{ => MaintenanceActions}/Action.cpp (91%) rename arangod/Cluster/{ => MaintenanceActions}/Action.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/ActionBase.cpp (99%) rename arangod/Cluster/{ => MaintenanceActions}/ActionBase.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/ActionDescription.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/ActionDescription.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/CreateCollection.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/CreateCollection.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/CreateDatabase.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/CreateDatabase.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/DropCollection.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/DropCollection.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/DropDatabase.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/DropDatabase.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/DropIndex.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/DropIndex.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/EnsureIndex.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/EnsureIndex.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/ResignShardLeadership.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/ResignShardLeadership.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/SynchronizeShard.cpp (99%) rename arangod/Cluster/{ => MaintenanceActions}/SynchronizeShard.h (97%) rename arangod/Cluster/{ => MaintenanceActions}/TakeoverShardLeadership.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/TakeoverShardLeadership.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/UpdateCollection.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/UpdateCollection.h (100%) rename arangod/Cluster/{ => MaintenanceActions}/UpdateReplicatedLogAction.cpp (100%) rename arangod/Cluster/{ => MaintenanceActions}/UpdateReplicatedLogAction.h (95%) diff --git a/arangod/Cluster/Maintenance.cpp b/arangod/Cluster/Maintenance.cpp index f03a4b60959a..68378610ed64 100644 --- a/arangod/Cluster/Maintenance.cpp +++ b/arangod/Cluster/Maintenance.cpp @@ -34,7 +34,7 @@ #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterInfo.h" #include "Cluster/FollowerInfo.h" -#include "Cluster/ResignShardLeadership.h" +#include "Cluster/MaintenanceActions/ResignShardLeadership.h" #include "Indexes/Index.h" #include "Inspection/VPack.h" #include "IResearch/IResearchCommon.h" diff --git a/arangod/Cluster/Action.cpp b/arangod/Cluster/MaintenanceActions/Action.cpp similarity index 91% rename from arangod/Cluster/Action.cpp rename to arangod/Cluster/MaintenanceActions/Action.cpp index b274a622592a..b82024a417e7 100644 --- a/arangod/Cluster/Action.cpp +++ b/arangod/Cluster/MaintenanceActions/Action.cpp @@ -25,18 +25,18 @@ #include "Action.h" #include "Basics/Exceptions.h" -#include "Cluster/CreateCollection.h" -#include "Cluster/CreateDatabase.h" -#include "Cluster/DropCollection.h" -#include "Cluster/DropDatabase.h" -#include "Cluster/DropIndex.h" -#include "Cluster/EnsureIndex.h" +#include "Cluster/MaintenanceActions/CreateCollection.h" +#include "Cluster/MaintenanceActions/CreateDatabase.h" +#include "Cluster/MaintenanceActions/DropCollection.h" +#include "Cluster/MaintenanceActions/DropDatabase.h" +#include "Cluster/MaintenanceActions/DropIndex.h" +#include "Cluster/MaintenanceActions/EnsureIndex.h" #include "Cluster/MaintenanceStrings.h" -#include "Cluster/ResignShardLeadership.h" -#include "Cluster/SynchronizeShard.h" -#include "Cluster/TakeoverShardLeadership.h" -#include "Cluster/UpdateCollection.h" -#include "Cluster/UpdateReplicatedLogAction.h" +#include "Cluster/MaintenanceActions/ResignShardLeadership.h" +#include "Cluster/MaintenanceActions/SynchronizeShard.h" +#include "Cluster/MaintenanceActions/TakeoverShardLeadership.h" +#include "Cluster/MaintenanceActions/UpdateCollection.h" +#include "Cluster/MaintenanceActions/UpdateReplicatedLogAction.h" using namespace arangodb; using namespace arangodb::maintenance; diff --git a/arangod/Cluster/Action.h b/arangod/Cluster/MaintenanceActions/Action.h similarity index 100% rename from arangod/Cluster/Action.h rename to arangod/Cluster/MaintenanceActions/Action.h diff --git a/arangod/Cluster/ActionBase.cpp b/arangod/Cluster/MaintenanceActions/ActionBase.cpp similarity index 99% rename from arangod/Cluster/ActionBase.cpp rename to arangod/Cluster/MaintenanceActions/ActionBase.cpp index ce8018d1e995..11ce40b8b8eb 100644 --- a/arangod/Cluster/ActionBase.cpp +++ b/arangod/Cluster/MaintenanceActions/ActionBase.cpp @@ -22,7 +22,7 @@ /// @author Matthew Von-Maszewski //////////////////////////////////////////////////////////////////////////////// -#include "Cluster/ActionBase.h" +#include "Cluster/MaintenanceActions/ActionBase.h" #include "ApplicationFeatures/ApplicationServer.h" #include "Basics/TimeString.h" diff --git a/arangod/Cluster/ActionBase.h b/arangod/Cluster/MaintenanceActions/ActionBase.h similarity index 100% rename from arangod/Cluster/ActionBase.h rename to arangod/Cluster/MaintenanceActions/ActionBase.h diff --git a/arangod/Cluster/ActionDescription.cpp b/arangod/Cluster/MaintenanceActions/ActionDescription.cpp similarity index 100% rename from arangod/Cluster/ActionDescription.cpp rename to arangod/Cluster/MaintenanceActions/ActionDescription.cpp diff --git a/arangod/Cluster/ActionDescription.h b/arangod/Cluster/MaintenanceActions/ActionDescription.h similarity index 100% rename from arangod/Cluster/ActionDescription.h rename to arangod/Cluster/MaintenanceActions/ActionDescription.h diff --git a/arangod/Cluster/CreateCollection.cpp b/arangod/Cluster/MaintenanceActions/CreateCollection.cpp similarity index 100% rename from arangod/Cluster/CreateCollection.cpp rename to arangod/Cluster/MaintenanceActions/CreateCollection.cpp diff --git a/arangod/Cluster/CreateCollection.h b/arangod/Cluster/MaintenanceActions/CreateCollection.h similarity index 100% rename from arangod/Cluster/CreateCollection.h rename to arangod/Cluster/MaintenanceActions/CreateCollection.h diff --git a/arangod/Cluster/CreateDatabase.cpp b/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp similarity index 100% rename from arangod/Cluster/CreateDatabase.cpp rename to arangod/Cluster/MaintenanceActions/CreateDatabase.cpp diff --git a/arangod/Cluster/CreateDatabase.h b/arangod/Cluster/MaintenanceActions/CreateDatabase.h similarity index 100% rename from arangod/Cluster/CreateDatabase.h rename to arangod/Cluster/MaintenanceActions/CreateDatabase.h diff --git a/arangod/Cluster/DropCollection.cpp b/arangod/Cluster/MaintenanceActions/DropCollection.cpp similarity index 100% rename from arangod/Cluster/DropCollection.cpp rename to arangod/Cluster/MaintenanceActions/DropCollection.cpp diff --git a/arangod/Cluster/DropCollection.h b/arangod/Cluster/MaintenanceActions/DropCollection.h similarity index 100% rename from arangod/Cluster/DropCollection.h rename to arangod/Cluster/MaintenanceActions/DropCollection.h diff --git a/arangod/Cluster/DropDatabase.cpp b/arangod/Cluster/MaintenanceActions/DropDatabase.cpp similarity index 100% rename from arangod/Cluster/DropDatabase.cpp rename to arangod/Cluster/MaintenanceActions/DropDatabase.cpp diff --git a/arangod/Cluster/DropDatabase.h b/arangod/Cluster/MaintenanceActions/DropDatabase.h similarity index 100% rename from arangod/Cluster/DropDatabase.h rename to arangod/Cluster/MaintenanceActions/DropDatabase.h diff --git a/arangod/Cluster/DropIndex.cpp b/arangod/Cluster/MaintenanceActions/DropIndex.cpp similarity index 100% rename from arangod/Cluster/DropIndex.cpp rename to arangod/Cluster/MaintenanceActions/DropIndex.cpp diff --git a/arangod/Cluster/DropIndex.h b/arangod/Cluster/MaintenanceActions/DropIndex.h similarity index 100% rename from arangod/Cluster/DropIndex.h rename to arangod/Cluster/MaintenanceActions/DropIndex.h diff --git a/arangod/Cluster/EnsureIndex.cpp b/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp similarity index 100% rename from arangod/Cluster/EnsureIndex.cpp rename to arangod/Cluster/MaintenanceActions/EnsureIndex.cpp diff --git a/arangod/Cluster/EnsureIndex.h b/arangod/Cluster/MaintenanceActions/EnsureIndex.h similarity index 100% rename from arangod/Cluster/EnsureIndex.h rename to arangod/Cluster/MaintenanceActions/EnsureIndex.h diff --git a/arangod/Cluster/ResignShardLeadership.cpp b/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp similarity index 100% rename from arangod/Cluster/ResignShardLeadership.cpp rename to arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp diff --git a/arangod/Cluster/ResignShardLeadership.h b/arangod/Cluster/MaintenanceActions/ResignShardLeadership.h similarity index 100% rename from arangod/Cluster/ResignShardLeadership.h rename to arangod/Cluster/MaintenanceActions/ResignShardLeadership.h diff --git a/arangod/Cluster/SynchronizeShard.cpp b/arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp similarity index 99% rename from arangod/Cluster/SynchronizeShard.cpp rename to arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp index 51641892c66c..b3e99cde21cd 100644 --- a/arangod/Cluster/SynchronizeShard.cpp +++ b/arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp @@ -33,7 +33,7 @@ #include "Basics/TimeString.h" #include "Basics/VelocyPackHelper.h" #include "Basics/debugging.h" -#include "Cluster/ActionDescription.h" +#include "Cluster/MaintenanceActions/ActionDescription.h" #include "Cluster/AgencyCache.h" #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterInfo.h" @@ -41,7 +41,7 @@ #include "Cluster/FollowerInfo.h" #include "Cluster/Maintenance.h" #include "Cluster/MaintenanceFeature.h" -#include "Cluster/ResignShardLeadership.h" +#include "Cluster/MaintenanceActions/ResignShardLeadership.h" #include "Cluster/ReplicationTimeoutFeature.h" #include "Cluster/ServerState.h" #include "GeneralServer/AuthenticationFeature.h" diff --git a/arangod/Cluster/SynchronizeShard.h b/arangod/Cluster/MaintenanceActions/SynchronizeShard.h similarity index 97% rename from arangod/Cluster/SynchronizeShard.h rename to arangod/Cluster/MaintenanceActions/SynchronizeShard.h index b430981427c6..8cd2ba0ba267 100644 --- a/arangod/Cluster/SynchronizeShard.h +++ b/arangod/Cluster/MaintenanceActions/SynchronizeShard.h @@ -25,8 +25,8 @@ #pragma once #include "Basics/ResultT.h" -#include "Cluster/ActionBase.h" -#include "Cluster/ActionDescription.h" +#include "Cluster/MaintenanceActions/ActionBase.h" +#include "Cluster/MaintenanceActions/ActionDescription.h" #include "Replication/utilities.h" #include "VocBase/voc-types.h" diff --git a/arangod/Cluster/TakeoverShardLeadership.cpp b/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp similarity index 100% rename from arangod/Cluster/TakeoverShardLeadership.cpp rename to arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp diff --git a/arangod/Cluster/TakeoverShardLeadership.h b/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.h similarity index 100% rename from arangod/Cluster/TakeoverShardLeadership.h rename to arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.h diff --git a/arangod/Cluster/UpdateCollection.cpp b/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp similarity index 100% rename from arangod/Cluster/UpdateCollection.cpp rename to arangod/Cluster/MaintenanceActions/UpdateCollection.cpp diff --git a/arangod/Cluster/UpdateCollection.h b/arangod/Cluster/MaintenanceActions/UpdateCollection.h similarity index 100% rename from arangod/Cluster/UpdateCollection.h rename to arangod/Cluster/MaintenanceActions/UpdateCollection.h diff --git a/arangod/Cluster/UpdateReplicatedLogAction.cpp b/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp similarity index 100% rename from arangod/Cluster/UpdateReplicatedLogAction.cpp rename to arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp diff --git a/arangod/Cluster/UpdateReplicatedLogAction.h b/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.h similarity index 95% rename from arangod/Cluster/UpdateReplicatedLogAction.h rename to arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.h index 035504c83054..1f9346a65d9e 100644 --- a/arangod/Cluster/UpdateReplicatedLogAction.h +++ b/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.h @@ -23,7 +23,7 @@ #pragma once -#include "Cluster/ActionBase.h" +#include "Cluster/MaintenanceActions/ActionBase.h" namespace arangodb::maintenance { diff --git a/arangod/Cluster/MaintenanceFeature.cpp b/arangod/Cluster/MaintenanceFeature.cpp index 0597462db89a..c43759f2d905 100644 --- a/arangod/Cluster/MaintenanceFeature.cpp +++ b/arangod/Cluster/MaintenanceFeature.cpp @@ -42,12 +42,12 @@ #include "Basics/TimeString.h" #include "Basics/WriteLocker.h" #include "Basics/system-functions.h" -#include "Cluster/Action.h" -#include "Cluster/ActionDescription.h" +#include "Cluster/MaintenanceActions/Action.h" +#include "Cluster/MaintenanceActions/ActionDescription.h" #include "Cluster/AgencyCache.h" #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterInfo.h" -#include "Cluster/CreateDatabase.h" +#include "Cluster/MaintenanceActions/CreateDatabase.h" #include "Cluster/MaintenanceWorker.h" #include "Cluster/ServerState.h" #include "Logger/LogMacros.h" diff --git a/arangod/Cluster/MaintenanceFeature.h b/arangod/Cluster/MaintenanceFeature.h index a8ba7ce0e23e..51a4a214bed8 100644 --- a/arangod/Cluster/MaintenanceFeature.h +++ b/arangod/Cluster/MaintenanceFeature.h @@ -26,7 +26,7 @@ #include "Basics/ConditionVariable.h" #include "Basics/Result.h" -#include "Cluster/Action.h" +#include "Cluster/MaintenanceActions/Action.h" #include "Cluster/MaintenanceWorker.h" #include "Cluster/Utils/ShardID.h" #include "ProgramOptions/ProgramOptions.h" diff --git a/arangod/Cluster/MaintenanceRestHandler.h b/arangod/Cluster/MaintenanceRestHandler.h index 5d13893f068c..b622c93e5edf 100644 --- a/arangod/Cluster/MaintenanceRestHandler.h +++ b/arangod/Cluster/MaintenanceRestHandler.h @@ -24,7 +24,7 @@ #pragma once -#include "Cluster/Action.h" +#include "Cluster/MaintenanceActions/Action.h" #include "RestHandler/RestBaseHandler.h" namespace arangodb { diff --git a/arangod/Cluster/MaintenanceWorker.h b/arangod/Cluster/MaintenanceWorker.h index 1a6bb3e197c1..ffb84a2f831d 100644 --- a/arangod/Cluster/MaintenanceWorker.h +++ b/arangod/Cluster/MaintenanceWorker.h @@ -25,7 +25,7 @@ #pragma once #include "Basics/Thread.h" -#include "Cluster/Action.h" +#include "Cluster/MaintenanceActions/Action.h" namespace arangodb { diff --git a/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp b/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp index 0ef784a30f93..a22ff21e52a5 100644 --- a/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp +++ b/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp @@ -23,9 +23,9 @@ #include "Replication2/StateMachines/Document/MaintenanceActionExecutor.h" -#include "Cluster/ActionDescription.h" -#include "Cluster/CreateCollection.h" -#include "Cluster/EnsureIndex.h" +#include "Cluster/MaintenanceActions/ActionDescription.h" +#include "Cluster/MaintenanceActions/CreateCollection.h" +#include "Cluster/MaintenanceActions/EnsureIndex.h" #include "Cluster/Maintenance.h" #include "Logger/LogMacros.h" #include "VocBase/Methods/Collections.h" diff --git a/arangod/RestHandler/RestCollectionHandler.cpp b/arangod/RestHandler/RestCollectionHandler.cpp index 704ab3c89bb3..26efab253491 100644 --- a/arangod/RestHandler/RestCollectionHandler.cpp +++ b/arangod/RestHandler/RestCollectionHandler.cpp @@ -25,7 +25,7 @@ #include "Async/async.h" #include "ApplicationFeatures/ApplicationServer.h" -#include "Cluster/ActionDescription.h" +#include "Cluster/MaintenanceActions/ActionDescription.h" #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterInfo.h" #include "Cluster/ClusterMethods.h" diff --git a/arangod/RestHandler/RestReplicationHandler.cpp b/arangod/RestHandler/RestReplicationHandler.cpp index 95d6777bec81..67da2043d7b8 100644 --- a/arangod/RestHandler/RestReplicationHandler.cpp +++ b/arangod/RestHandler/RestReplicationHandler.cpp @@ -42,7 +42,7 @@ #include "Cluster/CollectionInfoCurrent.h" #include "Cluster/FollowerInfo.h" #include "Cluster/RebootTracker.h" -#include "Cluster/ResignShardLeadership.h" +#include "Cluster/MaintenanceActions/ResignShardLeadership.h" #include "Cluster/ServerState.h" #include "Containers/HashSet.h" #include "Containers/MerkleTree.h" diff --git a/arangod/arangoserver.cmake b/arangod/arangoserver.cmake index 56254ef10bc7..a6256ddd037f 100644 --- a/arangod/arangoserver.cmake +++ b/arangod/arangoserver.cmake @@ -6,9 +6,9 @@ add_library(arangoserver STATIC Auth/TokenCache.cpp Auth/User.cpp Auth/UserManager.cpp - Cluster/Action.cpp - Cluster/ActionBase.cpp - Cluster/ActionDescription.cpp + Cluster/MaintenanceActions/Action.cpp + Cluster/MaintenanceActions/ActionBase.cpp + Cluster/MaintenanceActions/ActionDescription.cpp Cluster/AgencyCache.cpp Cluster/AgencyCallback.cpp Cluster/AgencyCallbackRegistry.cpp @@ -21,13 +21,13 @@ add_library(arangoserver STATIC Cluster/ClusterTypes.cpp Cluster/ClusterUpgradeFeature.cpp Cluster/CollectionInfoCurrent.cpp - Cluster/CreateCollection.cpp - Cluster/CreateDatabase.cpp + Cluster/MaintenanceActions/CreateCollection.cpp + Cluster/MaintenanceActions/CreateDatabase.cpp Cluster/DBServerAgencySync.cpp - Cluster/DropCollection.cpp - Cluster/DropDatabase.cpp - Cluster/DropIndex.cpp - Cluster/EnsureIndex.cpp + Cluster/MaintenanceActions/DropCollection.cpp + Cluster/MaintenanceActions/DropDatabase.cpp + Cluster/MaintenanceActions/DropIndex.cpp + Cluster/MaintenanceActions/EnsureIndex.cpp Cluster/FollowerInfo.cpp Cluster/HeartbeatThread.cpp Cluster/Maintenance.cpp @@ -36,15 +36,15 @@ add_library(arangoserver STATIC Cluster/MaintenanceWorker.cpp Cluster/RebootTracker.cpp Cluster/ReplicationTimeoutFeature.cpp - Cluster/ResignShardLeadership.cpp + Cluster/MaintenanceActions/ResignShardLeadership.cpp Cluster/RestAgencyCallbacksHandler.cpp Cluster/RestClusterHandler.cpp Cluster/ServerDefaults.cpp Cluster/ServerState.cpp - Cluster/SynchronizeShard.cpp - Cluster/TakeoverShardLeadership.cpp - Cluster/UpdateCollection.cpp - Cluster/UpdateReplicatedLogAction.cpp + Cluster/MaintenanceActions/SynchronizeShard.cpp + Cluster/MaintenanceActions/TakeoverShardLeadership.cpp + Cluster/MaintenanceActions/UpdateCollection.cpp + Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp FeaturePhases/AgencyFeaturePhase.cpp FeaturePhases/AqlFeaturePhase.cpp FeaturePhases/BasicFeaturePhaseServer.cpp diff --git a/tests/Maintenance/MaintenanceFeatureTest.cpp b/tests/Maintenance/MaintenanceFeatureTest.cpp index 1b258c9fa3d6..95241ee1e700 100644 --- a/tests/Maintenance/MaintenanceFeatureTest.cpp +++ b/tests/Maintenance/MaintenanceFeatureTest.cpp @@ -31,7 +31,7 @@ #include "ApplicationFeatures/GreetingsFeaturePhase.h" #include "Basics/Result.h" #include "Basics/ScopeGuard.h" -#include "Cluster/Action.h" +#include "Cluster/MaintenanceActions/Action.h" #include "Cluster/ClusterFeature.h" #include "Cluster/Maintenance.h" #include "Cluster/MaintenanceFeature.h" diff --git a/tests/Maintenance/MaintenanceTest.cpp b/tests/Maintenance/MaintenanceTest.cpp index 796f1271e1a6..48c62e0c9dda 100644 --- a/tests/Maintenance/MaintenanceTest.cpp +++ b/tests/Maintenance/MaintenanceTest.cpp @@ -35,7 +35,7 @@ #include "Basics/StaticStrings.h" #include "Cluster/Maintenance.h" #include "Cluster/MaintenanceFeature.h" -#include "Cluster/ResignShardLeadership.h" +#include "Cluster/MaintenanceActions/ResignShardLeadership.h" #include "Metrics/MetricsFeature.h" #include "Mocks/Servers.h" #include "Mocks/StorageEngineMock.h" diff --git a/tests/Mocks/Servers.cpp b/tests/Mocks/Servers.cpp index 1537ddf02223..2aec90020011 100644 --- a/tests/Mocks/Servers.cpp +++ b/tests/Mocks/Servers.cpp @@ -40,13 +40,13 @@ #include "Basics/StringUtils.h" #include "Basics/TimeString.h" #include "Basics/files.h" -#include "Cluster/ActionDescription.h" +#include "Cluster/MaintenanceActions/ActionDescription.h" #include "Cluster/AgencyCache.h" #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterInfo.h" -#include "Cluster/CreateCollection.h" -#include "Cluster/CreateDatabase.h" -#include "Cluster/DropDatabase.h" +#include "Cluster/MaintenanceActions/CreateCollection.h" +#include "Cluster/MaintenanceActions/CreateDatabase.h" +#include "Cluster/MaintenanceActions/DropDatabase.h" #include "Cluster/Maintenance.h" #include "ClusterEngine/ClusterEngine.h" #include "FeaturePhases/AqlFeaturePhase.h" From 7cb0c93bed4f3644541e639ff272d80b4162594f Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 12 May 2025 10:34:08 +0200 Subject: [PATCH 30/36] Instrumented Maintenance Actions --- arangod/Cluster/MaintenanceActions/CreateCollection.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/CreateDatabase.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/DropCollection.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/DropDatabase.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/DropIndex.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/EnsureIndex.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp | 1 + .../Cluster/MaintenanceActions/TakeoverShardLeadership.cpp | 4 ++++ arangod/Cluster/MaintenanceActions/UpdateCollection.cpp | 4 ++++ .../Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp | 4 ++++ .../StateMachines/Document/MaintenanceActionExecutor.cpp | 2 +- 12 files changed, 42 insertions(+), 1 deletion(-) diff --git a/arangod/Cluster/MaintenanceActions/CreateCollection.cpp b/arangod/Cluster/MaintenanceActions/CreateCollection.cpp index 0fd7fcde263a..0d686ba93155 100644 --- a/arangod/Cluster/MaintenanceActions/CreateCollection.cpp +++ b/arangod/Cluster/MaintenanceActions/CreateCollection.cpp @@ -44,6 +44,7 @@ #include "Replication2/ReplicatedState/ReplicatedState.h" #include "Replication2/StateMachines/Document/DocumentFollowerState.h" #include "Replication2/StateMachines/Document/DocumentLeaderState.h" +#include "TaskMonitoring/task.h" #include #include @@ -112,6 +113,9 @@ bool CreateCollection::first() { auto const& leader = _description.get(THE_LEADER); auto const& props = properties(); + // Add task monitoring + auto task = task_monitoring::Task{"CreateCollection for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + std::string from; _description.get("from", from); diff --git a/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp b/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp index 8a944aef491c..7ba54404d325 100644 --- a/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp +++ b/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp @@ -37,6 +37,7 @@ #include "Utils/DatabaseGuard.h" #include "Utils/OperationOptions.h" #include "VocBase/Methods/Databases.h" +#include "TaskMonitoring/task.h" using namespace arangodb; using namespace arangodb::application_features; @@ -69,6 +70,9 @@ bool CreateDatabase::first() { VPackSlice users; auto database = _description.get(DATABASE); + // Add task monitoring + auto task = task_monitoring::Task{"CreateDatabase for DB: '" + database + "'"}; + LOG_TOPIC("953b1", DEBUG, Logger::MAINTENANCE) << "CreateDatabase: creating database " << database; diff --git a/arangod/Cluster/MaintenanceActions/DropCollection.cpp b/arangod/Cluster/MaintenanceActions/DropCollection.cpp index aa71020576b4..9578ab093716 100644 --- a/arangod/Cluster/MaintenanceActions/DropCollection.cpp +++ b/arangod/Cluster/MaintenanceActions/DropCollection.cpp @@ -40,6 +40,7 @@ #include "VocBase/Methods/Collections.h" #include "VocBase/Methods/Databases.h" #include "VocBase/vocbase.h" +#include "TaskMonitoring/task.h" using namespace arangodb; using namespace arangodb::application_features; @@ -69,6 +70,9 @@ bool DropCollection::first() { auto const& database = getDatabase(); auto const& shard = getShard(); + // Add task monitoring + auto task = task_monitoring::Task{"DropCollection for DB: '" + database + "', Shard: '" + shard + "'"}; + LOG_TOPIC("a2961", DEBUG, Logger::MAINTENANCE) << "DropCollection: dropping local shard '" << database << "/" << shard; diff --git a/arangod/Cluster/MaintenanceActions/DropDatabase.cpp b/arangod/Cluster/MaintenanceActions/DropDatabase.cpp index ce2ff1ba7131..ee69075fef4a 100644 --- a/arangod/Cluster/MaintenanceActions/DropDatabase.cpp +++ b/arangod/Cluster/MaintenanceActions/DropDatabase.cpp @@ -36,6 +36,7 @@ #include "Utils/ExecContext.h" #include "Utils/OperationOptions.h" #include "VocBase/Methods/Databases.h" +#include "TaskMonitoring/task.h" using namespace arangodb::application_features; using namespace arangodb::methods; @@ -69,6 +70,9 @@ bool DropDatabase::first() { LOG_TOPIC("22779", DEBUG, Logger::MAINTENANCE) << "DropDatabase: dropping " << database; + // Add task monitoring + auto task = task_monitoring::Task{"DropDatabase for DB: '" + database + "'"}; + try { auto& df = _feature.server().getFeature(); DatabaseGuard guard(df, StaticStrings::SystemDatabase); diff --git a/arangod/Cluster/MaintenanceActions/DropIndex.cpp b/arangod/Cluster/MaintenanceActions/DropIndex.cpp index a70ac036ca95..05313fcc38e8 100644 --- a/arangod/Cluster/MaintenanceActions/DropIndex.cpp +++ b/arangod/Cluster/MaintenanceActions/DropIndex.cpp @@ -39,6 +39,7 @@ #include "VocBase/Methods/Collections.h" #include "VocBase/Methods/Databases.h" #include "VocBase/Methods/Indexes.h" +#include "TaskMonitoring/task.h" using namespace arangodb::application_features; using namespace arangodb::maintenance; @@ -90,6 +91,9 @@ bool DropIndex::first() { auto const& shard = _description.get(SHARD); auto const& id = _description.get(INDEX); + // Add task monitoring + auto task = task_monitoring::Task{"DropIndex for DB: '" + database + "', Shard: '" + shard + "', Index: '" + id + "'"}; + VPackBuilder index; index.add(VPackValue(_description.get(INDEX))); diff --git a/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp b/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp index 43007082df6d..226a014cfe37 100644 --- a/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp +++ b/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp @@ -40,6 +40,7 @@ #include "Utils/DatabaseGuard.h" #include "VocBase/LogicalCollection.h" #include "VocBase/Methods/Databases.h" +#include "TaskMonitoring/task.h" using namespace arangodb; using namespace arangodb::application_features; @@ -114,6 +115,9 @@ bool EnsureIndex::first() { auto const& shard = _description.get(SHARD); auto const& id = properties().get(ID).copyString(); + // Add task monitoring + auto task = task_monitoring::Task{"EnsureIndex for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + try { // now try to guard the database auto& df = _feature.server().getFeature(); DatabaseGuard guard(df, database); diff --git a/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp b/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp index c390f071167c..78ed7217f166 100644 --- a/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp +++ b/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp @@ -41,6 +41,7 @@ #include "VocBase/LogicalCollection.h" #include "VocBase/Methods/Collections.h" #include "VocBase/Methods/Databases.h" +#include "TaskMonitoring/task.h" #include #include @@ -80,6 +81,9 @@ bool ResignShardLeadership::first() { std::string const& database = getDatabase(); std::string const& collection = getShard(); + // Add task monitoring + auto task = task_monitoring::Task{"ResignShardLeadership for DB: '" + database + "', Shard: '" + collection + "'"}; + LOG_TOPIC("14f43", DEBUG, Logger::MAINTENANCE) << "trying to withdraw as leader of shard '" << database << "/" << collection; diff --git a/arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp b/arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp index b3e99cde21cd..0ba01f8a55da 100644 --- a/arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp +++ b/arangod/Cluster/MaintenanceActions/SynchronizeShard.cpp @@ -66,6 +66,7 @@ #include "VocBase/LogicalCollection.h" #include "VocBase/Methods/Collections.h" #include "VocBase/Methods/Databases.h" +#include "TaskMonitoring/task.h" #include #include diff --git a/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp b/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp index 183a117ec6c0..35307d64a428 100644 --- a/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp +++ b/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp @@ -51,6 +51,7 @@ #include "VocBase/LogicalCollection.h" #include "VocBase/Methods/Collections.h" #include "VocBase/Methods/Databases.h" +#include "TaskMonitoring/task.h" #include #include @@ -267,6 +268,9 @@ bool TakeoverShardLeadership::first() { uint64_t planIndex = basics::StringUtils::uint64(planRaftIndex); Result res; + // Add task monitoring + auto task = task_monitoring::Task{"TakeoverShardLeadership for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + try { auto& df = _feature.server().getFeature(); DatabaseGuard guard(df, database); diff --git a/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp b/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp index 9e22e43d8dbd..6600effbc0e8 100644 --- a/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp +++ b/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp @@ -43,6 +43,7 @@ #include "VocBase/LogicalCollection.h" #include "VocBase/Methods/Collections.h" #include "VocBase/Methods/Databases.h" +#include "TaskMonitoring/task.h" using namespace arangodb; using namespace arangodb::application_features; @@ -89,6 +90,9 @@ bool UpdateCollection::first() { auto const& props = properties(); Result res; + // Add task monitoring + auto task = task_monitoring::Task{"UpdateCollection for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + std::string from; _description.get("from", from); diff --git a/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp b/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp index 97cf44728e29..80d4ddf18c1d 100644 --- a/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp +++ b/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp @@ -41,6 +41,7 @@ #include "UpdateReplicatedLogAction.h" #include "Utils/DatabaseGuard.h" #include "VocBase/vocbase.h" +#include "TaskMonitoring/task.h" using namespace arangodb; using namespace arangodb::basics; @@ -63,6 +64,9 @@ bool arangodb::maintenance::UpdateReplicatedLogAction::first() { auto const& database = _description.get(DATABASE); auto& df = _feature.server().getFeature(); + // Add task monitoring + auto task = task_monitoring::Task{"UpdateReplicatedLogAction for DB: '" + database + "', LogId: '" + std::to_string(logId.id()) + "'"}; + auto result = basics::catchToResult([&] { DatabaseGuard guard(df, database); diff --git a/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp b/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp index a22ff21e52a5..314e7535738c 100644 --- a/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp +++ b/arangod/Replication2/StateMachines/Document/MaintenanceActionExecutor.cpp @@ -172,4 +172,4 @@ auto MaintenanceActionExecutor::addDirty() noexcept -> Result { } return res; } -} // namespace arangodb::replication2::replicated_state::document \ No newline at end of file +} // namespace arangodb::replication2::replicated_state::document From 5fd52a0093d53f4708e5a7c208e6fcc0a19829b1 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 12 May 2025 12:30:37 +0200 Subject: [PATCH 31/36] Added a Pretty Printer for the RestEndpoint output --- .../TaskMonitoring/PrettyPrinter/README.md | 32 ++++++++ .../PrettyPrinter/src/pretty_printer.py | 22 ++++++ .../src/taskmonitoring/__init__.py | 1 + .../src/taskmonitoring/tasktree.py | 63 +++++++++++++++ .../PrettyPrinter/src/tests/__init__.py | 1 + .../PrettyPrinter/src/tests/test_tasktree.py | 79 +++++++++++++++++++ 6 files changed, 198 insertions(+) create mode 100644 arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md create mode 100755 arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py create mode 100644 arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/__init__.py create mode 100644 arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py create mode 100644 arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/__init__.py create mode 100644 arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md new file mode 100644 index 000000000000..9db5dafd43a0 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md @@ -0,0 +1,32 @@ +# Pretty Printer for ArangoDB's Task Monitoring Output + +This python-package provides a pretty-printer for the hierarchical task monitoring JSON output produced by ArangoDB. + +The pretty-printer groups tasks by their top-level (hierarchy 0, no parent) and by their state. The output is grouped and ordered as follows: +1. Running tasks +2. Finished tasks +3. Deleted tasks + +Each group displays the task hierarchy as an ASCII tree for improved readability. + +## Usage + +To pretty-print a monitoring output JSON file: + +```sh +cat | ./src/pretty_printer.py +``` + +## Run tests + +Inside the src-folder run unittests via: + +```sh +python3 -m unittest discover +``` + +## Project Structure + +- `src/pretty_printer.py`: Main script for pretty-printing the monitoring output. +- `src/taskmonitoring/`: Python package with core logic for parsing and formatting the task monitoring data. +- `src/tests/`: Unit tests for the pretty printer. \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py new file mode 100755 index 000000000000..76da0bc9c054 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +"""Task Monitoring Pretty Printer + +This script pretty-prints the hierarchical task monitoring JSON output from ArangoDB. +Groups by top-level task and state, and displays as ASCII trees. + +Usage: cat | ./pretty_printer.py +""" + +import sys +import json +from taskmonitoring.tasktree import TaskTree + +def main(): + string = sys.stdin.read() + data = json.loads(string)["task_stacktraces"] + tree = TaskTree.from_json(data) + tree.pretty_print() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/__init__.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/__init__.py new file mode 100644 index 000000000000..0519ecba6ea9 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py new file mode 100644 index 000000000000..13fedd7b6c83 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py @@ -0,0 +1,63 @@ +import collections +from typing import List, Dict, Any, Optional + +class TaskNode: + def __init__(self, data: dict, hierarchy: int): + self.id = data["id"] + self.name = data["name"] + self.state = data["state"] + self.parent_id = data["parent"].get("id") if data["parent"] else None + self.thread = data["thread"] + self.source_location = data["source_location"] + self.hierarchy = hierarchy + self.children: List['TaskNode'] = [] + + def add_child(self, child: 'TaskNode'): + self.children.append(child) + + def __str__(self): + return f"{self.name} [{self.state}] (thread: {self.thread['name']}:{self.thread['LWPID']}) @ {self.source_location['function_name']} ({self.source_location['file_name']}:{self.source_location['line']})" + +class TaskTree: + def __init__(self, roots: List[TaskNode]): + self.roots = roots + + @staticmethod + def from_json(task_stacktraces: List[List[Dict[str, Any]]]) -> 'TaskTree': + # Flatten all tasks and build id->node mapping + nodes = {} + all_nodes = [] + for stack in task_stacktraces: + for entry in stack: + node = TaskNode(entry["data"], entry["hierarchy"]) + nodes[node.id] = node + all_nodes.append(node) + # Build hierarchy + roots = [] + for node in all_nodes: + if node.parent_id and node.parent_id in nodes: + nodes[node.parent_id].add_child(node) + else: + roots.append(node) + return TaskTree(roots) + + def pretty_print(self): + # Group by state: Running, Finished, Deleted (in this order) + state_order = ["Running", "Finished", "Deleted"] + grouped = collections.defaultdict(list) + for node in self.roots: + grouped[node.state].append(node) + for state in state_order: + if grouped[state]: + print(f"=== {state} Tasks ===") + for node in grouped[state]: + self._print_node(node) + print() + + def _print_node(self, node: TaskNode, prefix: str = "", is_last: bool = True): + connector = "└─ " if is_last else "├─ " + print(prefix + connector + str(node)) + if node.children: + for i, child in enumerate(node.children): + last = (i == len(node.children) - 1) + self._print_node(child, prefix + (" " if is_last else "│ "), last) \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/__init__.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/__init__.py new file mode 100644 index 000000000000..0519ecba6ea9 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py new file mode 100644 index 000000000000..25eb7cb448f0 --- /dev/null +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py @@ -0,0 +1,79 @@ +import unittest +import io +import sys +import json +from taskmonitoring.tasktree import TaskTree + +SAMPLE_JSON = { + "task_stacktraces": [ + [ + { + "hierarchy": 0, + "data": { + "id": "root1", + "name": "Top Task 1", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 1, + "data": { + "id": "child1", + "name": "Child Task 1", + "state": "Running", + "parent": {"id": "root1"}, + "thread": {"LWPID": 2, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 20, "function_name": "funcB"} + } + } + ], + [ + { + "hierarchy": 0, + "data": { + "id": "root2", + "name": "Top Task 2", + "state": "Deleted", + "parent": {}, + "thread": {"LWPID": 3, "name": "main"}, + "source_location": {"file_name": "file2.cpp", "line": 30, "function_name": "funcC"} + } + } + ] + ] +} + +class TestTaskTree(unittest.TestCase): + def test_hierarchy_and_grouping(self): + tree = TaskTree.from_json(SAMPLE_JSON["task_stacktraces"]) + self.assertEqual(len(tree.roots), 2) + running = [n for n in tree.roots if n.state == "Running"] + deleted = [n for n in tree.roots if n.state == "Deleted"] + self.assertEqual(len(running), 1) + self.assertEqual(len(deleted), 1) + self.assertEqual(running[0].name, "Top Task 1") + self.assertEqual(deleted[0].name, "Top Task 2") + self.assertEqual(len(running[0].children), 1) + self.assertEqual(running[0].children[0].name, "Child Task 1") + + def test_pretty_print_output(self): + tree = TaskTree.from_json(SAMPLE_JSON["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + self.assertIn("=== Running Tasks ===", output) + self.assertIn("Top Task 1 [Running]", output) + self.assertIn("Child Task 1 [Running]", output) + self.assertIn("=== Deleted Tasks ===", output) + self.assertIn("Top Task 2 [Deleted]", output) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From f5745a14e39e198b125189ffcc697d31d412e41e Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 12 May 2025 16:08:14 +0200 Subject: [PATCH 32/36] Next version on pretty printing --- .../src/taskmonitoring/tasktree.py | 49 ++++-- .../PrettyPrinter/src/tests/test_tasktree.py | 162 ++++++++++++++++++ 2 files changed, 201 insertions(+), 10 deletions(-) diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py index 13fedd7b6c83..cdd7eb91c7dd 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py @@ -1,5 +1,5 @@ import collections -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, Tuple class TaskNode: def __init__(self, data: dict, hierarchy: int): @@ -15,6 +15,18 @@ def __init__(self, data: dict, hierarchy: int): def add_child(self, child: 'TaskNode'): self.children.append(child) + def group_key(self) -> Tuple: + # Used for grouping: all fields except id, parent_id, and hierarchy + return ( + self.name, + self.state, + self.thread["name"], + self.thread["LWPID"], + self.source_location["file_name"], + self.source_location["line"], + self.source_location["function_name"] + ) + def __str__(self): return f"{self.name} [{self.state}] (thread: {self.thread['name']}:{self.thread['LWPID']}) @ {self.source_location['function_name']} ({self.source_location['file_name']}:{self.source_location['line']})" @@ -50,14 +62,31 @@ def pretty_print(self): for state in state_order: if grouped[state]: print(f"=== {state} Tasks ===") - for node in grouped[state]: - self._print_node(node) + self._print_grouped_nodes(grouped[state], top_level=True) print() - def _print_node(self, node: TaskNode, prefix: str = "", is_last: bool = True): - connector = "└─ " if is_last else "├─ " - print(prefix + connector + str(node)) - if node.children: - for i, child in enumerate(node.children): - last = (i == len(node.children) - 1) - self._print_node(child, prefix + (" " if is_last else "│ "), last) \ No newline at end of file + def _print_grouped_nodes(self, nodes: List[TaskNode], prefix: str = "", is_last: bool = True, top_level: bool = False): + # Group nodes by their group_key + group_map = collections.defaultdict(list) + for node in nodes: + group_map[node.group_key()].append(node) + group_items = list(group_map.items()) + for idx, (key, group) in enumerate(group_items): + node = group[0] + count = len(group) + if top_level: + # Print counter at the start, right-aligned in 3 chars, then ' x' (e.g., ' 5 x') + count_str = f"{count:3d} x" if count < 1000 else f"{count} x" + print(f"{count_str} {str(node)}") + next_top_level = False + else: + connector = "└─ " if (is_last and idx == len(group_items) - 1) else "├─ " + count_str = f" [x{count}]" if count > 1 else "" + print(prefix + connector + str(node) + count_str) + next_top_level = False + # Collect all children from all grouped nodes + all_children = [] + for n in group: + all_children.extend(n.children) + if all_children: + self._print_grouped_nodes(all_children, prefix + (" " if (is_last and idx == len(group_items) - 1) else "│ "), True, top_level=next_top_level) \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py index 25eb7cb448f0..1bc49044bdfc 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py @@ -46,6 +46,126 @@ ] } +SAMPLE_GROUP_JSON = { + "task_stacktraces": [ + [ + { + "hierarchy": 0, + "data": { + "id": "root1", + "name": "Top Task", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 0, + "data": { + "id": "root2", + "name": "Top Task", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 1, + "data": { + "id": "child1", + "name": "Child Task", + "state": "Running", + "parent": {"id": "root1"}, + "thread": {"LWPID": 2, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 20, "function_name": "funcB"} + } + }, + { + "hierarchy": 1, + "data": { + "id": "child2", + "name": "Child Task", + "state": "Running", + "parent": {"id": "root2"}, + "thread": {"LWPID": 2, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 20, "function_name": "funcB"} + } + } + ] + ] +} + +SAMPLE_GROUP_DIFF_STATE_JSON = { + "task_stacktraces": [ + [ + { + "hierarchy": 0, + "data": { + "id": "root1", + "name": "Top Task", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 0, + "data": { + "id": "root2", + "name": "Top Task", + "state": "Deleted", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + } + ] + ] +} + +SAMPLE_GROUP_NESTED_JSON = { + "task_stacktraces": [ + [ + { + "hierarchy": 0, + "data": { + "id": "root1", + "name": "Top Task", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 1, + "data": { + "id": "child1", + "name": "Child Task", + "state": "Running", + "parent": {"id": "root1"}, + "thread": {"LWPID": 2, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 20, "function_name": "funcB"} + } + }, + { + "hierarchy": 1, + "data": { + "id": "child2", + "name": "Child Task", + "state": "Running", + "parent": {"id": "root1"}, + "thread": {"LWPID": 2, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 20, "function_name": "funcB"} + } + } + ] + ] +} + class TestTaskTree(unittest.TestCase): def test_hierarchy_and_grouping(self): tree = TaskTree.from_json(SAMPLE_JSON["task_stacktraces"]) @@ -75,5 +195,47 @@ def test_pretty_print_output(self): self.assertIn("=== Deleted Tasks ===", output) self.assertIn("Top Task 2 [Deleted]", output) + def test_grouping_identical_tasks(self): + tree = TaskTree.from_json(SAMPLE_GROUP_JSON["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Top Task should be grouped (x2), Child Task should be grouped (x2) + self.assertIn("Top Task [Running] (thread: main:1) @ funcA (file.cpp:10) [x2]", output) + self.assertIn("Child Task [Running] (thread: worker:2) @ funcB (file.cpp:20) [x2]", output) + + def test_grouping_different_states(self): + tree = TaskTree.from_json(SAMPLE_GROUP_DIFF_STATE_JSON["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Should not group across states + self.assertIn("Top Task [Running] (thread: main:1) @ funcA (file.cpp:10)", output) + self.assertIn("Top Task [Deleted] (thread: main:1) @ funcA (file.cpp:10)", output) + self.assertNotIn("[x2]", output) + + def test_grouping_nested(self): + tree = TaskTree.from_json(SAMPLE_GROUP_NESTED_JSON["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Child Task should be grouped (x2) under Top Task + self.assertIn("Child Task [Running] (thread: worker:2) @ funcB (file.cpp:20) [x2]", output) + if __name__ == '__main__': unittest.main() \ No newline at end of file From 635317b95d38a9c6539c5ee6564b9909f60145a5 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 12 May 2025 17:15:22 +0200 Subject: [PATCH 33/36] Updated the layout a bit. Running tasks are not Grouped --- .../src/taskmonitoring/tasktree.py | 35 +- .../PrettyPrinter/src/tests/test_tasktree.py | 331 +++++++++++++++++- 2 files changed, 353 insertions(+), 13 deletions(-) diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py index cdd7eb91c7dd..678be8db964f 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py @@ -16,19 +16,24 @@ def add_child(self, child: 'TaskNode'): self.children.append(child) def group_key(self) -> Tuple: - # Used for grouping: all fields except id, parent_id, and hierarchy + # For 'Running' tasks, do not group (return unique key) + if self.state == "Running": + return (id(self),) + # For non-'Running', group by name, state, thread name (not ID), and source location return ( self.name, self.state, self.thread["name"], - self.thread["LWPID"], self.source_location["file_name"], self.source_location["line"], self.source_location["function_name"] ) def __str__(self): - return f"{self.name} [{self.state}] (thread: {self.thread['name']}:{self.thread['LWPID']}) @ {self.source_location['function_name']} ({self.source_location['file_name']}:{self.source_location['line']})" + if self.state == "Running": + return f"{self.name} [{self.state}] (thread: {self.thread['name']}:{self.thread['LWPID']}) @ {self.source_location['function_name']} ({self.source_location['file_name']}:{self.source_location['line']})" + else: + return f"{self.name} [{self.state}] (thread: {self.thread['name']}) @ {self.source_location['function_name']} ({self.source_location['file_name']}:{self.source_location['line']})" class TaskTree: def __init__(self, roots: List[TaskNode]): @@ -62,10 +67,29 @@ def pretty_print(self): for state in state_order: if grouped[state]: print(f"=== {state} Tasks ===") - self._print_grouped_nodes(grouped[state], top_level=True) + if state == "Running": + for node in grouped[state]: + self._print_grouped_nodes([node], top_level=True, force_no_group=True) + else: + self._print_grouped_nodes(grouped[state], top_level=True) print() - def _print_grouped_nodes(self, nodes: List[TaskNode], prefix: str = "", is_last: bool = True, top_level: bool = False): + def _print_grouped_nodes(self, nodes: List[TaskNode], prefix: str = "", is_last: bool = True, top_level: bool = False, force_no_group: bool = False): + if force_no_group: + # Print all nodes individually, no grouping + for idx, node in enumerate(nodes): + count = 1 + if top_level: + count_str = f"{count:3d} x" + print(f"{count_str} {str(node)}") + next_top_level = False + else: + connector = "└─ " if (is_last and idx == len(nodes) - 1) else "├─ " + print(prefix + connector + str(node)) + next_top_level = False + if node.children: + self._print_grouped_nodes(node.children, prefix + (" " if (is_last and idx == len(nodes) - 1) else "│ "), True, top_level=next_top_level, force_no_group=force_no_group) + return # Group nodes by their group_key group_map = collections.defaultdict(list) for node in nodes: @@ -75,7 +99,6 @@ def _print_grouped_nodes(self, nodes: List[TaskNode], prefix: str = "", is_last: node = group[0] count = len(group) if top_level: - # Print counter at the start, right-aligned in 3 chars, then ' x' (e.g., ' 5 x') count_str = f"{count:3d} x" if count < 1000 else f"{count} x" print(f"{count_str} {str(node)}") next_top_level = False diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py index 1bc49044bdfc..91c7366eeaa8 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py @@ -166,6 +166,224 @@ ] } +SAMPLE_REUSED_ID_SIBLINGS = { + "task_stacktraces": [ + [ + { # Parent + "hierarchy": 0, + "data": { + "id": "parent", + "name": "Parent", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 1, "function_name": "parentFunc"} + } + }, + { # Child1 (id reused) + "hierarchy": 1, + "data": { + "id": "child", + "name": "Child", + "state": "Running", + "parent": {"id": "parent"}, + "thread": {"LWPID": 2, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 2, "function_name": "childFunc"} + } + }, + { # Child2 (id reused) + "hierarchy": 1, + "data": { + "id": "child", + "name": "Child", + "state": "Running", + "parent": {"id": "parent"}, + "thread": {"LWPID": 3, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 3, "function_name": "childFunc2"} + } + } + ] + ] +} + +SAMPLE_REUSED_ID_COUSINS = { + "task_stacktraces": [ + [ + { # Grandparent + "hierarchy": 0, + "data": { + "id": "grandparent", + "name": "Grandparent", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 1, "function_name": "grandparentFunc"} + } + }, + { # Parent1 + "hierarchy": 1, + "data": { + "id": "parent1", + "name": "Parent1", + "state": "Running", + "parent": {"id": "grandparent"}, + "thread": {"LWPID": 2, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 2, "function_name": "parentFunc1"} + } + }, + { # Parent2 + "hierarchy": 1, + "data": { + "id": "parent2", + "name": "Parent2", + "state": "Running", + "parent": {"id": "grandparent"}, + "thread": {"LWPID": 3, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 3, "function_name": "parentFunc2"} + } + }, + { # Cousin1 (id reused) + "hierarchy": 2, + "data": { + "id": "cousin", + "name": "Cousin", + "state": "Running", + "parent": {"id": "parent1"}, + "thread": {"LWPID": 4, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 4, "function_name": "cousinFunc1"} + } + }, + { # Cousin2 (id reused) + "hierarchy": 2, + "data": { + "id": "cousin", + "name": "Cousin", + "state": "Running", + "parent": {"id": "parent2"}, + "thread": {"LWPID": 5, "name": "worker"}, + "source_location": {"file_name": "file.cpp", "line": 5, "function_name": "cousinFunc2"} + } + } + ] + ] +} + +SAMPLE_REUSED_ID_SEPARATE_TREES = { + "task_stacktraces": [ + [ + { # Root1 + "hierarchy": 0, + "data": { + "id": "root", + "name": "Root1", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 1, "function_name": "rootFunc1"} + } + }, + { # Root2 (id reused) + "hierarchy": 0, + "data": { + "id": "root", + "name": "Root2", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 2, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 2, "function_name": "rootFunc2"} + } + } + ] + ] +} + +SAMPLE_RUNNING_NOT_GROUPED = { + "task_stacktraces": [ + [ + { + "hierarchy": 0, + "data": { + "id": "root1", + "name": "Top Task", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 0, + "data": { + "id": "root2", + "name": "Top Task", + "state": "Running", + "parent": {}, + "thread": {"LWPID": 2, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + } + ] + ] +} + +SAMPLE_NON_RUNNING_GROUP_THREAD_NAME = { + "task_stacktraces": [ + [ + { + "hierarchy": 0, + "data": { + "id": "root1", + "name": "Top Task", + "state": "Deleted", + "parent": {}, + "thread": {"LWPID": 1, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 0, + "data": { + "id": "root2", + "name": "Top Task", + "state": "Deleted", + "parent": {}, + "thread": {"LWPID": 2, "name": "main"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + } + ] + ] +} + +SAMPLE_NON_RUNNING_NOT_GROUP_THREAD_NAME = { + "task_stacktraces": [ + [ + { + "hierarchy": 0, + "data": { + "id": "root1", + "name": "Top Task", + "state": "Deleted", + "parent": {}, + "thread": {"LWPID": 1, "name": "main1"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + }, + { + "hierarchy": 0, + "data": { + "id": "root2", + "name": "Top Task", + "state": "Deleted", + "parent": {}, + "thread": {"LWPID": 2, "name": "main2"}, + "source_location": {"file_name": "file.cpp", "line": 10, "function_name": "funcA"} + } + } + ] + ] +} + class TestTaskTree(unittest.TestCase): def test_hierarchy_and_grouping(self): tree = TaskTree.from_json(SAMPLE_JSON["task_stacktraces"]) @@ -205,9 +423,10 @@ def test_grouping_identical_tasks(self): finally: sys.stdout = sys_stdout output = captured.getvalue() - # Top Task should be grouped (x2), Child Task should be grouped (x2) - self.assertIn("Top Task [Running] (thread: main:1) @ funcA (file.cpp:10) [x2]", output) - self.assertIn("Child Task [Running] (thread: worker:2) @ funcB (file.cpp:20) [x2]", output) + # Running tasks are not grouped, so expect two separate lines + self.assertEqual(output.count("Top Task [Running] (thread: main:1) @ funcA (file.cpp:10)"), 2) + self.assertNotIn("[x2]", output) + self.assertNotIn(" 2 x", output) def test_grouping_different_states(self): tree = TaskTree.from_json(SAMPLE_GROUP_DIFF_STATE_JSON["task_stacktraces"]) @@ -219,10 +438,12 @@ def test_grouping_different_states(self): finally: sys.stdout = sys_stdout output = captured.getvalue() - # Should not group across states + # Running task: thread ID is printed, not grouped self.assertIn("Top Task [Running] (thread: main:1) @ funcA (file.cpp:10)", output) - self.assertIn("Top Task [Deleted] (thread: main:1) @ funcA (file.cpp:10)", output) + # Deleted task: thread ID is NOT printed, not grouped + self.assertIn("Top Task [Deleted] (thread: main) @ funcA (file.cpp:10)", output) self.assertNotIn("[x2]", output) + self.assertNotIn(" 2 x", output) def test_grouping_nested(self): tree = TaskTree.from_json(SAMPLE_GROUP_NESTED_JSON["task_stacktraces"]) @@ -234,8 +455,104 @@ def test_grouping_nested(self): finally: sys.stdout = sys_stdout output = captured.getvalue() - # Child Task should be grouped (x2) under Top Task - self.assertIn("Child Task [Running] (thread: worker:2) @ funcB (file.cpp:20) [x2]", output) + # Running children are not grouped, so expect two lines + self.assertEqual(output.count("Child Task [Running] (thread: worker:2) @ funcB (file.cpp:20)"), 2) + self.assertNotIn("[x2]", output) + self.assertNotIn(" 2 x", output) + + def test_reused_id_siblings(self): + tree = TaskTree.from_json(SAMPLE_REUSED_ID_SIBLINGS["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Both children should appear under the parent, even though they have the same id + self.assertIn("Parent [Running]", output) + self.assertIn("Child [Running] (thread: worker:2)", output) + self.assertIn("Child [Running] (thread: worker:3)", output) + + def test_reused_id_cousins(self): + tree = TaskTree.from_json(SAMPLE_REUSED_ID_COUSINS["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Both cousins should appear under their respective parents, even though they have the same id + self.assertIn("Parent1 [Running]", output) + self.assertIn("Parent2 [Running]", output) + self.assertIn("Cousin [Running] (thread: worker:4)", output) + self.assertIn("Cousin [Running] (thread: worker:5)", output) + + def test_reused_id_separate_trees(self): + tree = TaskTree.from_json(SAMPLE_REUSED_ID_SEPARATE_TREES["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Both roots should appear, even though they have the same id + self.assertIn("Root1 [Running] (thread: main:1)", output) + self.assertIn("Root2 [Running] (thread: main:2)", output) + + def test_running_not_grouped(self): + tree = TaskTree.from_json(SAMPLE_RUNNING_NOT_GROUPED["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Both should appear, not grouped, and thread ID is printed + self.assertIn("Top Task [Running] (thread: main:1)", output) + self.assertIn("Top Task [Running] (thread: main:2)", output) + self.assertNotIn("[x2]", output) + self.assertNotIn(" 2 x", output) + + def test_non_running_group_by_thread_name(self): + tree = TaskTree.from_json(SAMPLE_NON_RUNNING_GROUP_THREAD_NAME["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Should be grouped, thread ID not printed + self.assertIn(" 2 x Top Task [Deleted] (thread: main) @ funcA (file.cpp:10)", output) + self.assertNotIn(":1)", output) + self.assertNotIn(":2)", output) + + def test_non_running_not_group_if_thread_name_differs(self): + tree = TaskTree.from_json(SAMPLE_NON_RUNNING_NOT_GROUP_THREAD_NAME["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Should not be grouped, thread ID not printed + self.assertIn("Top Task [Deleted] (thread: main1) @ funcA (file.cpp:10)", output) + self.assertIn("Top Task [Deleted] (thread: main2) @ funcA (file.cpp:10)", output) + self.assertNotIn("[x2]", output) + self.assertNotIn(" 2 x", output) + self.assertNotIn(":1)", output) + self.assertNotIn(":2)", output) if __name__ == '__main__': unittest.main() \ No newline at end of file From 92f5e1d9da5f4b12b4d3a35e5b8d7117ce3587f2 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 12 May 2025 18:15:06 +0200 Subject: [PATCH 34/36] Reversed order in Pretty printer --- .../src/taskmonitoring/tasktree.py | 32 ++++++++----------- .../PrettyPrinter/src/tests/test_tasktree.py | 31 ++++++++++++++++++ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py index 678be8db964f..1a41772651b1 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py @@ -68,48 +68,44 @@ def pretty_print(self): if grouped[state]: print(f"=== {state} Tasks ===") if state == "Running": - for node in grouped[state]: + for node in reversed(grouped[state]): self._print_grouped_nodes([node], top_level=True, force_no_group=True) else: - self._print_grouped_nodes(grouped[state], top_level=True) + self._print_grouped_nodes(list(reversed(grouped[state])), top_level=True) print() def _print_grouped_nodes(self, nodes: List[TaskNode], prefix: str = "", is_last: bool = True, top_level: bool = False, force_no_group: bool = False): if force_no_group: - # Print all nodes individually, no grouping - for idx, node in enumerate(nodes): + # Post-order: print children first + for idx, node in enumerate(reversed(nodes)): count = 1 + if node.children: + self._print_grouped_nodes(list(reversed(node.children)), prefix + (" " if (is_last and idx == len(nodes) - 1) else "│ "), True, top_level=False, force_no_group=force_no_group) if top_level: count_str = f"{count:3d} x" print(f"{count_str} {str(node)}") - next_top_level = False else: connector = "└─ " if (is_last and idx == len(nodes) - 1) else "├─ " print(prefix + connector + str(node)) - next_top_level = False - if node.children: - self._print_grouped_nodes(node.children, prefix + (" " if (is_last and idx == len(nodes) - 1) else "│ "), True, top_level=next_top_level, force_no_group=force_no_group) return # Group nodes by their group_key group_map = collections.defaultdict(list) for node in nodes: group_map[node.group_key()].append(node) group_items = list(group_map.items()) - for idx, (key, group) in enumerate(group_items): + for idx, (key, group) in enumerate(reversed(group_items)): node = group[0] count = len(group) + # Post-order: print children first + all_children = [] + for n in group: + all_children.extend(n.children) + if all_children: + self._print_grouped_nodes(list(reversed(all_children)), prefix + (" " if (is_last and idx == len(group_items) - 1) else "│ "), True, top_level=False) if top_level: count_str = f"{count:3d} x" if count < 1000 else f"{count} x" print(f"{count_str} {str(node)}") - next_top_level = False else: connector = "└─ " if (is_last and idx == len(group_items) - 1) else "├─ " count_str = f" [x{count}]" if count > 1 else "" - print(prefix + connector + str(node) + count_str) - next_top_level = False - # Collect all children from all grouped nodes - all_children = [] - for n in group: - all_children.extend(n.children) - if all_children: - self._print_grouped_nodes(all_children, prefix + (" " if (is_last and idx == len(group_items) - 1) else "│ "), True, top_level=next_top_level) \ No newline at end of file + print(prefix + connector + str(node) + count_str) \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py index 91c7366eeaa8..1c0d4e2912e7 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py @@ -459,6 +459,37 @@ def test_grouping_nested(self): self.assertEqual(output.count("Child Task [Running] (thread: worker:2) @ funcB (file.cpp:20)"), 2) self.assertNotIn("[x2]", output) self.assertNotIn(" 2 x", output) + # Check reverse order: deepest child first, then parent, then root + idx_child2 = output.find("Child Task [Running] (thread: worker:2) @ funcB (file.cpp:20)") + idx_root = output.find("Top Task [Running] (thread: main:1) @ funcA (file.cpp:10)") + if not (idx_child2 < idx_root): + print("\nDEBUG OUTPUT (test_grouping_nested):\n" + output) + self.assertTrue(idx_child2 < idx_root, "Deepest child should appear before root in output") + + def test_reverse_ordering_deep_stack(self): + # Simulate a deep stack + deep_stack = { + "task_stacktraces": [[ + {"hierarchy": 0, "data": {"id": "root", "name": "Root", "state": "Running", "parent": {}, "thread": {"LWPID": 1, "name": "main"}, "source_location": {"file_name": "file.cpp", "line": 1, "function_name": "rootFunc"}}}, + {"hierarchy": 1, "data": {"id": "mid", "name": "Mid", "state": "Running", "parent": {"id": "root"}, "thread": {"LWPID": 1, "name": "main"}, "source_location": {"file_name": "file.cpp", "line": 2, "function_name": "midFunc"}}}, + {"hierarchy": 2, "data": {"id": "leaf", "name": "Leaf", "state": "Running", "parent": {"id": "mid"}, "thread": {"LWPID": 1, "name": "main"}, "source_location": {"file_name": "file.cpp", "line": 3, "function_name": "leafFunc"}}} + ]] + } + tree = TaskTree.from_json(deep_stack["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print() + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + idx_leaf = output.find("Leaf [Running] (thread: main:1) @ leafFunc (file.cpp:3)") + idx_mid = output.find("Mid [Running] (thread: main:1) @ midFunc (file.cpp:2)") + idx_root = output.find("Root [Running] (thread: main:1) @ rootFunc (file.cpp:1)") + if not (idx_leaf < idx_mid < idx_root): + print("\nDEBUG OUTPUT (test_reverse_ordering_deep_stack):\n" + output) + self.assertTrue(idx_leaf < idx_mid < idx_root, "Order should be leaf, then mid, then root") def test_reused_id_siblings(self): tree = TaskTree.from_json(SAMPLE_REUSED_ID_SIBLINGS["task_stacktraces"]) From 907b8b37837e016fdb56cfa1cd520cdf6cf3ab94 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Mon, 12 May 2025 18:20:42 +0200 Subject: [PATCH 35/36] Added a show-deleted flag --- .../TaskMonitoring/PrettyPrinter/README.md | 7 ++-- .../PrettyPrinter/src/pretty_printer.py | 9 +++-- .../src/taskmonitoring/tasktree.py | 5 +-- .../PrettyPrinter/src/tests/test_tasktree.py | 36 ++++++++++++++++--- 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md index 9db5dafd43a0..8fc397a4e2fc 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/README.md @@ -5,7 +5,7 @@ This python-package provides a pretty-printer for the hierarchical task monitori The pretty-printer groups tasks by their top-level (hierarchy 0, no parent) and by their state. The output is grouped and ordered as follows: 1. Running tasks 2. Finished tasks -3. Deleted tasks +3. Deleted tasks (optional, see below) Each group displays the task hierarchy as an ASCII tree for improved readability. @@ -14,9 +14,12 @@ Each group displays the task hierarchy as an ASCII tree for improved readability To pretty-print a monitoring output JSON file: ```sh -cat | ./src/pretty_printer.py +cat | ./src/pretty_printer.py [--show-deleted] ``` +- By default, **Deleted** tasks are hidden. +- Use the `--show-deleted` flag to include Deleted tasks in the output. + ## Run tests Inside the src-folder run unittests via: diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py index 76da0bc9c054..cc27c8f4376f 100755 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/pretty_printer.py @@ -5,18 +5,23 @@ This script pretty-prints the hierarchical task monitoring JSON output from ArangoDB. Groups by top-level task and state, and displays as ASCII trees. -Usage: cat | ./pretty_printer.py +Usage: cat | ./pretty_printer.py [--show-deleted] """ import sys import json +import argparse from taskmonitoring.tasktree import TaskTree def main(): + parser = argparse.ArgumentParser(description="Pretty print ArangoDB task monitoring output.") + parser.add_argument('--show-deleted', action='store_true', help='Show Deleted tasks (default: hide)') + args = parser.parse_args() + string = sys.stdin.read() data = json.loads(string)["task_stacktraces"] tree = TaskTree.from_json(data) - tree.pretty_print() + tree.pretty_print(show_deleted=args.show_deleted) if __name__ == '__main__': main() \ No newline at end of file diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py index 1a41772651b1..264b2b1bb7e8 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/taskmonitoring/tasktree.py @@ -58,13 +58,14 @@ def from_json(task_stacktraces: List[List[Dict[str, Any]]]) -> 'TaskTree': roots.append(node) return TaskTree(roots) - def pretty_print(self): - # Group by state: Running, Finished, Deleted (in this order) + def pretty_print(self, show_deleted: bool = False): state_order = ["Running", "Finished", "Deleted"] grouped = collections.defaultdict(list) for node in self.roots: grouped[node.state].append(node) for state in state_order: + if state == "Deleted" and not show_deleted: + continue if grouped[state]: print(f"=== {state} Tasks ===") if state == "Running": diff --git a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py index 1c0d4e2912e7..80c1dfba96b5 100644 --- a/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py +++ b/arangod/SystemMonitor/TaskMonitoring/PrettyPrinter/src/tests/test_tasktree.py @@ -403,7 +403,7 @@ def test_pretty_print_output(self): sys_stdout = sys.stdout sys.stdout = captured try: - tree.pretty_print() + tree.pretty_print(show_deleted=True) finally: sys.stdout = sys_stdout output = captured.getvalue() @@ -434,7 +434,7 @@ def test_grouping_different_states(self): sys_stdout = sys.stdout sys.stdout = captured try: - tree.pretty_print() + tree.pretty_print(show_deleted=True) finally: sys.stdout = sys_stdout output = captured.getvalue() @@ -552,13 +552,41 @@ def test_running_not_grouped(self): self.assertNotIn("[x2]", output) self.assertNotIn(" 2 x", output) + def test_deleted_tasks_hidden_by_default(self): + tree = TaskTree.from_json(SAMPLE_NON_RUNNING_GROUP_THREAD_NAME["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print(show_deleted=False) + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Deleted tasks should not be printed + self.assertNotIn("Deleted Tasks", output) + self.assertNotIn("Top Task [Deleted]", output) + + def test_deleted_tasks_shown_with_flag(self): + tree = TaskTree.from_json(SAMPLE_NON_RUNNING_GROUP_THREAD_NAME["task_stacktraces"]) + captured = io.StringIO() + sys_stdout = sys.stdout + sys.stdout = captured + try: + tree.pretty_print(show_deleted=True) + finally: + sys.stdout = sys_stdout + output = captured.getvalue() + # Deleted tasks should be printed + self.assertIn("=== Deleted Tasks ===", output) + self.assertIn("Top Task [Deleted]", output) + def test_non_running_group_by_thread_name(self): tree = TaskTree.from_json(SAMPLE_NON_RUNNING_GROUP_THREAD_NAME["task_stacktraces"]) captured = io.StringIO() sys_stdout = sys.stdout sys.stdout = captured try: - tree.pretty_print() + tree.pretty_print(show_deleted=True) finally: sys.stdout = sys_stdout output = captured.getvalue() @@ -573,7 +601,7 @@ def test_non_running_not_group_if_thread_name_differs(self): sys_stdout = sys.stdout sys.stdout = captured try: - tree.pretty_print() + tree.pretty_print(show_deleted=True) finally: sys.stdout = sys_stdout output = captured.getvalue() From 6ca4f7ac0c82ecbe7be5dc29563f7109939f18fe Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 13 May 2025 09:31:03 +0200 Subject: [PATCH 36/36] Applied ClangFormat --- .../MaintenanceActions/CreateCollection.cpp | 4 +++- .../MaintenanceActions/CreateDatabase.cpp | 3 ++- .../MaintenanceActions/DropCollection.cpp | 3 ++- .../Cluster/MaintenanceActions/DropIndex.cpp | 4 +++- .../Cluster/MaintenanceActions/EnsureIndex.cpp | 4 +++- .../ResignShardLeadership.cpp | 4 +++- .../TakeoverShardLeadership.cpp | 4 +++- .../MaintenanceActions/UpdateCollection.cpp | 4 +++- .../UpdateReplicatedLogAction.cpp | 4 +++- arangod/VocBase/Methods/Databases.cpp | 6 ++++-- arangod/VocBase/Methods/UpgradeTasks.cpp | 18 ++++++++++++------ 11 files changed, 41 insertions(+), 17 deletions(-) diff --git a/arangod/Cluster/MaintenanceActions/CreateCollection.cpp b/arangod/Cluster/MaintenanceActions/CreateCollection.cpp index 0d686ba93155..e5a4b7fd6f66 100644 --- a/arangod/Cluster/MaintenanceActions/CreateCollection.cpp +++ b/arangod/Cluster/MaintenanceActions/CreateCollection.cpp @@ -114,7 +114,9 @@ bool CreateCollection::first() { auto const& props = properties(); // Add task monitoring - auto task = task_monitoring::Task{"CreateCollection for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + auto task = task_monitoring::Task{"CreateCollection for DB: '" + database + + "', Collection: '" + collection + + "', Shard: '" + shard + "'"}; std::string from; _description.get("from", from); diff --git a/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp b/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp index 7ba54404d325..bc9a2599611a 100644 --- a/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp +++ b/arangod/Cluster/MaintenanceActions/CreateDatabase.cpp @@ -71,7 +71,8 @@ bool CreateDatabase::first() { auto database = _description.get(DATABASE); // Add task monitoring - auto task = task_monitoring::Task{"CreateDatabase for DB: '" + database + "'"}; + auto task = + task_monitoring::Task{"CreateDatabase for DB: '" + database + "'"}; LOG_TOPIC("953b1", DEBUG, Logger::MAINTENANCE) << "CreateDatabase: creating database " << database; diff --git a/arangod/Cluster/MaintenanceActions/DropCollection.cpp b/arangod/Cluster/MaintenanceActions/DropCollection.cpp index 9578ab093716..3c4578559629 100644 --- a/arangod/Cluster/MaintenanceActions/DropCollection.cpp +++ b/arangod/Cluster/MaintenanceActions/DropCollection.cpp @@ -71,7 +71,8 @@ bool DropCollection::first() { auto const& shard = getShard(); // Add task monitoring - auto task = task_monitoring::Task{"DropCollection for DB: '" + database + "', Shard: '" + shard + "'"}; + auto task = task_monitoring::Task{"DropCollection for DB: '" + database + + "', Shard: '" + shard + "'"}; LOG_TOPIC("a2961", DEBUG, Logger::MAINTENANCE) << "DropCollection: dropping local shard '" << database << "/" << shard; diff --git a/arangod/Cluster/MaintenanceActions/DropIndex.cpp b/arangod/Cluster/MaintenanceActions/DropIndex.cpp index 05313fcc38e8..32965875e84d 100644 --- a/arangod/Cluster/MaintenanceActions/DropIndex.cpp +++ b/arangod/Cluster/MaintenanceActions/DropIndex.cpp @@ -92,7 +92,9 @@ bool DropIndex::first() { auto const& id = _description.get(INDEX); // Add task monitoring - auto task = task_monitoring::Task{"DropIndex for DB: '" + database + "', Shard: '" + shard + "', Index: '" + id + "'"}; + auto task = + task_monitoring::Task{"DropIndex for DB: '" + database + "', Shard: '" + + shard + "', Index: '" + id + "'"}; VPackBuilder index; index.add(VPackValue(_description.get(INDEX))); diff --git a/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp b/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp index 226a014cfe37..2b6c55c0b6b6 100644 --- a/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp +++ b/arangod/Cluster/MaintenanceActions/EnsureIndex.cpp @@ -116,7 +116,9 @@ bool EnsureIndex::first() { auto const& id = properties().get(ID).copyString(); // Add task monitoring - auto task = task_monitoring::Task{"EnsureIndex for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + auto task = task_monitoring::Task{"EnsureIndex for DB: '" + database + + "', Collection: '" + collection + + "', Shard: '" + shard + "'"}; try { // now try to guard the database auto& df = _feature.server().getFeature(); diff --git a/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp b/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp index 78ed7217f166..df48fc6d9526 100644 --- a/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp +++ b/arangod/Cluster/MaintenanceActions/ResignShardLeadership.cpp @@ -82,7 +82,9 @@ bool ResignShardLeadership::first() { std::string const& collection = getShard(); // Add task monitoring - auto task = task_monitoring::Task{"ResignShardLeadership for DB: '" + database + "', Shard: '" + collection + "'"}; + auto task = + task_monitoring::Task{"ResignShardLeadership for DB: '" + database + + "', Shard: '" + collection + "'"}; LOG_TOPIC("14f43", DEBUG, Logger::MAINTENANCE) << "trying to withdraw as leader of shard '" << database << "/" diff --git a/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp b/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp index 35307d64a428..ccbb78e23d9e 100644 --- a/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp +++ b/arangod/Cluster/MaintenanceActions/TakeoverShardLeadership.cpp @@ -269,7 +269,9 @@ bool TakeoverShardLeadership::first() { Result res; // Add task monitoring - auto task = task_monitoring::Task{"TakeoverShardLeadership for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + auto task = task_monitoring::Task{"TakeoverShardLeadership for DB: '" + + database + "', Collection: '" + collection + + "', Shard: '" + shard + "'"}; try { auto& df = _feature.server().getFeature(); diff --git a/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp b/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp index 6600effbc0e8..6fb71deb4030 100644 --- a/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp +++ b/arangod/Cluster/MaintenanceActions/UpdateCollection.cpp @@ -91,7 +91,9 @@ bool UpdateCollection::first() { Result res; // Add task monitoring - auto task = task_monitoring::Task{"UpdateCollection for DB: '" + database + "', Collection: '" + collection + "', Shard: '" + shard + "'"}; + auto task = task_monitoring::Task{"UpdateCollection for DB: '" + database + + "', Collection: '" + collection + + "', Shard: '" + shard + "'"}; std::string from; _description.get("from", from); diff --git a/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp b/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp index 80d4ddf18c1d..c73622f54591 100644 --- a/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp +++ b/arangod/Cluster/MaintenanceActions/UpdateReplicatedLogAction.cpp @@ -65,7 +65,9 @@ bool arangodb::maintenance::UpdateReplicatedLogAction::first() { auto& df = _feature.server().getFeature(); // Add task monitoring - auto task = task_monitoring::Task{"UpdateReplicatedLogAction for DB: '" + database + "', LogId: '" + std::to_string(logId.id()) + "'"}; + auto task = + task_monitoring::Task{"UpdateReplicatedLogAction for DB: '" + database + + "', LogId: '" + std::to_string(logId.id()) + "'"}; auto result = basics::catchToResult([&] { DatabaseGuard guard(df, database); diff --git a/arangod/VocBase/Methods/Databases.cpp b/arangod/VocBase/Methods/Databases.cpp index 2f1abd4bdd8a..f62d9fb4bc89 100644 --- a/arangod/VocBase/Methods/Databases.cpp +++ b/arangod/VocBase/Methods/Databases.cpp @@ -98,7 +98,8 @@ std::vector Databases::list(ArangodServer& server, } Result Databases::info(TRI_vocbase_t* vocbase, velocypack::Builder& result) { - auto task = task_monitoring::Task{"Collect Database information for " + vocbase->name()}; + auto task = task_monitoring::Task{"Collect Database information for " + + vocbase->name()}; if (ServerState::instance()->isCoordinator()) { auto& cache = vocbase->server().getFeature().agencyCache(); auto [acb, idx] = cache.read(std::vector{ @@ -190,7 +191,8 @@ Result Databases::grantCurrentUser(CreateDatabaseInfo const& info, // Create database on cluster; Result Databases::createCoordinator(CreateDatabaseInfo const& info) { - auto task = task_monitoring::Task{"Create Database " + info.getName() + " on Coordinator"}; + auto task = task_monitoring::Task{"Create Database " + info.getName() + + " on Coordinator"}; // TODO: Add status strings to task for phases. TRI_ASSERT(ServerState::instance()->isCoordinator()); diff --git a/arangod/VocBase/Methods/UpgradeTasks.cpp b/arangod/VocBase/Methods/UpgradeTasks.cpp index 9e6df6f08484..65b52c9783b3 100644 --- a/arangod/VocBase/Methods/UpgradeTasks.cpp +++ b/arangod/VocBase/Methods/UpgradeTasks.cpp @@ -392,7 +392,8 @@ Result createSystemStatisticsIndices( Result createSystemCollectionsIndices( TRI_vocbase_t& vocbase, std::vector>& collections) { - auto task = task_monitoring::Task{"Create System Collections Indices for " + vocbase.name()}; + auto task = task_monitoring::Task{"Create System Collections Indices for " + + vocbase.name()}; Result res; if (vocbase.isSystem()) { res = ::createIndex(StaticStrings::UsersCollection, @@ -441,7 +442,8 @@ Result createSystemCollectionsIndices( Result UpgradeTasks::createSystemCollectionsAndIndices( TRI_vocbase_t& vocbase, velocypack::Slice slice) { - auto task = task_monitoring::Task{"Create System Collections for " + vocbase.name()}; + auto task = + task_monitoring::Task{"Create System Collections for " + vocbase.name()}; // after the call to ::createSystemCollections this vector should contain // a LogicalCollection for *every* (required) system collection. std::vector> presentSystemCollections; @@ -481,7 +483,8 @@ Result UpgradeTasks::createSystemCollectionsAndIndices( Result UpgradeTasks::createStatisticsCollectionsAndIndices( TRI_vocbase_t& vocbase, velocypack::Slice slice) { - auto task = task_monitoring::Task{"Create Statistics Collections for " + vocbase.name()}; + auto task = task_monitoring::Task{"Create Statistics Collections for " + + vocbase.name()}; // This vector should after the call to ::createSystemCollections contain // a LogicalCollection for *every* (required) system collection. std::vector> presentSystemCollections; @@ -511,7 +514,8 @@ Result UpgradeTasks::createStatisticsCollectionsAndIndices( //////////////////////////////////////////////////////////////////////////////// Result UpgradeTasks::dropLegacyAnalyzersCollection( TRI_vocbase_t& vocbase, velocypack::Slice /*upgradeParams*/) { - auto task = task_monitoring::Task{"Drop Legacy Analyzers for " + vocbase.name()}; + auto task = + task_monitoring::Task{"Drop Legacy Analyzers for " + vocbase.name()}; // drop legacy collection if upgrading the system vocbase and collection found #ifdef ARANGODB_ENABLE_MAINTAINER_MODE if (!vocbase.server().hasFeature()) { @@ -604,7 +608,8 @@ Result UpgradeTasks::addDefaultUserOther(TRI_vocbase_t& vocbase, Result UpgradeTasks::renameReplicationApplierStateFiles( TRI_vocbase_t& vocbase, velocypack::Slice slice) { - auto task = task_monitoring::Task{"Rename Replication Applier Files " + vocbase.name()}; + auto task = task_monitoring::Task{"Rename Replication Applier Files " + + vocbase.name()}; std::string const path = vocbase.engine().databasePath(); std::string const source = arangodb::basics::FileUtils::buildFilename( @@ -641,7 +646,8 @@ Result UpgradeTasks::renameReplicationApplierStateFiles( Result UpgradeTasks::dropPregelQueriesCollection( TRI_vocbase_t& vocbase, velocypack::Slice /*upgradeParams*/) { - auto task = task_monitoring::Task{"Drop Pregel Queries Collection for " + vocbase.name()}; + auto task = task_monitoring::Task{"Drop Pregel Queries Collection for " + + vocbase.name()}; std::shared_ptr col; auto res = arangodb::methods::Collections::lookup(vocbase, "_pregel_queries", col);