diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c5a200e32e..4800b9c25c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -229,6 +229,7 @@ jobs: run: cmake --build . --target pytest - name: Compiled tests + timeout-minutes: 3 run: cmake --build . --target cpptest - name: Interface test @@ -334,6 +335,7 @@ jobs: run: cmake --build --preset default --target pytest - name: C++ tests + timeout-minutes: 3 run: cmake --build --preset default --target cpptest - name: Visibility test @@ -393,6 +395,7 @@ jobs: run: cmake --build build --target pytest - name: C++ tests + timeout-minutes: 3 run: cmake --build build --target cpptest - name: Interface test @@ -516,6 +519,7 @@ jobs: run: cmake --build build --target pytest - name: C++ tests + timeout-minutes: 3 run: cmake --build build --target cpptest - name: Interface test @@ -570,6 +574,7 @@ jobs: run: cmake --build build --target pytest - name: C++ tests + timeout-minutes: 3 run: cmake --build build --target cpptest - name: Interface test @@ -652,6 +657,7 @@ jobs: cmake --build build-11 --target check - name: C++ tests C++11 + timeout-minutes: 3 run: | set +e; source /opt/intel/oneapi/setvars.sh; set -e cmake --build build-11 --target cpptest @@ -689,6 +695,7 @@ jobs: cmake --build build-17 --target check - name: C++ tests C++17 + timeout-minutes: 3 run: | set +e; source /opt/intel/oneapi/setvars.sh; set -e cmake --build build-17 --target cpptest @@ -760,6 +767,7 @@ jobs: run: cmake --build build --target pytest - name: C++ tests + timeout-minutes: 3 run: cmake --build build --target cpptest - name: Interface test @@ -1000,6 +1008,7 @@ jobs: run: cmake --build build --target pytest - name: C++20 tests + timeout-minutes: 3 run: cmake --build build --target cpptest -j 2 - name: Interface test C++20 @@ -1076,6 +1085,7 @@ jobs: run: cmake --build build --target pytest -j 2 - name: C++11 tests + timeout-minutes: 3 run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build --target cpptest -j 2 - name: Interface test C++11 @@ -1100,6 +1110,7 @@ jobs: run: cmake --build build2 --target pytest -j 2 - name: C++14 tests + timeout-minutes: 3 run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build2 --target cpptest -j 2 - name: Interface test C++14 @@ -1124,6 +1135,7 @@ jobs: run: cmake --build build3 --target pytest -j 2 - name: C++17 tests + timeout-minutes: 3 run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build3 --target cpptest -j 2 - name: Interface test C++17 @@ -1195,6 +1207,7 @@ jobs: run: cmake --build . --target pytest -j 2 - name: C++ tests + timeout-minutes: 3 run: cmake --build . --target cpptest -j 2 - name: Interface test @@ -1257,6 +1270,7 @@ jobs: run: cmake --build . --target pytest -j 2 - name: C++ tests + timeout-minutes: 3 run: cmake --build . --target cpptest -j 2 - name: Interface test @@ -1329,6 +1343,7 @@ jobs: run: cmake --build build --target pytest -j 2 - name: C++ tests + timeout-minutes: 3 run: PYTHONHOME=/clangarm64 PYTHONPATH=/clangarm64 cmake --build build --target cpptest -j 2 - name: Interface test diff --git a/.github/workflows/reusable-standard.yml b/.github/workflows/reusable-standard.yml index 96b14bdfba..56d92e2779 100644 --- a/.github/workflows/reusable-standard.yml +++ b/.github/workflows/reusable-standard.yml @@ -83,6 +83,7 @@ jobs: run: cmake --build build --target pytest - name: C++ tests + timeout-minutes: 3 run: cmake --build build --target cpptest - name: Interface test diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml index 15ede7a856..890ae0b6fd 100644 --- a/.github/workflows/upstream.yml +++ b/.github/workflows/upstream.yml @@ -66,6 +66,7 @@ jobs: run: cmake --build build11 --target pytest -j 2 - name: C++11 tests + timeout-minutes: 3 run: cmake --build build11 --target cpptest -j 2 - name: Interface test C++11 @@ -87,6 +88,7 @@ jobs: run: cmake --build build17 --target pytest - name: C++17 tests + timeout-minutes: 3 run: cmake --build build17 --target cpptest # Third build - C++17 mode with unstable ABI diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 858de67525..5ccd4d18e5 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -660,6 +660,8 @@ class internals_pp_manager { char const *holder_id_ = nullptr; on_fetch_function *on_fetch_ = nullptr; + // Pointer-to-pointer to the singleton internals for the first seen interpreter (may not be the + // main interpreter) std::unique_ptr *internals_singleton_pp_; }; diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h index 2abd8fc326..a6450cf7d6 100644 --- a/include/pybind11/gil_safe_call_once.h +++ b/include/pybind11/gil_safe_call_once.h @@ -3,17 +3,30 @@ #pragma once #include "detail/common.h" +#include "detail/internals.h" #include "gil.h" #include #include -#ifdef Py_GIL_DISABLED +#if defined(Py_GIL_DISABLED) || defined(PYBIND11_HAS_SUBINTERPRETER_SUPPORT) # include #endif +#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT +# include +# include +#endif PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +namespace detail { +#if defined(Py_GIL_DISABLED) || defined(PYBIND11_HAS_SUBINTERPRETER_SUPPORT) +using atomic_bool = std::atomic_bool; +#else +using atomic_bool = bool; +#endif +} // namespace detail + // Use the `gil_safe_call_once_and_store` class below instead of the naive // // static auto imported_obj = py::module_::import("module_name"); // BAD, DO NOT USE! @@ -48,12 +61,22 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) // functions, which is usually the case. // // For in-depth background, see docs/advanced/deadlock.md +#ifndef PYBIND11_HAS_SUBINTERPRETER_SUPPORT +// Subinterpreter support is disabled. +// In this case, we can store the result globally, because there is only a single interpreter. +// +// The life span of the stored result is the entire process lifetime. It is leaked on process +// termination to avoid destructor calls after the Python interpreter was finalized. template class gil_safe_call_once_and_store { public: // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called. + // Note: The second parameter (finalize callback) is intentionally unused when subinterpreter + // support is disabled. In that case, storage is process-global and intentionally leaked to + // avoid calling destructors after the Python interpreter has been finalized. template - gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn) { + gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn, + void (*)(T &) /*unused*/ = nullptr) { if (!is_initialized_) { // This read is guarded by the GIL. // Multiple threads may enter here, because the GIL is released in the next line and // CPython API calls in the `fn()` call below may release and reacquire the GIL. @@ -74,29 +97,195 @@ class gil_safe_call_once_and_store { T &get_stored() { assert(is_initialized_); PYBIND11_WARNING_PUSH -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 +# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 // Needed for gcc 4.8.5 PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing") -#endif +# endif return *reinterpret_cast(storage_); PYBIND11_WARNING_POP } constexpr gil_safe_call_once_and_store() = default; + // The instance is a global static, so its destructor runs when the process + // is terminating. Therefore, do nothing here because the Python interpreter + // may have been finalized already. PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default; private: + // Global static storage (per process) when subinterpreter support is disabled. alignas(T) char storage_[sizeof(T)] = {}; std::once_flag once_flag_; -#ifdef Py_GIL_DISABLED - std::atomic_bool -#else - bool -#endif - is_initialized_{false}; + // The `is_initialized_`-`storage_` pair is very similar to `std::optional`, // but the latter does not have the triviality properties of former, // therefore `std::optional` is not a viable alternative here. + detail::atomic_bool is_initialized_{false}; +}; +#else +// Subinterpreter support is enabled. +// In this case, we should store the result per-interpreter instead of globally, because each +// subinterpreter has its own separate state. The cached result may not shareable across +// interpreters (e.g., imported modules and their members). + +struct call_once_storage_base { + call_once_storage_base() = default; + virtual ~call_once_storage_base() = default; + call_once_storage_base(const call_once_storage_base &) = delete; + call_once_storage_base(call_once_storage_base &&) = delete; + call_once_storage_base &operator=(const call_once_storage_base &) = delete; + call_once_storage_base &operator=(call_once_storage_base &&) = delete; +}; + +template +struct call_once_storage : call_once_storage_base { + alignas(T) char storage[sizeof(T)] = {}; + std::once_flag once_flag; + void (*finalize)(T &) = nullptr; + std::atomic_bool is_initialized{false}; + + call_once_storage() = default; + ~call_once_storage() override { + if (is_initialized) { + if (finalize != nullptr) { + finalize(*reinterpret_cast(storage)); + } else { + reinterpret_cast(storage)->~T(); + } + } + } + call_once_storage(const call_once_storage &) = delete; + call_once_storage(call_once_storage &&) = delete; + call_once_storage &operator=(const call_once_storage &) = delete; + call_once_storage &operator=(call_once_storage &&) = delete; +}; + +/// Storage map for `gil_safe_call_once_and_store`. Stored in a capsule in the interpreter's state +/// dict with proper destructor to ensure cleanup when the interpreter is destroyed. +using call_once_storage_map_type = std::unordered_map; + +# define PYBIND11_CALL_ONCE_STORAGE_MAP_ID PYBIND11_INTERNALS_ID "_call_once_storage_map__" + +// The life span of the stored result is the entire interpreter lifetime. An additional +// `finalize_fn` can be provided to clean up the stored result when the interpreter is destroyed. +template +class gil_safe_call_once_and_store { +public: + // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called. + template + gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn, + void (*finalize_fn)(T &) = nullptr) { + if (!is_last_storage_valid()) { + // Multiple threads may enter here, because the GIL is released in the next line and + // CPython API calls in the `fn()` call below may release and reacquire the GIL. + gil_scoped_release gil_rel; // Needed to establish lock ordering. + const void *const key = reinterpret_cast(this); + // There can be multiple threads going through here. + call_once_storage *value = nullptr; + { + gil_scoped_acquire gil_acq; + // Only one thread will enter here at a time. + auto &storage_map = *get_or_create_call_once_storage_map(); + const auto it = storage_map.find(key); + if (it != storage_map.end()) { + value = static_cast *>(it->second); + } else { + value = new call_once_storage{}; + storage_map.emplace(key, value); + } + } + assert(value != nullptr); + std::call_once(value->once_flag, [&] { + // Only one thread will ever enter here. + gil_scoped_acquire gil_acq; + // fn may release, but will reacquire, the GIL. + ::new (value->storage) T(fn()); + value->finalize = finalize_fn; + value->is_initialized = true; + last_storage_ptr_ = reinterpret_cast(value->storage); + is_initialized_by_atleast_one_interpreter_ = true; + }); + // All threads will observe `is_initialized_by_atleast_one_interpreter_` as true here. + } + // Intentionally not returning `T &` to ensure the calling code is self-documenting. + return *this; + } + + // This must only be called after `call_once_and_store_result()` was called. + T &get_stored() { + T *result = last_storage_ptr_; + if (!is_last_storage_valid()) { + gil_scoped_acquire gil_acq; + const void *const key = reinterpret_cast(this); + auto &storage_map = *get_or_create_call_once_storage_map(); + auto *value = static_cast *>(storage_map.at(key)); + result = last_storage_ptr_ = reinterpret_cast(value->storage); + } + assert(result != nullptr); + return *result; + } + + gil_safe_call_once_and_store() = default; + // The instance is a global static, so its destructor runs when the process + // is terminating. Therefore, do nothing here because the Python interpreter + // may have been finalized already. + PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default; + +private: + bool is_last_storage_valid() const { + return is_initialized_by_atleast_one_interpreter_ + && detail::get_num_interpreters_seen() == 1; + } + + static call_once_storage_map_type *get_or_create_call_once_storage_map() { + // Preserve any existing Python error state. dict_getitemstringref may clear + // errors or set new ones when the key is not found; we restore the original + // error state when this scope exits. + error_scope err_scope; + dict state_dict = detail::get_python_state_dict(); + auto storage_map_obj = reinterpret_steal( + detail::dict_getitemstringref(state_dict.ptr(), PYBIND11_CALL_ONCE_STORAGE_MAP_ID)); + call_once_storage_map_type *storage_map = nullptr; + if (storage_map_obj) { + void *raw_ptr = PyCapsule_GetPointer(storage_map_obj.ptr(), /*name=*/nullptr); + if (!raw_ptr) { + raise_from(PyExc_SystemError, + "pybind11::gil_safe_call_once_and_store::" + "get_or_create_call_once_storage_map() FAILED"); + throw error_already_set(); + } + storage_map = reinterpret_cast(raw_ptr); + } else { + // Use unique_ptr for exception safety: if capsule creation throws, + // the map is automatically deleted. + auto storage_map_ptr + = std::unique_ptr(new call_once_storage_map_type()); + // Create capsule with destructor to clean up the storage map when the interpreter + // shuts down + state_dict[PYBIND11_CALL_ONCE_STORAGE_MAP_ID] + = capsule(storage_map_ptr.get(), [](void *ptr) noexcept { + auto *map = reinterpret_cast(ptr); + for (const auto &entry : *map) { + delete entry.second; + } + delete map; + }); + // Capsule now owns the storage map, release from unique_ptr + storage_map = storage_map_ptr.release(); + } + return storage_map; + } + + // No storage needed when subinterpreter support is enabled. + // The actual storage is stored in the per-interpreter state dict via + // `get_or_create_call_once_storage_map()`. + + // Fast local cache to avoid repeated lookups when there are no multiple interpreters. + // This is only valid if there is a single interpreter. Otherwise, it is not used. + T *last_storage_ptr_ = nullptr; + // This flag is true if the value has been initialized by any interpreter (may not be the + // current one). + detail::atomic_bool is_initialized_by_atleast_one_interpreter_{false}; }; +#endif PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/tests/test_with_catch/catch.cpp b/tests/test_with_catch/catch.cpp index 5bd8b3880e..5dbc01f677 100644 --- a/tests/test_with_catch/catch.cpp +++ b/tests/test_with_catch/catch.cpp @@ -13,10 +13,68 @@ PYBIND11_WARNING_DISABLE_MSVC(4996) #endif #define CATCH_CONFIG_RUNNER +#define CATCH_CONFIG_DEFAULT_REPORTER "progress" #include namespace py = pybind11; +// Simple progress reporter that prints a line per test case. +namespace { + +class ProgressReporter : public Catch::StreamingReporterBase { +public: + using StreamingReporterBase::StreamingReporterBase; + + static std::string getDescription() { return "Simple progress reporter (one line per test)"; } + + void testCaseStarting(Catch::TestCaseInfo const &testInfo) override { + print_python_version_once(); + auto &os = Catch::cout(); + os << "[ RUN ] " << testInfo.name << '\n'; + os.flush(); + } + + void testCaseEnded(Catch::TestCaseStats const &stats) override { + bool failed = stats.totals.assertions.failed > 0; + auto &os = Catch::cout(); + os << (failed ? "[ FAILED ] " : "[ OK ] ") << stats.testInfo.name << '\n'; + os.flush(); + } + + void noMatchingTestCases(std::string const &spec) override { + auto &os = Catch::cout(); + os << "[ NO TEST ] no matching test cases for spec: " << spec << '\n'; + os.flush(); + } + + void reportInvalidArguments(std::string const &arg) override { + auto &os = Catch::cout(); + os << "[ ERROR ] invalid Catch2 arguments: " << arg << '\n'; + os.flush(); + } + + void assertionStarting(Catch::AssertionInfo const &) override {} + + bool assertionEnded(Catch::AssertionStats const &) override { return false; } + +private: + void print_python_version_once() { + if (printed_) { + return; + } + printed_ = true; + auto &os = Catch::cout(); + os << "[ PYTHON ] " << Py_GetVersion() << '\n'; + os.flush(); + } + + bool printed_ = false; +}; + +} // namespace + +CATCH_REGISTER_REPORTER("progress", ProgressReporter) + int main(int argc, char *argv[]) { // Setup for TEST_CASE in test_interpreter.cpp, tagging on a large random number: std::string updated_pythonpath("pybind11_test_with_catch_PYTHONPATH_2099743835476552");