Skip to content

Commit 0bac82d

Browse files
committed
Improve thread-safety and add default finalizer
1 parent 5d1d678 commit 0bac82d

File tree

2 files changed

+46
-25
lines changed

2 files changed

+46
-25
lines changed

include/pybind11/detail/internals.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,14 +247,17 @@ template <typename T>
247247
struct call_once_storage : call_once_storage_base {
248248
void (*finalize)(T &) = nullptr;
249249
alignas(T) char storage[sizeof(T)] = {0};
250+
std::atomic_bool is_initialized{false};
250251

251252
call_once_storage() = default;
252253
~call_once_storage() override {
253-
if (finalize != nullptr) {
254-
finalize(*reinterpret_cast<T *>(storage));
254+
if (is_initialized) {
255+
if (finalize != nullptr) {
256+
finalize(*reinterpret_cast<T *>(storage));
257+
} else {
258+
reinterpret_cast<T *>(storage)->~T();
259+
}
255260
}
256-
memset(storage, 0, sizeof(T));
257-
finalize = nullptr;
258261
};
259262
call_once_storage(const call_once_storage &) = delete;
260263
call_once_storage(call_once_storage &&) = delete;

include/pybind11/gil_safe_call_once.h

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
5454
//
5555
// For in-depth background, see docs/advanced/deadlock.md
5656
#ifndef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
57+
// Subinterpreter support is disabled.
58+
// In this case, we can store the result globally, because there is only a single interpreter.
59+
//
60+
// The life span of the stored result is the entire process lifetime. It is leaked on process
61+
// termination to avoid destructor calls after the Python interpreter was finalized.
5762
template <typename T>
5863
class gil_safe_call_once_and_store {
5964
public:
@@ -107,36 +112,45 @@ class gil_safe_call_once_and_store {
107112
};
108113
#else
109114
// Subinterpreter support is enabled.
110-
// In this case, we should store the result per-interpreter instead of globally, because
111-
// each subinterpreter has its own separate state. The cached object may not shareable
112-
// across interpreters (e.g., imported modules and their members).
115+
// In this case, we should store the result per-interpreter instead of globally, because each
116+
// subinterpreter has its own separate state. The cached result may not shareable across
117+
// interpreters (e.g., imported modules and their members).
118+
//
119+
// The life span of the stored result is the entire interpreter lifetime. An additional
120+
// `finalize_fn` can be provided to clean up the stored result when the interpreter is destroyed.
113121
template <typename T>
114122
class gil_safe_call_once_and_store {
115123
public:
116124
// PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called.
117125
template <typename Callable>
118126
gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn,
119127
void (*finalize_fn)(T &) = nullptr) {
120-
if (!is_initialized_by_atleast_one_interpreter_
121-
|| detail::get_num_interpreters_seen() > 1) {
122-
detail::with_internals([&](detail::internals &internals) {
123-
const void *k = reinterpret_cast<const void *>(this);
124-
auto &storage_map = internals.call_once_storage_map;
125-
auto it = storage_map.find(k);
126-
if (it == storage_map.end()) {
127-
gil_scoped_release gil_rel; // Needed to establish lock ordering.
128-
{
129-
// Only one thread will ever enter here.
130-
gil_scoped_acquire gil_acq;
128+
if (!is_last_storage_valid()) {
129+
// Multiple threads may enter here, because the GIL is released in the next line and
130+
// CPython API calls in the `fn()` call below may release and reacquire the GIL.
131+
gil_scoped_release gil_rel; // Needed to establish lock ordering.
132+
{
133+
gil_scoped_acquire gil_acq;
134+
detail::with_internals([&](detail::internals &internals) {
135+
// The concurrency control is done inside `detail::with_internals`.
136+
// At most one thread will enter here at a time.
137+
const void *k = reinterpret_cast<const void *>(this);
138+
auto &storage_map = internals.call_once_storage_map;
139+
// There can be multiple threads going through here, but only one each at a
140+
// time. So only one thread will create the storage. Other threads will find it
141+
// already created.
142+
auto it = storage_map.find(k);
143+
if (it == storage_map.end()) {
131144
auto v = new detail::call_once_storage<T>{};
132145
::new (v->storage) T(fn()); // fn may release, but will reacquire, the GIL.
133146
v->finalize = finalize_fn;
134147
last_storage_ = reinterpret_cast<T *>(v->storage);
148+
v->is_initialized = true;
135149
storage_map.emplace(k, v);
136-
};
137-
}
138-
is_initialized_by_atleast_one_interpreter_ = true;
139-
});
150+
}
151+
is_initialized_by_atleast_one_interpreter_ = true;
152+
});
153+
}
140154
// All threads will observe `is_initialized_by_atleast_one_interp_` as true here.
141155
}
142156
// Intentionally not returning `T &` to ensure the calling code is self-documenting.
@@ -146,8 +160,7 @@ class gil_safe_call_once_and_store {
146160
// This must only be called after `call_once_and_store_result()` was called.
147161
T &get_stored() {
148162
T *result = last_storage_;
149-
if (!is_initialized_by_atleast_one_interpreter_
150-
|| detail::get_num_interpreters_seen() > 1) {
163+
if (!is_last_storage_valid()) {
151164
detail::with_internals([&](detail::internals &internals) {
152165
const void *k = reinterpret_cast<const void *>(this);
153166
auto &storage_map = internals.call_once_storage_map;
@@ -159,13 +172,18 @@ class gil_safe_call_once_and_store {
159172
return *result;
160173
}
161174

162-
constexpr gil_safe_call_once_and_store() = default;
175+
gil_safe_call_once_and_store() = default;
163176
// The instance is a global static, so its destructor runs when the process
164177
// is terminating. Therefore, do nothing here because the Python interpreter
165178
// may have been finalized already.
166179
PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default;
167180

168181
private:
182+
bool is_last_storage_valid() const {
183+
return is_initialized_by_atleast_one_interpreter_
184+
&& detail::get_num_interpreters_seen() <= 1 && last_storage_ != nullptr;
185+
}
186+
169187
// No storage needed when subinterpreter support is enabled.
170188
// The actual storage is stored in the per-interpreter state dict in
171189
// `internals.call_once_storage_map`.

0 commit comments

Comments
 (0)