diff --git a/docs/advanced/classes.rst b/docs/advanced/classes.rst index faaba38b8d..bfbaea60ab 100644 --- a/docs/advanced/classes.rst +++ b/docs/advanced/classes.rst @@ -1381,11 +1381,22 @@ You can do that using ``py::custom_type_setup``: .. code-block:: cpp - struct OwnsPythonObjects { - py::object value = py::none(); + struct ContainerOwnsPythonObjects { + std::vector list; + + void append(const py::object &obj) { list.emplace_back(obj); } + py::object at(py::ssize_t index) const { + if (index >= size() || index < 0) { + throw py::index_error("Index out of range"); + } + return list.at(py::size_t(index)); + } + py::ssize_t size() const { return py::ssize_t_cast(list.size()); } + void clear() { list.clear(); } }; - py::class_ cls( - m, "OwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) { + + py::class_ cls( + m, "ContainerOwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) { auto *type = &heap_type->ht_type; type->tp_flags |= Py_TPFLAGS_HAVE_GC; type->tp_traverse = [](PyObject *self_base, visitproc visit, void *arg) { @@ -1394,20 +1405,28 @@ You can do that using ``py::custom_type_setup``: Py_VISIT(Py_TYPE(self_base)); #endif if (py::detail::is_holder_constructed(self_base)) { - auto &self = py::cast(py::handle(self_base)); - Py_VISIT(self.value.ptr()); + auto &self = py::cast(py::handle(self_base)); + for (auto &item : self.list) { + Py_VISIT(item.ptr()); + } } return 0; }; type->tp_clear = [](PyObject *self_base) { if (py::detail::is_holder_constructed(self_base)) { - auto &self = py::cast(py::handle(self_base)); - self.value = py::none(); + auto &self = py::cast(py::handle(self_base)); + for (auto &item : self.list) { + Py_CLEAR(item.ptr()); + } + self.list.clear(); } return 0; }; })); cls.def(py::init<>()); - cls.def_readwrite("value", &OwnsPythonObjects::value); + cls.def("append", &ContainerOwnsPythonObjects::append); + cls.def("at", &ContainerOwnsPythonObjects::at); + cls.def("size", &ContainerOwnsPythonObjects::size); + cls.def("clear", &ContainerOwnsPythonObjects::clear); .. versionadded:: 2.8 diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 9b3e69f4db..7cfa5da921 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -337,19 +337,7 @@ struct internals { internals(internals &&other) = delete; internals &operator=(const internals &other) = delete; internals &operator=(internals &&other) = delete; - ~internals() { - // Normally this destructor runs during interpreter finalization and it may DECREF things. - // In odd finalization scenarios it might end up running after the interpreter has - // completely shut down, In that case, we should not decref these objects because pymalloc - // is gone. This also applies across sub-interpreters, we should only DECREF when the - // original owning interpreter is active. - auto *cur_istate = get_interpreter_state_unchecked(); - if (cur_istate && cur_istate == istate) { - Py_CLEAR(instance_base); - Py_CLEAR(default_metaclass); - Py_CLEAR(static_property_type); - } - } + ~internals() = default; }; // the internals struct (above) is shared between all the modules. local_internals are only @@ -359,8 +347,6 @@ struct internals { // impact any other modules, because the only things accessing the local internals is the // module that contains them. struct local_internals { - local_internals() : istate(get_interpreter_state_unchecked()) {} - // It should be safe to use fast_type_map here because this entire // data structure is scoped to our single module, and thus a single // DSO and single instance of type_info for any particular type. @@ -368,19 +354,6 @@ struct local_internals { std::forward_list registered_exception_translators; PyTypeObject *function_record_py_type = nullptr; - PyInterpreterState *istate = nullptr; - - ~local_internals() { - // Normally this destructor runs during interpreter finalization and it may DECREF things. - // In odd finalization scenarios it might end up running after the interpreter has - // completely shut down, In that case, we should not decref these objects because pymalloc - // is gone. This also applies across sub-interpreters, we should only DECREF when the - // original owning interpreter is active. - auto *cur_istate = get_interpreter_state_unchecked(); - if (cur_istate && cur_istate == istate) { - Py_CLEAR(function_record_py_type); - } - } }; enum class holder_enum_t : uint8_t { @@ -576,6 +549,10 @@ inline void translate_local_exception(std::exception_ptr p) { } #endif +// Sentinel value for the `dtor` parameter of `atomic_get_or_create_in_state_dict`. +// Indicates no destructor was explicitly provided (distinct from nullptr, which means "leak"). +#define PYBIND11_DTOR_USE_DELETE (reinterpret_cast(1)) + // Get or create per-storage capsule in the current interpreter's state dict. // - The storage is interpreter-dependent: different interpreters will have different storage. // This is important when using multiple-interpreters, to avoid sharing unshareable objects @@ -592,9 +569,14 @@ inline void translate_local_exception(std::exception_ptr p) { // // Returns: pair of (pointer to storage, bool indicating if newly created). // The bool follows std::map::insert convention: true = created, false = existed. +// `dtor`: optional destructor called when the interpreter shuts down. +// - If not provided: the storage will be deleted using `delete`. +// - If nullptr: the storage will be leaked (useful for singletons that outlive the interpreter). +// - If a function: that function will be called with the capsule object. template std::pair atomic_get_or_create_in_state_dict(const char *key, - void (*dtor)(PyObject *) = nullptr) { + void (*dtor)(PyObject *) + = PYBIND11_DTOR_USE_DELETE) { error_scope err_scope; // preserve any existing Python error states auto state_dict = reinterpret_borrow(get_python_state_dict()); @@ -640,7 +622,7 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state // dict will incref it. We need to set the caller's destructor on it, which will be // called when the interpreter shuts down. - if (created && dtor) { + if (created && dtor != PYBIND11_DTOR_USE_DELETE) { if (PyCapsule_SetDestructor(capsule_obj, dtor) < 0) { throw error_already_set(); } @@ -657,6 +639,8 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, return std::pair(static_cast(raw_ptr), created); } +#undef PYBIND11_DTOR_USE_DELETE + template class internals_pp_manager { public: @@ -713,42 +697,75 @@ class internals_pp_manager { // this could be called without an active interpreter, just use what was cached if (!tstate || tstate->interp == last_istate_tls()) { auto tpp = internals_p_tls(); - - delete tpp; + { + std::lock_guard lock(pp_set_mutex_); + pps_have_created_content_.erase(tpp); // untrack deleted pp + } + delete tpp; // may call back into Python } unref(); return; } #endif - delete internals_singleton_pp_; + { + std::lock_guard lock(pp_set_mutex_); + pps_have_created_content_.erase(internals_singleton_pp_); // untrack deleted pp + } + delete internals_singleton_pp_; // may call back into Python unref(); } -private: - internals_pp_manager(char const *id, on_fetch_function *on_fetch) - : holder_id_(id), on_fetch_(on_fetch) {} + void create_pp_content_once(std::unique_ptr *const pp) { + // Assume the GIL is held here. May call back into Python. We cannot hold the lock with our + // mutex here. So there may be multiple threads creating the content at the same time. Only + // one will install its content to pp below. Others will be freed when going out of scope. + auto tmp = std::unique_ptr(new InternalsType()); - static void internals_shutdown(PyObject *capsule) { - auto *pp = static_cast *>( - PyCapsule_GetPointer(capsule, nullptr)); - if (pp) { - pp->reset(); + { + // Lock scope must not include Python calls, which may require the GIL and cause + // deadlocks. + std::lock_guard lock(pp_set_mutex_); + + if (*pp) { + // Already created in another thread. + return; + } + + // At this point, pp->get() is nullptr. + // The content is either not yet created, or was previously destroyed via pp->reset(). + + // Detect re-creation of internals after destruction during interpreter shutdown. + // If pybind11 code (e.g., tp_traverse/tp_clear calling py::cast) runs after internals + // have been destroyed, a new empty internals would be created, causing type lookup + // failures. See also get_or_create_pp_in_state_dict() comments. + if (pps_have_created_content_.find(pp) != pps_have_created_content_.end()) { + pybind11_fail( + "pybind11::detail::internals_pp_manager::create_pp_content_once() " + "FAILED: reentrant call detected while fetching pybind11 internals!"); + } + + // Each interpreter can only create its internals once. + pps_have_created_content_.insert(pp); + // Install the created content. + pp->swap(tmp); } - // We reset the unique_ptr's contents but cannot delete the unique_ptr itself here. - // The pp_manager in this module (and possibly other modules sharing internals) holds - // a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now. - // - // For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is - // called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the - // unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension - // loaded into an external interpreter), destroy() is never called and the unique_ptr - // shell (8 bytes, not its contents) is leaked. - // (See PR #5958 for ideas to eliminate this leak.) } +private: + internals_pp_manager(char const *id, on_fetch_function *on_fetch) + : holder_id_(id), on_fetch_(on_fetch) {} + std::unique_ptr *get_or_create_pp_in_state_dict() { + // The `unique_ptr` is intentionally leaked on interpreter shutdown. + // Once an instance is created, it will never be deleted until the process exits (compare + // to interpreter shutdown in multiple-interpreter scenarios). + // We cannot guarantee the destruction order of capsules in the interpreter state dict on + // interpreter shutdown, so deleting internals too early could cause undefined behavior + // when other pybind11 objects access `get_internals()` during finalization (which would + // recreate empty internals). See also create_pp_content_once() above. + // See https://github.com/pybind/pybind11/pull/5958#discussion_r2717645230. auto result = atomic_get_or_create_in_state_dict>( - holder_id_, &internals_shutdown); + holder_id_, /*dtor=*/nullptr /* leak the capsule content */); auto *pp = result.first; bool created = result.second; // Only call on_fetch_ when fetching existing internals, not when creating new ones. @@ -774,7 +791,12 @@ class internals_pp_manager { on_fetch_function *on_fetch_ = nullptr; // Pointer-to-pointer to the singleton internals for the first seen interpreter (may not be the // main interpreter) - std::unique_ptr *internals_singleton_pp_; + std::unique_ptr *internals_singleton_pp_ = nullptr; + + // Track pointer-to-pointers whose internals have been created, to detect re-entrancy. + // Use instance member over static due to singleton pattern of this class. + std::unordered_set *> pps_have_created_content_; + std::mutex pp_set_mutex_; }; // If We loaded the internals through `state_dict`, our `error_already_set` @@ -815,7 +837,8 @@ PYBIND11_NOINLINE internals &get_internals() { // Slow path, something needs fetched from the state dict or created gil_scoped_acquire_simple gil; error_scope err_scope; - internals_ptr.reset(new internals()); + + ppmgr.create_pp_content_once(&internals_ptr); if (!internals_ptr->instance_base) { // This calls get_internals, so cannot be called from within the internals constructor @@ -826,6 +849,31 @@ PYBIND11_NOINLINE internals &get_internals() { return *internals_ptr; } +/// Return the PyObject* for the internals capsule (borrowed reference). +/// Returns nullptr if the capsule doesn't exist yet. +inline PyObject *get_internals_capsule() { + auto state_dict = reinterpret_borrow(get_python_state_dict()); + return dict_getitemstring(state_dict.ptr(), PYBIND11_INTERNALS_ID); +} + +/// Return the key used for local_internals in the state dict. +/// This function ensures a consistent key is used across all call sites within the same +/// compilation unit. The key includes the address of a static variable to make it unique per +/// module (DSO), matching the behavior of get_local_internals_pp_manager(). +inline const std::string &get_local_internals_key() { + static const std::string key + = PYBIND11_MODULE_LOCAL_ID + std::to_string(reinterpret_cast(&key)); + return key; +} + +/// Return the PyObject* for the local_internals capsule (borrowed reference). +/// Returns nullptr if the capsule doesn't exist yet. +inline PyObject *get_local_internals_capsule() { + const auto &key = get_local_internals_key(); + auto state_dict = reinterpret_borrow(get_python_state_dict()); + return dict_getitemstring(state_dict.ptr(), key.c_str()); +} + inline void ensure_internals() { pybind11::detail::get_internals_pp_manager().unref(); #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT @@ -837,12 +885,10 @@ inline void ensure_internals() { } inline internals_pp_manager &get_local_internals_pp_manager() { - // Use the address of this static itself as part of the key, so that the value is uniquely tied + // Use the address of a static variable as part of the key, so that the value is uniquely tied // to where the module is loaded in memory - static const std::string this_module_idstr - = PYBIND11_MODULE_LOCAL_ID - + std::to_string(reinterpret_cast(&this_module_idstr)); - return internals_pp_manager::get_instance(this_module_idstr.c_str(), nullptr); + return internals_pp_manager::get_instance(get_local_internals_key().c_str(), + nullptr); } /// Works like `get_internals`, but for things which are locally registered. @@ -850,7 +896,10 @@ inline local_internals &get_local_internals() { auto &ppmgr = get_local_internals_pp_manager(); auto &internals_ptr = *ppmgr.get_pp(); if (!internals_ptr) { - internals_ptr.reset(new local_internals()); + gil_scoped_acquire_simple gil; + error_scope err_scope; + + ppmgr.create_pp_content_once(&internals_ptr); } return *internals_ptr; } diff --git a/tests/env.py b/tests/env.py index ee932ad77a..8c06178307 100644 --- a/tests/env.py +++ b/tests/env.py @@ -29,3 +29,31 @@ or GRAALPY or (CPYTHON and PY_GIL_DISABLED and (3, 13) <= sys.version_info < (3, 14)) ) + + +def check_script_success_in_subprocess(code: str, *, rerun: int = 8) -> None: + """Runs the given code in a subprocess.""" + import os + import subprocess + import sys + import textwrap + + code = textwrap.dedent(code).strip() + try: + for _ in range(rerun): # run flakily failing test multiple times + subprocess.check_output( + [sys.executable, "-c", code], + cwd=os.getcwd(), + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as ex: + raise RuntimeError( + f"Subprocess failed with exit code {ex.returncode}.\n\n" + f"Code:\n" + f"```python\n" + f"{code}\n" + f"```\n\n" + f"Output:\n" + f"{ex.output}" + ) from None diff --git a/tests/test_custom_type_setup.cpp b/tests/test_custom_type_setup.cpp index 35d30abcf7..15516b21b0 100644 --- a/tests/test_custom_type_setup.cpp +++ b/tests/test_custom_type_setup.cpp @@ -7,22 +7,67 @@ BSD-style license that can be found in the LICENSE file. */ +#include #include #include "pybind11_tests.h" +#include + namespace py = pybind11; namespace { +struct ContainerOwnsPythonObjects { + std::vector list; -struct OwnsPythonObjects { - py::object value = py::none(); + void append(const py::object &obj) { list.emplace_back(obj); } + py::object at(py::ssize_t index) const { + if (index >= size() || index < 0) { + throw py::index_error("Index out of range"); + } + return list.at(py::size_t(index)); + } + py::ssize_t size() const { return py::ssize_t_cast(list.size()); } + void clear() { list.clear(); } }; + +void add_gc_checkers_with_weakrefs(const py::object &obj) { + py::handle global_capsule = py::detail::get_internals_capsule(); + if (!global_capsule) { + throw std::runtime_error("No global internals capsule found"); + } + (void) py::weakref(obj, py::cpp_function([global_capsule](py::handle weakref) -> void { + py::handle current_global_capsule = py::detail::get_internals_capsule(); + if (!current_global_capsule.is(global_capsule)) { + throw std::runtime_error( + "Global internals capsule was destroyed prematurely"); + } + weakref.dec_ref(); + })) + .release(); + + py::handle local_capsule = py::detail::get_local_internals_capsule(); + if (!local_capsule) { + throw std::runtime_error("No local internals capsule found"); + } + (void) py::weakref( + obj, py::cpp_function([local_capsule](py::handle weakref) -> void { + py::handle current_local_capsule = py::detail::get_local_internals_capsule(); + if (!current_local_capsule.is(local_capsule)) { + throw std::runtime_error("Local internals capsule was destroyed prematurely"); + } + weakref.dec_ref(); + })) + .release(); +} } // namespace TEST_SUBMODULE(custom_type_setup, m) { - py::class_ cls( - m, "OwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) { + py::class_ cls( + m, + "ContainerOwnsPythonObjects", + // Please review/update docs/advanced/classes.rst after making changes here. + py::custom_type_setup([](PyHeapTypeObject *heap_type) { auto *type = &heap_type->ht_type; type->tp_flags |= Py_TPFLAGS_HAVE_GC; type->tp_traverse = [](PyObject *self_base, visitproc visit, void *arg) { @@ -31,19 +76,29 @@ TEST_SUBMODULE(custom_type_setup, m) { Py_VISIT(Py_TYPE(self_base)); #endif if (py::detail::is_holder_constructed(self_base)) { - auto &self = py::cast(py::handle(self_base)); - Py_VISIT(self.value.ptr()); + auto &self = py::cast(py::handle(self_base)); + for (auto &item : self.list) { + Py_VISIT(item.ptr()); + } } return 0; }; type->tp_clear = [](PyObject *self_base) { if (py::detail::is_holder_constructed(self_base)) { - auto &self = py::cast(py::handle(self_base)); - self.value = py::none(); + auto &self = py::cast(py::handle(self_base)); + for (auto &item : self.list) { + Py_CLEAR(item.ptr()); + } + self.list.clear(); } return 0; }; })); cls.def(py::init<>()); - cls.def_readwrite("value", &OwnsPythonObjects::value); + cls.def("append", &ContainerOwnsPythonObjects::append); + cls.def("at", &ContainerOwnsPythonObjects::at); + cls.def("size", &ContainerOwnsPythonObjects::size); + cls.def("clear", &ContainerOwnsPythonObjects::clear); + + m.def("add_gc_checkers_with_weakrefs", &add_gc_checkers_with_weakrefs); } diff --git a/tests/test_custom_type_setup.py b/tests/test_custom_type_setup.py index bb2865cade..4c6b9510ae 100644 --- a/tests/test_custom_type_setup.py +++ b/tests/test_custom_type_setup.py @@ -1,11 +1,14 @@ from __future__ import annotations import gc +import os +import sys import weakref import pytest -import env # noqa: F401 +import env +import pybind11_tests from pybind11_tests import custom_type_setup as m @@ -36,15 +39,46 @@ def add_ref(obj): # PyPy does not seem to reliably garbage collect. @pytest.mark.skipif("env.PYPY or env.GRAALPY") def test_self_cycle(gc_tester): - obj = m.OwnsPythonObjects() - obj.value = obj + obj = m.ContainerOwnsPythonObjects() + obj.append(obj) gc_tester(obj) # PyPy does not seem to reliably garbage collect. @pytest.mark.skipif("env.PYPY or env.GRAALPY") def test_indirect_cycle(gc_tester): - obj = m.OwnsPythonObjects() - obj_list = [obj] - obj.value = obj_list + obj = m.ContainerOwnsPythonObjects() + obj.append([obj]) gc_tester(obj) + + +@pytest.mark.skipif( + env.IOS or sys.platform.startswith("emscripten"), + reason="Requires subprocess support", +) +@pytest.mark.skipif("env.PYPY or env.GRAALPY") +def test_py_cast_useable_on_shutdown(): + """Test that py::cast works during interpreter shutdown. + + See PR #5972 and https://github.com/pybind/pybind11/pull/5958#discussion_r2717645230. + """ + env.check_script_success_in_subprocess( + f""" + import sys + + sys.path.insert(0, {os.path.dirname(env.__file__)!r}) + sys.path.insert(0, {os.path.dirname(pybind11_tests.__file__)!r}) + + from pybind11_tests import custom_type_setup as m + + # Create a self-referential cycle that will be collected during shutdown. + # The tp_traverse and tp_clear callbacks call py::cast, which requires + # internals to still be valid. + obj = m.ContainerOwnsPythonObjects() + obj.append(obj) + + # Add weakref callbacks that verify the capsule is still alive when the + # pybind11 object is garbage collected during shutdown. + m.add_gc_checkers_with_weakrefs(obj) + """ + ) diff --git a/tests/test_multiple_interpreters.py b/tests/test_multiple_interpreters.py index 44877e772a..56d303a36e 100644 --- a/tests/test_multiple_interpreters.py +++ b/tests/test_multiple_interpreters.py @@ -3,7 +3,6 @@ import contextlib import os import pickle -import subprocess import sys import textwrap @@ -219,6 +218,7 @@ def test_dependent_subinterpreters(): def test(): import sys + sys.path.insert(0, {os.path.dirname(env.__file__)!r}) sys.path.insert(0, {os.path.dirname(pybind11_tests.__file__)!r}) import collections @@ -269,36 +269,13 @@ def test_import_module_with_singleton_per_interpreter(): interp.exec(code) -def check_script_success_in_subprocess(code: str, *, rerun: int = 8) -> None: - """Runs the given code in a subprocess.""" - code = textwrap.dedent(code).strip() - try: - for _ in range(rerun): # run flakily failing test multiple times - subprocess.check_output( - [sys.executable, "-c", code], - cwd=os.getcwd(), - stderr=subprocess.STDOUT, - text=True, - ) - except subprocess.CalledProcessError as ex: - raise RuntimeError( - f"Subprocess failed with exit code {ex.returncode}.\n\n" - f"Code:\n" - f"```python\n" - f"{code}\n" - f"```\n\n" - f"Output:\n" - f"{ex.output}" - ) from None - - @pytest.mark.skipif( sys.platform.startswith("emscripten"), reason="Requires loadable modules" ) @pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+") def test_import_in_subinterpreter_after_main(): """Tests that importing a module in a subinterpreter after the main interpreter works correctly""" - check_script_success_in_subprocess( + env.check_script_success_in_subprocess( PREAMBLE_CODE + textwrap.dedent( """ @@ -319,7 +296,7 @@ def test_import_in_subinterpreter_after_main(): ) ) - check_script_success_in_subprocess( + env.check_script_success_in_subprocess( PREAMBLE_CODE + textwrap.dedent( """ @@ -354,7 +331,7 @@ def test_import_in_subinterpreter_after_main(): @pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+") def test_import_in_subinterpreter_before_main(): """Tests that importing a module in a subinterpreter before the main interpreter works correctly""" - check_script_success_in_subprocess( + env.check_script_success_in_subprocess( PREAMBLE_CODE + textwrap.dedent( """ @@ -375,7 +352,7 @@ def test_import_in_subinterpreter_before_main(): ) ) - check_script_success_in_subprocess( + env.check_script_success_in_subprocess( PREAMBLE_CODE + textwrap.dedent( """ @@ -401,7 +378,7 @@ def test_import_in_subinterpreter_before_main(): ) ) - check_script_success_in_subprocess( + env.check_script_success_in_subprocess( PREAMBLE_CODE + textwrap.dedent( """ @@ -434,7 +411,7 @@ def test_import_in_subinterpreter_before_main(): @pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+") def test_import_in_subinterpreter_concurrently(): """Tests that importing a module in multiple subinterpreters concurrently works correctly""" - check_script_success_in_subprocess( + env.check_script_success_in_subprocess( PREAMBLE_CODE + textwrap.dedent( """