diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index a7becfb4..f42348cd 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -69,7 +69,7 @@ jobs: shell: bash run: | COVERAGE_PROCESS_START=$GITHUB_WORKSPACE/.coveragerc \ - PYTHONPATH='.:tests' python -m pytest -r s + PYTHONPATH='.:tests' python -m pytest -r s -vs coverage combine --append coverage xml -i - name: Publish coverage results diff --git a/CHANGES.md b/CHANGES.md index 517c5db5..a6b0b443 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,9 @@ In development ============== +- Make pickling of functions depending on globals in notebook more + deterministic. ([PR#560](https://github.com/cloudpipe/cloudpickle/pull/560)) + 3.1.2 ===== diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e600b35f..9f9f7971 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -835,6 +835,13 @@ def _code_reduce(obj): # See the inline comment in _class_setstate for details. co_name = "".join(obj.co_name) + # co_filename is not used in the constructor of code objects, so we can + # safely set it to indicate that this is dynamic code. This also makes + # the payload deterministic, independent of where the function is defined + # which is especially useful when defining classes in jupyter/ipython + # cells which do not have a deterministic filename. + co_filename = "".join("") + # Create shallow copies of these tuple to make cloudpickle payload deterministic. # When creating a code object during load, copies of these four tuples are # created, while in the main process, these tuples can be shared. @@ -857,7 +864,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_qualname, obj.co_firstlineno, @@ -880,7 +887,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_linetable, @@ -901,7 +908,7 @@ def _code_reduce(obj): obj.co_code, obj.co_consts, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_lnotab, @@ -925,7 +932,7 @@ def _code_reduce(obj): obj.co_consts, co_names, co_varnames, - obj.co_filename, + co_filename, co_name, obj.co_firstlineno, obj.co_lnotab, diff --git a/dev-requirements.txt b/dev-requirements.txt index 35c529fe..7735fd92 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -7,6 +7,8 @@ pytest-cov psutil # To be able to test tornado coroutines tornado +# To be able to test behavior in jupyter-notebooks +ipykernel # To be able to test numpy specific things # but do not build numpy from source on Python nightly numpy >=1.18.5; python_version <= '3.12' diff --git a/tests/cloudpickle_ipykernel_test.py b/tests/cloudpickle_ipykernel_test.py new file mode 100644 index 00000000..8a5cbfd7 --- /dev/null +++ b/tests/cloudpickle_ipykernel_test.py @@ -0,0 +1,100 @@ +import sys +import time +import pytest +import platform +import textwrap +from queue import Empty + +from .testutils import check_deterministic_pickle + +if sys.platform == "win32": + if sys.version_info < (3, 11): + pytest.skip( + "ipykernel requires Python 3.11 or later", + allow_module_level=True + ) +ipykernel = pytest.importorskip("ipykernel") + + +def run_in_notebook(code, timeout=10): + + km = ipykernel.connect.jupyter_client.KernelManager() + km.start_kernel() + kc = km.client() + kc.start_channels() + status, output, err = "kernel_started", None, None + try: + assert km.is_alive() and kc.is_alive() + kc.wait_for_ready() + idx = kc.execute(code) + running = True + while running: + try: + res = kc.iopub_channel.get_msg(timeout=timeout) + except Empty: + status = "timeout" + break + if res['parent_header'].get('msg_id') != idx: + continue + content = res['content'] + if content.get("name", "state") == "stdout": + output = content['text'] + if "traceback" in content: + err = "\n".join(content['traceback']) + status = "error" + running = res['content'].get('execution_state', None) != "idle" + finally: + kc.shutdown() + kc.stop_channels() + km.shutdown_kernel(now=True, restart=False) + assert not km.is_alive() + if status not in ["error", "timeout"]: + status = "ok" if not running else "exec_error" + return status, output, err + + +@pytest.mark.skipif( + platform.python_implementation() == "PyPy", + reason="Skip PyPy because tests are too slow", +) +@pytest.mark.parametrize("code, expected", [ + ("1 + 1", "ok"), + ("raise ValueError('This is a test error')", "error"), + ("import time; time.sleep(100)", "timeout") + +]) +def test_run_in_notebook(code, expected): + code = textwrap.dedent(code) + + t_start = time.time() + status, output, err = run_in_notebook(code, timeout=1) + duration = time.time() - t_start + assert status == expected, ( + f"Unexpected status: {status}, output: {output}, err: {err}, duration: {duration}" + ) + assert duration < 10, "Timeout not enforced properly" + if expected == "error": + assert "This is a test error" in err + + +def test_deterministic_payload_for_dynamic_func_in_notebook(): + code = textwrap.dedent(""" + import cloudpickle + + MY_PI = 3.1415 + + def get_pi(): + return MY_PI + + print(cloudpickle.dumps(get_pi)) + """) + + status, output, err = run_in_notebook(code) + assert status == "ok" + payload = eval(output.strip(), {}) + + status, output, err = run_in_notebook(code) + assert status == "ok" + payload2 = eval(output.strip(), {}) + + check_deterministic_pickle(payload, payload2)