From 56154168724c824530e041308c73365f6165aa64 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Tue, 9 May 2023 11:23:49 -0500 Subject: [PATCH 1/6] Try dask-expr --- .github/workflows/tests.yml | 3 +++ tests/benchmarks/test_csv.py | 2 +- tests/benchmarks/test_h2o.py | 2 +- tests/benchmarks/test_join.py | 2 +- tests/benchmarks/test_parquet.py | 2 +- tests/benchmarks/test_xgboost.py | 2 +- tests/runtime/test_xgboost.py | 2 +- tests/utils_test.py | 2 +- tests/workflows/test_uber_lyft.py | 2 +- 9 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 79d2bf0a3b..4446efda0b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -98,6 +98,9 @@ jobs: # as that's the only place where the optuna + dask integration is being used. python -m pip install git+https://github.com/optuna/optuna + - name: Install dask-expr + run: python -m pip install git+https://github.com/mrocklin/dask-expr + - name: Run Coiled Runtime Tests id: test env: diff --git a/tests/benchmarks/test_csv.py b/tests/benchmarks/test_csv.py index 53b5975094..c34015a211 100644 --- a/tests/benchmarks/test_csv.py +++ b/tests/benchmarks/test_csv.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pandas as pd from ..utils_test import run_up_to_nthreads diff --git a/tests/benchmarks/test_h2o.py b/tests/benchmarks/test_h2o.py index 58683ce07f..5001d61d0c 100644 --- a/tests/benchmarks/test_h2o.py +++ b/tests/benchmarks/test_h2o.py @@ -6,7 +6,7 @@ """ import os -import dask.dataframe as dd +import dask_expr as dd import pandas as pd import pytest diff --git a/tests/benchmarks/test_join.py b/tests/benchmarks/test_join.py index c8d48cdd72..8f783fc1a0 100644 --- a/tests/benchmarks/test_join.py +++ b/tests/benchmarks/test_join.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pytest from ..utils_test import cluster_memory, run_up_to_nthreads, timeseries_of_size, wait diff --git a/tests/benchmarks/test_parquet.py b/tests/benchmarks/test_parquet.py index 9545e2b2e6..54530f2099 100644 --- a/tests/benchmarks/test_parquet.py +++ b/tests/benchmarks/test_parquet.py @@ -5,8 +5,8 @@ import uuid import boto3 -import dask.dataframe as dd import dask.datasets +import dask_expr as dd import distributed import fsspec import pandas diff --git a/tests/benchmarks/test_xgboost.py b/tests/benchmarks/test_xgboost.py index 17298d908a..03bf67978a 100644 --- a/tests/benchmarks/test_xgboost.py +++ b/tests/benchmarks/test_xgboost.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import numpy as np import pandas as pd import pytest diff --git a/tests/runtime/test_xgboost.py b/tests/runtime/test_xgboost.py index 2daadd78bb..14d1c1f711 100644 --- a/tests/runtime/test_xgboost.py +++ b/tests/runtime/test_xgboost.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pytest # `coiled-runtime=0.0.4` don't contain `xgboost` diff --git a/tests/utils_test.py b/tests/utils_test.py index e7f60eccb6..4001f67644 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -4,7 +4,7 @@ import dask import dask.array as da -import dask.dataframe as dd +import dask_expr as dd import distributed import numpy as np import pandas as pd diff --git a/tests/workflows/test_uber_lyft.py b/tests/workflows/test_uber_lyft.py index 2613e70e51..fc9482b627 100644 --- a/tests/workflows/test_uber_lyft.py +++ b/tests/workflows/test_uber_lyft.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pytest From c56b8739daf6058589b8f8b4f6cabd976df9d479 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Tue, 9 May 2023 11:28:39 -0500 Subject: [PATCH 2/6] Try dask-expr --- tests/benchmarks/test_parquet.py | 3 +-- tests/stability/test_deadlock.py | 6 +++--- tests/utils_test.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/benchmarks/test_parquet.py b/tests/benchmarks/test_parquet.py index 54530f2099..5d3fdf45ba 100644 --- a/tests/benchmarks/test_parquet.py +++ b/tests/benchmarks/test_parquet.py @@ -5,7 +5,6 @@ import uuid import boto3 -import dask.datasets import dask_expr as dd import distributed import fsspec @@ -79,7 +78,7 @@ def test_read_hive_partitioned_data(parquet_client): @run_up_to_nthreads("parquet_cluster", 100, reason="fixed dataset") def test_write_wide_data(parquet_client, s3_url): # Write a ~700 partition, ~200 GB dataset with a lot of columns - ddf = dask.datasets.timeseries( + ddf = dd.datasets.timeseries( dtypes={ **{f"name-{i}": str for i in range(25)}, **{f"price-{i}": float for i in range(25)}, diff --git a/tests/stability/test_deadlock.py b/tests/stability/test_deadlock.py index 74910a4ff4..ecc5f145e4 100644 --- a/tests/stability/test_deadlock.py +++ b/tests/stability/test_deadlock.py @@ -1,6 +1,6 @@ import uuid -import dask +import dask_expr as dx import distributed import pytest from coiled import Cluster @@ -27,12 +27,12 @@ def test_repeated_merge_spill( ) as cluster: with Client(cluster) as client: with upload_cluster_dump(client), benchmark_all(client): - ddf = dask.datasets.timeseries( + ddf = dx.datasets.timeseries( "2020", "2025", partition_freq="2w", ) - ddf2 = dask.datasets.timeseries( + ddf2 = dx.datasets.timeseries( "2020", "2023", partition_freq="2w", diff --git a/tests/utils_test.py b/tests/utils_test.py index 4001f67644..567f1a4d8e 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd import pytest -from dask.datasets import timeseries from dask.sizeof import sizeof from dask.utils import format_bytes, parse_bytes +from dask_expr.datasets import timeseries def scaled_array_shape( From e96390f1e9b1af3095f3bdfc02a91a446b26662e Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Tue, 9 May 2023 12:55:04 -0500 Subject: [PATCH 3/6] Rerun CI From 718b6158275d8831e5492e36dab3c5d6d3451a82 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Tue, 9 May 2023 14:43:42 -0500 Subject: [PATCH 4/6] Rerun CI From e85b8e72d1192f18e4062a597f6a1b5377dd0e6b Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Tue, 16 May 2023 16:32:46 -0500 Subject: [PATCH 5/6] Rerun CI From 8982eb154b77f5ff1e63d1ef75b569fcec5f362d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 19 Jun 2023 11:20:39 +0200 Subject: [PATCH 6/6] Fix incompatibility --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 48e87b3790..75ccec3d47 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -94,7 +94,7 @@ jobs: echo EXTRA_OPTIONS=$EXTRA_OPTIONS >> $GITHUB_ENV - name: Install dask-expr - run: python -m pip install git+https://github.com/dask-contrib/dask-expr + run: python -m pip install git+https://github.com/phofl/dask-expr.git@benchmarks - name: Run Coiled Runtime Tests id: test