diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9b76cdbff5..75ccec3d47 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -93,6 +93,9 @@ jobs: echo $EXTRA_OPTIONS echo EXTRA_OPTIONS=$EXTRA_OPTIONS >> $GITHUB_ENV + - name: Install dask-expr + run: python -m pip install git+https://github.com/phofl/dask-expr.git@benchmarks + - name: Run Coiled Runtime Tests id: test env: diff --git a/tests/benchmarks/test_csv.py b/tests/benchmarks/test_csv.py index 53b5975094..c34015a211 100644 --- a/tests/benchmarks/test_csv.py +++ b/tests/benchmarks/test_csv.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pandas as pd from ..utils_test import run_up_to_nthreads diff --git a/tests/benchmarks/test_h2o.py b/tests/benchmarks/test_h2o.py index 58683ce07f..5001d61d0c 100644 --- a/tests/benchmarks/test_h2o.py +++ b/tests/benchmarks/test_h2o.py @@ -6,7 +6,7 @@ """ import os -import dask.dataframe as dd +import dask_expr as dd import pandas as pd import pytest diff --git a/tests/benchmarks/test_join.py b/tests/benchmarks/test_join.py index c8d48cdd72..8f783fc1a0 100644 --- a/tests/benchmarks/test_join.py +++ b/tests/benchmarks/test_join.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pytest from ..utils_test import cluster_memory, run_up_to_nthreads, timeseries_of_size, wait diff --git a/tests/benchmarks/test_parquet.py b/tests/benchmarks/test_parquet.py index 9545e2b2e6..5d3fdf45ba 100644 --- a/tests/benchmarks/test_parquet.py +++ b/tests/benchmarks/test_parquet.py @@ -5,8 +5,7 @@ import uuid import boto3 -import dask.dataframe as dd -import dask.datasets +import dask_expr as dd import distributed import fsspec import pandas @@ -79,7 +78,7 @@ def test_read_hive_partitioned_data(parquet_client): @run_up_to_nthreads("parquet_cluster", 100, reason="fixed dataset") def test_write_wide_data(parquet_client, s3_url): # Write a ~700 partition, ~200 GB dataset with a lot of columns - ddf = dask.datasets.timeseries( + ddf = dd.datasets.timeseries( dtypes={ **{f"name-{i}": str for i in range(25)}, **{f"price-{i}": float for i in range(25)}, diff --git a/tests/benchmarks/test_xgboost.py b/tests/benchmarks/test_xgboost.py index 17298d908a..03bf67978a 100644 --- a/tests/benchmarks/test_xgboost.py +++ b/tests/benchmarks/test_xgboost.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import numpy as np import pandas as pd import pytest diff --git a/tests/runtime/test_xgboost.py b/tests/runtime/test_xgboost.py index 2daadd78bb..14d1c1f711 100644 --- a/tests/runtime/test_xgboost.py +++ b/tests/runtime/test_xgboost.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pytest # `coiled-runtime=0.0.4` don't contain `xgboost` diff --git a/tests/stability/test_deadlock.py b/tests/stability/test_deadlock.py index 74910a4ff4..ecc5f145e4 100644 --- a/tests/stability/test_deadlock.py +++ b/tests/stability/test_deadlock.py @@ -1,6 +1,6 @@ import uuid -import dask +import dask_expr as dx import distributed import pytest from coiled import Cluster @@ -27,12 +27,12 @@ def test_repeated_merge_spill( ) as cluster: with Client(cluster) as client: with upload_cluster_dump(client), benchmark_all(client): - ddf = dask.datasets.timeseries( + ddf = dx.datasets.timeseries( "2020", "2025", partition_freq="2w", ) - ddf2 = dask.datasets.timeseries( + ddf2 = dx.datasets.timeseries( "2020", "2023", partition_freq="2w", diff --git a/tests/utils_test.py b/tests/utils_test.py index 8dcd94a952..f46f421427 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -4,14 +4,14 @@ import dask import dask.array as da -import dask.dataframe as dd +import dask_expr as dd import distributed import numpy as np import pandas as pd import pytest -from dask.datasets import timeseries from dask.sizeof import sizeof from dask.utils import format_bytes, parse_bytes +from dask_expr.datasets import timeseries def scaled_array_shape( diff --git a/tests/workflows/test_uber_lyft.py b/tests/workflows/test_uber_lyft.py index 2613e70e51..fc9482b627 100644 --- a/tests/workflows/test_uber_lyft.py +++ b/tests/workflows/test_uber_lyft.py @@ -1,4 +1,4 @@ -import dask.dataframe as dd +import dask_expr as dd import pytest