diff --git a/.github/workflows/ab_tests.yml b/.github/workflows/ab_tests.yml index 901f92eaac..bf67fc5954 100644 --- a/.github/workflows/ab_tests.yml +++ b/.github/workflows/ab_tests.yml @@ -52,12 +52,7 @@ jobs: timeout-minutes: 120 strategy: fail-fast: false - # AWS implements limiters to how many EC2 instances you can spawn in parallel *on - # the same AWS account*. If such limit is reached, jobs will randomly fail when - # trying to create the Coiled clusters, and restarting failed jobs won't fix the - # problem. Additionally, there are problems with Coiled itself triggered by - # limitations that are never actually reached with real paying users. - max-parallel: 5 + max-parallel: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).max_parallel }} matrix: os: [ubuntu-latest] python-version: ["3.9"] diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e68ee28b6d..f0f8bfb513 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -102,7 +102,7 @@ jobs: DB_NAME: ${{ matrix.os }}-${{ matrix.runtime-version }}-py${{ matrix.python-version }}.db BENCHMARK: true CLUSTER_DUMP: always - run: bash ci/scripts/run_tests.sh ${{ matrix.pytest_args }} + run: bash ci/scripts/run_tests.sh -n 4 --dist loadscope ${{ matrix.pytest_args }} - name: Dump coiled.Cluster kwargs run: cat cluster_kwargs.merged.yaml diff --git a/AB_environments/README.md b/AB_environments/README.md index 00eca399d5..31bcf32fdb 100644 --- a/AB_environments/README.md +++ b/AB_environments/README.md @@ -112,9 +112,15 @@ automatically create a verbatim copy of AB_baseline and then compare the two in tests. Set it to false to save some money if you are already confident that the 'repeat' setting is high enough. -Finally, the files offers a `categories` list. These are the subdirectories of `tests/` +The file offers a `categories` list. These are the subdirectories of `tests/` which you wish to run. +Finally, the `max_parallel` setting lets you tweak maximum test parallelism, both in +github actions and in pytest-xdist. Reducing parallelism is useful when testing on very +large clusters (e.g. to avoid having 20 clusters with 1000 workers each at the same +time). + + ### 5. (optional) Tweak tests Nothing prevents you from changing the tests themselves. @@ -154,6 +160,9 @@ categories: - runtime - benchmarks - stability +max_parallel: + ci_jobs: 5 + pytest_workers_per_job: 4 ``` ### 6. Run CI diff --git a/AB_environments/config.yaml b/AB_environments/config.yaml index e9e4f5f3b1..700b2fa226 100644 --- a/AB_environments/config.yaml +++ b/AB_environments/config.yaml @@ -14,3 +14,16 @@ categories: - benchmarks # - runtime # - stability + +# AWS implements limiters to how many EC2 instances you can spawn in parallel on the +# same AWS account. If such limit is reached, tests will randomly fail when trying to +# create the Coiled clusters, and restarting failed jobs won't fix the problem. +# Additionally, there are problems with Coiled itself triggered by limitations that are +# never actually reached with real paying users. +max_parallel: + # Number of parallel A/B test jobs per branch. + ci_jobs: 5 + # Number of parallel test_*.py modules per A/B test job. + # Each module typically spawns one Coiled cluster at a time. + # Set to 1 to disable pytest-xdist. + pytest_workers_per_job: 4 diff --git a/ci/scripts/discover_ab_environments.py b/ci/scripts/discover_ab_environments.py index a9b18e3575..f3377a347a 100644 --- a/ci/scripts/discover_ab_environments.py +++ b/ci/scripts/discover_ab_environments.py @@ -12,9 +12,19 @@ class JSONOutput(TypedDict): run_AB: bool repeat: list[int] runtime: list[str] + max_parallel: int pytest_args: list[str] +DO_NOT_RUN: JSONOutput = { + "run_AB": False, + "repeat": [], + "runtime": [], + "max_parallel": 1, + "pytest_args": [], +} + + def build_json() -> JSONOutput: with open("AB_environments/config.yaml") as fh: cfg = yaml.safe_load(fh) @@ -26,7 +36,7 @@ def build_json() -> JSONOutput: raise ValueError("fNot a valid test category: {category}") if not cfg["repeat"] or not cfg["categories"]: - return {"run_AB": False, "repeat": [], "runtime": [], "pytest_args": []} + return DO_NOT_RUN runtimes = [] for conda_fname in sorted(glob.glob("AB_environments/AB_*.conda.yaml")): @@ -37,7 +47,7 @@ def build_json() -> JSONOutput: runtimes.append(env_name) if not runtimes: - return {"run_AB": False, "repeat": [], "runtime": [], "pytest_args": []} + return DO_NOT_RUN if "AB_baseline" not in runtimes: # If any A/B environments are defined, AB_baseline is required @@ -46,11 +56,15 @@ def build_json() -> JSONOutput: if cfg["test_null_hypothesis"]: runtimes += ["AB_null_hypothesis"] + n = cfg["max_parallel"]["pytest_workers_per_job"] + xdist_args = f"-n {n} --dist loadscope " if n > 1 else "" + return { "run_AB": True, "repeat": list(range(1, cfg["repeat"] + 1)), "runtime": runtimes, - "pytest_args": [" ".join(f"tests/{c}" for c in cfg["categories"])], + "max_parallel": cfg["max_parallel"]["ci_jobs"], + "pytest_args": [xdist_args + " ".join(f"tests/{c}" for c in cfg["categories"])], } diff --git a/ci/scripts/run_tests.sh b/ci/scripts/run_tests.sh index 867ad2d3e9..7ad23f22a1 100644 --- a/ci/scripts/run_tests.sh +++ b/ci/scripts/run_tests.sh @@ -16,4 +16,4 @@ then EXTRA_OPTIONS="$EXTRA_OPTIONS --benchmark" fi -python -m pytest -n 4 --dist loadscope $EXTRA_OPTIONS $@ +python -m pytest $EXTRA_OPTIONS $@