From 6ff8e34bd736cf26cd8f8d00f0682b6b5048cb45 Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Wed, 30 Nov 2022 15:06:34 +0000
Subject: [PATCH 1/5] Tweak parallelism from A/B config

---
 .github/workflows/ab_tests.yml         |  7 +------
 .github/workflows/tests.yml            |  2 +-
 AB_environments/README.md              |  3 +++
 AB_environments/config.yaml            | 15 ++++++++++++++-
 ci/scripts/discover_ab_environments.py | 20 +++++++++++++++++---
 ci/scripts/run_tests.sh                |  2 +-
 6 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ab_tests.yml b/.github/workflows/ab_tests.yml
index c31ac6ea7a..21935e7f98 100644
--- a/.github/workflows/ab_tests.yml
+++ b/.github/workflows/ab_tests.yml
@@ -52,12 +52,7 @@ jobs:
     timeout-minutes: 120
     strategy:
       fail-fast: false
-      # AWS implements limiters to how many EC2 instances you can spawn in parallel *on
-      # the same AWS account*. If such limit is reached, jobs will randomly fail when
-      # trying to create the Coiled clusters, and restarting failed jobs won't fix the
-      # problem. Additionally, there are problems with Coiled itself triggered by
-      # limitations that are never actually reached with real paying users.
-      max-parallel: 5
+      max-parallel: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).max_parallel }}
       matrix:
         os: [ubuntu-latest]
         python-version: ["3.9"]
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d06598e51a..afe0e965c8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -102,7 +102,7 @@ jobs:
           DB_NAME: ${{ matrix.os }}-${{ matrix.runtime-version }}-py${{ matrix.python-version }}.db
           BENCHMARK: true
           CLUSTER_DUMP: always
-        run: bash ci/scripts/run_tests.sh ${{ matrix.pytest_args }}
+        run: bash ci/scripts/run_tests.sh -n 4 --dist loadscope ${{ matrix.pytest_args }}
 
       - name: Upload benchmark results
         uses: actions/upload-artifact@v3
diff --git a/AB_environments/README.md b/AB_environments/README.md
index a647c7be85..79a4652c35 100644
--- a/AB_environments/README.md
+++ b/AB_environments/README.md
@@ -121,6 +121,9 @@ categories:
   - runtime
   - benchmarks
   - stability
+max_parallel:
+  ci_jobs: 5
+  modules_per_job: 4
 ```
 
 ### 6. Run CI
diff --git a/AB_environments/config.yaml b/AB_environments/config.yaml
index e9e4f5f3b1..5ba867fc21 100644
--- a/AB_environments/config.yaml
+++ b/AB_environments/config.yaml
@@ -2,7 +2,7 @@
 # Lower values are faster and cheaper but will result in higher variance.
 # This must remain set to 0 in the main branch, thus completely disabling
 # A/B tests, in order to avoid unnecessary runs.
-repeat: 0
+repeat: 1
 
 # Set to true to automatically create a verbatim copy of AB_baseline and then compare
 # the two in the A/B tests. Set to false to save some money if you are already confident
@@ -14,3 +14,16 @@ categories:
   - benchmarks
   # - runtime
   # - stability
+
+# AWS implements limiters to how many EC2 instances you can spawn in parallel on the
+# same AWS account. If such limit is reached, tests will randomly fail when trying to
+# create the Coiled clusters, and restarting failed jobs won't fix the problem.
+# Additionally, there are problems with Coiled itself triggered by limitations that are
+# never actually reached with real paying users.
+max_parallel:
+  # Number of parallel A/B test jobs per branch.
+  ci_jobs: 5
+  # Number of parallel test_*.py modules per A/B test job.
+  # Each module typically spawns one Coiled cluster at a time.
+  # Set to 1 to disable pytest-xdist.
+  modules_per_job: 4
diff --git a/ci/scripts/discover_ab_environments.py b/ci/scripts/discover_ab_environments.py
index a9b18e3575..98f8bdea11 100644
--- a/ci/scripts/discover_ab_environments.py
+++ b/ci/scripts/discover_ab_environments.py
@@ -12,9 +12,19 @@ class JSONOutput(TypedDict):
     run_AB: bool
     repeat: list[int]
     runtime: list[str]
+    max_parallel: int
     pytest_args: list[str]
 
 
+DO_NOT_RUN: JSONOutput = {
+    "run_AB": False,
+    "repeat": [],
+    "runtime": [],
+    "max_parallel": 1,
+    "pytest_args": [],
+}
+
+
 def build_json() -> JSONOutput:
     with open("AB_environments/config.yaml") as fh:
         cfg = yaml.safe_load(fh)
@@ -26,7 +36,7 @@ def build_json() -> JSONOutput:
             raise ValueError("fNot a valid test category: {category}")
 
     if not cfg["repeat"] or not cfg["categories"]:
-        return {"run_AB": False, "repeat": [], "runtime": [], "pytest_args": []}
+        return DO_NOT_RUN
 
     runtimes = []
     for conda_fname in sorted(glob.glob("AB_environments/AB_*.conda.yaml")):
@@ -37,7 +47,7 @@ def build_json() -> JSONOutput:
         runtimes.append(env_name)
 
     if not runtimes:
-        return {"run_AB": False, "repeat": [], "runtime": [], "pytest_args": []}
+        return DO_NOT_RUN
 
     if "AB_baseline" not in runtimes:
         # If any A/B environments are defined, AB_baseline is required
@@ -46,11 +56,15 @@ def build_json() -> JSONOutput:
     if cfg["test_null_hypothesis"]:
         runtimes += ["AB_null_hypothesis"]
 
+    n = cfg["max_parallel"]["modules_per_job"]
+    xdist_args = f"-n {n} --dist loadscope " if n > 1 else ""
+
     return {
         "run_AB": True,
         "repeat": list(range(1, cfg["repeat"] + 1)),
         "runtime": runtimes,
-        "pytest_args": [" ".join(f"tests/{c}" for c in cfg["categories"])],
+        "max_parallel": cfg["max_parallel"]["ci_jobs"],
+        "pytest_args": [xdist_args + " ".join(f"tests/{c}" for c in cfg["categories"])],
     }
 
 
diff --git a/ci/scripts/run_tests.sh b/ci/scripts/run_tests.sh
index 867ad2d3e9..7ad23f22a1 100644
--- a/ci/scripts/run_tests.sh
+++ b/ci/scripts/run_tests.sh
@@ -16,4 +16,4 @@ then
   EXTRA_OPTIONS="$EXTRA_OPTIONS --benchmark"
 fi
 
-python -m pytest -n 4 --dist loadscope $EXTRA_OPTIONS $@
+python -m pytest $EXTRA_OPTIONS $@

From e537d779964b6337feee71266f5d9160ea7f4627 Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Thu, 1 Dec 2022 11:22:27 +0000
Subject: [PATCH 2/5] Test disable pytest-xdist

---
 AB_environments/AB_sample.cluster.yaml | 14 --------------
 AB_environments/AB_sample.conda.yaml   | 17 -----------------
 AB_environments/AB_sample.dask.yaml    | 13 -------------
 AB_environments/config.yaml            |  4 ++--
 4 files changed, 2 insertions(+), 46 deletions(-)
 delete mode 100644 AB_environments/AB_sample.cluster.yaml
 delete mode 100644 AB_environments/AB_sample.conda.yaml
 delete mode 100644 AB_environments/AB_sample.dask.yaml

diff --git a/AB_environments/AB_sample.cluster.yaml b/AB_environments/AB_sample.cluster.yaml
deleted file mode 100644
index b9e7e1cebb..0000000000
--- a/AB_environments/AB_sample.cluster.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-# Sample cluster creation options file for A/B testing.
-# Change contents/delete/rename as needed.
-
-# Every A/B environment *must* present these three files:
-# - AB_<name>.conda.yaml
-# - AB_<name>.dask.yaml
-# - AB_<name>.cluster.yaml
-
-# Overrides ../cluster_kwargs.yaml.
-# Leave empty if you don't want to override anything.
-
-# small_cluster:
-#   n_workers: 5
-#   worker_vm_types: [m6i.xlarge]  # 4CPU, 16GiB
diff --git a/AB_environments/AB_sample.conda.yaml b/AB_environments/AB_sample.conda.yaml
deleted file mode 100644
index 8c421008a7..0000000000
--- a/AB_environments/AB_sample.conda.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-# Sample conda environment file for A/B testing.
-# Change contents/delete/rename as needed.
-
-# Every A/B environment *must* present these three files:
-# - AB_<name>.conda.yaml
-# - AB_<name>.dask.yaml
-# - AB_<name>.cluster.yaml
-
-# See notes in AB_baseline.conda.yaml
-channels:
-  - conda-forge
-dependencies:
-    - python=3.9
-    - coiled-runtime=0.1.1
-    - pip:
-      - dask==2022.11.1
-      - distributed==2022.11.1
diff --git a/AB_environments/AB_sample.dask.yaml b/AB_environments/AB_sample.dask.yaml
deleted file mode 100644
index baa367a5f5..0000000000
--- a/AB_environments/AB_sample.dask.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-# Sample dask config file for A/B testing.
-# Change contents/delete/rename as needed.
-
-# Every A/B environment *must* present these three files:
-# - AB_<name>.conda.yaml
-# - AB_<name>.dask.yaml
-# - AB_<name>.cluster.yaml
-
-# Leave empty if you don't want to override anything.
-
-# distributed:
-#   scheduler:
-#     worker-saturation: 1.2
diff --git a/AB_environments/config.yaml b/AB_environments/config.yaml
index 5ba867fc21..adf8a3ff24 100644
--- a/AB_environments/config.yaml
+++ b/AB_environments/config.yaml
@@ -7,7 +7,7 @@ repeat: 1
 # Set to true to automatically create a verbatim copy of AB_baseline and then compare
 # the two in the A/B tests. Set to false to save some money if you are already confident
 # that the 'repeat' setting is high enough.
-test_null_hypothesis: true
+test_null_hypothesis: false
 
 # Tests categories to run. These are subdirectories of tests/.
 categories:
@@ -26,4 +26,4 @@ max_parallel:
   # Number of parallel test_*.py modules per A/B test job.
   # Each module typically spawns one Coiled cluster at a time.
   # Set to 1 to disable pytest-xdist.
-  modules_per_job: 4
+  modules_per_job: 1

From 9cff90e57a7b168642f34adb5d81c0a180f9bd17 Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Thu, 1 Dec 2022 12:08:50 +0000
Subject: [PATCH 3/5] fix

---
 conftest.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/conftest.py b/conftest.py
index c39f142b47..f8faaa6b67 100644
--- a/conftest.py
+++ b/conftest.py
@@ -463,7 +463,8 @@ def cluster_kwargs() -> dict:
         override_fname = os.path.join(base_dir, override_fname)
         with open(override_fname) as fh:
             override_config = yaml.safe_load(fh)
-        dask.config.update(config, override_config)
+        if override_config:  # Empty files convert to None when read with yaml
+            dask.config.update(config, override_config)
 
     default = config.pop("default")
     config = {k: dask.config.merge(default, v) for k, v in config.items()}

From c662ef433c73189d1014a52234bb7e000ce51f08 Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Thu, 1 Dec 2022 16:50:29 +0000
Subject: [PATCH 4/5] revert temp tests

---
 AB_environments/AB_sample.cluster.yaml | 14 ++++++++++++++
 AB_environments/AB_sample.conda.yaml   | 17 +++++++++++++++++
 AB_environments/AB_sample.dask.yaml    | 13 +++++++++++++
 AB_environments/config.yaml            |  6 +++---
 4 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 AB_environments/AB_sample.cluster.yaml
 create mode 100644 AB_environments/AB_sample.conda.yaml
 create mode 100644 AB_environments/AB_sample.dask.yaml

diff --git a/AB_environments/AB_sample.cluster.yaml b/AB_environments/AB_sample.cluster.yaml
new file mode 100644
index 0000000000..b9e7e1cebb
--- /dev/null
+++ b/AB_environments/AB_sample.cluster.yaml
@@ -0,0 +1,14 @@
+# Sample cluster creation options file for A/B testing.
+# Change contents/delete/rename as needed.
+
+# Every A/B environment *must* present these three files:
+# - AB_<name>.conda.yaml
+# - AB_<name>.dask.yaml
+# - AB_<name>.cluster.yaml
+
+# Overrides ../cluster_kwargs.yaml.
+# Leave empty if you don't want to override anything.
+
+# small_cluster:
+#   n_workers: 5
+#   worker_vm_types: [m6i.xlarge]  # 4CPU, 16GiB
diff --git a/AB_environments/AB_sample.conda.yaml b/AB_environments/AB_sample.conda.yaml
new file mode 100644
index 0000000000..8c421008a7
--- /dev/null
+++ b/AB_environments/AB_sample.conda.yaml
@@ -0,0 +1,17 @@
+# Sample conda environment file for A/B testing.
+# Change contents/delete/rename as needed.
+
+# Every A/B environment *must* present these three files:
+# - AB_<name>.conda.yaml
+# - AB_<name>.dask.yaml
+# - AB_<name>.cluster.yaml
+
+# See notes in AB_baseline.conda.yaml
+channels:
+  - conda-forge
+dependencies:
+    - python=3.9
+    - coiled-runtime=0.1.1
+    - pip:
+      - dask==2022.11.1
+      - distributed==2022.11.1
diff --git a/AB_environments/AB_sample.dask.yaml b/AB_environments/AB_sample.dask.yaml
new file mode 100644
index 0000000000..baa367a5f5
--- /dev/null
+++ b/AB_environments/AB_sample.dask.yaml
@@ -0,0 +1,13 @@
+# Sample dask config file for A/B testing.
+# Change contents/delete/rename as needed.
+
+# Every A/B environment *must* present these three files:
+# - AB_<name>.conda.yaml
+# - AB_<name>.dask.yaml
+# - AB_<name>.cluster.yaml
+
+# Leave empty if you don't want to override anything.
+
+# distributed:
+#   scheduler:
+#     worker-saturation: 1.2
diff --git a/AB_environments/config.yaml b/AB_environments/config.yaml
index adf8a3ff24..93189153ad 100644
--- a/AB_environments/config.yaml
+++ b/AB_environments/config.yaml
@@ -2,12 +2,12 @@
 # Lower values are faster and cheaper but will result in higher variance.
 # This must remain set to 0 in the main branch, thus completely disabling
 # A/B tests, in order to avoid unnecessary runs.
-repeat: 1
+repeat: 0
 
 # Set to true to automatically create a verbatim copy of AB_baseline and then compare
 # the two in the A/B tests. Set to false to save some money if you are already confident
 # that the 'repeat' setting is high enough.
-test_null_hypothesis: false
+test_null_hypothesis: true
 
 # Tests categories to run. These are subdirectories of tests/.
 categories:
@@ -26,4 +26,4 @@ max_parallel:
   # Number of parallel test_*.py modules per A/B test job.
   # Each module typically spawns one Coiled cluster at a time.
   # Set to 1 to disable pytest-xdist.
-  modules_per_job: 1
+  modules_per_job: 4

From d4db652e82bdcc3ac7d0be85c6d61f215cb4b0fa Mon Sep 17 00:00:00 2001
From: crusaderky <crusaderky@gmail.com>
Date: Wed, 14 Dec 2022 17:15:04 +0000
Subject: [PATCH 5/5] Code review

---
 AB_environments/README.md              | 10 ++++++++--
 AB_environments/config.yaml            |  2 +-
 ci/scripts/discover_ab_environments.py |  2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/AB_environments/README.md b/AB_environments/README.md
index 3a2bdfebb9..31bcf32fdb 100644
--- a/AB_environments/README.md
+++ b/AB_environments/README.md
@@ -112,9 +112,15 @@ automatically create a verbatim copy of AB_baseline and then compare the two in
 tests. Set it to false to save some money if you are already confident that the 'repeat'
 setting is high enough.
 
-Finally, the files offers a `categories` list. These are the subdirectories of `tests/`
+The file offers a `categories` list. These are the subdirectories of `tests/`
 which you wish to run.
 
+Finally, the `max_parallel` setting lets you tweak maximum test parallelism, both in
+github actions and in pytest-xdist. Reducing parallelism is useful when testing on very
+large clusters (e.g. to avoid having 20 clusters with 1000 workers each at the same
+time).
+
+
 ### 5. (optional) Tweak tests
 Nothing prevents you from changing the tests themselves.
 
@@ -156,7 +162,7 @@ categories:
   - stability
 max_parallel:
   ci_jobs: 5
-  modules_per_job: 4
+  pytest_workers_per_job: 4
 ```
 
 ### 6. Run CI
diff --git a/AB_environments/config.yaml b/AB_environments/config.yaml
index 93189153ad..700b2fa226 100644
--- a/AB_environments/config.yaml
+++ b/AB_environments/config.yaml
@@ -26,4 +26,4 @@ max_parallel:
   # Number of parallel test_*.py modules per A/B test job.
   # Each module typically spawns one Coiled cluster at a time.
   # Set to 1 to disable pytest-xdist.
-  modules_per_job: 4
+  pytest_workers_per_job: 4
diff --git a/ci/scripts/discover_ab_environments.py b/ci/scripts/discover_ab_environments.py
index 98f8bdea11..f3377a347a 100644
--- a/ci/scripts/discover_ab_environments.py
+++ b/ci/scripts/discover_ab_environments.py
@@ -56,7 +56,7 @@ def build_json() -> JSONOutput:
     if cfg["test_null_hypothesis"]:
         runtimes += ["AB_null_hypothesis"]
 
-    n = cfg["max_parallel"]["modules_per_job"]
+    n = cfg["max_parallel"]["pytest_workers_per_job"]
     xdist_args = f"-n {n} --dist loadscope " if n > 1 else ""
 
     return {