diff --git a/.coveragerc b/.coveragerc
index c6aa107297..28208602d7 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,8 +1,12 @@
 [run]
 parallel = true
 branch = true
-source = openproblems
-omit = */__init__.py
+source =
+    openproblems
+    test
+omit =
+    */__init__.py
+    test/utils/*.py
 
 [report]
 exclude_lines =
diff --git a/.dockerignore b/.dockerignore
index 4e785b3bf1..f42541ded2 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -2,3 +2,13 @@ nf-openproblems
 workflow
 website
 .github
+.snakemake
+static
+test
+*.egg-info
+.coverage*
+.pytest_cache
+.idea
+.vscode
+*.md
+!./README.md
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index c69bbe6c5a..6553e28774 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,63 +5,74 @@ updates:
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
     directory: "/docker/openproblems"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
     directory: "/docker/openproblems-github-actions"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
-    directory: "/docker/openproblems-python-batch-integration"
+    directory: "/docker/openproblems-python-pytorch"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
     directory: "/docker/openproblems-python-extras"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
     directory: "/docker/openproblems-python-scvi"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
-    directory: "/docker/openproblems-python-tf2.4"
+    directory: "/docker/openproblems-python-tensorflow"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
     directory: "/docker/openproblems-r-base"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
     directory: "/docker/openproblems-r-extras"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "pip"
     directory: "/docker/openproblems-r-pytorch"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
 
   - package-ecosystem: "github-actions"
     directory: "/"
     schedule:
       interval: "daily"
     open-pull-requests-limit: 1
+    rebase-strategy: "disabled"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 203bf09d76..ad07e4dd5d 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -12,12 +12,9 @@
 
 ### Testing
 
-* [ ] This submission was written on a forked copy of SingleCellOpenProblems
-* [ ] GitHub Actions "Run Benchmark" tests are passing on this base branch of this pull
-  request (include link to passed test: )
-* [ ] If this pull request is not ready for review (including passing the "Run
-  Benchmark" tests), I will open this PR as a draft (click on the down arrow next to the
-  "Create Pull Request" button)
+* [ ] This submission was written in a forked copy of openproblems
+* [ ] Nextflow test pipeline is passing on this base branch of this pull
+  request (include link to passed test on NF Tower found in GitHub Actions summary: )
 
 ### Submission guidelines
 
@@ -31,11 +28,6 @@
 This PR will be evaluated on the basis of the following checks:
 
 * [ ] The task addresses a valid open problem in single-cell analysis
-* [ ] The latest version of master is merged and tested
-* [ ] The methods/metrics are imported to `__init__.py` and were tested in the pipeline
-* [ ] Method and metric decorators are annotated with paper title, year, author, code
-  version, and date
-* [ ] The README gives an outline of the methods, metrics and datasets in the folder
 * [ ] The README provides a satisfactory task explanation (for new tasks)
 * [ ] The sample test data is appropriate to test implementation of all methods and
   metrics (for new tasks)
diff --git a/.github/workflows/check_r_dependencies.yml b/.github/workflows/check_r_dependencies.yml
index 91d87dd44a..bb447a2a24 100644
--- a/.github/workflows/check_r_dependencies.yml
+++ b/.github/workflows/check_r_dependencies.yml
@@ -12,6 +12,14 @@ on:
     branches:
       - 'main'
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: write
+  pull-requests: write
+
 jobs:
   check-r-dependencies:
     runs-on: ubuntu-latest
@@ -54,5 +62,5 @@ jobs:
         title: "Update ${{ env.PKG_CHANGED }}"
         committer: "openproblems-bio <singlecellopenproblems@protonmail.com>"
         author: "openproblems-bio <singlecellopenproblems@protonmail.com>"
-        commit-message: "Update ${{ env.PKG_CHANGED }}"
+        commit-message: "Update ${{ env.PKG_CHANGED }} # ci skip"
         draft: true
diff --git a/.github/workflows/comment_pull_request.yml b/.github/workflows/comment_pull_request.yml
new file mode 100644
index 0000000000..7b80d44f59
--- /dev/null
+++ b/.github/workflows/comment_pull_request.yml
@@ -0,0 +1,20 @@
+name: Comment on Pull Request Status
+
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened, ready_for_review]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  comment_pr:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: thollander/actions-comment-pull-request@v2
+      with:
+        message: |
+          [![Current build status](https://img.shields.io/github/actions/workflow/status/${{ github.event.pull_request.head.repo.full_name }}/run_tests.yml?branch=${{ github.event.pull_request.head.ref }})](https://github.com/${{ github.event.pull_request.head.repo.full_name }}/actions/workflows/run_tests.yml?query=branch%3A${{ github.event.pull_request.head.ref }})
+        comment_tag: build_status
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index b38848967e..501e225900 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -6,26 +6,25 @@ on:
   pull_request:
     types: [opened, synchronize, reopened, ready_for_review]
 
+permissions:
+  contents: write
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   pre-commit:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-latest
 
     container:
       image: singlecellopenproblems/openproblems-github-actions:latest
       options: --user root
 
     if: >-
-      !endsWith(github.event.head_commit.message, '# ci skip') &&
-      (
-        startsWith(github.ref, 'refs/heads') ||
-        github.event.pull_request.draft == false
-      )
+      !endsWith(github.event.head_commit.message, '# ci skip')
 
     steps:
-      - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@0.10.1
-        with:
-          access_token: ${{ github.token }}
 
       - uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/process_results.yml b/.github/workflows/process_results.yml
index 2a8bf1868a..a8a62d8038 100644
--- a/.github/workflows/process_results.yml
+++ b/.github/workflows/process_results.yml
@@ -7,6 +7,13 @@ on:
     branches:
       - 'test_process'
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
 jobs:
   process_results:
     runs-on: ubuntu-latest
@@ -23,12 +30,13 @@ jobs:
     - name: Checkout website repo
       uses: actions/checkout@v3
       with:
-        fetch-depth: 0
+        fetch-depth: 1
         repository: openproblems-bio/website
+        ref: main
         path: website
         token: ${{ secrets.GH_ACTIONS_WEBSITE_PAT }}
 
-    - name: Set up Git branch
+    - name: Set up website Git branch
       working-directory: website
       run: |
         git checkout -b $UPDATE_BRANCH_NAME
@@ -36,7 +44,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: ${{ matrix.config.python }}
+        python-version: "3.8"
 
     - name: Install AWS CLI
       run: |
@@ -66,25 +74,18 @@ jobs:
           S3_URI="s3://openproblems-nextflow/cwd_example"
         fi
         aws s3 cp --quiet --recursive "${S3_URI}" /tmp/results/
-        rm -r website/data/results/*/
-        rm -r website/content/benchmarks/*/
-        python openproblems/workflow/parse_nextflow.py /tmp website/data/results
-        python openproblems/workflow/generate_website_markdown.py website/content/benchmarks
+        python openproblems/workflow/parse_nextflow.py /tmp website/results
 
-    - name: AWS S3 cleanup
-      if: "github.event_name == 'repository_dispatch'"
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        AWS_DEFAULT_REGION: us-west-2
-      run: |
-        aws s3 rm --recursive "s3://openproblems-nextflow/work_main"
-        aws s3 rm --recursive "s3://openproblems-nextflow/cwd_example"
-        aws s3 cp --recursive "s3://openproblems-nextflow/cwd_main" "s3://openproblems-nextflow/cwd_example"
-        aws s3 rm --recursive "s3://openproblems-nextflow/cwd_main"
+    - name: Upload results
+      uses: actions/upload-artifact@main
+      with:
+        name: results
+        path: website/results
 
     - name: Push to openproblems-bio/website
-      if: "github.event_name == 'repository_dispatch'"
+      if: |
+        github.event_name == 'repository_dispatch' ||
+        endsWith(github.event.head_commit.message, '# publish')
       shell: bash
       working-directory: './website'
       env:
@@ -92,30 +93,29 @@ jobs:
       run: |
         git push origin "${UPDATE_BRANCH_NAME}"
 
-    - name: Create Pull Request
-      if: "github.event_name == 'repository_dispatch'"
+    - name: Create website Pull Request
+      if: |
+        github.event_name == 'repository_dispatch' ||
+        endsWith(github.event.head_commit.message, '# publish')
       uses: peter-evans/create-pull-request@v4
       with:
         branch: ${{ env.UPDATE_BRANCH_NAME }}
         delete-branch: true
-        base: main
         title: '[auto] Update benchmark results'
-        reviewers: scottgigante, dburkhardt
+        reviewers: scottgigante-immunai,rcannood
         path: './website'
         token: ${{ secrets.GH_ACTIONS_WEBSITE_PAT }}
         author: "openproblems-bio <singlecellopenproblems@protonmail.com>"
-        commit-message: "Update benchmark results # ci skip"
-
-    - name: Upload results on test
-      if: "github.event_name != 'repository_dispatch'"
-      uses: actions/upload-artifact@main
-      with:
-        name: results
-        path: website/data/results
+        commit-message: "Update benchmark results"
 
-    - name: Upload markdown on test
-      if: "github.event_name != 'repository_dispatch'"
-      uses: actions/upload-artifact@main
-      with:
-        name: markdown
-        path: website/content/benchmarks
+    - name: AWS S3 cleanup
+      if: "github.event_name == 'repository_dispatch'"
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        AWS_DEFAULT_REGION: us-west-2
+      run: |
+        aws s3 rm --recursive "s3://openproblems-nextflow/work_main"
+        aws s3 rm --recursive "s3://openproblems-nextflow/cwd_example"
+        aws s3 cp --recursive "s3://openproblems-nextflow/cwd_main" "s3://openproblems-nextflow/cwd_example"
+        aws s3 rm --recursive "s3://openproblems-nextflow/cwd_main"
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 217af3fb4e..227b547475 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -6,42 +6,41 @@ on:
       - '*'
     branches:
       - '**'
-  pull_request:
-    types: [opened, synchronize, reopened, ready_for_review]
+  pull_request_review:
+    types:
+      - 'submitted'
 
-jobs:
-  cancel_previous_runs:
-    if: |
-      !endsWith(github.event.head_commit.message, '# ci skip') &&
-      (
-        startsWith(github.ref, 'refs/heads') ||
-        startsWith(github.ref, 'refs/tags') ||
-        github.event.pull_request.draft == false
-      )
-    runs-on: ubuntu-latest
-    steps:
-      - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@0.10.1
-        with:
-          access_token: ${{ github.token }}
+permissions:
+  contents: write
+  packages: write
+  pull-requests: write
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
   build_images:
 
-    needs: cancel_previous_runs
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-latest
     if: |
       !endsWith(github.event.head_commit.message, '# ci skip') &&
+      !startsWith(github.ref, 'refs/heads/test_process') &&
+      !startsWith(github.ref, 'refs/heads/test_website') &&
       (
-        startsWith(github.ref, 'refs/heads/test_docker') ||
+        (github.event_name != 'pull_request_review') ||
         (
-          startsWith(github.ref, 'refs/heads/main') &&
-          github.repository == 'openproblems-bio/openproblems'
+          github.event.review.state == 'approved' &&
+          github.event.pull_request.head.repo.owner.id == github.event.pull_request.base.repo.owner.id
         )
       )
 
     env:
       BRANCH_NAME: "auto_update_docker_${{ github.run_number }}"
 
+    outputs:
+      images: ${{ steps.export-images.outputs.images }}
+
     steps:
     - uses: actions/checkout@v3
       with:
@@ -55,6 +54,20 @@ jobs:
       with:
         python-version: "3.8"
 
+    - name: Set up environment
+      run: |
+        echo "LINUX_VERSION=$(uname -rs)" >> $GITHUB_ENV
+        echo "REPO_OWNER_LOWER=${OWNER,,}" >> ${GITHUB_ENV}
+      env:
+        OWNER: '${{ github.repository_owner }}'
+
+    - name: Cache Python packages
+      uses: actions/cache@v3
+      with:
+        path: ${{ env.pythonLocation }}
+        key: python-pip-${{ env.LINUX_VERSION }}-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
+        restore-keys: python-pip-${{ env.LINUX_VERSION }}-${{ env.pythonLocation }}-
+
     - name: Install package & dependencies
       run: |
         python -m pip install --upgrade pip
@@ -62,15 +75,43 @@ jobs:
         pip install --editable .[evaluate]
         python -c "import openproblems"
 
+    - name: Pull Docker images
+      if: |
+        (
+          startsWith(github.ref, 'refs/heads/test_benchmark') ||
+          startsWith(github.ref, 'refs/heads/test_full_benchmark')
+        )
+      run: |
+        cd workflow
+        snakemake -j $(nproc) docker_pull
+        cd ..
+
+    - name: Update Docker images
+      if: |
+        !(
+          startsWith(github.ref, 'refs/heads/test_docker') ||
+          (
+            startsWith(github.ref, 'refs/heads/main') &&
+            github.repository == 'openproblems-bio/openproblems'
+          )
+        )
+      run: |
+        cd workflow
+        SNAKEMAKE_COPY_SOURCE=1 snakemake -j $(nproc) docker
+        cd ..
+
     - name: Build Docker images
-      if: "!startsWith(github.ref, 'refs/heads/main')"
+      if: |
+        startsWith(github.ref, 'refs/heads/test_docker')
       run: |
         cd workflow
         snakemake -j $(nproc) docker_build
         cd ..
 
     - name: Build and push Docker images
-      if: "startsWith(github.ref, 'refs/heads/main')"
+      if: |
+        startsWith(github.ref, 'refs/heads/main') &&
+        github.repository == 'openproblems-bio/openproblems'
       env:
         DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
       run: |
@@ -89,15 +130,85 @@ jobs:
         title: '[auto] Update docker version'
         reviewers: scottgigante, dburkhardt
         author: "openproblems-bio <singlecellopenproblems@protonmail.com>"
-        commit-message: Update docker version # ci skip
+        commit-message: "Update docker version # ci skip"
         add-paths: docker/.version
 
-    - name: Upload check results on fail
-      if: failure()
-      uses: actions/upload-artifact@main
+    - name: Log in to the Container registry
+      uses: docker/login-action@v2
       with:
-        name: ${{ matrix.config.name }}_results
-        path: check
+        registry: ghcr.io
+        username: ${{ env.REPO_OWNER_LOWER }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Export docker images
+      id: export-images
+      run: |
+        IMAGES="$(find ./docker -mindepth 1 -type d -exec basename {} \;)"
+        for image in ${IMAGES}; do
+          GHCR_IMAGE="ghcr.io/${REPO_OWNER_LOWER}/${image}:${{ github.run_id }}"
+          docker tag singlecellopenproblems/$image $GHCR_IMAGE
+          docker push $GHCR_IMAGE &
+          PIDS+=("${!}")
+        done
+        for pid in "${PIDS[@]}"; do
+          # ensure exited with status 0
+          wait $pid
+        done
+        # convert to JSON
+        echo "images=[\"$(paste -s -d ' ' <(echo $IMAGES) | sed 's/  */\",\"/g')\"]" >> $GITHUB_OUTPUT
+      shell: bash -e {0}
+
+  create_matrix:
+    runs-on: ubuntu-latest
+    if: |
+      !endsWith(github.event.head_commit.message, '# ci skip') &&
+      !startsWith(github.ref, 'refs/heads/test_docker') &&
+      !startsWith(github.ref, 'refs/heads/test_benchmark') &&
+      !startsWith(github.ref, 'refs/heads/test_full_benchmark') &&
+      !startsWith(github.ref, 'refs/heads/test_process') &&
+      !startsWith(github.ref, 'refs/heads/test_website') &&
+      (
+        github.event_name != 'pull_request_review' ||
+        github.event.review.state == 'approved'
+      )
+
+    outputs:
+      matrix: ${{ steps.generate-matrix.outputs.matrix }}
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 1
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.8"
+
+    - name: Set up environment
+      run: |
+        echo "LINUX_VERSION=$(uname -rs)" >> $GITHUB_ENV
+
+    - name: Cache Python packages
+      uses: actions/cache@v3
+      with:
+        path: ${{ env.pythonLocation }}
+        key: python-pip-${{ env.LINUX_VERSION }}-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
+        restore-keys: python-pip-${{ env.LINUX_VERSION }}-${{ env.pythonLocation }}-
+
+    - name: Install package & dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -U wheel setuptools
+        pip install --editable .[evaluate]
+        python -c "import openproblems"
+
+
+    - name: Create test matrix
+      id: generate-matrix
+      run: |
+        set -eo pipefail
+        echo "matrix=$(python scripts/generate_test_matrix.py)" >> $GITHUB_OUTPUT
 
   run_tester:
     runs-on: ubuntu-latest
@@ -119,24 +230,27 @@ jobs:
         - /usr/local/lib/android:/opt/remove/android
       options: --user root
 
-    needs: build_images
+    needs:
+      - build_images
+      - create_matrix
     if: |
       always() &&
-      (needs.build_images.result == 'success' || needs.build_images.result == 'skipped') &&
       !endsWith(github.event.head_commit.message, '# ci skip') &&
+      needs.create_matrix.result == 'success' &&
       (
-        startsWith(github.ref, 'refs/heads') ||
-        startsWith(github.ref, 'refs/tags') ||
-        github.event.pull_request.draft == false
-      ) &&
-      !startsWith(github.ref, 'refs/heads/test_docker') &&
-      !startsWith(github.ref, 'refs/heads/test_benchmark') &&
-      !startsWith(github.ref, 'refs/heads/test_process')
+        needs.build_images.result == 'success' ||
+        needs.build_images.result == 'skipped'
+      )
+
+    strategy:
+      fail-fast: false
+      matrix:
+        tests: ${{ fromJSON(needs.create_matrix.outputs.matrix) }}
 
     steps:
     - name: Clear space on runner
       run: |
-        sudo rm -rf /opt/remove/*/*
+        sudo find /opt/remove -mindepth 2 -maxdepth 2 -type d -exec rm -rf {} \;
 
     - uses: actions/checkout@v3
       with:
@@ -144,16 +258,50 @@ jobs:
 
     - name: Set up environment
       run: |
-        echo "LINUX_VERSION=$(uname -a)" >> $GITHUB_ENV
+        echo "LINUX_VERSION=$(uname -rs)" >> $GITHUB_ENV
+        echo "pythonLocation=$(which python)" >> $GITHUB_ENV
         echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV
         echo "R_VERSION=$(R --version | head -n 1)" >> $GITHUB_ENV
+        echo "REPO_OWNER_LOWER=${OWNER,,}" >> ${GITHUB_ENV}
+      env:
+        OWNER: '${{ github.repository_owner }}'
+      shell: bash -e {0}
+
+    - name: Log in to the Container registry
+      uses: docker/login-action@v2
+      if: "needs.build_images.result == 'success'"
+      with:
+        registry: ghcr.io
+        username: ${{ env.REPO_OWNER_LOWER }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Download docker images
+      if: "needs.build_images.result == 'success'"
+      env:
+        RUN_ID: ${{ github.run_id }}
+      run: |
+        for image in $(find ./docker -mindepth 1 -type d -exec basename {} \;); do
+          GHCR_IMAGE="ghcr.io/${REPO_OWNER_LOWER}/${image}:${RUN_ID}"
+          docker pull $GHCR_IMAGE &
+          PIDS+=("${!}")
+        done
+        for pid in "${PIDS[@]}"; do
+          # ensure exited with status 0
+          wait $pid
+        done
+        # tag images
+        for image in $(find ./docker -mindepth 1 -type d -exec basename {} \;); do
+          GHCR_IMAGE="ghcr.io/${REPO_OWNER_LOWER}/${image}:${RUN_ID}"
+          docker tag $GHCR_IMAGE singlecellopenproblems/${image}:latest
+        done
+      shell: bash -e {0}
 
     - name: Cache Python packages
       uses: actions/cache@v3
       with:
-        path: ${{ env.PYTHON_VERSION }}
-        key: ${{env.LINUX_VERSION}}-pip-${{ env.PYTHON_VERSION }}-${{ hashFiles('setup.py') }}
-        restore-keys: ${{env.LINUX_VERSION}}-pip-${{ env.PYTHON_VERSION }}-
+        path: ${{ env.pythonLocation }}
+        key: python-pip-${{env.LINUX_VERSION}}-pip-${{ env.PYTHON_VERSION }}-${{ hashFiles('setup.py') }}
+        restore-keys: python-pip-${{env.LINUX_VERSION}}-pip-${{ env.PYTHON_VERSION }}-
 
     - name: Install package & dependencies
       run: |
@@ -172,6 +320,8 @@ jobs:
           ${{ env.LINUX_VERSION }}-renv-${{ env.R_VERSION }}-
 
     - name: Install R packages
+      env:
+        BIOCVERSION: '3.16'
       run: |
         if (!requireNamespace("renv", quietly = TRUE)) install.packages("renv")
         renv::restore()
@@ -180,47 +330,78 @@ jobs:
         install_renv("docker/openproblems-github-actions/r_requirements.txt")
       shell: Rscript {0}
 
-    - name: Pull Docker images
-      if: "startsWith(github.ref, 'refs/heads/main') && github.repository == 'openproblems-bio/openproblems'"
-      run: |
-        cd workflow
-        snakemake -j $(nproc) docker_pull
-        cd ..
-
     - name: Update Docker images
-      if: "!(startsWith(github.ref, 'refs/heads/main') && github.repository == 'openproblems-bio/openproblems')"
+      if: "needs.build_images.result == 'skipped'"
       run: |
         cd workflow
         snakemake -j $(nproc) docker
         cd ..
 
     - name: Run tests
-      run: pytest --cov=openproblems --cov-report=xml -vv --durations=15 --tb=native
-
-    - name: Upload coverage
-      continue-on-error: ${{ github.repository != 'openproblems-bio/openproblems' }}
-      run: codecov --no-color --required --flags unittests
+      timeout-minutes: 60
+      run: |
+        pytest --cov=openproblems --cov=test --cov-report=xml -vv --durations=15 --tb=native -k "${{ matrix.tests }}"
+        mkdir -p coverage
+        mv coverage.xml "$(echo 'coverage_${{ matrix.tests }}.xml' | sed 's/[^a-z0-9\.]/_/g')"
 
-    - name: Upload check results on fail
-      if: failure()
+    - name: Upload coverage to GitHub Actions
       uses: actions/upload-artifact@main
       with:
-        name: ${{ matrix.config.name }}_results
-        path: results
+        path: coverage_*.xml
+        name: coverage
 
-
-  run_test_benchmark:
+  upload_coverage:
     needs: run_tester
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-latest
+    if: >-
+      always() &&
+      !endsWith(github.event.head_commit.message, '# ci skip') &&
+      needs.run_tester.result == 'success'
+
+    steps:
+
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+
+    - name: Download coverage from GitHub Actions
+      uses: actions/download-artifact@v3
+      with:
+        name: coverage
+
+    - name: Upload coverage
+      uses: codecov/codecov-action@v3
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        flags: unittests
+        fail_ci_if_error: ${{ github.repository == 'openproblems-bio/openproblems' }}
+        verbose: true
+        override_pr: ${{ github.event.pull_request.number }}
+
+    - name: Delete coverage artifacts
+      uses: geekyeggo/delete-artifact@v2
+      with:
+          name: coverage
+
+  setup_benchmark:
+    needs:
+    - run_tester
+    - build_images
+    runs-on: ubuntu-latest
     if: >-
       always() &&
       !endsWith(github.event.head_commit.message, '# ci skip') &&
-      github.event_name == 'push' &&
+      needs.build_images.result == 'success' &&
+      github.event_name != 'pull_request_review' &&
       (
         needs.run_tester.result == 'success' ||
-        startsWith(github.ref, 'refs/heads/test_benchmark')
+        needs.run_tester.result == 'skipped'
       )
 
+    outputs:
+      branch: ${{ steps.setup-environment.outputs.branch }}
+      run_name: ${{ steps.setup-environment.outputs.run_name }}
+
     steps:
     - name: Check dependabot
       run: |
@@ -229,18 +410,6 @@ jobs:
           exit 1
         fi
 
-    - uses: actions/checkout@v3
-      with:
-        fetch-depth: 1000
-
-    - name: Clear space on runner
-      run: ./scripts/clear_runner_diskspace.sh
-
-    - name: Install system dependencies
-      run: |
-        sudo apt-get update -qq || (sudo rm /etc/apt/sources.list.d/* && sudo apt-get update -yqq)
-        sudo apt-get install -qy --no-install-recommends libhdf5-dev pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev
-
     - name: Check Tower authentication
       env:
         TOWER_ACCESS_TOKEN: ${{ secrets.TOWER_ACCESS_KEY }}
@@ -267,110 +436,62 @@ jobs:
           exit 1
         fi
 
-    - name: Set up Python
-      uses: actions/setup-python@v4
+    - uses: actions/checkout@v3
       with:
-        python-version: "3.8"
+        fetch-depth: 1
 
-    - name: Set up Java ${{ matrix.java_version }}
-      uses: actions/setup-java@v3
-      with:
-        java-version: 15
-        architecture: x64
-        distribution: zulu
+    - name: Clear space on runner
+      run: ./scripts/clear_runner_diskspace.sh
 
     - name: Set up environment
+      id: setup-environment
       run: |
-        SCRIPTS_PATH=$(python3 -c 'import os, sysconfig; print(sysconfig.get_path("scripts",f"{os.name}_user"))')
-        echo "PATH=${SCRIPTS_PATH}:${PATH}" >> $GITHUB_ENV
-        echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
-        echo "PYTHON_LOCATION=$(which python3)" >> $GITHUB_ENV
-        echo "UBUNTU_VERSION=`grep DISTRIB_RELEASE /etc/lsb-release | sed 's/.*=//g'`" >> $GITHUB_ENV
         # If not on the base repository, append first 6 characters of username to the image name
         # to avoid clashes on ECR
         REPO_PARSED=$(echo ${{ github.repository }} | awk '{print $1}' FS=/ | head -c 6)
         BRANCH_PARSED=$(echo ${{ github.ref }} | sed 's:refs/[a-z]*/::' | sed 's:[^a-zA-Z0-9]:-:g')
-        if [[ "${{ github.repository }}" == "openproblems-bio/openproblems" ]]; then
+        if [[ ${{ startsWith(github.ref, 'refs/tags') || startsWith(github.ref, 'refs/heads/test_full_benchmark') }} == true ]]; then
+          BRANCH="prod"
+        elif [[ "${{ github.repository }}" == "openproblems-bio/openproblems" ]]; then
           BRANCH=`echo $BRANCH_PARSED | head -c 40`
         else
           BRANCH="${REPO_PARSED}-`echo $BRANCH_PARSED | head -c 33`"
         fi
         BRANCH=`echo $BRANCH | sed 's/[^a-zA-Z0-9]*$//'`
         echo "BRANCH=${BRANCH}" >> $GITHUB_ENV
+        echo "branch=${BRANCH}" >> $GITHUB_OUTPUT
+        RUN_NAME="$(echo "$BRANCH" | sed "s/[^a-zA-Z0-9]/_/g")_$(git rev-parse --short HEAD)_${GITHUB_RUN_ATTEMPT}"
+        echo "run_name=${RUN_NAME}" >> $GITHUB_OUTPUT
+        echo "REPO_OWNER_LOWER=${OWNER,,}" >> ${GITHUB_ENV}
+      env:
+        OWNER: '${{ github.repository_owner }}'
 
-    - name: Cache Python packages
-      uses: actions/cache@v3
+    - name: Log in to the Container registry
+      uses: docker/login-action@v2
       with:
-        path: ${{ env.pythonLocation }}
-        key: ${{ env.UBUNTU_VERSION }}-pip-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
-        restore-keys: ${{ env.UBUNTU_VERSION}}-pip-${{ env.pythonLocation }}-
+        registry: ghcr.io
+        username: ${{ env.REPO_OWNER_LOWER }}
+        password: ${{ secrets.GITHUB_TOKEN }}
 
-    - name: Install Nextflow
+    - name: Download docker images
       env:
-          CAPSULE_LOG: none
-          NXF_VER: 22.04.0
-      run: |
-        mkdir /tmp/nextflow
-        cd /tmp/nextflow
-        wget -qO- get.nextflow.io | bash
-        sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow
-        nextflow -version
-
-    - name: Install AWS CLI
-      run: |
-        mkdir /tmp/awscli
-        cd /tmp/awscli
-        curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
-        unzip -q awscliv2.zip
-        sudo ./aws/install || sudo ./aws/install --update
-        aws --version
-
-    - name: Set up S3FS
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        AWS_DEFAULT_REGION: us-west-2
+        RUN_ID: ${{ github.run_id }}
       run: |
-        sudo apt-get install -qy --no-install-recommends s3fs
-        echo $AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY > ~/.passwd-s3fs
-        chmod 600 ~/.passwd-s3fs
-        sudo mkdir -p /mnt/openproblems-nextflow
-        sudo chown $USER /mnt/openproblems-nextflow
-        s3fs -o umask=0277,uid=$(id -u) openproblems-nextflow /mnt/openproblems-nextflow
-        # Create bucket/ work/ and cwd/
-        for dir in bucket work cwd; do
-          mkdir -p /mnt/openproblems-nextflow/${dir}/${BRANCH}
+        for image in $(find ./docker -mindepth 1 -type d -exec basename {} \;); do
+          GHCR_IMAGE="ghcr.io/${REPO_OWNER_LOWER}/${image}:${RUN_ID}"
+          docker pull $GHCR_IMAGE &
+          PIDS+=("${!}")
         done
-        ls -l /mnt/openproblems-nextflow/*/${BRANCH}
-
-    - name: Install package & dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -U wheel setuptools
-        pip install -U --editable .[evaluate,process]
-        python -c "import openproblems"
-        openproblems-cli --version
-        openproblems-cli --test-hash
-
-    - name: Pull Docker images
-      if: |
-        startsWith(github.ref, 'refs/heads/main') &&
-        github.repository == 'openproblems-bio/openproblems'
-      run: |
-        cd workflow
-        snakemake -j $(nproc) docker_pull
-        cd ..
-
-    - name: Build Docker images
-      if: |
-        !(
-          startsWith(github.ref, 'refs/heads/main') &&
-          github.repository == 'openproblems-bio/openproblems'
-        )
-      run: |
-        cd workflow
-        SNAKEMAKE_COPY_SOURCE=1 snakemake -j $(nproc) docker
-        cd ..
+        for pid in "${PIDS[@]}"; do
+          # ensure exited with status 0
+          wait $pid
+        done
+        # tag images
+        for image in $(find ./docker -mindepth 1 -type d -exec basename {} \;); do
+          GHCR_IMAGE="ghcr.io/${REPO_OWNER_LOWER}/${image}:${RUN_ID}"
+          docker tag $GHCR_IMAGE singlecellopenproblems/${image}:latest
+        done
+      shell: bash -e {0}
 
     - name: Upload Docker images
       env:
@@ -383,88 +504,59 @@ jobs:
           docker login --username AWS --password-stdin $ECR_ENDPOINT
         for image in $(cd docker && ls -1d */ | tr -d '/'); do
           docker tag singlecellopenproblems/${image} ${ECR_ENDPOINT}/openproblems:${BRANCH}-${image}
-          docker push --quiet ${ECR_ENDPOINT}/openproblems:${BRANCH}-${image}
+          docker push --quiet ${ECR_ENDPOINT}/openproblems:${BRANCH}-${image} &
+          PIDS+=("${!}")
         done
+        for pid in "${PIDS[@]}"; do
+          # ensure exited with status 0
+          wait $pid
+        done
+      shell: bash -e {0}
 
-    - name: Run test benchmark
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        TOWER_ACCESS_TOKEN: ${{ secrets.TOWER_ACCESS_KEY }}
-        TOWER_WORKSPACE_ID: 53907369739130
-        AWS_DEFAULT_REGION: us-west-2
-        NXF_DEFAULT_DSL: 1
-      run: |
-        RUN_NAME="$(echo "$BRANCH" | sed "s/[^a-zA-Z0-9]/_/g")_$(git rev-parse --short HEAD)_${GITHUB_RUN_ATTEMPT}"
-        cd /mnt/openproblems-nextflow/cwd/${BRANCH}
-        nextflow run \
-        -revision v1.8 \
-        -with-tower \
-        -ansi-log false -resume \
-        -profile aws,test \
-        -work-dir "/mnt/openproblems-nextflow/work/${BRANCH}" \
-        -bucket-dir "s3://openproblems-nextflow/bucket/${BRANCH}" \
-        -name "${RUN_NAME}" \
-        -e.PYTHONPATH="${PYTHONPATH}" \
-        openproblems-bio/nf-openproblems \
-        --branch ${BRANCH} | \
-        tee >(grep --color=never --line-buffered "Monitor the execution with Nextflow Tower using this url" >> $GITHUB_STEP_SUMMARY)
-      shell: /bin/bash -eou pipefail {0}
-
-    - name: Parse results
-      run: |
-        python workflow/parse_nextflow.py /mnt/openproblems-nextflow/cwd/${BRANCH} /tmp/website
-        python workflow/generate_website_markdown.py /tmp/website
-
-    - name: Rename nextflow log
-      if: always()
-      run: |
-        mv /mnt/openproblems-nextflow/cwd/${{ env.BRANCH }}/.nextflow.log /tmp/nextflow.log
-
-    - name: Upload nextflow log
-      if: always()
-      uses: actions/upload-artifact@main
-      with:
-        name: nextflow.log
-        path: /tmp/nextflow.log
-
-  run_full_benchmark:
-    needs: run_test_benchmark
+  run_benchmark:
+    needs:
+    - setup_benchmark
     runs-on: ubuntu-latest
     if: >-
       always() &&
-      (needs.run_test_benchmark.result == 'success' || needs.run_test_benchmark.result == 'skipped') &&
-      !endsWith(github.event.head_commit.message, '# ci skip') &&
-      github.event_name == 'push' &&
-      (
-        startsWith(github.ref, 'refs/tags') ||
-        startsWith(github.ref, 'refs/heads/test_benchmark')
-      )
+      needs.setup_benchmark.result == 'success'
 
     steps:
-
-    - name: Run full benchmark
+    - name: Run benchmark
       env:
         TOWER_WATCH_URL: https://tower.nf/orgs/openproblems-bio/workspaces/openproblems-bio/watch
         TOWER_WORKSPACE_ID: 53907369739130
-        TOWER_ACTION_ID: 7jylKuFGbSN65qSA4NfdFY
+        TOWER_TEST_ACTION_ID: "6yMzmbRXXDZMoVqVkEozQo"
+        TOWER_FULL_ACTION_ID: "6znCmebL2EBgWJTQz0H7pz"
+        BRANCH: ${{ needs.setup_benchmark.outputs.branch }}
       run: |
+        if [[ ${{ startsWith(github.ref, 'refs/tags') || startsWith(github.ref, 'refs/heads/test_full_benchmark') }} == true ]]; then
+          TOWER_ACTION_ID="${TOWER_FULL_ACTION_ID}"
+          WORKDIR="s3://openproblems-nextflow/work_main"
+        else
+          TOWER_ACTION_ID="${TOWER_TEST_ACTION_ID}"
+          WORKDIR="s3://openproblems-nextflow/work/${BRANCH}"
+        fi
         generate_parameters()
         {
         cat <<EOF
           {
             "params" : {
-              "branch": "main"
+              "branch": "${BRANCH}"
             }
           }
         EOF
         }
-        WORKFLOW_ID=$(
+        OUTPUT=$(
             curl -H "Content-Type: application/json" \
             -H "Authorization: Bearer ${{ secrets.TOWER_ACCESS_KEY }}" \
             https://api.tower.nf/actions/${TOWER_ACTION_ID}/launch?workspaceId=${TOWER_WORKSPACE_ID} \
-            --data "$(generate_parameters)" | \
-            tee /dev/stderr | \
-            sed 's/.*"\([a-zA-Z0-9]*\)".*/\1/'
+            --data "$(generate_parameters)"
         )
-        echo "Full benchmark running at ${TOWER_WATCH_URL}/${WORKFLOW_ID}" >> $GITHUB_STEP_SUMMARY
+        echo "$OUTPUT"
+        if [ $(echo "$OUTPUT" | grep "No more than 5 workflow runs at time are allowed") ]; then
+          echo "Nextflow Tower is currently rate limited. Please wait until fewer jobs are running and then retry failed jobs."
+          exit 1
+        fi
+        WORKFLOW_ID=$(echo "$OUTPUT" | sed 's/.*"\([a-zA-Z0-9]*\)".*/\1/')
+        echo "Benchmark running at ${TOWER_WATCH_URL}/${WORKFLOW_ID}" >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/update_website_content.yml b/.github/workflows/update_website_content.yml
new file mode 100644
index 0000000000..77e01b4c36
--- /dev/null
+++ b/.github/workflows/update_website_content.yml
@@ -0,0 +1,100 @@
+name: Update website content
+
+on:
+  push:
+    branches:
+      - 'main'
+      - 'test_website'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  update_content:
+    runs-on: ubuntu-latest
+
+    env:
+      UPDATE_BRANCH_NAME: "auto_update_content_${{ github.run_number }}"
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+        path: openproblems
+
+    - name: Checkout website repo
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 1
+        repository: openproblems-bio/website
+        ref: main
+        path: website
+        token: ${{ secrets.GH_ACTIONS_WEBSITE_PAT }}
+
+    - name: Set up website Git branch
+      working-directory: website
+      run: |
+        git checkout -b $UPDATE_BRANCH_NAME
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.8"
+
+    - name: Install package & dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -U wheel setuptools
+        pip install -U --editable ./openproblems[process]
+        python -c "import openproblems"
+
+    - name: Parse metadata
+      run: |
+        python openproblems/workflow/parse_metadata.py website/results
+        cp openproblems/main.bib website/bibliography/library.bib
+        cd website
+        git diff --exit-code --quiet || echo "CHANGED=true" >> $GITHUB_ENV
+
+
+    - name: Upload json
+      uses: actions/upload-artifact@main
+      with:
+        name: json
+        path: website/content/benchmarks
+
+    - name: Push to openproblems-bio/website
+      if: |
+        env.CHANGED == 'true' &&
+        (
+          startsWith(github.ref, 'refs/heads/main') ||
+          endsWith(github.event.head_commit.message, '# publish')
+        )
+      shell: bash
+      working-directory: './website'
+      env:
+        GITHUB_TOKEN: ${{ secrets.GH_ACTIONS_WEBSITE_PAT }}
+      run: |
+        git push origin "${UPDATE_BRANCH_NAME}"
+
+    - name: Create website Pull Request
+      if: |
+        env.CHANGED == 'true' &&
+        (
+          startsWith(github.ref, 'refs/heads/main') ||
+          endsWith(github.event.head_commit.message, '# publish')
+        )
+      uses: peter-evans/create-pull-request@v4
+      with:
+        base: main
+        branch: ${{ env.UPDATE_BRANCH_NAME }}
+        delete-branch: true
+        title: '[auto] Update benchmark content'
+        reviewers: scottgigante-immunai,rcannood
+        path: './website'
+        token: ${{ secrets.GH_ACTIONS_WEBSITE_PAT }}
+        author: "openproblems-bio <singlecellopenproblems@protonmail.com>"
+        commit-message: "Update benchmark content # ci skip"
diff --git a/.gitignore b/.gitignore
index f9bee9a858..ccfb3f5b6a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -146,14 +146,12 @@ nf-openproblems
 
 # Editor
 .idea
+.vscode
 
 scratch/
 openproblems/results/
 openproblems/work/
 batch_embed.txt
-immune.h5ad
+*.h5ad
 
-immune.h5ad
-batch_embed.txt
-.vscode/launch.json
 run_bbknn.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index eca5a31bdb..e722941c34 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,11 +9,11 @@ repos:
       - id: requirements-txt-fixer
         files: .*requirements.*\.txt
   - repo: https://github.com/timothycrosley/isort
-    rev: 5.10.1
+    rev: 5.11.5
     hooks:
       - id: isort
   - repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 22.10.0
     hooks:
       - id: black
         args: ['--target-version', 'py36']
@@ -21,7 +21,7 @@ repos:
     rev: v1.5.4
     hooks:
       - id: autopep8
-  - repo: https://gitlab.com/pycqa/flake8
+  - repo: https://github.com/pycqa/flake8
     rev: 3.8.4
     hooks:
       - id: flake8
@@ -39,3 +39,8 @@ repos:
     hooks:
       - id: markdownlint-fix
         exclude: (SAGEMAKER.md|.github/ISSUE_TEMPLATE/bug_report.md|.github/pull_request_template.md)
+  - repo: https://github.com/FlamingTempura/bibtex-tidy
+    rev: "8838095"
+    hooks:
+    - id: bibtex-tidy
+      args: ['--omit', 'abstract', '--sort', '--duplicates', '--sort-fields', '--trailing-commas']
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4442a45674..9f08016476 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -26,6 +26,7 @@ website, or simply star it in GitHub to say "I use it".
 * [API](#api)
   * [Writing functions in R](#writing-functions-in-r)
   * [Adding package dependencies](#adding-package-dependencies)
+  * [Adding paper references](#adding-paper-references)
   * [Adding a new dataset](#adding-a-new-dataset)
   * [Adding a dataset / method / metric to a
     task](#adding-a-dataset--method--metric-to-a-task)
@@ -177,6 +178,9 @@ Metrics should take an AnnData object and return a `float`.
 function metric(AnnData adata) -> float
 ```
 
+Note that the AnnData object is passed to the metric function as a copy, so there is no
+need to copy it internally, even if you modify the object.
+
 Task-specific APIs are described in the README for each task.
 
 * [Label Projection](openproblems/tasks/label_projection)
@@ -218,7 +222,7 @@ _pca = r_function("pca.R")
 @method(
     method_name="PCA",
     paper_name="On lines and planes of closest fit to systems of points in space",
-    paper_url="https://www.tandfonline.com/doi/abs/10.1080/14786440109462720",
+    paper_reference="pearson1901pca",
     paper_year=1901,
     code_url="https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/prcomp",
     image="openproblems-r-base",
@@ -252,12 +256,24 @@ def f2(adata):
   import package2
 ```
 
+### Adding paper references
+
+All papers cited in the `openproblems` repository should be cited in [`main.bib`](main.bib)
+and referenced in the corresponding dataset / method / metric decorator by its BibTeX
+reference, generally of the form `author1900papername`. BibTeX entries should be retrieved
+from [doi2bib.org](https://www.doi2bib.org/) where feasible, except for arXiv and bioRxiv
+which provide more correct BibTeX entries on the paper abstract page.
+
+When referencing a paper in markdown (e.g. in a task README), you should link directly
+to the bibliography entry on the Open Problems website using the BibTeX reference, e.g.
+[`https://openproblems.bio/bibliography#openproblems`](https://openproblems.bio/bibliography#openproblems).
+
 ### Adding a new dataset
 
 Datasets are loaded under `openproblems/data`. Each data loading function should
 download the appropriate dataset from a stable location (e.g. from Figshare) be
 decorated with `openproblems.data.utils.loader(data_url="https://data.link",
-data_reference="https://doi.org/10.0/123")` in order to cache the result.
+data_reference="author1900papername")` in order to cache the result.
 
 Data should be provided in a raw count format. We assume that `adata.X` contains the raw
 (count) data for the primary modality; this will also be copied to
@@ -304,15 +320,48 @@ ease of use, we provide a collection of common normalization functions in
 stored in `adata.X` is automatically stored in `adata.layers["counts"]` for later
 reference in the case the a metric needs to access the unnormalized data.
 
+#### Testing method performance
+
 To test the performance of a dataset, method, or metric, you can use the command-line
-interface:
+interface `openproblems-cli test`.
 
-```shell
-openproblems-cli test --help
+First, you must launch a Docker image containing the relevant dependencies for the
+dataset/method/metric you wish to test. You can then run `openproblems-cli test` with
+any/all of `--dataset`, `--method`, and `--metric` as desired. E.g.,
+
+```bash
+cd openproblems
+docker run \
+  -v $(pwd):/usr/src/singlecellopenproblems -v /tmp:/tmp \
+  -it singlecellopenproblems/openproblems-python-extras bash
+openproblems-cli test \
+  --task label_projection \
+  --dataset zebrafish_labs \
+  --method logistic_regression_log_cp10k \
+  --metric f1
 ```
 
+which will print the benchmark score for the method evaluated by the metric on the
+dataset you chose.
+
+Notes:
+
+* If you have updated Docker images to run your method, you must first rebuild the
+  images -- see the [Docker README](docker/README.md) for details.
+* If your dataset/method/metric cannot be run on the same docker image, you may wish to
+  `load`, `run`, and `evaluate` separately. You can do this using each of these commands
+  independently; however, this workflow is not documented.
+* These commands are not guaranteed to work with Apple silicon (M1 chip).
+* If your local machine cannot run the test due to memory constraints or OS
+  incompatibility, you may use your AWS credentials to launch a VM for testing purposes.
+  See the [EC2 README](./EC2.md) for details.
+
 ### Adding a new task
 
+To add a new task, you must provide a task description, dataset and method API, and at
+least one dataset, one method, and one metric. In order to appear on the website, a task
+must have at least three methods.
+
 The task directory structure is as follows
 
 ```text
diff --git a/EC2.md b/EC2.md
new file mode 100644
index 0000000000..a28cce7238
--- /dev/null
+++ b/EC2.md
@@ -0,0 +1,110 @@
+# AWS EC2 Usage Instructions
+
+The following instructions give a step-by-step guide to launching an AWS virtual machine
+with all the required prerequisites to run `openproblems`.
+
+## Code of conduct
+
+**Please be respectful of our finite resources!**
+
+* The use of the `openproblems` AWS account is a privilege, not a right.
+* This privilege is given solely for the purposes of testing methods with
+  `openproblems-cli test`.
+* Developers who have their own compute resources should use them; please help us
+  conserve our resources for those who need them.
+* If developers are found to be using resources irresponsibly, we may have to revoke
+  this privilege.
+
+## Requirements
+
+* a Unix-based OS (Mac or Linux), though you should be
+able to amend the commands for use on Windows (or consider [Windows Subsystem for
+Linux](https://docs.microsoft.com/en-us/windows/wsl/install)).
+* The [AWS CLI](https://aws.amazon.com/cli/)
+* [jq](https://stedolan.github.io/jq/download/)
+
+## Instructions
+
+The following instructions are for `bash`, other shell users may need to modify commands
+slightly.
+
+First, if you have recieved openproblems AWS credentials, configure AWS to use them.
+Note: `openproblems` uses `us-west-2` as default region. If you have other AWS accounts,
+you can configure AWS with multiple accounts by using the `AWS_PROFILE` environment
+variable.
+
+```shell
+export AWS_PROFILE=openproblems
+aws configure
+```
+
+Second, create a key pair (only do this once):
+
+```shell
+KEY_NAME="my_openproblems_key" # name this whatever you like, but it must be unique
+aws ec2 create-key-pair --key-name $KEY_NAME --key-format pem \
+--query "KeyMaterial" --output text > ${KEY_NAME}.pem
+chmod 400 ${KEY_NAME}.pem
+```
+
+Now, create an instance with your key pair:
+
+```shell
+OWNER_NAME="this_is_your_name"
+AWS_EC2_INSTANCE_TYPE="t2.micro"
+INSTANCE_ID=$(
+aws ec2 run-instances --count 1 --image-id ami-01219569b1bbf9fb2 \
+  --instance-type $AWS_EC2_INSTANCE_TYPE --key-name $KEY_NAME \
+  --security-group-ids sg-002d2b9db29bb43dd \
+  --tag-specifications "ResourceType=instance,Tags=[{Key=owner,Value=${OWNER_NAME}}]" |
+  jq '.["Instances"][0]["InstanceId"]' |
+  tr -d '"'
+)
+```
+
+Get the public DNS address for your instance
+
+```shell
+sleep 30 # wait for boot
+PUBLIC_DNS_NAME=$(
+aws ec2 describe-instances --instance-id $INSTANCE_ID |
+  jq '.["Reservations"][0]["Instances"][0]["PublicDnsName"]' |
+  tr -d '"'
+)
+```
+
+Now you can SSH into your instance:
+
+```shell
+# check the status of your instance
+aws ec2 describe-instance-status --instance-id ${INSTANCE_ID} | \
+jq '.["InstanceStatuses"][0]["SystemStatus"]'
+ssh -i ${KEY_NAME}.pem ubuntu@${PUBLIC_DNS_NAME}
+```
+
+The instance will by default contain all dependencies to use `openproblems`. You can
+run `openproblems` with
+
+```shell
+git clone https://github.com/openproblems-bio/openproblems
+cd openproblems
+sudo docker run \
+  -v $(pwd):/usr/src/singlecellopenproblems -v /tmp:/tmp \
+  -it singlecellopenproblems/openproblems bash
+openproblems-cli --help
+```
+
+For more information on using the CLI, see
+[CONTRIBUTING.md](CONTRIBUTING.md#testing-method-performance).
+
+When you are done, make sure to shut down your instance:
+
+```shell
+aws ec2 terminate-instances --instance-ids ${INSTANCE_ID}
+```
+
+Finally, make sure you don't have any instances left running:
+
+```shell
+aws ec2 describe-instances --filters "Name=tag:owner,Values=${OWNER_NAME}"
+```
diff --git a/README.md b/README.md
index 5b5a3799a3..2db00acf91 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Open Problems in Single-Cell Analysis
 
-[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/singlecellopenproblems/singlecellopenproblems/Run%20Tests/master?label=Github%20Actions)](https://github.com/openproblems-bio/openproblems/actions)
+[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/openproblems-bio/openproblems/run_tests.yml?branch=main)](https://github.com/openproblems-bio/openproblems/actions)
 [![Coverage Status](https://codecov.io/gh/openproblems-bio/openproblems/branch/main/graph/badge.svg?token=S1ZIME1ZZR)](https://codecov.io/gh/openproblems-bio/openproblems)
 [![Netlify Status](https://api.netlify.com/api/v1/badges/83b92388-53c7-4fef-9003-e14d94c6ac6f/deploy-status)](https://app.netlify.com/sites/openproblems/deploys)
 [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -29,7 +29,7 @@ Formalizing and benchmarking open problems in single-cell genomics.
 **Core** (_alphabetically_):
 
 * Daniel Burkhardt (@dburkhardt), Cellarity
-* Robrecht Cannoodt (@rcannoodt), Data Intuitive
+* Robrecht Cannoodt (@rcannood), Data Intuitive
 * Scott Gigante (@scottgigante-immunai), Immunai
 * Christopher Lance (@xlancelottx), Helmholtz Munich
 * Malte Luecken (@LuckyMD), Helmholtz Munich
diff --git a/docker/.version b/docker/.version
index a918a2aa18..faef31a435 100644
--- a/docker/.version
+++ b/docker/.version
@@ -1 +1 @@
-0.6.0
+0.7.0
diff --git a/docker/README.md b/docker/README.md
index d5949faf26..77a87a7101 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -125,7 +125,7 @@ example, to install the `openproblems` base container, you can run the following
 docker build -f docker/openproblems/Dockerfile -t singlecellopenproblems/openproblems .
 ```
 
-or to update all available Docker images:
+or to update all available Docker images, updating only when necessary:
 
 ```shell
 cd workflow && snakemake -j 10 docker
@@ -215,14 +215,16 @@ You can then run commands within a docker container using `docker run`. Consult
 [Docker documentation](https://docs.docker.com/engine/reference/commandline/run/) to
 learn more about the `run` command.
 
-**Using `IMAGE ID`**
-
 ```shell
-docker run -it 90a9110c7d69 /bin/bash
+cd openproblems
+docker run \
+  -v $(pwd):/usr/src/singlecellopenproblems -v /tmp:/tmp \
+  -it singlecellopenproblems/openproblems-python-extras bash
 ```
 
-**Using `RESPOSITORY:TAG`**
+You may also specify the docker image by its ID, rather than its name:
 
 ```shell
-docker run -it singlecellopenproblems/openproblems-python-extras:latest /bin/bash
+cd openproblems
+docker run -v $(pwd):/usr/src/singlecellopenproblems -v /tmp:/tmp -it 90a9110c7d69 bash
 ```
diff --git a/docker/openproblems-github-actions/Dockerfile b/docker/openproblems-github-actions/Dockerfile
index e5ce9085e1..bce262f0cb 100644
--- a/docker/openproblems-github-actions/Dockerfile
+++ b/docker/openproblems-github-actions/Dockerfile
@@ -13,13 +13,10 @@ RUN sh -c 'echo \
 RUN apt-get update
 RUN apt-get install -y docker-ce docker-ce-cli containerd.io
 
-# install Python packages
+# install dependencies and openproblems
 COPY ./docker/openproblems-github-actions/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems[test,r,evaluate]
 
 # Install R packages
 COPY ./docker/openproblems-github-actions/r_requirements.txt ./r_requirements.txt
 RUN R -e "source(\"install_renv.R\"); install_renv(\"r_requirements.txt\")"
-
-# Install Python packages
-RUN pip install --no-cache-dir -U /usr/src/singlecellopenproblems[test,r,evaluate]
diff --git a/docker/openproblems-github-actions/r_requirements.txt b/docker/openproblems-github-actions/r_requirements.txt
index 7693ca4226..764418061e 100644
--- a/docker/openproblems-github-actions/r_requirements.txt
+++ b/docker/openproblems-github-actions/r_requirements.txt
@@ -1,6 +1,6 @@
 backports@1.4.1
 docopt@0.7.1
 git2r@0.30.1
-lintr@3.0.1
-styler@1.7.0
+lintr@3.0.2
+styler@1.9.0
 tibble@3.1.8
diff --git a/docker/openproblems-python-batch-integration/Dockerfile b/docker/openproblems-python-batch-integration/Dockerfile
deleted file mode 100644
index 697905d79c..0000000000
--- a/docker/openproblems-python-batch-integration/Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-FROM singlecellopenproblems/openproblems-r-base:latest
-
-USER root
-WORKDIR /
-
-ARG NB_USER="sagemaker-user"
-ARG NB_UID="1000"
-ARG NB_GID="100"
-
-RUN sed -i '$ d' /etc/apt/sources.list
-RUN \
-apt-get update --allow-releaseinfo-change && \
-apt-get -y install --no-install-recommends gcc git python3-llvmlite && \
-apt-get autoremove -y && \
-rm -rf /var/lib/apt/lists/*
-
-# Install Python packages
-COPY ./docker/openproblems-python-batch-integration/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
-# force reinstall annoy addresses https://github.com/spotify/annoy/issues/513
-RUN pip install --no-cache-dir --force annoy==1.17.0
-
-USER $NB_UID
-WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-batch-integration/README.md b/docker/openproblems-python-batch-integration/README.md
deleted file mode 100644
index 02a18e1c20..0000000000
--- a/docker/openproblems-python-batch-integration/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# openproblems-python-extras Docker image
-
-Base image: singlecellopenproblems/openproblems-r-base
-
-OS: Debian Stretch
-
-Python: 3.8
-
-Python packages:
-
-* scIB
-* mnnpy
-* scanorama
-* bbknn
-* scVI
diff --git a/docker/openproblems-python-batch-integration/requirements.txt b/docker/openproblems-python-batch-integration/requirements.txt
deleted file mode 100644
index 6f1fe411b3..0000000000
--- a/docker/openproblems-python-batch-integration/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-annoy==1.17.1
-bbknn==1.5.*
-git+https://github.com/scottgigante-immunai/mnnpy@eb4c551  # branch: patch-2
-git+https://github.com/theislab/scib@a35e300
-scanorama==1.7.0
-scvi-tools~=0.16  # pinned in #313
diff --git a/docker/openproblems-python-bedtools/Dockerfile b/docker/openproblems-python-bedtools/Dockerfile
new file mode 100644
index 0000000000..d0dff3ad54
--- /dev/null
+++ b/docker/openproblems-python-bedtools/Dockerfile
@@ -0,0 +1,28 @@
+FROM singlecellopenproblems/openproblems:latest
+
+ARG NB_USER="sagemaker-user"
+ARG NB_UID="1000"
+ARG NB_GID="100"
+
+USER root
+WORKDIR /
+
+# Install pybedtools dependency
+ARG BUILD_PACKAGES=""
+ARG PACKAGE_VERSION=2.27.1
+RUN apt-get update && \
+    apt-get install --yes git openssl build-essential zlib1g-dev && \
+    cd /tmp && \
+    git clone https://github.com/arq5x/bedtools2.git && \
+    cd bedtools2 && \
+    git checkout v$PACKAGE_VERSION && \
+    make && \
+    mv bin/* /usr/local/bin && \
+    cd /
+
+# install dependencies and openproblems
+COPY ./docker/openproblems-python-bedtools/requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems
+
+USER $NB_UID
+WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-bedtools/README.md b/docker/openproblems-python-bedtools/README.md
new file mode 100644
index 0000000000..d7b9935395
--- /dev/null
+++ b/docker/openproblems-python-bedtools/README.md
@@ -0,0 +1,12 @@
+# openproblems-python-extras Docker image
+
+Base image: singlecellopenproblems/openproblems
+
+OS: Debian Stretch
+
+Python: 3.8
+
+Python packages:
+
+* pybedtools
+* pyensembl
diff --git a/docker/openproblems-python-bedtools/requirements.txt b/docker/openproblems-python-bedtools/requirements.txt
new file mode 100644
index 0000000000..3b2a743754
--- /dev/null
+++ b/docker/openproblems-python-bedtools/requirements.txt
@@ -0,0 +1,4 @@
+gtfparse==1.3.*
+polars==0.14.*
+pybedtools==0.9.*
+pyensembl==2.0.*
diff --git a/docker/openproblems-python-extras/Dockerfile b/docker/openproblems-python-extras/Dockerfile
index e7226456a8..ddb8a48542 100644
--- a/docker/openproblems-python-extras/Dockerfile
+++ b/docker/openproblems-python-extras/Dockerfile
@@ -7,22 +7,9 @@ ARG NB_GID="100"
 USER root
 WORKDIR /
 
-# Install pybedtools dependency
-ARG BUILD_PACKAGES=""
-ARG PACKAGE_VERSION=2.27.1
-RUN apt-get update && \
-    apt-get install --yes git openssl build-essential zlib1g-dev && \
-    cd /tmp && \
-    git clone https://github.com/arq5x/bedtools2.git && \
-    cd bedtools2 && \
-    git checkout v$PACKAGE_VERSION && \
-    make && \
-    mv bin/* /usr/local/bin && \
-    cd /
-
 # Install Python packages
 COPY ./docker/openproblems-python-extras/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems
 
 USER $NB_UID
 WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-extras/README.md b/docker/openproblems-python-extras/README.md
index 7e060243fc..1fd2017dd0 100644
--- a/docker/openproblems-python-extras/README.md
+++ b/docker/openproblems-python-extras/README.md
@@ -9,8 +9,6 @@ Python: 3.8
 Python packages:
 
 * harmonic-alignment
-* pybedtools
-* pyensembl
 * magic-impute
 * molecular-cross-validation
 * MulticoreTSNE
diff --git a/docker/openproblems-python-extras/requirements.txt b/docker/openproblems-python-extras/requirements.txt
index 71b8137fa1..8b5c6930ec 100644
--- a/docker/openproblems-python-extras/requirements.txt
+++ b/docker/openproblems-python-extras/requirements.txt
@@ -1,16 +1,7 @@
-cmake==3.24.1.1
-git+https://github.com/BayraktarLab/cell2location.git@7e7aa231cc61ff460da14402fa3b9a1fa3ec69ac
-git+https://github.com/czbiohub/molecular-cross-validation@04d9df0
+cmake==3.25.0
 git+https://github.com/jorvis/Multicore-TSNE@6832575
 git+https://github.com/KrishnaswamyLab/harmonic-alignment@v0.1#subdirectory=python
-git+https://github.com/michalk8/neuralee@8946abf  # contains gradient error fix
 git+https://github.com/scottgigante-immunai/knn-smoothing@python_package
 magic-impute==3.0.*
 phate==1.0.*
-pybedtools==0.9.*
-pyensembl==2.0.*
-scvi-tools==0.16.*
-tangram-sc==1.0.*
-tensorflow-cpu==2.9.*
-torch==1.12.*
 xgboost==1.6.*
diff --git a/docker/openproblems-python-pytorch/Dockerfile b/docker/openproblems-python-pytorch/Dockerfile
new file mode 100644
index 0000000000..cdf852acc0
--- /dev/null
+++ b/docker/openproblems-python-pytorch/Dockerfile
@@ -0,0 +1,15 @@
+FROM singlecellopenproblems/openproblems:latest
+
+ARG NB_USER="sagemaker-user"
+ARG NB_UID="1000"
+ARG NB_GID="100"
+
+USER root
+WORKDIR /
+
+# install dependencies and openproblems
+COPY ./docker/openproblems-python-pytorch/requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems
+
+USER $NB_UID
+WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-scvi/README.md b/docker/openproblems-python-pytorch/README.md
similarity index 64%
rename from docker/openproblems-python-scvi/README.md
rename to docker/openproblems-python-pytorch/README.md
index 546cec9bc0..d566a8efd5 100644
--- a/docker/openproblems-python-scvi/README.md
+++ b/docker/openproblems-python-pytorch/README.md
@@ -9,3 +9,9 @@ Python: 3.8
 Python packages:
 
 * scvi-tools
+* tangram
+* torch
+* neuralee
+* xgboost
+* molecular-cross-validation
+* cell2location
diff --git a/docker/openproblems-python-pytorch/requirements.txt b/docker/openproblems-python-pytorch/requirements.txt
new file mode 100644
index 0000000000..c615e671ae
--- /dev/null
+++ b/docker/openproblems-python-pytorch/requirements.txt
@@ -0,0 +1,12 @@
+git+https://github.com/BayraktarLab/cell2location.git@47c8d6dc90dd3f1ab639861e8617c6ef0b62bb89
+git+https://github.com/czbiohub/molecular-cross-validation@04d9df0
+git+https://github.com/michalk8/neuralee@8946abf  # contains gradient error fix
+jax==0.3.25
+jaxlib==0.3.25
+pymde==0.1.18
+scalex==1.0.2
+scikit-misc==0.1.*
+scvi-tools~=0.19
+tangram-sc==1.0.*
+torch==1.12.*
+xgboost==1.6.*
diff --git a/docker/openproblems-python-scvi/Dockerfile b/docker/openproblems-python-scvi/Dockerfile
deleted file mode 100644
index f7edd2e4dc..0000000000
--- a/docker/openproblems-python-scvi/Dockerfile
+++ /dev/null
@@ -1,15 +0,0 @@
-FROM singlecellopenproblems/openproblems:latest
-
-ARG NB_USER="sagemaker-user"
-ARG NB_UID="1000"
-ARG NB_GID="100"
-
-USER root
-WORKDIR /
-
-# Install Python packages
-COPY ./docker/openproblems-python-scvi/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
-
-USER $NB_UID
-WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-scvi/requirements.txt b/docker/openproblems-python-scvi/requirements.txt
deleted file mode 100644
index cd95e5cd0d..0000000000
--- a/docker/openproblems-python-scvi/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-jax==0.3.20
-jaxlib==0.3.20
-scikit-misc==0.1.*
-scvi-tools~=0.17  # pinned in #313
-xgboost==1.6.*
diff --git a/docker/openproblems-python-tensorflow/Dockerfile b/docker/openproblems-python-tensorflow/Dockerfile
new file mode 100644
index 0000000000..170d6bc896
--- /dev/null
+++ b/docker/openproblems-python-tensorflow/Dockerfile
@@ -0,0 +1,15 @@
+FROM singlecellopenproblems/openproblems:latest
+
+ARG NB_USER="sagemaker-user"
+ARG NB_UID="1000"
+ARG NB_GID="100"
+
+USER root
+WORKDIR /
+
+# install dependencies and openproblems
+COPY ./docker/openproblems-python-tensorflow/requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems
+
+USER $NB_UID
+WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-tf2.4/README.md b/docker/openproblems-python-tensorflow/README.md
similarity index 56%
rename from docker/openproblems-python-tf2.4/README.md
rename to docker/openproblems-python-tensorflow/README.md
index f08a69ea20..3f4c4dbf67 100644
--- a/docker/openproblems-python-tf2.4/README.md
+++ b/docker/openproblems-python-tensorflow/README.md
@@ -1,4 +1,4 @@
-# openproblems-python-tf2.4 Docker image
+# openproblems-python-tensorflow Docker image
 
 Base image: singlecellopenproblems/openproblems
 
@@ -8,6 +8,5 @@ Python: 3.8
 
 Python packages:
 
-* keras >=2.4,<2.6
-* tensorflow >=2.4,<2.5
+* tensorflow
 * dca
diff --git a/docker/openproblems-python-tensorflow/requirements.txt b/docker/openproblems-python-tensorflow/requirements.txt
new file mode 100644
index 0000000000..f2a476acf9
--- /dev/null
+++ b/docker/openproblems-python-tensorflow/requirements.txt
@@ -0,0 +1,4 @@
+git+https://github.com/Avsecz/kopt@6a5c890  # master
+git+https://github.com/scottgigante-immunai/dca@1f4edbc  # patch-1 contains tf version bump
+protobuf==3.20.*
+tensorflow==2.9.0
diff --git a/docker/openproblems-python-tf2.4/Dockerfile b/docker/openproblems-python-tf2.4/Dockerfile
deleted file mode 100644
index dcdabaf28d..0000000000
--- a/docker/openproblems-python-tf2.4/Dockerfile
+++ /dev/null
@@ -1,18 +0,0 @@
-FROM singlecellopenproblems/openproblems:latest
-
-ARG NB_USER="sagemaker-user"
-ARG NB_UID="1000"
-ARG NB_GID="100"
-
-USER root
-WORKDIR /
-
-# Install Python packages
-COPY ./docker/openproblems-python-tf2.4/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
-
-# tensorflow downgrades numpy and h5py (and therefore anndata)
-RUN pip install --no-cache-dir -e /usr/src/singlecellopenproblems
-
-USER $NB_UID
-WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-python-tf2.4/requirements.txt b/docker/openproblems-python-tf2.4/requirements.txt
deleted file mode 100644
index 9dedbff275..0000000000
--- a/docker/openproblems-python-tf2.4/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-dca==0.3.*
-keras>=2.4,<2.6  # pinned in dca
-pyyaml==6.0  # pinned in #431
-tensorflow-cpu==2.4.*  # pinned in dca
diff --git a/docker/openproblems-r-base/Dockerfile b/docker/openproblems-r-base/Dockerfile
index 5c70677019..71d909d34d 100644
--- a/docker/openproblems-r-base/Dockerfile
+++ b/docker/openproblems-r-base/Dockerfile
@@ -27,6 +27,7 @@ RUN apt-get update -qq
 RUN apt-get install -yq --no-install-suggests --no-install-recommends r-base-dev=4.2\*
 RUN apt-get clean -y && apt-get autoremove -y
 ENV R_HOME=/usr/lib/R
+ENV BIOCVERSION="3.16"
 
 # Install R packages
 RUN R -e "install.packages('renv'); renv::consent(TRUE)"
diff --git a/docker/openproblems-r-base/README.md b/docker/openproblems-r-base/README.md
index 785a9ace1b..ebca77780d 100644
--- a/docker/openproblems-r-base/README.md
+++ b/docker/openproblems-r-base/README.md
@@ -28,4 +28,4 @@ R packages:
 Python packages:
 
 * rpy2
-* anndata2ri>=1.0.6
+* anndata2ri>=1.1
diff --git a/docker/openproblems-r-base/r_requirements.txt b/docker/openproblems-r-base/r_requirements.txt
index 1b94692a0c..11d15839fd 100644
--- a/docker/openproblems-r-base/r_requirements.txt
+++ b/docker/openproblems-r-base/r_requirements.txt
@@ -1,3 +1,3 @@
-bioc::scran@1.24.1
-IRkernel@1.3
-RcppAnnoy@0.0.19
+bioc::scran@1.26.2
+IRkernel@1.3.1
+RcppAnnoy@0.0.20
diff --git a/docker/openproblems-r-extras/Dockerfile b/docker/openproblems-r-extras/Dockerfile
index e67fe8eb09..115b056678 100644
--- a/docker/openproblems-r-extras/Dockerfile
+++ b/docker/openproblems-r-extras/Dockerfile
@@ -10,7 +10,8 @@ WORKDIR /
 
 RUN apt-get update && apt-get install -y \
     libhdf5-dev hdf5-tools libgeos-dev \
-    libharfbuzz-dev libfribidi-dev
+    libharfbuzz-dev libfribidi-dev \
+    libgsl-dev
 
 RUN apt-get clean autoclean && \
     apt-get autoremove --yes && \
@@ -19,11 +20,12 @@ RUN apt-get clean autoclean && \
 
 # Install R packages
 COPY ./docker/openproblems-r-extras/r_requirements.txt ./r_requirements.txt
+COPY ./scripts/install_renv.R ./install_renv.R
 RUN R -e "source(\"install_renv.R\"); install_renv(\"r_requirements.txt\")"
 
-# Install Python packages
+# install dependencies and openproblems
 COPY ./docker/openproblems-r-extras/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems[r]
 
 # Fix permissions
 RUN chown -R $NB_USER:$NB_GID /home/$NB_USER
diff --git a/docker/openproblems-r-extras/r_requirements.txt b/docker/openproblems-r-extras/r_requirements.txt
index ebfd924607..42a8df1066 100644
--- a/docker/openproblems-r-extras/r_requirements.txt
+++ b/docker/openproblems-r-extras/r_requirements.txt
@@ -1,51 +1,54 @@
-bioc::batchelor@1.12.3
-bioc::ComplexHeatmap@2.12.1
-bioc::scater@1.24.0
-bioc::scran@1.24.1
-bioc::scuttle@1.6.3
-bslib@0.4.0
+bioc::basilisk@1.10.2
+bioc::batchelor@1.14.1
+bioc::ComplexHeatmap@2.14.0
+bioc::dir.expiry
+bioc::scater@1.26.1
+bioc::scuttle@1.8.3
+bslib@0.4.2
 caret@6.0-93
 cli@3.4.1
-conos@1.4.9
+conos@1.5.0
 crayon@1.5.2
 dbplyr@2.2.1
 devtools@2.4.5
-dmcable/spacexr@eeb02a2 # master
+dmcable/spacexr@9461a8d # master
 downlit@0.4.2
 dplyr@1.0.10
-e1071@1.7-11
+e1071@1.7-12
 ellipsis@0.3.2
-forecast@8.17.0
-hardhat@1.1.0
+forecast@8.18
+hardhat@1.2.0
 here@1.0.1
 hexbin@1.28.2
-htmltools@0.5.3
+htmltools@0.5.4
 htmlwidgets@1.5.4
 igraph@1.3.5
-lifecycle@1.0.2
+lifecycle@1.0.3
+LTLA/basilisk.utils@411502f # required for liana0.1.9
 Matrix@1.5-1
 pkgdown@2.0.6
-pkgload@1.3.0
+pkgload@1.3.1
 profvis@0.3.7
 proxy@0.4-27
-ragg@1.2.2
+ragg@1.2.4
 Rcpp@1.0.9
 RcppTOML@0.1.7
 reticulate@1.26
-rlang@1.0.5
+rlang@1.0.6
 rliger@1.0.0
 rmarkdown@2.2
 RSQLite@2.2.4
-saezlab/liana@0.1.6
-saezlab/OmnipathR@679bb79 # master
+saezlab/liana@0.1.9
+saezlab/OmnipathR@edf276b # master
 sass@0.4.2
-sctransform@0.3.4
-Seurat@4.1.1
-SeuratObject@4.1.1
+sctransform@0.3.5
+Seurat@4.3.0
+SeuratObject@4.1.3
 shiny@1.4.0.2
 sparsesvd@0.2
 systemfonts@1.0.4
 textshaping@0.3.6
+theislab/kBET@a10ffea # master
 tibble@3.1.7
 tidymodels@0.1.2
 tidyverse@1.3.0
diff --git a/docker/openproblems-r-extras/requirements.txt b/docker/openproblems-r-extras/requirements.txt
index 9d03431a90..ec3092587e 100644
--- a/docker/openproblems-r-extras/requirements.txt
+++ b/docker/openproblems-r-extras/requirements.txt
@@ -1,3 +1,4 @@
 git+https://github.com/KrishnaswamyLab/harmonic-alignment@v0.1#subdirectory=python
-git+https://github.com/theislab/scib@v1.0.2
+rpy2<3.4.3
+scib==1.1.3
 xgboost==1.6.*
diff --git a/docker/openproblems-r-pytorch/Dockerfile b/docker/openproblems-r-pytorch/Dockerfile
index d8300572f7..55568a6eae 100644
--- a/docker/openproblems-r-pytorch/Dockerfile
+++ b/docker/openproblems-r-pytorch/Dockerfile
@@ -12,10 +12,9 @@ WORKDIR /
 COPY ./docker/openproblems-r-pytorch/r_requirements.txt ./r_requirements.txt
 RUN R -e "source(\"install_renv.R\"); install_renv(\"r_requirements.txt\")"
 
-# Install Python packages
+# install dependencies and openproblems
 COPY ./docker/openproblems-r-pytorch/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
-
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems[r]
 
 USER $NB_UID
 WORKDIR /home/$NB_USER
diff --git a/docker/openproblems-r-pytorch/README.md b/docker/openproblems-r-pytorch/README.md
index 03ccbc07d4..33d3e5a30c 100644
--- a/docker/openproblems-r-pytorch/README.md
+++ b/docker/openproblems-r-pytorch/README.md
@@ -8,12 +8,12 @@ Python: 3.8
 
 R: 4.0
 
-R packages:
-
-* batchelor
-* sparsesvd
-* dplyr
-
 Python packages:
 
-* harmonic-alignment
+* harmony-pytorch
+* torch
+* bbknn
+* mnnpy
+* scib
+* scanorama
+* scvi-tools
diff --git a/docker/openproblems-r-pytorch/requirements.txt b/docker/openproblems-r-pytorch/requirements.txt
index 7121777e82..c35cc20330 100644
--- a/docker/openproblems-r-pytorch/requirements.txt
+++ b/docker/openproblems-r-pytorch/requirements.txt
@@ -1,3 +1,8 @@
-git+https://github.com/theislab/scib@v1.0.2
+annoy==1.17.1
+bbknn==1.5.*
+git+https://github.com/chriscainx/mnnpy@2097dec  # master
 harmony-pytorch==0.1.*
-torch==1.12.*
+scanorama==1.7.0
+scib==1.1.3
+scvi-tools~=0.19
+torch==1.13.*
diff --git a/docker/openproblems/Dockerfile b/docker/openproblems/Dockerfile
index 6132ab9208..2434cc5acb 100644
--- a/docker/openproblems/Dockerfile
+++ b/docker/openproblems/Dockerfile
@@ -27,14 +27,11 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2
 RUN unzip -q awscliv2.zip
 RUN ./aws/install
 
-# install dependencies
-COPY ./docker/openproblems/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Install single-cell open problems
+# install dependencies and openproblems
 COPY . /usr/src/singlecellopenproblems
 RUN cd /usr/src/singlecellopenproblems && git clean -fxdq
-RUN pip install --no-cache-dir --editable /usr/src/singlecellopenproblems
+COPY ./docker/openproblems/requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt --editable /usr/src/singlecellopenproblems
 
 # Overwrite kernel.json to use system Python install
 COPY ./docker/openproblems/kernelspec.json /usr/local/share/jupyter/kernels/python3/kernel.json
diff --git a/docker/openproblems/requirements.txt b/docker/openproblems/requirements.txt
index 709868274c..af970653a2 100644
--- a/docker/openproblems/requirements.txt
+++ b/docker/openproblems/requirements.txt
@@ -1,7 +1,7 @@
-boto3==1.24.*
+boto3==1.26.*
 cmake==3.22.*  # pinned in #607
 jupyter==1.0.*
 pip
-sagemaker==2.112.*
+sagemaker==2.116.*
 setuptools
 wheel
diff --git a/main.bib b/main.bib
new file mode 100644
index 0000000000..88eb6dc1a3
--- /dev/null
+++ b/main.bib
@@ -0,0 +1,1038 @@
+@misc{10x2018pbmc,
+  title         = {1k PBMCs from a Healthy Donor (v3 chemistry)},
+  author        = {{10x Genomics}},
+  year          = {2018},
+  url           = {https://www.10xgenomics.com/resources/datasets/1-k-pbm-cs-from-a-healthy-donor-v-3-chemistry-3-standard-3-0-0},
+}
+@misc{10x2019pbmc,
+  title         = {5k Peripheral Blood Mononuclear Cells (PBMCs) from a Healthy Donor with a Panel of TotalSeq-B Antibodies (v3 chemistry)},
+  author        = {{10x Genomics}},
+  year          = {2019},
+  url           = {https://www.10xgenomics.com/resources/datasets/5-k-peripheral-blood-mononuclear-cells-pbm-cs-from-a-healthy-donor-with-cell-surface-proteins-v-3-chemistry-3-1-standard-3-1-0},
+}
+@article{agrawal2021mde,
+  title         = {Minimum-Distortion Embedding},
+  author        = {Akshay Agrawal and Alnur Ali and Stephen Boyd},
+  year          = {2021},
+  journal       = {Foundations and Trends{\textregistered} in Machine Learning},
+  publisher     = {Now Publishers},
+  volume        = {14},
+  number        = {3},
+  pages         = {211--378},
+  doi           = {10.1561/2200000090},
+  url           = {https://doi.org/10.1561/2200000090},
+}
+@article{aliee2021autogenes,
+  title         = {{AutoGeneS}: Automatic gene selection using multi-objective optimization for {RNA}-seq deconvolution},
+  author        = {Hananeh Aliee and Fabian J. Theis},
+  year          = {2021},
+  month         = jul,
+  journal       = {Cell Systems},
+  publisher     = {Elsevier {BV}},
+  volume        = {12},
+  number        = {7},
+  pages         = {706--715.e4},
+  doi           = {10.1016/j.cels.2021.05.006},
+  url           = {https://doi.org/10.1016/j.cels.2021.05.006},
+}
+@article{andersson2020single,
+  title         = {Single-cell and spatial transcriptomics enables probabilistic inference of cell type topography},
+  author        = {Alma Andersson and Joseph Bergenstr{\aa}hle and Michaela Asp and Ludvig Bergenstr{\aa}hle and Aleksandra Jurek and Jos{\'{e}} Fern{\'{a}}ndez Navarro and Joakim Lundeberg},
+  year          = {2020},
+  month         = oct,
+  journal       = {Communications Biology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {3},
+  number        = {1},
+  doi           = {10.1038/s42003-020-01247-y},
+  url           = {https://doi.org/10.1038/s42003-020-01247-y},
+}
+@article{batson2019molecular,
+  title         = {Molecular Cross-Validation for Single-Cell RNA-seq},
+  author        = {Batson, Joshua and Royer, Lo{\"\i}c and Webber, James},
+  year          = {2019},
+  journal       = {bioRxiv},
+  publisher     = {Cold Spring Harbor Laboratory},
+  doi           = {10.1101/786269},
+  url           = {https://www.biorxiv.org/content/early/2019/09/30/786269},
+  elocation-id  = {786269},
+  eprint        = {https://www.biorxiv.org/content/early/2019/09/30/786269.full.pdf},
+}
+@article{biancalani2021deep,
+  title         = {Deep learning and alignment of spatially resolved single-cell transcriptomes with Tangram},
+  author        = {Tommaso Biancalani and Gabriele Scalia and Lorenzo Buffoni and Raghav Avasthi and Ziqing Lu and Aman Sanger and Neriman Tokcan and Charles R. Vanderburg and {\AA}sa Segerstolpe and Meng Zhang and Inbal Avraham-Davidi and Sanja Vickovic and Mor Nitzan and Sai Ma and Ayshwarya Subramanian and Michal Lipinski and Jason Buenrostro and Nik Bear Brown and Duccio Fanelli and Xiaowei Zhuang and Evan Z. Macosko and Aviv Regev},
+  year          = {2021},
+  month         = oct,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {18},
+  number        = {11},
+  pages         = {1352--1362},
+  doi           = {10.1038/s41592-021-01264-7},
+  url           = {https://doi.org/10.1038/s41592-021-01264-7},
+}
+@article{bland2000odds,
+  title         = {Statistics Notes: The odds ratio},
+  author        = {J. M. Bland},
+  year          = {2000},
+  month         = may,
+  journal       = {{BMJ}},
+  publisher     = {{BMJ}},
+  volume        = {320},
+  number        = {7247},
+  pages         = {1468--1468},
+  doi           = {10.1136/bmj.320.7247.1468},
+  url           = {https://doi.org/10.1136/bmj.320.7247.1468},
+}
+@article{bttner2018test,
+  title         = {A test metric for assessing single-cell {RNA}-seq batch correction},
+  author        = {Maren B\"{u}ttner and Zhichao Miao and F. Alexander Wolf and Sarah A. Teichmann and Fabian J. Theis},
+  year          = {2018},
+  month         = dec,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {16},
+  number        = {1},
+  pages         = {43--49},
+  doi           = {10.1038/s41592-018-0254-1},
+  url           = {https://doi.org/10.1038/s41592-018-0254-1},
+}
+@article{cabello2020singlecellsignalr,
+  title         = {{SingleCellSignalR}: inference of intercellular networks from single-cell transcriptomics},
+  author        = {Simon Cabello-Aguilar and M{\'{e}}lissa Alame and Fabien Kon-Sun-Tack and Caroline Fau and Matthieu Lacroix and Jacques Colinge},
+  year          = {2020},
+  month         = mar,
+  journal       = {Nucleic Acids Research},
+  publisher     = {Oxford University Press ({OUP})},
+  volume        = {48},
+  number        = {10},
+  pages         = {e55--e55},
+  doi           = {10.1093/nar/gkaa183},
+  url           = {https://doi.org/10.1093/nar/gkaa183},
+}
+@article{cable2021robust,
+  title         = {Robust decomposition of cell type mixtures in spatial transcriptomics},
+  author        = {Dylan M. Cable and Evan Murray and Luli S. Zou and Aleksandrina Goeva and Evan Z. Macosko and Fei Chen and Rafael A. Irizarry},
+  year          = {2021},
+  month         = feb,
+  journal       = {Nature Biotechnology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {40},
+  number        = {4},
+  pages         = {517--526},
+  doi           = {10.1038/s41587-021-00830-w},
+  url           = {https://doi.org/10.1038/s41587-021-00830-w},
+}
+@article{cao2018joint,
+  title         = {Joint profiling of chromatin accessibility and gene expression in thousands of single cells},
+  author        = {Junyue Cao and Darren A. Cusanovich and Vijay Ramani and Delasa Aghamirzaie and Hannah A. Pliner and Andrew J. Hill and Riza M. Daza and Jose L. McFaline-Figueroa and Jonathan S. Packer and Lena Christiansen and Frank J. Steemers and Andrew C. Adey and Cole Trapnell and Jay Shendure},
+  year          = {2018},
+  month         = sep,
+  journal       = {Science},
+  publisher     = {American Association for the Advancement of Science ({AAAS})},
+  volume        = {361},
+  number        = {6409},
+  pages         = {1380--1385},
+  doi           = {10.1126/science.aau0730},
+  url           = {https://doi.org/10.1126/science.aau0730},
+}
+@article{cao2020human,
+  title         = {A human cell atlas of fetal gene expression},
+  author        = {Junyue Cao and Diana R. O'Day and Hannah A. Pliner and Paul D. Kingsley and Mei Deng and Riza M. Daza and Michael A. Zager and Kimberly A. Aldinger and Ronnie Blecher-Gonen and Fan Zhang and Malte Spielmann and James Palis and Dan Doherty and Frank J. Steemers and Ian A. Glass and Cole Trapnell and Jay Shendure},
+  year          = {2020},
+  month         = nov,
+  journal       = {Science},
+  publisher     = {American Association for the Advancement of Science ({AAAS})},
+  volume        = {370},
+  number        = {6518},
+  doi           = {10.1126/science.aba7721},
+  url           = {https://doi.org/10.1126/science.aba7721},
+}
+@article{chen2009local,
+  title         = {Local Multidimensional Scaling for Nonlinear Dimension Reduction, Graph Drawing, and Proximity Analysis},
+  author        = {Lisha Chen and Andreas Buja},
+  year          = {2009},
+  month         = mar,
+  journal       = {Journal of the American Statistical Association},
+  publisher     = {Informa {UK} Limited},
+  volume        = {104},
+  number        = {485},
+  pages         = {209--219},
+  doi           = {10.1198/jasa.2009.0111},
+  url           = {https://doi.org/10.1198/jasa.2009.0111},
+}
+@inproceedings{chen2016xgboost,
+  title         = {{XGBoost}},
+  author        = {Tianqi Chen and Carlos Guestrin},
+  year          = {2016},
+  month         = aug,
+  booktitle     = {Proceedings of the 22nd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
+  publisher     = {{Acm}},
+  doi           = {10.1145/2939672.2939785},
+  url           = {https://doi.org/10.1145/2939672.2939785},
+}
+@article{cichocki2009fast,
+  title         = {Fast Local Algorithms for Large Scale Nonnegative Matrix and Tensor Factorizations},
+  author        = {Andrzej Cichocki and Anh-Huy Phan},
+  year          = {2009},
+  journal       = {{IEICE} Transactions on Fundamentals of Electronics,  Communications and Computer Sciences},
+  publisher     = {Institute of Electronics,  Information and Communications Engineers ({IEICE})},
+  volume        = {E92-a},
+  number        = {3},
+  pages         = {708--721},
+  doi           = {10.1587/transfun.e92.a.708},
+  url           = {https://doi.org/10.1587/transfun.e92.a.708},
+}
+@article{coifman2006diffusion,
+  title         = {Diffusion maps},
+  author        = {Ronald R. Coifman and St{\'{e}}phane Lafon},
+  year          = {2006},
+  month         = jul,
+  journal       = {Applied and Computational Harmonic Analysis},
+  publisher     = {Elsevier {BV}},
+  volume        = {21},
+  number        = {1},
+  pages         = {5--30},
+  doi           = {10.1016/j.acha.2006.04.006},
+  url           = {https://doi.org/10.1016/j.acha.2006.04.006},
+}
+@article{cover1967nearest,
+  title         = {Nearest neighbor pattern classification},
+  author        = {T. Cover and P. Hart},
+  year          = {1967},
+  month         = jan,
+  journal       = {{IEEE} Transactions on Information Theory},
+  publisher     = {Institute of Electrical and Electronics Engineers ({IEEE})},
+  volume        = {13},
+  number        = {1},
+  pages         = {21--27},
+  doi           = {10.1109/tit.1967.1053964},
+  url           = {https://doi.org/10.1109/tit.1967.1053964},
+}
+@inproceedings{davis2006prauc,
+  title         = {The relationship between Precision-Recall and {ROC} curves},
+  author        = {Jesse Davis and Mark Goadrich},
+  year          = {2006},
+  booktitle     = {Proceedings of the 23rd international conference on Machine learning  - {ICML} {\textquotesingle}06},
+  publisher     = {{ACM} Press},
+  doi           = {10.1145/1143844.1143874},
+  url           = {https://doi.org/10.1145/1143844.1143874},
+}
+@article{dimitrov2022comparison,
+  title         = {Comparison of methods and resources for cell-cell communication inference from single-cell {RNA}-Seq data},
+  author        = {Daniel Dimitrov and D{\'{e}}nes T\"{u}rei and Martin Garrido-Rodriguez and Paul L. Burmedi and James S. Nagai and Charlotte Boys and Ricardo O. Ramirez Flores and Hyojin Kim and Bence Szalai and Ivan G. Costa and Alberto Valdeolivas and Aur{\'{e}}lien Dugourd and Julio Saez-Rodriguez},
+  year          = {2022},
+  month         = jun,
+  journal       = {Nature Communications},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {13},
+  number        = {1},
+  doi           = {10.1038/s41467-022-30755-0},
+  url           = {https://doi.org/10.1038/s41467-022-30755-0},
+}
+@article{efremova2020cellphonedb,
+  title         = {{CellPhoneDB}: inferring cell{\textendash}cell communication from combined expression of multi-subunit ligand{\textendash}receptor complexes},
+  author        = {Mirjana Efremova and Miquel Vento-Tormo and Sarah A. Teichmann and Roser Vento-Tormo},
+  year          = {2020},
+  month         = feb,
+  journal       = {Nature Protocols},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {15},
+  number        = {4},
+  pages         = {1484--1506},
+  doi           = {10.1038/s41596-020-0292-x},
+  url           = {https://doi.org/10.1038/s41596-020-0292-x},
+}
+@article{eraslan2019single,
+  title         = {Single-cell {RNA}-seq denoising using a deep count autoencoder},
+  author        = {G\"{o}kcen Eraslan and Lukas M. Simon and Maria Mircea and Nikola S. Mueller and Fabian J. Theis},
+  year          = {2019},
+  month         = jan,
+  journal       = {Nature Communications},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {10},
+  number        = {1},
+  doi           = {10.1038/s41467-018-07931-2},
+  url           = {https://doi.org/10.1038/s41467-018-07931-2},
+}
+@article{gower1975generalized,
+  title         = {Generalized procrustes analysis},
+  author        = {J. C. Gower},
+  year          = {1975},
+  month         = mar,
+  journal       = {Psychometrika},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {40},
+  number        = {1},
+  pages         = {33--51},
+  doi           = {10.1007/bf02291478},
+  url           = {https://doi.org/10.1007/bf02291478},
+}
+@article{grandini2020metrics,
+  title         = {Metrics for Multi-Class Classification: an Overview},
+  author        = {Grandini,  Margherita and Bagli,  Enrico and Visani,  Giorgio},
+  year          = {2020},
+  journal       = {arXiv},
+  publisher     = {Cornell University},
+  doi           = {10.48550/arxiv.2008.05756},
+  url           = {https://arxiv.org/abs/2008.05756},
+  copyright     = {arXiv.org perpetual, non-exclusive license},
+  keywords      = {Machine Learning (stat.ML),  Machine Learning (cs.LG),  FOS: Computer and information sciences,  FOS: Computer and information sciences},
+}
+@article{granja2021archr,
+  title         = {{ArchR} is a scalable software package for integrative single-cell chromatin accessibility analysis},
+  author        = {Jeffrey M. Granja and M. Ryan Corces and Sarah E. Pierce and S. Tansu Bagdatli and Hani Choudhry and Howard Y. Chang and William J. Greenleaf},
+  year          = {2021},
+  month         = feb,
+  journal       = {Nature Genetics},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {53},
+  number        = {3},
+  pages         = {403--411},
+  doi           = {10.1038/s41588-021-00790-6},
+  url           = {https://doi.org/10.1038/s41588-021-00790-6},
+}
+@article{grn2014validation,
+  title         = {Validation of noise models for single-cell transcriptomics},
+  author        = {Dominic Gr\"{u}n and Lennart Kester and Alexander van Oudenaarden},
+  year          = {2014},
+  month         = apr,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {11},
+  number        = {6},
+  pages         = {637--640},
+  doi           = {10.1038/nmeth.2930},
+  url           = {https://doi.org/10.1038/nmeth.2930},
+}
+@article{haghverdi2018batch,
+  title         = {Batch effects in single-cell {RNA}-sequencing data are corrected by matching mutual nearest neighbors},
+  author        = {Laleh Haghverdi and Aaron T L Lun and Michael D Morgan and John C Marioni},
+  year          = {2018},
+  month         = apr,
+  journal       = {Nature Biotechnology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {36},
+  number        = {5},
+  pages         = {421--427},
+  doi           = {10.1038/nbt.4091},
+  url           = {https://doi.org/10.1038/nbt.4091},
+}
+@article{hammarlund2018cengen,
+  title         = {The {CeNGEN} Project: The Complete Gene Expression Map of an Entire Nervous System},
+  author        = {Marc Hammarlund and Oliver Hobert and David M. Miller and Nenad Sestan},
+  year          = {2018},
+  month         = aug,
+  journal       = {Neuron},
+  publisher     = {Elsevier {BV}},
+  volume        = {99},
+  number        = {3},
+  pages         = {430--433},
+  doi           = {10.1016/j.neuron.2018.07.042},
+  url           = {https://doi.org/10.1016/j.neuron.2018.07.042},
+}
+@article{hansen2012removing,
+  title         = {Adjusting batch effects in microarray expression data using empirical Bayes methods},
+  author        = {W. Evan Johnson and Cheng Li and Ariel Rabinovic},
+  year          = {2006},
+  month         = apr,
+  journal       = {Biostatistics},
+  publisher     = {Oxford University Press ({OUP})},
+  volume        = {8},
+  number        = {1},
+  pages         = {118--127},
+  doi           = {10.1093/biostatistics/kxj037},
+  url           = {https://doi.org/10.1093/biostatistics/kxj037},
+}
+@article{hao2021integrated,
+  title         = {Integrated analysis of multimodal single-cell data},
+  author        = {Yuhan Hao and Stephanie Hao and Erica Andersen-Nissen and William M. Mauck and Shiwei Zheng and Andrew Butler and Maddie J. Lee and Aaron J. Wilk and Charlotte Darby and Michael Zager and Paul Hoffman and Marlon Stoeckius and Efthymia Papalexi and Eleni P. Mimitou and Jaison Jain and Avi Srivastava and Tim Stuart and Lamar M. Fleming and Bertrand Yeung and Angela J. Rogers and Juliana M. McElrath and Catherine A. Blish and Raphael Gottardo and Peter Smibert and Rahul Satija},
+  year          = {2021},
+  month         = jun,
+  journal       = {Cell},
+  publisher     = {Elsevier {BV}},
+  volume        = {184},
+  number        = {13},
+  pages         = {3573--3587.e29},
+  doi           = {10.1016/j.cell.2021.04.048},
+  url           = {https://doi.org/10.1016/j.cell.2021.04.048},
+}
+@article{hie2019efficient,
+  title         = {Efficient integration of heterogeneous single-cell transcriptomes using Scanorama},
+  author        = {Brian Hie and Bryan Bryson and Bonnie Berger},
+  year          = {2019},
+  month         = may,
+  journal       = {Nature Biotechnology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {37},
+  number        = {6},
+  pages         = {685--691},
+  doi           = {10.1038/s41587-019-0113-3},
+  url           = {https://doi.org/10.1038/s41587-019-0113-3},
+}
+@article{hinton1989connectionist,
+  title         = {Connectionist learning procedures},
+  author        = {Geoffrey E. Hinton},
+  year          = {1989},
+  month         = sep,
+  journal       = {Artificial Intelligence},
+  publisher     = {Elsevier {BV}},
+  volume        = {40},
+  number        = {1-3},
+  pages         = {185--234},
+  doi           = {10.1016/0004-3702(89)90049-0},
+  url           = {https://doi.org/10.1016/0004-3702(89)90049-0},
+}
+@book{hosmer2013applied,
+  title         = {Applied logistic regression},
+  author        = {Hosmer Jr, D.W. and Lemeshow, S. and Sturdivant, R.X.},
+  year          = {2013},
+  publisher     = {John Wiley \& Sons},
+  volume        = {398},
+}
+@article{hou2019scmatch,
+  title         = {{scMatch}: a single-cell gene expression profile annotation tool using reference datasets},
+  author        = {Rui Hou and Elena Denisenko and Alistair R R Forrest},
+  year          = {2019},
+  month         = apr,
+  journal       = {Bioinformatics},
+  publisher     = {Oxford University Press ({OUP})},
+  volume        = {35},
+  number        = {22},
+  pages         = {4688--4695},
+  doi           = {10.1093/bioinformatics/btz292},
+  url           = {https://doi.org/10.1093/bioinformatics/btz292},
+  editor        = {Janet Kelso},
+}
+@string{jan = {Jan}}
+@string{feb = {Feb.}}
+@string{mar = {Mar.}}
+@string{apr = {Apr.}}
+@string{may = {May}}
+@string{jun = {Jun.}}
+@string{jul = {Jul.}}
+@string{aug = {Aug.}}
+@string{sep = {Sept.}}
+@string{oct = {Oct.}}
+@string{nov = {Nov.}}
+@string{dec = {Dec.}}
+@article{hou2020predicting,
+  title         = {Predicting cell-to-cell communication networks using {NATMI}},
+  author        = {Rui Hou and Elena Denisenko and Huan Ting Ong and Jordan A. Ramilowski and Alistair R. R. Forrest},
+  year          = {2020},
+  month         = oct,
+  journal       = {Nature Communications},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {11},
+  number        = {1},
+  doi           = {10.1038/s41467-020-18873-z},
+  url           = {https://doi.org/10.1038/s41467-020-18873-z},
+}
+@article{hou2020systematic,
+  title         = {A systematic evaluation of single-cell {RNA}-sequencing imputation methods},
+  author        = {Wenpin Hou and Zhicheng Ji and Hongkai Ji and Stephanie C. Hicks},
+  year          = {2020},
+  month         = aug,
+  journal       = {Genome Biology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {21},
+  number        = {1},
+  doi           = {10.1186/s13059-020-02132-x},
+  url           = {https://doi.org/10.1186/s13059-020-02132-x},
+}
+@article{kiselev2019challenges,
+  title         = {Challenges in unsupervised clustering of single-cell {RNA}-seq data},
+  author        = {Vladimir Yu Kiselev and Tallulah S. Andrews and Martin Hemberg},
+  year          = {2019},
+  month         = jan,
+  journal       = {Nature Reviews Genetics},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {20},
+  number        = {5},
+  pages         = {273--282},
+  doi           = {10.1038/s41576-018-0088-9},
+  url           = {https://doi.org/10.1038/s41576-018-0088-9},
+}
+@article{kleshchevnikov2022cell2location,
+  title         = {Cell2location maps fine-grained cell types in spatial transcriptomics},
+  author        = {Vitalii Kleshchevnikov and Artem Shmatko and Emma Dann and Alexander Aivazidis and Hamish W. King and Tong Li and Rasa Elmentaite and Artem Lomakin and Veronika Kedlian and Adam Gayoso and Mika Sarkin Jain and Jun Sung Park and Lauma Ramona and Elizabeth Tuck and Anna Arutyunyan and Roser Vento-Tormo and Moritz Gerstung and Louisa James and Oliver Stegle and Omer Ali Bayraktar},
+  year          = {2022},
+  month         = jan,
+  journal       = {Nature Biotechnology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {40},
+  number        = {5},
+  pages         = {661--671},
+  doi           = {10.1038/s41587-021-01139-4},
+  url           = {https://doi.org/10.1038/s41587-021-01139-4},
+}
+@article{korsunsky2019fast,
+  title         = {Fast,  sensitive and accurate integration of single-cell data with Harmony},
+  author        = {Ilya Korsunsky and Nghia Millard and Jean Fan and Kamil Slowikowski and Fan Zhang and Kevin Wei and Yuriy Baglaenko and Michael Brenner and Po-ru Loh and Soumya Raychaudhuri},
+  year          = {2019},
+  month         = nov,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {16},
+  number        = {12},
+  pages         = {1289--1296},
+  doi           = {10.1038/s41592-019-0619-0},
+  url           = {https://doi.org/10.1038/s41592-019-0619-0},
+}
+@article{kraemer2018dimred,
+  title         = {{dimRed} and {coRanking} - Unifying Dimensionality Reduction in R},
+  author        = {Guido Kraemer and Markus Reichstein and Miguel, D. Mahecha},
+  year          = {2018},
+  journal       = {The R Journal},
+  publisher     = {The R Foundation},
+  volume        = {10},
+  number        = {1},
+  pages         = {342},
+  doi           = {10.32614/rj-2018-039},
+  url           = {https://doi.org/10.32614/rj-2018-039},
+}
+@article{kruskal1964mds,
+  title         = {Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis},
+  author        = {J. B. Kruskal},
+  year          = {1964},
+  month         = mar,
+  journal       = {Psychometrika},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {29},
+  number        = {1},
+  pages         = {1--27},
+  doi           = {10.1007/bf02289565},
+  url           = {https://doi.org/10.1007/bf02289565},
+}
+@article{lance2022multimodal,
+  title         = {Multimodal single cell data integration challenge: results and lessons learned},
+  author        = {Lance, Christopher and Luecken, Malte D. and Burkhardt, Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh, Nikolay and Ryazantsev, Gleb and Ohler, Uwe and , and Pisco, Angela Oliveira and Bloom, Jonathan and Krishnaswamy, Smita and Theis, Fabian J.},
+  year          = {2022},
+  journal       = {bioRxiv},
+  publisher     = {Cold Spring Harbor Laboratory},
+  doi           = {10.1101/2022.04.11.487796},
+  url           = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796},
+  elocation-id  = {2022.04.11.487796},
+  eprint        = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796.full.pdf},
+}
+@book{lawson1995solving,
+  title         = {Solving Least Squares Problems},
+  author        = {Charles L. Lawson and Richard J. Hanson},
+  year          = {1995},
+  month         = jan,
+  publisher     = {Society for Industrial and Applied Mathematics},
+  doi           = {10.1137/1.9781611971217},
+  url           = {https://doi.org/10.1137/1.9781611971217},
+}
+@article{lee2009quality,
+  title         = {Quality assessment of dimensionality reduction: Rank-based criteria},
+  author        = {John A. Lee and Michel Verleysen},
+  year          = {2009},
+  month         = mar,
+  journal       = {Neurocomputing},
+  publisher     = {Elsevier {BV}},
+  volume        = {72},
+  number        = {7-9},
+  pages         = {1431--1443},
+  doi           = {10.1016/j.neucom.2008.12.017},
+  url           = {https://doi.org/10.1016/j.neucom.2008.12.017},
+}
+@article{linderman2018zero,
+  title         = {Zero-preserving imputation of scRNA-seq data using low-rank approximation},
+  author        = {Linderman, George C. and Zhao, Jun and Kluger, Yuval},
+  year          = {2018},
+  journal       = {bioRxiv},
+  publisher     = {Cold Spring Harbor Laboratory},
+  doi           = {10.1101/397588},
+  url           = {https://www.biorxiv.org/content/early/2018/08/22/397588},
+  elocation-id  = {397588},
+  eprint        = {https://www.biorxiv.org/content/early/2018/08/22/397588.full.pdf},
+}
+@article{lopez2018deep,
+  title         = {Deep generative modeling for single-cell transcriptomics},
+  author        = {Romain Lopez and Jeffrey Regier and Michael B. Cole and Michael I. Jordan and Nir Yosef},
+  year          = {2018},
+  month         = nov,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {15},
+  number        = {12},
+  pages         = {1053--1058},
+  doi           = {10.1038/s41592-018-0229-2},
+  url           = {https://doi.org/10.1038/s41592-018-0229-2},
+}
+@article{lopez2022destvi,
+  title         = {{DestVI} identifies continuums of cell types in spatial transcriptomics data},
+  author        = {Romain Lopez and Baoguo Li and Hadas Keren-Shaul and Pierre Boyeau and Merav Kedmi and David Pilzer and Adam Jelinski and Ido Yofe and Eyal David and Allon Wagner and Can Ergen and Yoseph Addadi and Ofra Golani and Franca Ronchese and Michael I. Jordan and Ido Amit and Nir Yosef},
+  year          = {2022},
+  month         = apr,
+  journal       = {Nature Biotechnology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {40},
+  number        = {9},
+  pages         = {1360--1369},
+  doi           = {10.1038/s41587-022-01272-8},
+  url           = {https://doi.org/10.1038/s41587-022-01272-8},
+}
+@article{lotfollahi2020query,
+  title         = {Query to reference single-cell integration with transfer learning},
+  author        = {Lotfollahi, Mohammad and Naghipourfar, Mohsen and Luecken, Malte D. and Khajavi, Matin and B{\"u}ttner, Maren and Avsec, Ziga and Misharin, Alexander V. and Theis, Fabian J.},
+  year          = {2020},
+  journal       = {bioRxiv},
+  publisher     = {Cold Spring Harbor Laboratory},
+  doi           = {10.1101/2020.07.16.205997},
+  url           = {https://doi.org/10.1101/2020.07.16.205997},
+  elocation-id  = {2020.07.16.205997},
+  eprint        = {https://www.biorxiv.org/content/early/2020/07/16/2020.07.16.205997.full.pdf},
+}
+@article{luecken2022benchmarking,
+  title         = {Benchmarking atlas-level data integration in single-cell genomics},
+  author        = {Malte D. Luecken and M. B\"{u}ttner and K. Chaichoompu and A. Danese and M. Interlandi and M. F. Mueller and D. C. Strobl and L. Zappia and M. Dugas and M. Colom{\'{e}}-Tatch{\'{e}} and Fabian J. Theis},
+  year          = {2021},
+  month         = dec,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {19},
+  number        = {1},
+  pages         = {41--50},
+  doi           = {10.1038/s41592-021-01336-8},
+  url           = {https://doi.org/10.1038/s41592-021-01336-8},
+}
+@article{lueks2011evaluate,
+  title         = {How to Evaluate Dimensionality Reduction? - Improving the Co-ranking Matrix},
+  author        = {Lueks, Wouter and Mokbel, Bassam and Biehl, Michael and Hammer, Barbara},
+  year          = {2011},
+  journal       = {arXiv},
+  doi           = {10.48550/ARXIV.1110.3917},
+  url           = {https://arxiv.org/abs/1110.3917},
+  copyright     = {arXiv.org perpetual, non-exclusive license},
+  keywords      = {Machine Learning (cs.LG), Information Retrieval (cs.IR), FOS: Computer and information sciences, FOS: Computer and information sciences},
+}
+@misc{lun2019fastmnn,
+  title         = {A description of the theory behind the fastMNN algorithm},
+  author        = {Lun, Aaron},
+  year          = {2019},
+  url           = {https://marionilab.github.io/FurtherMNN2018/theory/description.html},
+}
+@article{mcinnes2018umap,
+  title         = {UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction},
+  author        = {McInnes,  Leland and Healy,  John and Melville,  James},
+  year          = {2018},
+  journal       = {arXiv},
+  publisher     = {Cornell University},
+  doi           = {10.48550/arxiv.1802.03426},
+  url           = {https://arxiv.org/abs/1802.03426},
+  copyright     = {arXiv.org perpetual,  non-exclusive license},
+  keywords      = {Machine Learning (stat.ML),  Computational Geometry (cs.CG),  Machine Learning (cs.LG),  FOS: Computer and information sciences,  FOS: Computer and information sciences},
+}
+@inbook{miles2005rsquared,
+  title         = {Encyclopedia of Statistics in Behavioral Science},
+  author        = {Jeremy Miles},
+  year          = {2005},
+  month         = oct,
+  publisher     = {John Wiley {\&} Sons,  Ltd},
+  doi           = {10.1002/0470013192.bsa526},
+  url           = {https://doi.org/10.1002/0470013192.bsa526},
+  chapter       = {{R-Squared}, Adjusted {R-Squared}},
+}
+@article{moon2019visualizing,
+  title         = {Visualizing structure and transitions in high-dimensional biological data},
+  author        = {Kevin R. Moon and David van Dijk and Zheng Wang and Scott Gigante and Daniel B. Burkhardt and William S. Chen and Kristina Yim and Antonia van den Elzen and Matthew J. Hirn and Ronald R. Coifman and Natalia B. Ivanova and Guy Wolf and Smita Krishnaswamy},
+  year          = {2019},
+  month         = dec,
+  journal       = {Nature Biotechnology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {37},
+  number        = {12},
+  pages         = {1482--1492},
+  doi           = {10.1038/s41587-019-0336-3},
+  url           = {https://doi.org/10.1038/s41587-019-0336-3},
+}
+@article{narayan2021assessing,
+  title         = {Assessing single-cell transcriptomic variability through density-preserving data visualization},
+  author        = {Ashwin Narayan and Bonnie Berger and Hyunghoon Cho},
+  year          = {2021},
+  month         = jan,
+  journal       = {Nature Biotechnology},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {39},
+  number        = {6},
+  pages         = {765--774},
+  doi           = {10.1038/s41587-020-00801-7},
+  url           = {https://doi.org/10.1038/s41587-020-00801-7},
+}
+@article{nestorowa2016single,
+  title         = {A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation},
+  author        = {Sonia Nestorowa and Fiona K. Hamey and Blanca Pijuan Sala and Evangelia Diamanti and Mairi Shepherd and Elisa Laurenti and Nicola K. Wilson and David G. Kent and Berthold G\"{o}ttgens},
+  year          = {2016},
+  month         = aug,
+  journal       = {Blood},
+  publisher     = {American Society of Hematology},
+  volume        = {128},
+  number        = {8},
+  pages         = {e20--e31},
+  doi           = {10.1182/blood-2016-05-716480},
+  url           = {https://doi.org/10.1182/blood-2016-05-716480},
+}
+@article{olsson2016single,
+  title         = {Single-cell analysis of mixed-lineage states leading to a binary cell fate choice},
+  author        = {Andre Olsson and Meenakshi Venkatasubramanian and Viren K. Chaudhri and Bruce J. Aronow and Nathan Salomonis and Harinder Singh and H. Leighton Grimes},
+  year          = {2016},
+  month         = aug,
+  journal       = {Nature},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {537},
+  number        = {7622},
+  pages         = {698--702},
+  doi           = {10.1038/nature19348},
+  url           = {https://doi.org/10.1038/nature19348},
+}
+@misc{openproblems,
+  title         = {Open Problems},
+  author        = {{Open Problems for Single Cell Analysis Consortium}},
+  year          = {2022},
+  url           = {https://openproblems.bio},
+}
+@article{pearson1901pca,
+  title         = {On lines and planes of closest fit to systems of points in space},
+  author        = {Karl Pearson},
+  year          = {1901},
+  month         = nov,
+  journal       = {The London,  Edinburgh,  and Dublin Philosophical Magazine and Journal of Science},
+  publisher     = {Informa {UK} Limited},
+  volume        = {2},
+  number        = {11},
+  pages         = {559--572},
+  doi           = {10.1080/14786440109462720},
+  url           = {https://doi.org/10.1080/14786440109462720},
+}
+@article{pliner2019supervised,
+  title         = {Supervised classification enables rapid annotation of cell atlases},
+  author        = {Hannah A. Pliner and Jay Shendure and Cole Trapnell},
+  year          = {2019},
+  month         = sep,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {16},
+  number        = {10},
+  pages         = {983--986},
+  doi           = {10.1038/s41592-019-0535-3},
+  url           = {https://doi.org/10.1038/s41592-019-0535-3},
+}
+@article{polanski2020bbknn,
+  title         = {{BBKNN}: fast batch alignment of single cell transcriptomes},
+  author        = {Krzysztof Pola{\'{n}}ski and Matthew D Young and Zhichao Miao and Kerstin B Meyer and Sarah A Teichmann and Jong-Eun Park},
+  year          = {2019},
+  month         = aug,
+  journal       = {Bioinformatics},
+  publisher     = {Oxford University Press ({OUP})},
+  doi           = {10.1093/bioinformatics/btz625},
+  url           = {https://doi.org/10.1093/bioinformatics/btz625},
+  editor        = {Bonnie Berger},
+}
+@article{raredon2022computation,
+  title         = {Computation and visualization of cell{\textendash}cell signaling topologies in single-cell systems data using Connectome},
+  author        = {Micha Sam Brickman Raredon and Junchen Yang and James Garritano and Meng Wang and Dan Kushnir and Jonas Christian Schupp and Taylor S. Adams and Allison M. Greaney and Katherine L. Leiby and Naftali Kaminski and Yuval Kluger and Andre Levchenko and Laura E. Niklason},
+  year          = {2022},
+  month         = mar,
+  journal       = {Scientific Reports},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {12},
+  number        = {1},
+  doi           = {10.1038/s41598-022-07959-x},
+  url           = {https://doi.org/10.1038/s41598-022-07959-x},
+}
+@article{rodriques2019slide,
+  title         = {Slide-seq: A scalable technology for measuring genome-wide expression at high spatial resolution},
+  author        = {Samuel G. Rodriques and Robert R. Stickels and Aleksandrina Goeva and Carly A. Martin and Evan Murray and Charles R. Vanderburg and Joshua Welch and Linlin M. Chen and Fei Chen and Evan Z. Macosko},
+  year          = {2019},
+  month         = mar,
+  journal       = {Science},
+  publisher     = {American Association for the Advancement of Science ({AAAS})},
+  volume        = {363},
+  number        = {6434},
+  pages         = {1463--1467},
+  doi           = {10.1126/science.aaw1219},
+  url           = {https://doi.org/10.1126/science.aaw1219},
+}
+@article{sarkar2021separating,
+  title         = {Separating measurement and expression models clarifies confusion in single-cell {RNA} sequencing analysis},
+  author        = {Abhishek Sarkar and Matthew Stephens},
+  year          = {2021},
+  month         = may,
+  journal       = {Nature Genetics},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {53},
+  number        = {6},
+  pages         = {770--777},
+  doi           = {10.1038/s41588-021-00873-4},
+  url           = {https://doi.org/10.1038/s41588-021-00873-4},
+}
+@article{schober2018correlation,
+  title         = {Correlation Coefficients},
+  author        = {Patrick Schober and Christa Boer and Lothar A. Schwarte},
+  year          = {2018},
+  month         = may,
+  journal       = {Anesthesia {\&} Analgesia},
+  publisher     = {Ovid Technologies (Wolters Kluwer Health)},
+  volume        = {126},
+  number        = {5},
+  pages         = {1763--1768},
+  doi           = {10.1213/ane.0000000000002864},
+  url           = {https://doi.org/10.1213/ane.0000000000002864},
+}
+@inproceedings{stanley2020harmonic,
+  title         = {Harmonic Alignment},
+  author        = {Jay S. Stanley and Scott Gigante and Guy Wolf and Smita Krishnaswamy},
+  year          = {2020},
+  month         = jan,
+  booktitle     = {Proceedings of the 2020 {SIAM} International Conference on Data Mining},
+  publisher     = {Society for Industrial and Applied Mathematics},
+  pages         = {316--324},
+  doi           = {10.1137/1.9781611976236.36},
+  url           = {https://doi.org/10.1137/1.9781611976236.36},
+}
+@article{stoeckius2017simultaneous,
+  title         = {Simultaneous epitope and transcriptome measurement in single cells},
+  author        = {Marlon Stoeckius and Christoph Hafemeister and William Stephenson and Brian Houck-Loomis and Pratip K Chattopadhyay and Harold Swerdlow and Rahul Satija and Peter Smibert},
+  year          = {2017},
+  month         = jul,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {14},
+  number        = {9},
+  pages         = {865--868},
+  doi           = {10.1038/nmeth.4380},
+  url           = {https://doi.org/10.1038/nmeth.4380},
+}
+@article{stuart2019comprehensive,
+  title         = {Comprehensive Integration of Single-Cell Data},
+  author        = {Stuart, T. and Butler, A. and Hoffman, P. and Hafemeister, C. and Papalexi, E. and Mauck, W.M. and Hao, Y. and Stoeckius, M. and Smibert, P. and Satija, R.},
+  year          = {2019},
+  journal       = {Cell},
+  volume        = {177},
+  number        = {7},
+  pages         = {1888--1902.e21},
+  doi           = {10.1016/j.cell.2019.05.031},
+}
+@article{szubert2019structurepreserving,
+  title         = {Structure-preserving visualisation of high dimensional single-cell datasets},
+  author        = {Benjamin Szubert and Jennifer E. Cole and Claudia Monaco and Ignat Drozdov},
+  year          = {2019},
+  month         = jun,
+  journal       = {Scientific Reports},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {9},
+  number        = {1},
+  doi           = {10.1038/s41598-019-45301-0},
+  url           = {https://doi.org/10.1038/s41598-019-45301-0},
+}
+@article{tabula2018single,
+  title         = {Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris},
+  author        = {{Tabula Muris Consortium}},
+  year          = {2018},
+  month         = oct,
+  journal       = {Nature},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {562},
+  number        = {7727},
+  pages         = {367--372},
+  doi           = {10.1038/s41586-018-0590-4},
+  url           = {https://doi.org/10.1038/s41586-018-0590-4},
+}
+@article{tabula2020single,
+  title         = {A single-cell transcriptomic atlas characterizes ageing tissues in the mouse},
+  author        = {{Tabula Muris Consortium}},
+  year          = {2020},
+  month         = jul,
+  journal       = {Nature},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {583},
+  number        = {7817},
+  pages         = {590--595},
+  doi           = {10.1038/s41586-020-2496-1},
+  url           = {https://doi.org/10.1038/s41586-020-2496-1},
+}
+@article{tasic2016adult,
+  title         = {Adult mouse cortical cell taxonomy revealed by single cell transcriptomics},
+  author        = {Bosiljka Tasic and Vilas Menon and Thuc Nghi Nguyen and Tae Kyung Kim and Tim Jarsky and Zizhen Yao and Boaz Levi and Lucas T Gray and Staci A Sorensen and Tim Dolbeare and Darren Bertagnolli and Jeff Goldy and Nadiya Shapovalova and Sheana Parry and Changkyu Lee and Kimberly Smith and Amy Bernard and Linda Madisen and Susan M Sunkin and Michael Hawrylycz and Christof Koch and Hongkui Zeng},
+  year          = {2016},
+  month         = jan,
+  journal       = {Nature Neuroscience},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {19},
+  number        = {2},
+  pages         = {335--346},
+  doi           = {10.1038/nn.4216},
+  url           = {https://doi.org/10.1038/nn.4216},
+}
+@article{tian2019benchmarking,
+  title         = {Benchmarking single cell {RNA}-sequencing analysis pipelines using mixture control experiments},
+  author        = {Luyi Tian and Xueyi Dong and Saskia Freytag and Kim-Anh L{\^{e}} Cao and Shian Su and Abolfazl JalalAbadi and Daniela Amann-Zalcenstein and Tom S. Weber and Azadeh Seidi and Jafar S. Jabbari and Shalin H. Naik and Matthew E. Ritchie},
+  year          = {2019},
+  month         = may,
+  journal       = {Nature Methods},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {16},
+  number        = {6},
+  pages         = {479--487},
+  doi           = {10.1038/s41592-019-0425-8},
+  url           = {https://doi.org/10.1038/s41592-019-0425-8},
+}
+@article{van2018recovering,
+  title         = {Recovering Gene Interactions from Single-Cell Data Using Data Diffusion},
+  author        = {David van Dijk and Roshan Sharma and Juozas Nainys and Kristina Yim and Pooja Kathail and Ambrose J. Carr and Cassandra Burdziak and Kevin R. Moon and Christine L. Chaffer and Diwakar Pattabiraman and Brian Bierie and Linas Mazutis and Guy Wolf and Smita Krishnaswamy and Dana Pe'er},
+  year          = {2018},
+  month         = jul,
+  journal       = {Cell},
+  publisher     = {Elsevier {BV}},
+  volume        = {174},
+  number        = {3},
+  pages         = {716--729.e27},
+  doi           = {10.1016/j.cell.2018.05.061},
+  url           = {https://doi.org/10.1016/j.cell.2018.05.061},
+}
+@article{vandermaaten2008visualizing,
+  title         = {Visualizing Data using t-SNE},
+  author        = {{van der} Maaten, Laurens and Hinton, Geoffrey},
+  year          = {2008},
+  journal       = {Journal of Machine Learning Research},
+  volume        = {9},
+  number        = {86},
+  pages         = {2579--2605},
+  url           = {http://jmlr.org/papers/v9/vandermaaten08a.html},
+}
+@inproceedings{venna2001neighborhood,
+  title         = {Neighborhood Preservation in Nonlinear Projection Methods: An Experimental Study},
+  author        = {Jarkko Venna and Samuel Kaski},
+  year          = {2001},
+  booktitle     = {Artificial Neural Networks {\textemdash} {ICANN} 2001},
+  publisher     = {Springer Berlin Heidelberg},
+  pages         = {485--491},
+  doi           = {{10.1007/3-540-44668-0\_68}},
+  url           = {{https://doi.org/10.1007/3-540-44668-0\_68}},
+}
+@article{venna2006local,
+  title         = {Local multidimensional scaling},
+  author        = {Jarkko Venna and Samuel Kaski},
+  year          = {2006},
+  month         = jul,
+  journal       = {Neural Networks},
+  publisher     = {Elsevier {BV}},
+  volume        = {19},
+  number        = {6-7},
+  pages         = {889--899},
+  doi           = {10.1016/j.neunet.2006.05.014},
+  url           = {https://doi.org/10.1016/j.neunet.2006.05.014},
+}
+@article{wagner2018knearest,
+  title         = {K-nearest neighbor smoothing for high-throughput single-cell RNA-Seq data},
+  author        = {Wagner, Florian and Yan, Yun and Yanai, Itai},
+  year          = {2018},
+  journal       = {bioRxiv},
+  publisher     = {Cold Spring Harbor Laboratory},
+  doi           = {10.1101/217737},
+  url           = {https://www.biorxiv.org/content/early/2018/04/09/217737},
+  elocation-id  = {217737},
+  eprint        = {https://www.biorxiv.org/content/early/2018/04/09/217737.full.pdf},
+}
+@article{wagner2018single,
+  title         = {Single-cell mapping of gene expression landscapes and lineage in the zebrafish embryo},
+  author        = {Daniel E. Wagner and Caleb Weinreb and Zach M. Collins and James A. Briggs and Sean G. Megason and Allon M. Klein},
+  year          = {2018},
+  month         = jun,
+  journal       = {Science},
+  publisher     = {American Association for the Advancement of Science ({AAAS})},
+  volume        = {360},
+  number        = {6392},
+  pages         = {981--987},
+  doi           = {10.1126/science.aar4362},
+  url           = {https://doi.org/10.1126/science.aar4362},
+}
+@article{wang2013target,
+  title         = {Target analysis by integration of transcriptome and {ChIP}-seq data with {BETA}},
+  author        = {Su Wang and Hanfei Sun and Jian Ma and Chongzhi Zang and Chenfei Wang and Juan Wang and Qianzi Tang and Clifford A Meyer and Yong Zhang and X Shirley Liu},
+  year          = {2013},
+  month         = nov,
+  journal       = {Nature Protocols},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {8},
+  number        = {12},
+  pages         = {2502--2515},
+  doi           = {10.1038/nprot.2013.150},
+  url           = {https://doi.org/10.1038/nprot.2013.150},
+}
+@article{welch2019single,
+  title         = {Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity},
+  author        = {Joshua D. Welch and Velina Kozareva and Ashley Ferreira and Charles Vanderburg and Carly Martin and Evan Z. Macosko},
+  year          = {2019},
+  month         = jun,
+  journal       = {Cell},
+  publisher     = {Elsevier {BV}},
+  volume        = {177},
+  number        = {7},
+  pages         = {1873--1887.e17},
+  doi           = {10.1016/j.cell.2019.05.006},
+  url           = {https://doi.org/10.1016/j.cell.2019.05.006},
+}
+@article{wu2021single,
+  title         = {A single-cell and spatially resolved atlas of human breast cancers},
+  author        = {Sunny Z. Wu and Ghamdan Al-Eryani and Daniel Lee Roden and Simon Junankar and Kate Harvey and Alma Andersson and Aatish Thennavan and Chenfei Wang and James R. Torpy and Nenad Bartonicek and Taopeng Wang and Ludvig Larsson and Dominik Kaczorowski and Neil I. Weisenfeld and Cedric R. Uytingco and Jennifer G. Chew and Zachary W. Bent and Chia-Ling Chan and Vikkitharan Gnanasambandapillai and Charles-Antoine Dutertre and Laurence Gluch and Mun N. Hui and Jane Beith and Andrew Parker and Elizabeth Robbins and Davendra Segara and Caroline Cooper and Cindy Mak and Belinda Chan and Sanjay Warrier and Florent Ginhoux and Ewan Millar and Joseph E. Powell and Stephen R. Williams and X. Shirley Liu and Sandra O'Toole and Elgene Lim and Joakim Lundeberg and Charles M. Perou and Alexander Swarbrick},
+  year          = {2021},
+  month         = sep,
+  journal       = {Nature Genetics},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {53},
+  number        = {9},
+  pages         = {1334--1347},
+  doi           = {10.1038/s41588-021-00911-1},
+  url           = {https://doi.org/10.1038/s41588-021-00911-1},
+}
+@article{xiong2020neuralee,
+  title         = {{NeuralEE}: A {GPU}-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale {scRNA}-Seq Data},
+  author        = {Jiankang Xiong and Fuzhou Gong and Lin Wan and Liang Ma},
+  year          = {2020},
+  month         = oct,
+  journal       = {Frontiers in Genetics},
+  publisher     = {Frontiers Media {SA}},
+  volume        = {11},
+  doi           = {10.3389/fgene.2020.00786},
+  url           = {https://doi.org/10.3389/fgene.2020.00786},
+}
+@article{xiong2021online,
+  title         = {Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space},
+  author        = {Lei Xiong and Kang Tian and Yuzhe Li and Weixi Ning and Xin Gao and Qiangfeng Cliff Zhang},
+  year          = {2022},
+  month         = oct,
+  journal       = {Nature Communications},
+  publisher     = {Springer Science and Business Media {LLC}},
+  volume        = {13},
+  number        = {1},
+  doi           = {10.1038/s41467-022-33758-z},
+  url           = {https://doi.org/10.1038/s41467-022-33758-z},
+}
+@article{xu2021probabilistic,
+  title         = {Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models},
+  author        = {Chenling Xu and Romain Lopez and Edouard Mehlman and Jeffrey Regier and Michael I Jordan and Nir Yosef},
+  year          = {2021},
+  month         = jan,
+  journal       = {Molecular Systems Biology},
+  publisher     = {{Embo}},
+  volume        = {17},
+  number        = {1},
+  doi           = {10.15252/msb.20209620},
+  url           = {https://doi.org/10.15252/msb.20209620},
+}
+@article{zhang2021pydrmetrics,
+  title         = {{pyDRMetrics} - A Python toolkit for dimensionality reduction quality assessment},
+  author        = {Yinsheng Zhang and Qian Shang and Guoming Zhang},
+  year          = {2021},
+  month         = feb,
+  journal       = {Heliyon},
+  publisher     = {Elsevier {BV}},
+  volume        = {7},
+  number        = {2},
+  pages         = {e06199},
+  doi           = {10.1016/j.heliyon.2021.e06199},
+  url           = {https://doi.org/10.1016/j.heliyon.2021.e06199},
+}
diff --git a/openproblems/api/README.md b/openproblems/api/README.md
index 69e9dd5284..ee6cdfd66e 100644
--- a/openproblems/api/README.md
+++ b/openproblems/api/README.md
@@ -39,7 +39,7 @@ For example:
 # Download a task-specific dataset and save it to `dataset.h5ad`
 openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 # Run a method on a datasets and save output to `method.h5ad`
-openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 # Evaluate the performance of a previously run method using the `accuracy` metric
 openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
 ```
@@ -63,15 +63,15 @@ You can then list the avaiable datasets, methods, and metrics for a partiular ta
 > openproblems-cli list --datasets --task label_projection
 pancreas_batch
 pancreas_random
-zebrafish_labels
+zebrafish_labs
 zebrafish_random
 
 > openproblems-cli list --methods --task label_projection
-knn_classifier_log_cpm
+knn_classifier_log_cp10k
 knn_classifier_scran
-logistic_regression_log_cpm
+logistic_regression_log_cp10k
 logistic_regression_scran
-mlp_log_cpm
+mlp_log_cp10k
 mlp_scran
 
 > openproblems-cli list --metrics --task label_projection
@@ -96,15 +96,15 @@ multimodal_data_integration
 $ openproblems-cli list --datasets --task label_projection
 pancreas_batch
 pancreas_random
-zebrafish_labels
+zebrafish_labs
 zebrafish_random
 $ openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 $ openproblems-cli list --methods --task label_projection
-logistic_regression_log_cpm
+logistic_regression_log_cp10k
 logistic_regression_scran
-mlp_log_cpm
+mlp_log_cp10k
 mlp_scran
-$ openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+$ openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 $ openproblems-cli list --metrics --task label_projection
 $ openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
 0.9521233432512848
@@ -121,7 +121,7 @@ openproblems-cli image --datasets --task label_projection pancreas_batch
 docker run -dt openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 openproblems-cli list --methods --task label_projection
 openproblems-cli image --methods --task label_projection logistic_regression_scran
-openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 openproblems-cli list --metrics --task label_projection
 openproblems-cli image --metrics --task label_projection accuracy
 openproblems-cli evaluate --task label_projection --input method.h5ad accuracy
@@ -141,19 +141,19 @@ multimodal_data_integration
 $ openproblems-cli list --datasets --task label_projection
 pancreas_batch
 pancreas_random
-zebrafish_labels
+zebrafish_labs
 zebrafish_random
 $ openproblems-cli image --datasets --task label_projection pancreas_batch
 openproblems
 $ docker run -dt singlecellopenproblems/openproblems openproblems-cli load --task label_projection --output dataset.h5ad pancreas_batch
 $ openproblems-cli list --methods --task label_projection
-logistic_regression_log_cpm
+logistic_regression_log_cp10k
 logistic_regression_scran
-mlp_log_cpm
+mlp_log_cp10k
 mlp_scran
 $ openproblems-cli image --methods --task label_projection logistic_regression_scran
 openproblems-r-base
-$ docker run -dt singlecellopenproblems/openproblems-r-base openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cpm
+$ docker run -dt singlecellopenproblems/openproblems-r-base openproblems-cli run --task label_projection --input dataset.h5ad --output method.h5ad logistic_regression_log_cp10k
 $ openproblems-cli list --metrics --task label_projection
 accuracy
 f1
diff --git a/openproblems/api/hash.py b/openproblems/api/hash.py
index 89eac0d8fa..6b524dd6d9 100644
--- a/openproblems/api/hash.py
+++ b/openproblems/api/hash.py
@@ -4,8 +4,10 @@
 import importlib
 import json
 import os
+import random
 import scprep
 import subprocess
+import warnings
 
 _MODULE = type(os)
 
@@ -30,12 +32,20 @@ def get_module(fun):
     return fun.__module__
 
 
-def git_hash(file):
-    """Get the git commit hash associated with a file."""
-    return _run(
-        ["git", "log", "-n", "1", "--pretty=format:%H", "--", file],
-        cwd=os.path.dirname(__file__),
-    )
+def git_hash(obj):
+    """Get the git commit hash associated with the latest change to a file."""
+    if isinstance(obj, str) and os.path.isfile(obj):
+        # if it's a file, run git log to get the hash
+        return _run(
+            ["git", "log", "-n", "1", "--pretty=format:%H", "--", obj],
+            cwd=os.path.dirname(__file__),
+        )
+    elif hasattr(obj, "__file__"):
+        # if it's a module, get the associated file
+        return git_hash(obj.__file__)
+    elif callable(obj):
+        # if it's a function, get the associated module
+        return git_hash(importlib.import_module(get_module(obj)))
 
 
 def docker_token(image_name):
@@ -44,8 +54,10 @@ def docker_token(image_name):
             [
                 "curl",
                 "--silent",
-                f"https://auth.docker.io/token?scope=repository:{image_name}:"
-                "pull&service=registry.docker.io",
+                (
+                    f"https://auth.docker.io/token?scope=repository:{image_name}:"
+                    "pull&service=registry.docker.io"
+                ),
             ]
         )
     )
@@ -72,17 +84,24 @@ def docker_labels_from_api(image_name, tag="latest"):
 def docker_hash(image_name):
     """Get the docker image hash associated with an image."""
     try:
-        return _run(
-            [
-                "docker",
-                "inspect",
-                "-f='{{ index .Config.Labels \"bio.openproblems.hash\"}}'",
-                image_name,
-            ]
+        try:
+            return _run(
+                [
+                    "docker",
+                    "inspect",
+                    "-f='{{ index .Config.Labels \"bio.openproblems.hash\"}}'",
+                    image_name,
+                ]
+            )
+        except (RuntimeError, FileNotFoundError):  # pragma: nocover
+            # docker is unavailable or the image is not locally available; use the API
+            return docker_labels_from_api(image_name)["bio.openproblems.hash"]
+    except Exception:  # pragma: nocover
+        warnings.warn(
+            "Failed to access docker or the docker API; docker image hash failed. All"
+            f" jobs using {image_name} will not be cached."
         )
-    except (RuntimeError, FileNotFoundError):  # pragma: nocover
-        # docker is unavailable or the image is not locally available; use the API
-        return docker_labels_from_api(image_name)["bio.openproblems.hash"]
+        return str(random.getrandbits(256))
 
 
 def get_context(obj, context=None):
diff --git a/openproblems/api/load.py b/openproblems/api/load.py
index a238bfa72e..43afd06c22 100644
--- a/openproblems/api/load.py
+++ b/openproblems/api/load.py
@@ -1,3 +1,4 @@
+from ..data.utils import write_h5ad
 from . import utils
 
 
@@ -10,4 +11,4 @@ def load_dataset(task_name, function_name, test):
 def main(args):
     """Run the ``load`` subcommand."""
     adata = load_dataset(args.task, args.name, args.test)
-    utils.write_h5ad(adata, args.output)
+    write_h5ad(adata, args.output)
diff --git a/openproblems/api/run.py b/openproblems/api/run.py
index 77f4d9dcb9..55a1daef56 100644
--- a/openproblems/api/run.py
+++ b/openproblems/api/run.py
@@ -1,3 +1,4 @@
+from ..data.utils import write_h5ad
 from ..utils import temporary
 from . import utils
 
@@ -23,7 +24,7 @@ def main(args):
     """Run the ``run`` subcommand."""
     adata = anndata.read_h5ad(args.input)
     adata = run_method(adata, args.task, args.name, args.test)
-    utils.write_h5ad(adata, args.output)
+    write_h5ad(adata, args.output)
     if args.version_file is not None:
         with open(args.version_file, "w") as handle:
             handle.write(adata.uns["method_code_version"])
diff --git a/openproblems/api/utils.py b/openproblems/api/utils.py
index a6bc3c4222..1953f23633 100644
--- a/openproblems/api/utils.py
+++ b/openproblems/api/utils.py
@@ -1,5 +1,4 @@
 import openproblems
-import os
 
 
 class NoSuchFunctionError(RuntimeError):
@@ -50,9 +49,3 @@ def print_output(output):
         print("\n".join(output))
     else:
         print(output)
-
-
-def write_h5ad(adata, filename):
-    if os.path.isfile(filename):
-        os.unlink(filename)
-    adata.write_h5ad(filename)
diff --git a/openproblems/data/Wagner_2018_zebrafish_embryo_CRISPR.py b/openproblems/data/Wagner_2018_zebrafish_embryo_CRISPR.py
index ef7d56a343..d87d2c5fd8 100644
--- a/openproblems/data/Wagner_2018_zebrafish_embryo_CRISPR.py
+++ b/openproblems/data/Wagner_2018_zebrafish_embryo_CRISPR.py
@@ -6,7 +6,7 @@
 
 @utils.loader(
     data_url="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE112294",
-    data_reference="https://doi.org/10.1126/science.aar4362",
+    data_reference="wagner2018single",
 )
 def load_zebrafish_chd_tyr(test=False):
     """Download zebrafish data from GEO accession GSE112294"""
diff --git a/openproblems/data/allen_brain_atlas.py b/openproblems/data/allen_brain_atlas.py
index 66412172ee..65e9d6822e 100644
--- a/openproblems/data/allen_brain_atlas.py
+++ b/openproblems/data/allen_brain_atlas.py
@@ -2,14 +2,13 @@
 
 import numpy as np
 import os
-import scanpy as sc
 import scprep
 import tempfile
 
 URL = "https://figshare.com/ndownloader/files/36509385"
 
 
-@utils.loader(data_url=URL, data_reference="https://doi.org/10.1038/nn.4216")
+@utils.loader(data_url=URL, data_reference="tasic2016adult")
 def load_mouse_brain_atlas(test=False):
     """Download Allen Brain (Taisc et al.,2016) data from Figshare.
 
@@ -17,6 +16,8 @@ def load_mouse_brain_atlas(test=False):
     to the dataset is available at:
     https://figshare.com/articles/dataset/allen_brain_h5ad/20338089
     """
+    import scanpy as sc
+
     if test:
         # load full data first, cached if available
         adata = load_mouse_brain_atlas(test=False)
diff --git a/openproblems/data/cengen.py b/openproblems/data/cengen.py
index e47f312fa4..7cdf923527 100644
--- a/openproblems/data/cengen.py
+++ b/openproblems/data/cengen.py
@@ -1,7 +1,6 @@
 from . import utils
 
 import os
-import scanpy as sc
 import scprep
 import tempfile
 
@@ -12,9 +11,7 @@
 )
 
 
-@utils.loader(
-    data_url=URL, data_reference="https://doi.org/10.1016/j.neuron.2018.07.042"
-)
+@utils.loader(data_url=URL, data_reference="hammarlund2018cengen")
 def load_cengen(test=False):
     """Download CeNGEN data from GitHub.
 
@@ -22,6 +19,8 @@ def load_cengen(test=False):
     To learn about WormBase curation efforts for C. elegans single cell
     data visit https://wormbase.github.io/single-cell/
     """
+    import scanpy as sc
+
     with tempfile.TemporaryDirectory() as tempdir:
         filepath = os.path.join(tempdir, "cengen.h5ad")
         scprep.io.download.download_url(URL, filepath)
diff --git a/openproblems/data/immune_cells.py b/openproblems/data/immune_cells.py
index abed1d0a28..b053e27597 100644
--- a/openproblems/data/immune_cells.py
+++ b/openproblems/data/immune_cells.py
@@ -1,7 +1,6 @@
 from . import utils
 
 import os
-import scanpy as sc
 import scprep
 import tempfile
 
@@ -9,9 +8,11 @@
 URL = "https://ndownloader.figshare.com/files/36086786"
 
 
-@utils.loader(data_url=URL, data_reference="https://doi.org/10.1038/s41592-021-01336-8")
+@utils.loader(data_url=URL, data_reference="luecken2022benchmarking")
 def load_immune(test=False):
     """Download immune human data from figshare."""
+    import scanpy as sc
+
     if test:
         # load full data first, cached if available
         adata = load_immune(test=False)
@@ -37,7 +38,6 @@ def load_immune(test=False):
             # NOTE: adata.X contains log-normalized data, so we're moving it
             adata.layers["log_normalized"] = adata.X
             adata.X = adata.layers["counts"]
-            del adata.layers["counts"]
 
             # Ensure there are no cells or genes with 0 counts
             utils.filter_genes_cells(adata)
diff --git a/openproblems/data/lung.py b/openproblems/data/lung.py
new file mode 100644
index 0000000000..1f1d409eef
--- /dev/null
+++ b/openproblems/data/lung.py
@@ -0,0 +1,45 @@
+from . import utils
+
+import os
+import scprep
+import tempfile
+
+# sparsified from https://figshare.com/articles/dataset/Benchmarking_atlas-level_data_integration_in_single-cell_genomics_-_integration_task_datasets_Immune_and_pancreas_/12420968/2 # noqa: E501
+URL = "https://figshare.com/ndownloader/files/24539942"
+
+
+@utils.loader(data_url=URL, data_reference="luecken2022benchmarking")
+def load_lung(test=False):
+    """Download lung data from figshare."""
+    import scanpy as sc
+
+    if test:
+        # load full data first, cached if available
+        adata = load_lung(test=False)
+
+        # Subsample immune data to two batches with 250 cells each
+        adata = adata[:, :500].copy()
+        batch1 = adata[adata.obs.batch == "4"][:250]
+        batch2 = adata[adata.obs.batch == "A6"][:250]
+        adata = batch1.concatenate(batch2)
+        # Note: could also use 200-500 HVGs rather than 200 random genes
+
+        # Ensure there are no cells or genes with 0 counts
+        utils.filter_genes_cells(adata)
+
+        return adata
+
+    else:
+        with tempfile.TemporaryDirectory() as tempdir:
+            filepath = os.path.join(tempdir, "Lung_atlas_public.h5ad")
+            scprep.io.download.download_url(URL, filepath)
+            adata = sc.read(filepath)
+
+            # NOTE: adata.X contains log-normalized data, so we're moving it
+            adata.layers["log_normalized"] = adata.X
+            adata.X = adata.layers["counts"]
+
+            # Ensure there are no cells or genes with 0 counts
+            utils.filter_genes_cells(adata)
+
+        return adata
diff --git a/openproblems/data/mouse_blood_olssen_labelled.py b/openproblems/data/mouse_blood_olsson_labelled.py
similarity index 92%
rename from openproblems/data/mouse_blood_olssen_labelled.py
rename to openproblems/data/mouse_blood_olsson_labelled.py
index d04ecb353f..dbc6b40daa 100644
--- a/openproblems/data/mouse_blood_olssen_labelled.py
+++ b/openproblems/data/mouse_blood_olsson_labelled.py
@@ -1,7 +1,6 @@
 from . import utils
 
 import os
-import scanpy as sc
 import scprep
 import tempfile
 
@@ -10,9 +9,11 @@
 URL = "https://figshare.com/ndownloader/files/36872214"
 
 
-@utils.loader(data_url=URL, data_reference="https://doi.org/10.1038/nature19348")
+@utils.loader(data_url=URL, data_reference="olsson2016single")
 def load_olsson_2016_mouse_blood(test=False):
     """Download Olsson, 2016_mouse_blood, Nature, 2016 data from Figshare."""
+    import scanpy as sc
+
     if test:
         # load full data first, cached if available
         adata = load_olsson_2016_mouse_blood(test=False)
diff --git a/openproblems/data/mouse_hspc_nestorowa2016.py b/openproblems/data/mouse_hspc_nestorowa2016.py
index 81218ef9f0..a43c397146 100644
--- a/openproblems/data/mouse_hspc_nestorowa2016.py
+++ b/openproblems/data/mouse_hspc_nestorowa2016.py
@@ -1,7 +1,6 @@
 from . import utils
 
 import os
-import scanpy as sc
 import scprep
 import tempfile
 
@@ -10,11 +9,11 @@
 URL = "https://ndownloader.figshare.com/files/36088649"
 
 
-@utils.loader(
-    data_url=URL, data_reference="https://doi.org/10.1182/blood-2016-05-716480"
-)
+@utils.loader(data_url=URL, data_reference="nestorowa2016single")
 def load_mouse_hspc_nestorowa2016(test=False):
-    """Download Nesterova data from Figshare."""
+    """Download Nestorowa data from Figshare."""
+    import scanpy as sc
+
     if test:
         # load full data first, cached if available
         adata = load_mouse_hspc_nestorowa2016(test=False)
diff --git a/openproblems/data/multimodal/citeseq.py b/openproblems/data/multimodal/citeseq.py
index 7bf3a509c6..23f57a6aa3 100644
--- a/openproblems/data/multimodal/citeseq.py
+++ b/openproblems/data/multimodal/citeseq.py
@@ -17,7 +17,7 @@
 
 @loader(
     data_url="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE100866",
-    data_reference="https://doi.org/10.1038/nmeth.4380",
+    data_reference="stoeckius2017simultaneous",
 )
 def load_citeseq_cbmc(test=False):
     """Download CITEseq data from GEO."""
diff --git a/openproblems/data/multimodal/scicar/base.py b/openproblems/data/multimodal/scicar/base.py
index 4ff018f649..b0930ef91e 100644
--- a/openproblems/data/multimodal/scicar/base.py
+++ b/openproblems/data/multimodal/scicar/base.py
@@ -6,7 +6,7 @@
 import scprep
 import tempfile
 
-DATA_REFERENCE = "https://doi.org/10.1126/science.aau0730"
+DATA_REFERENCE = "cao2018joint"
 
 
 def load_scicar(
diff --git a/openproblems/data/multimodal/utils.py b/openproblems/data/multimodal/utils.py
index 6757ba0675..d4b4245d8a 100644
--- a/openproblems/data/multimodal/utils.py
+++ b/openproblems/data/multimodal/utils.py
@@ -1,7 +1,6 @@
 import anndata
 import numpy as np
 import pandas as pd
-import scanpy as sc
 import scprep
 
 
@@ -17,6 +16,8 @@ def subset_mode2_genes(adata, keep_genes):
 
 def filter_joint_data_empty_cells(adata):
     """Remove empty cells and genes from a multimodal dataset."""
+    import scanpy as sc
+
     assert np.all(adata.uns["mode2_obs"] == adata.obs.index)
     # filter cells
     n_cells_mode1 = scprep.utils.toarray(adata.X.sum(axis=1)).flatten()
diff --git a/openproblems/data/pancreas.py b/openproblems/data/pancreas.py
index 91288961ee..1e1663eebc 100644
--- a/openproblems/data/pancreas.py
+++ b/openproblems/data/pancreas.py
@@ -1,9 +1,7 @@
 from . import utils
 
-import anndata as ad
 import numpy as np
 import os
-import scanpy as sc
 import scprep
 import tempfile
 
@@ -11,18 +9,20 @@
 URL = "https://ndownloader.figshare.com/files/36086813"
 
 
-@utils.loader(data_url=URL, data_reference="https://doi.org/10.1038/s41592-021-01336-8")
-def load_pancreas(test=False, integer_only=False):
+@utils.loader(data_url=URL, data_reference="luecken2022benchmarking")
+def load_pancreas(test=False, keep_techs=None):
     """Download pancreas data from figshare."""
+    import scanpy as sc
+
     if test:
         # load full data first, cached if available
-        adata = load_pancreas(test=False, integer_only=integer_only)
+        adata = load_pancreas(
+            test=False,
+            keep_techs=keep_techs or ["celseq", "inDrop4", "smarter"],
+        )
 
         keep_celltypes = adata.obs["celltype"].dtype.categories[[0, 3]]
-        keep_techs = adata.obs["tech"].dtype.categories[[0, -3, -2]]
-        keep_tech_idx = adata.obs["tech"].isin(keep_techs)
-        keep_celltype_idx = adata.obs["celltype"].isin(keep_celltypes)
-        adata = adata[keep_tech_idx & keep_celltype_idx].copy()
+        adata = adata[adata.obs["celltype"].isin(keep_celltypes)].copy()
 
         # Subsample pancreas data
         adata = adata[:, :500].copy()
@@ -52,36 +52,14 @@ def load_pancreas(test=False, integer_only=False):
         scprep.io.download.download_url(URL, filepath)
         adata = sc.read(filepath)
 
+    if keep_techs is not None:
+        adata = adata[adata.obs["tech"].isin(keep_techs)].copy()
+
     # NOTE: adata.X contains log-normalized data, so we're moving it
     adata.layers["log_normalized"] = adata.X
     adata.X = adata.layers["counts"]
-    del adata.layers["counts"]
-
-    if integer_only:
-        adata = _get_pancreas_integer(adata)
 
     # Ensure there are no cells or genes with 0 counts
     utils.filter_genes_cells(adata)
 
     return adata
-
-
-def _get_pancreas_integer(adata: ad.AnnData):
-    """Transform counts to integer.
-
-    For some platforms the pancreas data set only have processed counts.
-    Here we grab those with integer counts.
-    See https://github.com/theislab/scib-reproducibility/tree/main/notebooks/data_preprocessing/pancreas # noqa: E501
-    """
-    is_int = ["smartseq2"]
-    is_int += ["inDrop{}".format(x) for x in range(1, 5)]
-
-    keep = np.zeros(len(adata)).astype(bool)
-
-    for tech in is_int:
-        idx = adata.obs.tech.values == tech
-        keep = keep | idx
-
-    adata = adata[keep, :].copy()
-
-    return adata
diff --git a/openproblems/data/sample/__init__.py b/openproblems/data/sample/__init__.py
new file mode 100644
index 0000000000..723bebb943
--- /dev/null
+++ b/openproblems/data/sample/__init__.py
@@ -0,0 +1 @@
+from .sample import load_sample_data
diff --git a/openproblems/data/sample.py b/openproblems/data/sample/sample.py
similarity index 55%
rename from openproblems/data/sample.py
rename to openproblems/data/sample/sample.py
index 008406807f..900d6b9aac 100644
--- a/openproblems/data/sample.py
+++ b/openproblems/data/sample/sample.py
@@ -1,23 +1,41 @@
-from .multimodal.scicar.cell_lines import rna_cells_url
-from .multimodal.scicar.cell_lines import rna_genes_url
-from .utils import loader
+from ..utils import loader
 
 import anndata
 import numpy as np
 import pandas as pd
+import pathlib
 import scipy.sparse
 
+SCRIPT_PATH = pathlib.Path(__file__)
+
 
 @loader(
     data_url="https://openproblems.bio",
     data_reference="https://github.com/openproblems-bio/openproblems",
 )
 def load_sample_data(test=True):
-    """Create a simple dataset to use for testing in multimodal applications."""
-    assert test
+    """Create a simple dataset to use for testing in multimodal applications.
 
+    Genes and cells generated by:
+    ```
+    from ..multimodal.scicar.cell_lines import rna_cells_url
+    from ..multimodal.scicar.cell_lines import rna_genes_url
     genes = pd.read_csv(rna_genes_url, low_memory=False, index_col=0, nrows=500)
     cells = pd.read_csv(rna_cells_url, low_memory=False, index_col=0, nrows=200)
+    ```
+    """
+    assert test
+
+    genes = pd.read_csv(
+        SCRIPT_PATH.parent.joinpath("sample_genes.csv.gz"),
+        low_memory=False,
+        index_col=0,
+    )
+    cells = pd.read_csv(
+        SCRIPT_PATH.parent.joinpath("sample_cells.csv.gz"),
+        low_memory=False,
+        index_col=0,
+    )
 
     rna_data = scipy.sparse.csr_matrix(
         np.random.poisson(0.3, (cells.shape[0], genes.shape[0])).astype(np.float32)
diff --git a/openproblems/data/sample/sample_cells.csv.gz b/openproblems/data/sample/sample_cells.csv.gz
new file mode 100644
index 0000000000..13b7452b6a
Binary files /dev/null and b/openproblems/data/sample/sample_cells.csv.gz differ
diff --git a/openproblems/data/sample/sample_genes.csv.gz b/openproblems/data/sample/sample_genes.csv.gz
new file mode 100644
index 0000000000..9058128d51
Binary files /dev/null and b/openproblems/data/sample/sample_genes.csv.gz differ
diff --git a/openproblems/data/tabula_muris_senis.py b/openproblems/data/tabula_muris_senis.py
index 28f709ea88..fd871744d2 100644
--- a/openproblems/data/tabula_muris_senis.py
+++ b/openproblems/data/tabula_muris_senis.py
@@ -1,67 +1,91 @@
 from . import utils
 
+import anndata as ad
 import os
-import pandas as pd
-import scanpy as sc
+import requests
 import scprep
 import tempfile
+import time
+
+COLLECTION_ID = "0b9d8a04-bb9d-44da-aa27-705bb65b54eb"
+DOMAIN = "cellxgene.cziscience.com"
+API_BASE = f"https://api.{DOMAIN}"
+METHOD_ALIASES = {"10x 3' v2": "droplet", "Smart-seq2": "facs"}
+
+
+def _get_json(url, retries=5, sleep=0.05, backoff=2):
+    try:
+        res = requests.get(url=url, headers={"Content-Type": "application/json"})
+        return res.json()
+    except Exception:  # pragma: nocover
+        if retries > 0:
+            time.sleep(sleep)
+            return _get_json(url, retries - 1, sleep * backoff, backoff)
+        raise
+
+
+def check_unknown_organs(datasets, organ_list):
+    known_organs = set([t["label"] for d in datasets for t in d["tissue"]])
+    unknown_organs = set(organ_list) - known_organs
+    if unknown_organs:
+        raise ValueError(
+            f"Unknown organs provided in `organ_list': {', '.join(unknown_organs)}."
+            f" Known organs are {', '.join(known_organs)}"
+        )
 
 
-def get_filenames_and_urls(url_df, method_list=None, organ_list=None):
-    """Takes in dataframe and returns corresponding filename(s) and url(s).
+def matching_dataset(dataset, method_list, organ_list):
+    # if dataset has multiple methods, skip it
+    if len(dataset["assay"]) > 1:
+        return False
 
-    Takes in dataframe (with sample information stored), a list of methods,
-    and a list of organs.
-    Returns filenames and figshare URLs associated with inputs.
-    If method_list or organ_list are None, do not filter based on that argument.
-    """
-    subset_df = url_df.copy()
-    # If method_list specified, filter based on methods in list.
-    if method_list:
-        subset_df = subset_df.loc[subset_df.method.isin(method_list)]
-    # If organ_list specified, filter based on organs in list.
-    if organ_list:
-        subset_df = subset_df.loc[subset_df.organ.isin(organ_list)]
+    # if dataset has multiple tissues, skip it
+    if len(dataset["tissue"]) > 1:
+        return False
 
-    return subset_df
+    method = dataset["assay"][0]["label"]
+    method = METHOD_ALIASES[method]
 
+    # if organ_list is not empty, check for specific tissue
+    if len(organ_list) > 0 and dataset["tissue"][0]["label"] not in organ_list:
+        return False
 
-def make_anndata_from_filename_and_url(filename, url, test=False):
-    """Takes in filename and url pair. Returns corresponding anndata object."""
-    with tempfile.TemporaryDirectory() as tempdir:
-        filepath = os.path.join(tempdir, filename)
-        scprep.io.download.download_url(url, filepath)
-        adata = sc.read_h5ad(filepath)
-        utils.filter_genes_cells(adata)
+    # if method_list is not empty, check for specific method
+    if len(method_list) > 0 and method not in method_list:
+        return False
 
-    if test:
-        sc.pp.subsample(adata, n_obs=100)
-        adata = adata[:, :1000]
-        utils.filter_genes_cells(adata)
+    return True
 
-    return adata
 
+def load_raw_counts(dataset):
+    import scanpy as sc
 
-def make_anndata_list(subset_df, test):
-    """Makes anndata from filename/url pair. Adds to list of anndatas.
+    dataset_id = dataset["id"]
+    assets_path = (
+        f"/curation/v1/collections/{COLLECTION_ID}/datasets/{dataset_id}/assets"
+    )
+    url = f"{API_BASE}{assets_path}"
+    assets = _get_json(url)
+    assets = [asset for asset in assets if asset["filetype"] == "H5AD"]
+    assert len(assets) == 1
+    asset = assets[0]
 
-    Input dataframe that contains filenames and urls to make anndatas from.
-    Returns a list of anndata objects.
-    """
-    adata_list = []
-    for i in range(len(subset_df)):
-        row = subset_df.iloc[i]
-        adata_list.append(
-            make_anndata_from_filename_and_url(row.filename, row.figshare_url)
-        )
-    if test:
-        return adata_list[0]
-    return adata_list
+    filename = f"{COLLECTION_ID}_{dataset_id}_{asset['filename']}"
+    with tempfile.TemporaryDirectory() as tempdir:
+        filepath = os.path.join(tempdir, filename)
+        scprep.io.download.download_url(asset["presigned_url"], filepath)
+        adata = sc.read_h5ad(filepath)
+
+    utils.filter_genes_cells(adata)
+    # If `raw` exists, raw counts are there
+    if getattr(adata, "raw", None) is not None:
+        return adata.raw.to_adata()
+    return adata
 
 
 @utils.loader(
     data_url="https://tabula-muris-senis.ds.czbiohub.org/",
-    data_reference="https://doi.org/10.1038/s41586-020-2496-1",
+    data_reference="tabula2020single",
 )
 def load_tabula_muris_senis(test=False, method_list=None, organ_list=None):
     """Load tubula_muris_senis datasets into 1 anndata object based on user input.
@@ -73,22 +97,38 @@ def load_tabula_muris_senis(test=False, method_list=None, organ_list=None):
     and droplet-fat anndata sets. (no facs-fat dataset available)
     """
 
-    # df containing figshare links, method of collection, and organ for each
-    # tabula muris dataset
-    url_df = pd.read_csv(
-        os.path.join(
-            os.path.dirname(__file__),
-            "tabula_muris_senis_data_objects",
-            "tabula_muris_senis_data_objects.csv",
-        ),
-        header=0,
-    )
+    if method_list is None:
+        method_list = []
+    if organ_list is None:
+        organ_list = []
+    method_list = [x.lower() for x in method_list]
+    organ_list = [x.lower() for x in organ_list]
+
+    unknown_methods = set(method_list) - set(["facs", "droplet"])
+    if unknown_methods:
+        raise ValueError(
+            f"Unknown methods provided in `method_list': {','.join(unknown_methods)}."
+            " Known methods are `facs' and `droplet'"
+        )
+
+    datasets_path = f"/curation/v1/collections/{COLLECTION_ID}"
+    url = f"{API_BASE}{datasets_path}"
+    datasets = _get_json(url)["datasets"]
+    check_unknown_organs(datasets, organ_list)
+
+    adata_list = []
+    for dataset in datasets:
+        if matching_dataset(dataset, method_list, organ_list):
+            adata_list.append(load_raw_counts(dataset))
+
+    assert len(adata_list) > 0
+    adata = ad.concat(adata_list, join="outer")
+
+    # this obs key causes write errors
+    del adata.obs["is_primary_data"]
 
-    subset_df = get_filenames_and_urls(url_df, method_list, organ_list)
-    adata_list = make_anndata_list(subset_df, test)
-    adata = adata_list[0].concatenate(adata_list[1:])
     if test:
-        sc.pp.subsample(adata, n_obs=500)
+        adata = utils.subsample_even(adata, n_obs=500, even_obs="method")
         adata = adata[:, :1000]
         utils.filter_genes_cells(adata)
     return adata
diff --git a/openproblems/data/tabula_muris_senis_data_objects/tabula_muris_senis_data_objects.csv b/openproblems/data/tabula_muris_senis_data_objects/tabula_muris_senis_data_objects.csv
deleted file mode 100644
index 5a95df87f0..0000000000
--- a/openproblems/data/tabula_muris_senis_data_objects/tabula_muris_senis_data_objects.csv
+++ /dev/null
@@ -1,40 +0,0 @@
-﻿filename,figshare_url,method,organ
-tabula-muris-senis-facs-processed-official-annotations-Aorta.h5ad,https://ndownloader.figshare.com/files/23872460,facs,aorta
-tabula-muris-senis-facs-processed-official-annotations-Kidney.h5ad,https://ndownloader.figshare.com/files/23872484,facs,kidney
-tabula-muris-senis-facs-processed-official-annotations-Diaphragm.h5ad,https://ndownloader.figshare.com/files/23872487,facs,diaphragm
-tabula-muris-senis-facs-processed-official-annotations-BAT.h5ad,https://ndownloader.figshare.com/files/23872493,facs,BAT
-tabula-muris-senis-droplet-processed-official-annotations-Large_Intestine.h5ad,https://ndownloader.figshare.com/files/23872502,droplet,large_intestine
-tabula-muris-senis-facs-processed-official-annotations-Spleen.h5ad,https://ndownloader.figshare.com/files/23872511,facs,spleen
-tabula-muris-senis-facs-processed-official-annotations-Limb_Muscle.h5ad,https://ndownloader.figshare.com/files/23872517,facs,limb_muscle
-tabula-muris-senis-facs-processed-official-annotations-Liver.h5ad,https://ndownloader.figshare.com/files/23872526,facs,liver
-tabula-muris-senis-facs-processed-official-annotations-MAT.h5ad,https://ndownloader.figshare.com/files/23872544,facs,MAT
-tabula-muris-senis-facs-processed-official-annotations-Thymus.h5ad,https://ndownloader.figshare.com/files/23872559,facs,thymus
-tabula-muris-senis-facs-processed-official-annotations-Trachea.h5ad,https://ndownloader.figshare.com/files/23872568,facs,trachea
-tabula-muris-senis-droplet-processed-official-annotations-Pancreas.h5ad,https://ndownloader.figshare.com/files/23872580,droplet,pancreas
-tabula-muris-senis-facs-processed-official-annotations-GAT.h5ad,https://ndownloader.figshare.com/files/23872583,facs,GAT
-tabula-muris-senis-facs-processed-official-annotations-SCAT.h5ad,https://ndownloader.figshare.com/files/23872601,facs,SCAT
-tabula-muris-senis-facs-processed-official-annotations-Bladder.h5ad,https://ndownloader.figshare.com/files/23872610,facs,bladder
-tabula-muris-senis-facs-processed-official-annotations-Lung.h5ad,https://ndownloader.figshare.com/files/23872619,facs,lung
-tabula-muris-senis-facs-processed-official-annotations-Mammary_Gland.h5ad,https://ndownloader.figshare.com/files/23872637,facs,mammary_gland
-tabula-muris-senis-facs-processed-official-annotations-Pancreas.h5ad,https://ndownloader.figshare.com/files/23872643,facs,pancreas
-tabula-muris-senis-droplet-processed-official-annotations-Trachea.h5ad,https://ndownloader.figshare.com/files/23872655,droplet,trachea
-tabula-muris-senis-facs-processed-official-annotations-Skin.h5ad,https://ndownloader.figshare.com/files/23872667,facs,skin
-tabula-muris-senis-droplet-processed-official-annotations-Skin.h5ad,https://ndownloader.figshare.com/files/23872676,droplet,skin
-tabula-muris-senis-facs-processed-official-annotations-Tongue.h5ad,https://ndownloader.figshare.com/files/23872703,facs,tongue
-tabula-muris-senis-droplet-processed-official-annotations-Fat.h5ad,https://ndownloader.figshare.com/files/23872715,droplet,fat
-tabula-muris-senis-droplet-processed-official-annotations-Thymus.h5ad,https://ndownloader.figshare.com/files/23872745,droplet,thymus
-tabula-muris-senis-droplet-processed-official-annotations-Liver.h5ad,https://ndownloader.figshare.com/files/23872763,droplet,liver
-tabula-muris-senis-facs-processed-official-annotations-Brain_Non-Myeloid.h5ad,https://ndownloader.figshare.com/files/23872787,facs,brain_non-myeloid
-tabula-muris-senis-droplet-processed-official-annotations-Heart_and_Aorta.h5ad,https://ndownloader.figshare.com/files/23872799,droplet,heart_and_aorta
-tabula-muris-senis-facs-processed-official-annotations-Heart.h5ad,https://ndownloader.figshare.com/files/23872838,facs,heart
-tabula-muris-senis-droplet-processed-official-annotations-Mammary_Gland.h5ad,https://ndownloader.figshare.com/files/23872862,droplet,mammary_gland
-tabula-muris-senis-facs-processed-official-annotations-Brain_Myeloid.h5ad,https://ndownloader.figshare.com/files/23872886,facs,brain_myeloid
-tabula-muris-senis-droplet-processed-official-annotations-Bladder.h5ad,https://ndownloader.figshare.com/files/23872916,droplet,bladder
-tabula-muris-senis-facs-processed-official-annotations-Large_Intestine.h5ad,https://ndownloader.figshare.com/files/23872931,facs,large_intestine
-tabula-muris-senis-facs-processed-official-annotations-Marrow.h5ad,https://ndownloader.figshare.com/files/23872976,facs,marrow
-tabula-muris-senis-droplet-processed-official-annotations-Lung.h5ad,https://ndownloader.figshare.com/files/23873012,droplet,lung
-tabula-muris-senis-droplet-processed-official-annotations-Kidney.h5ad,https://ndownloader.figshare.com/files/23873024,droplet,kidney
-tabula-muris-senis-droplet-processed-official-annotations-Limb_Muscle.h5ad,https://ndownloader.figshare.com/files/23873036,droplet,limb_muscle
-tabula-muris-senis-droplet-processed-official-annotations-Spleen.h5ad,https://ndownloader.figshare.com/files/23873054,droplet,spleen
-tabula-muris-senis-droplet-processed-official-annotations-Tongue.h5ad,https://ndownloader.figshare.com/files/23873081,droplet,tongue
-tabula-muris-senis-droplet-processed-official-annotations-Marrow.h5ad,https://ndownloader.figshare.com/files/23873090,droplet,marrow
diff --git a/openproblems/data/tenx.py b/openproblems/data/tenx.py
index 73cccee21c..1d9f9c3621 100644
--- a/openproblems/data/tenx.py
+++ b/openproblems/data/tenx.py
@@ -1,7 +1,6 @@
 from . import utils
 
 import os
-import scanpy as sc
 import scprep
 import tempfile
 
@@ -11,12 +10,13 @@
 
 # TODO(@LuckyMD): document relevant link at figshare.com/articles/*
 PBMC_5K_URL = "https://ndownloader.figshare.com/files/25555739"
-REFERENCE_URL = "https://www.10xgenomics.com/resources/datasets"
 
 
-@utils.loader(data_url=PBMC_1K_URL, data_reference=REFERENCE_URL)
+@utils.loader(data_url=PBMC_1K_URL, data_reference="10x2018pbmc")
 def load_tenx_1k_pbmc(test=False):
     """Download PBMC data from Figshare."""
+    import scanpy as sc
+
     if test:
         adata = load_tenx_1k_pbmc(test=False)
         sc.pp.subsample(adata, n_obs=100)
@@ -31,9 +31,11 @@ def load_tenx_1k_pbmc(test=False):
     return adata
 
 
-@utils.loader(data_url=PBMC_5K_URL, data_reference=REFERENCE_URL)
+@utils.loader(data_url=PBMC_5K_URL, data_reference="10x2019pbmc")
 def load_tenx_5k_pbmc(test=False):
     """Download 5k PBMCs from 10x Genomics."""
+    import scanpy as sc
+
     if test:
         # load full data first, cached if available
         adata = load_tenx_5k_pbmc(test=False)
diff --git a/openproblems/data/tnbc_wu2021.py b/openproblems/data/tnbc_wu2021.py
index 58dcc53d8b..3f27220138 100644
--- a/openproblems/data/tnbc_wu2021.py
+++ b/openproblems/data/tnbc_wu2021.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import os
-import scanpy as sc
 import scipy.sparse
 import scprep
 import tempfile
@@ -10,7 +9,7 @@
 URL = "https://figshare.com/ndownloader/files/37593188"
 
 
-@utils.loader(data_url=URL, data_reference="https://doi.org/10.1038/s41588-021-00911-1")
+@utils.loader(data_url=URL, data_reference="wu2021single")
 def load_tnbc_data(test=False):
     """Download TNBC data (Wu et al., 2021) from Figshare.
 
@@ -19,6 +18,8 @@ def load_tnbc_data(test=False):
     https://figshare.com/articles/dataset/TNBC_Data_from_Wu_et_al_2021/20338536
 
     """
+    import scanpy as sc
+
     if test:
         # load full data first, cached if available
         adata = load_tnbc_data(test=False)
diff --git a/openproblems/data/utils.py b/openproblems/data/utils.py
index ce1366ca92..9c63d2254f 100644
--- a/openproblems/data/utils.py
+++ b/openproblems/data/utils.py
@@ -4,8 +4,9 @@
 import functools
 import hashlib
 import logging
+import numpy as np
 import os
-import scanpy as sc
+import scipy.sparse
 
 log = logging.getLogger("openproblems")
 
@@ -27,12 +28,37 @@ def _hash_function(func, *args, **kwargs):
 
 
 def _cache_path(func, *args, **kwargs):
+    try:
+        os.mkdir(TEMPDIR)
+    except OSError:
+        pass
     if hasattr(func, "__wrapped__"):
         func = func.__wrapped__
     filename = "openproblems_{}.h5ad".format(_hash_function(func, *args, **kwargs))
     return os.path.join(TEMPDIR, filename)
 
 
+def _fix_matrix_format(X):
+    if scipy.sparse.issparse(X) and not isinstance(X, scipy.sparse.csr_matrix):
+        X = X.tocsr()
+    if isinstance(X, np.matrix):
+        X = X.A
+    return X
+
+
+def _fix_adata(adata):
+    adata.strings_to_categoricals()
+    if "var_names_all" not in adata.uns:
+        adata.uns["var_names_all"] = adata.var.index.to_numpy()
+    adata.X = _fix_matrix_format(adata.X)
+    for layer in adata.layers:
+        adata.layers[layer] = _fix_matrix_format(adata.layers[layer])
+    for obsm in adata.obsm:
+        adata.obsm[obsm] = _fix_matrix_format(adata.obsm[obsm])
+    if "counts" not in adata.layers:
+        adata.layers["counts"] = adata.X
+
+
 def loader(data_url, data_reference):
     """Decorate a data loader function.
 
@@ -48,31 +74,17 @@ def decorator(func):
         @functools.wraps(func)
         def apply_func(*args, **kwargs):
             filepath = _cache_path(func, *args, **kwargs)
+            dataset_name = f"{func.__name__}({args}, {kwargs})"
             if os.path.isfile(filepath):
-                log.debug(
-                    "Loading cached {}({}, {}) dataset".format(
-                        func.__name__, args, kwargs
-                    )
-                )
+                log.debug(f"Loading cached {dataset_name} dataset")
                 adata = anndata.read_h5ad(filepath)
                 adata.uns["_from_cache"] = True
                 return adata
             else:
-                log.debug(
-                    "Downloading {}({}, {}) dataset".format(func.__name__, args, kwargs)
-                )
+                log.debug(f"Downloading {dataset_name} dataset")
                 adata = func(*args, **kwargs)
-                adata.strings_to_categoricals()
                 adata.uns["_from_cache"] = False
-                if "var_names_all" not in adata.uns:
-                    adata.uns["var_names_all"] = adata.var.index.to_numpy()
-                if "counts" not in adata.layers:
-                    adata.layers["counts"] = adata.X
-                try:
-                    os.mkdir(TEMPDIR)
-                except OSError:
-                    pass
-                adata.write_h5ad(filepath)
+                write_h5ad(adata, filepath)
                 return adata
 
         apply_func.metadata = dict(data_url=data_url, data_reference=data_reference)
@@ -83,6 +95,8 @@ def apply_func(*args, **kwargs):
 
 def filter_genes_cells(adata):
     """Remove empty cells and genes."""
+    import scanpy as sc
+
     if "var_names_all" not in adata.uns:
         # fill in original var names before filtering
         adata.uns["var_names_all"] = adata.var.index.to_numpy()
@@ -106,6 +120,8 @@ def subsample_even(adata, n_obs, even_obs):
     adata : AnnData
         Subsampled AnnData object
     """
+    import scanpy as sc
+
     values = adata.obs[even_obs].unique()
     adatas = []
     n_obs_per_value = n_obs // len(values)
@@ -120,3 +136,10 @@ def subsample_even(adata, n_obs, even_obs):
     adata_out.varm = adata.varm
     adata_out.varp = adata.varp
     return adata_out
+
+
+def write_h5ad(adata, filepath):
+    if os.path.isfile(filepath):
+        os.unlink(filepath)
+    _fix_adata(adata)
+    adata.write_h5ad(filepath)
diff --git a/openproblems/data/zebrafish.py b/openproblems/data/zebrafish.py
index 630b9be0a2..23e8540f72 100644
--- a/openproblems/data/zebrafish.py
+++ b/openproblems/data/zebrafish.py
@@ -11,7 +11,7 @@
 )
 
 
-@utils.loader(data_url=URL, data_reference="https://doi.org/10.1126/science.aar4362")
+@utils.loader(data_url=URL, data_reference="wagner2018single")
 def load_zebrafish(test=False):
     """Download zebrafish data from figshare."""
     with tempfile.TemporaryDirectory() as tempdir:
diff --git a/openproblems/patch.py b/openproblems/patch.py
index 326bc38f29..809e187c0d 100644
--- a/openproblems/patch.py
+++ b/openproblems/patch.py
@@ -52,7 +52,7 @@ def _download_aftp(
     if timeout:
         wget_command_list += ["-T", str(timeout)]
 
-    log.debug("Running: %s" % (" ".join(wget_command_list)))
+    log.debug("Running: %s" % " ".join(wget_command_list))
     subprocess.call(wget_command_list)
     return tmp_path
 
diff --git a/openproblems/tasks/__init__.py b/openproblems/tasks/__init__.py
index 37fe938346..e576e230dc 100644
--- a/openproblems/tasks/__init__.py
+++ b/openproblems/tasks/__init__.py
@@ -1,7 +1,7 @@
 from . import denoising
 from . import dimensionality_reduction
 from . import label_projection
-from . import multimodal_data_integration
+from . import matching_modalities
 from . import regulatory_effect_prediction
 from . import spatial_decomposition
 from ._batch_integration import batch_integration_embed
diff --git a/openproblems/tasks/_batch_integration/README.md b/openproblems/tasks/_batch_integration/README.md
index f67783341d..c17078df0a 100644
--- a/openproblems/tasks/_batch_integration/README.md
+++ b/openproblems/tasks/_batch_integration/README.md
@@ -28,5 +28,5 @@ Metrics for this task can be divided into those that assess the removal of batch
 effects, and assessments of the conservation of biological variation. This can be a
 helpful distinction when devising new metrics. This task, including the subtask
 structure, was taken from a [benchmarking study of data integration
-methods](https://www.biorxiv.org/content/10.1101/2020.05.22.111161v2). This is a useful
+methods](https://openproblems.bio/bibliography#luecken2022benchmarking). This is a useful
 reference for more background reading on the task and the above concepts.
diff --git a/openproblems/tasks/_batch_integration/_common/__init__.py b/openproblems/tasks/_batch_integration/_common/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/openproblems/tasks/_batch_integration/_common/api.py b/openproblems/tasks/_batch_integration/_common/api.py
new file mode 100644
index 0000000000..41ba21b45c
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/_common/api.py
@@ -0,0 +1,80 @@
+from ....data.sample import load_sample_data
+from ....tools.decorators import dataset
+from .utils import filter_celltypes
+from .utils import precompute_hvg
+
+import numbers
+import numpy as np
+
+MIN_CELLS_PER_CELLTYPE = 50
+N_HVG_UNINT = 2000
+
+
+def check_neighbors(adata, neighbors_key, connectivities_key, distances_key):
+    assert neighbors_key in adata.uns
+    assert adata.uns[neighbors_key]["connectivities_key"] == connectivities_key
+    assert adata.uns[neighbors_key]["distances_key"] == distances_key
+    assert connectivities_key in adata.obsp
+    assert distances_key in adata.obsp
+
+
+def check_dataset(
+    adata,
+    do_check_hvg=False,
+):
+    """Check that dataset output fits expected API."""
+
+    assert "batch" in adata.obs
+    assert "labels" in adata.obs
+    assert (adata.obs["labels"].value_counts() >= MIN_CELLS_PER_CELLTYPE).all()
+
+    assert "log_normalized" in adata.layers
+    assert "counts" in adata.layers
+
+    assert adata.var_names.is_unique
+    assert adata.obs_names.is_unique
+
+    assert "n_genes_pre" in adata.uns
+    assert isinstance(adata.uns["n_genes_pre"], numbers.Integral)
+    assert adata.uns["n_genes_pre"] == adata.n_vars
+
+    assert "organism" in adata.uns
+    assert adata.uns["organism"] in ["mouse", "human"]
+
+    assert "X_uni_pca" in adata.obsm
+
+    if do_check_hvg:
+        assert "hvg_unint" in adata.uns
+        assert len(adata.uns["hvg_unint"]) == min(N_HVG_UNINT, adata.n_vars)
+        assert np.all(np.isin(adata.uns["hvg_unint"], adata.var.index))
+
+    check_neighbors(adata, "uni", "uni_connectivities", "uni_distances")
+
+    return True
+
+
+@dataset()
+def sample_dataset():
+    """Create a simple dataset to use for testing methods in this task."""
+    import scanpy as sc
+
+    adata = load_sample_data()
+    adata.uns["organism"] = "human"
+
+    adata.var.index = adata.var.gene_short_name.astype(str)
+    adata.var_names_make_unique()
+    adata.obs_names_make_unique()
+
+    sc.pp.normalize_total(adata)
+    adata.layers["log_normalized"] = adata.X
+
+    adata.obs["batch"] = np.random.choice(2, adata.shape[0], replace=True).astype(str)
+    adata.obs["labels"] = np.random.choice(3, adata.shape[0], replace=True).astype(str)
+    adata = filter_celltypes(adata)
+
+    adata.uns["hvg_unint"] = precompute_hvg(adata)
+    adata.uns["n_genes_pre"] = adata.n_vars
+
+    adata.obsm["X_uni_pca"] = sc.pp.pca(adata.X)
+    sc.pp.neighbors(adata, use_rep="X_uni_pca", key_added="uni")
+    return adata
diff --git a/openproblems/tasks/_batch_integration/_common/datasets/__init__.py b/openproblems/tasks/_batch_integration/_common/datasets/__init__.py
new file mode 100644
index 0000000000..f2fbcbc05f
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/_common/datasets/__init__.py
@@ -0,0 +1,3 @@
+from .immune import immune_batch
+from .lung import lung_batch
+from .pancreas import pancreas_batch
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/immune.py b/openproblems/tasks/_batch_integration/_common/datasets/immune.py
similarity index 55%
rename from openproblems/tasks/_batch_integration/batch_integration_graph/datasets/immune.py
rename to openproblems/tasks/_batch_integration/_common/datasets/immune.py
index 8c39c0fe28..4732e9c3a3 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/immune.py
+++ b/openproblems/tasks/_batch_integration/_common/datasets/immune.py
@@ -1,22 +1,33 @@
 from .....data.immune_cells import load_immune
 from .....tools.decorators import dataset
-
-import scanpy as sc
+from ..utils import filter_celltypes
+from ..utils import precompute_hvg
+from typing import Optional
 
 
 @dataset(
     dataset_name="Immune (by batch)",
     data_url=load_immune.metadata["data_url"],
     data_reference=load_immune.metadata["data_reference"],
-    dataset_summary="Human immune cells from peripheral blood and bone marrow "
-    "taken from 5 datasets comprising 10 batches across technologies (10X, "
-    "Smart-seq2).",
+    dataset_summary=(
+        "Human immune cells from peripheral blood and bone marrow taken from 5 datasets"
+        " comprising 10 batches across technologies (10X, Smart-seq2)."
+    ),
     image="openproblems",
 )
-def immune_batch(test=False):
+def immune_batch(
+    test: bool = False,
+    min_celltype_count: Optional[int] = None,
+    n_hvg: Optional[int] = None,
+):
+    import scanpy as sc
+
     adata = load_immune(test)
+    adata.uns["organism"] = "human"
     adata.obs["labels"] = adata.obs["final_annotation"]
 
+    adata = filter_celltypes(adata, min_celltype_count=min_celltype_count)
+
     sc.pp.filter_genes(adata, min_counts=1)
     sc.pp.filter_genes(adata, min_cells=1)
     adata.var_names_make_unique()
@@ -33,4 +44,7 @@ def immune_batch(test=False):
 
     sc.pp.neighbors(adata, use_rep="X_uni_pca", key_added="uni")
     adata.var_names_make_unique()
+
+    adata.uns["hvg_unint"] = precompute_hvg(adata, n_genes=n_hvg)
+    adata.uns["n_genes_pre"] = adata.n_vars
     return adata
diff --git a/openproblems/tasks/_batch_integration/_common/datasets/lung.py b/openproblems/tasks/_batch_integration/_common/datasets/lung.py
new file mode 100644
index 0000000000..0cdea36d46
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/_common/datasets/lung.py
@@ -0,0 +1,50 @@
+from .....data.lung import load_lung
+from .....tools.decorators import dataset
+from ..utils import filter_celltypes
+from ..utils import precompute_hvg
+from typing import Optional
+
+
+@dataset(
+    dataset_name="Lung (Viera Braga et al.)",
+    data_url=load_lung.metadata["data_url"],
+    data_reference=load_lung.metadata["data_reference"],
+    dataset_summary=(
+        "Human lung scRNA-seq data from 3 datasets with 32,472 cells. From Vieira Braga"
+        " et al. Technologies: 10X and Drop-seq."
+    ),
+    image="openproblems",
+)
+def lung_batch(
+    test: bool = False,
+    min_celltype_count: Optional[int] = None,
+    n_hvg: Optional[int] = None,
+):
+    import scanpy as sc
+
+    adata = load_lung(test)
+    adata.uns["organism"] = "human"
+    adata.obs["labels"] = adata.obs["cell_type"]
+    # No need to rename batch column as it already exists
+
+    adata = filter_celltypes(adata, min_celltype_count=min_celltype_count)
+
+    sc.pp.filter_genes(adata, min_counts=1)
+    sc.pp.filter_genes(adata, min_cells=1)
+
+    adata.X = adata.layers["log_normalized"]
+
+    sc.tl.pca(
+        adata,
+        svd_solver="arpack",
+        return_info=True,
+    )
+    adata.obsm["X_uni_pca"] = adata.obsm["X_pca"]
+
+    sc.pp.neighbors(adata, use_rep="X_uni_pca", key_added="uni")
+
+    adata.var_names_make_unique()
+
+    adata.uns["hvg_unint"] = precompute_hvg(adata, n_genes=n_hvg)
+    adata.uns["n_genes_pre"] = adata.n_vars
+    return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/pancreas.py b/openproblems/tasks/_batch_integration/_common/datasets/pancreas.py
similarity index 54%
rename from openproblems/tasks/_batch_integration/batch_integration_graph/datasets/pancreas.py
rename to openproblems/tasks/_batch_integration/_common/datasets/pancreas.py
index 6d8af4f505..ff611ea1cc 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/pancreas.py
+++ b/openproblems/tasks/_batch_integration/_common/datasets/pancreas.py
@@ -1,23 +1,34 @@
 from .....data.pancreas import load_pancreas
 from .....tools.decorators import dataset
-
-import scanpy as sc
+from ..utils import filter_celltypes
+from ..utils import precompute_hvg
+from typing import Optional
 
 
 @dataset(
     dataset_name="Pancreas (by batch)",
     data_url=load_pancreas.metadata["data_url"],
     data_reference=load_pancreas.metadata["data_reference"],
-    dataset_summary="Human pancreatic islet scRNA-seq data from 6 datasets "
-    "across technologies (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, "
-    "and SMARTER-seq).",
+    dataset_summary=(
+        "Human pancreatic islet scRNA-seq data from 6 datasets across technologies"
+        " (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, and SMARTER-seq)."
+    ),
     image="openproblems",
 )
-def pancreas_batch(test=False):
+def pancreas_batch(
+    test: bool = False,
+    min_celltype_count: Optional[int] = None,
+    n_hvg: Optional[int] = None,
+):
+    import scanpy as sc
+
     adata = load_pancreas(test)
+    adata.uns["organism"] = "human"
     adata.obs["labels"] = adata.obs["celltype"]
     adata.obs["batch"] = adata.obs["tech"]
 
+    adata = filter_celltypes(adata, min_celltype_count=min_celltype_count)
+
     sc.pp.filter_genes(adata, min_counts=1)
     sc.pp.filter_genes(adata, min_cells=1)
 
@@ -33,4 +44,7 @@ def pancreas_batch(test=False):
     sc.pp.neighbors(adata, use_rep="X_uni_pca", key_added="uni")
 
     adata.var_names_make_unique()
+
+    adata.uns["hvg_unint"] = precompute_hvg(adata, n_genes=n_hvg)
+    adata.uns["n_genes_pre"] = adata.n_vars
     return adata
diff --git a/openproblems/tasks/_batch_integration/_common/methods/__init__.py b/openproblems/tasks/_batch_integration/_common/methods/__init__.py
new file mode 100644
index 0000000000..3fc9e1fe16
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/_common/methods/__init__.py
@@ -0,0 +1,4 @@
+from .baseline import batch_random_integration
+from .baseline import celltype_random_integration
+from .baseline import no_integration
+from .baseline import random_integration
diff --git a/openproblems/tasks/_batch_integration/_common/methods/baseline.py b/openproblems/tasks/_batch_integration/_common/methods/baseline.py
new file mode 100644
index 0000000000..e46a6dda05
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/_common/methods/baseline.py
@@ -0,0 +1,117 @@
+from .....tools.decorators import baseline_method
+from .....tools.utils import check_version
+
+import numpy as np
+
+
+def _set_uns(adata):
+    adata.uns["neighbors"] = adata.uns["uni"]
+    adata.uns["neighbors"]["connectivities_key"] = "connectivities"
+    adata.uns["neighbors"]["distances_key"] = "distances"
+
+
+def _randomize_features(X, partition=None):
+    X_out = X.copy()
+    if partition is None:
+        partition = np.full(X.shape[0], 0)
+    else:
+        partition = np.asarray(partition)
+    for partition_name in np.unique(partition):
+        partition_idx = np.argwhere(partition == partition_name).flatten()
+        X_out[partition_idx] = X[np.random.permutation(partition_idx)]
+    return X_out
+
+
+def _randomize_graph(adata, partition=None):
+    distances, connectivities = (
+        adata.obsp["uni_distances"],
+        adata.obsp["uni_connectivities"],
+    )
+    new_idx = _randomize_features(np.arange(distances.shape[0]), partition=partition)
+    adata.obsp["distances"] = distances[new_idx][:, new_idx]
+    adata.obsp["connectivities"] = connectivities[new_idx][:, new_idx]
+    _set_uns(adata)
+    return adata
+
+
+def _random_embedding(partition, jitter=0.01):
+    from sklearn.preprocessing import LabelEncoder
+    from sklearn.preprocessing import OneHotEncoder
+
+    embedding = OneHotEncoder().fit_transform(
+        LabelEncoder().fit_transform(partition)[:, None]
+    )
+    if jitter is not None:
+        embedding = embedding + np.random.uniform(-1 * jitter, jitter, embedding.shape)
+    return embedding
+
+
+@baseline_method(
+    method_name="No Integration",
+    method_summary=(
+        "Cells are embedded by PCA on the unintegrated data. A graph is built on this"
+        " PCA embedding."
+    ),
+)
+def no_integration(adata, test=False):
+    adata.obsp["connectivities"] = adata.obsp["uni_connectivities"]
+    adata.obsp["distances"] = adata.obsp["uni_distances"]
+    _set_uns(adata)
+    adata.obsm["X_emb"] = adata.obsm["X_uni_pca"]
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="Random Integration",
+    method_summary=(
+        "Feature values, embedding coordinates, and graph connectivity are all randomly"
+        " permuted"
+    ),
+)
+def random_integration(adata, test=False):
+    adata.X = _randomize_features(adata.X)
+    adata.obsm["X_emb"] = _randomize_features(adata.obsm["X_uni_pca"])
+    adata = _randomize_graph(adata)
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="Random Integration by Celltype",
+    method_summary=(
+        "Feature values, embedding coordinates, and graph connectivity are all randomly"
+        " permuted within each celltype label"
+    ),
+)
+def celltype_random_integration(adata, test=False):
+    adata.obsm["X_emb"] = _randomize_features(
+        adata.obsm["X_uni_pca"], partition=adata.obs["labels"]
+    )
+    adata.X = _randomize_features(adata.X, partition=adata.obs["labels"])
+    adata = _randomize_graph(
+        adata,
+        partition=adata.obs["labels"].to_numpy(),
+    )
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="Random Integration by Batch",
+    method_summary=(
+        "Feature values, embedding coordinates, and graph connectivity are all randomly"
+        " permuted within each batch label"
+    ),
+)
+def batch_random_integration(adata, test=False):
+    adata.obsm["X_emb"] = _randomize_features(
+        adata.obsm["X_uni_pca"], partition=adata.obs["batch"]
+    )
+    adata.X = _randomize_features(adata.X, partition=adata.obs["batch"])
+    adata = _randomize_graph(
+        adata,
+        partition=adata.obs["batch"].to_numpy(),
+    )
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/_batch_integration/_common/utils.py b/openproblems/tasks/_batch_integration/_common/utils.py
new file mode 100644
index 0000000000..ee5d367ff5
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/_common/utils.py
@@ -0,0 +1,27 @@
+from . import api
+from scanpy.pp import highly_variable_genes
+from typing import Optional
+
+
+def filter_celltypes(adata, min_celltype_count: Optional[int] = None):
+
+    min_celltype_count = min_celltype_count or api.MIN_CELLS_PER_CELLTYPE
+
+    celltype_counts = adata.obs["labels"].value_counts()
+    keep_celltypes = celltype_counts[celltype_counts >= min_celltype_count].index
+    keep_cells = adata.obs["labels"].isin(keep_celltypes)
+    return adata[keep_cells].copy()
+
+
+def precompute_hvg(adata, n_genes: Optional[int] = None):
+
+    n_genes = n_genes or api.N_HVG_UNINT
+    hvg_unint = highly_variable_genes(
+        adata,
+        n_top_genes=n_genes,
+        layer="log_normalized",
+        flavor="cell_ranger",
+        batch_key="batch",
+        inplace=False,
+    )
+    return list(hvg_unint[hvg_unint.highly_variable].index)
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/README.md b/openproblems/tasks/_batch_integration/batch_integration_embed/README.md
index 8ca0f3b096..88d609bc9b 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/README.md
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/README.md
@@ -1,5 +1,3 @@
-<!--- TODO: add links --->
-
 # Batch integration embedding
 
 This is a sub-task of the overall batch integration task. Batch (or data) integration
@@ -16,7 +14,7 @@ sub-tasks for batch integration can be found for:
 
 This sub-task was taken from a
 [benchmarking study of data integration
-methods](https://www.biorxiv.org/content/10.1101/2020.05.22.111161v2).
+methods](https://openproblems.bio/bibliography#luecken2022benchmarking).
 
 ## API
 
@@ -31,9 +29,12 @@ Datasets should contain the following attributes:
 
 * `adata.obs["batch"]` with the batch covariate, and
 * `adata.obs["label"]` with the cell identity label
-* `adata.obsm['X_uni']` with a pre-integration embedding (PCA)
+* `adata.obsm['X_uni_pca']` with the PCA embedding of the unintegrated representation
+* `adata.obsp['uni_connectivities']` with an unintegrated connectivity matrix generated
+  by  `scanpy.pp.neighbors()`
 * `adata.layers['log_normalized']` with log-normalized data
 * `adata.X` with log-normalized data
+* `adata.uns["organism"]` with either `"mouse"` or `"human"`
 
 Methods should assign output to `adata.obsm['X_emb']`.
 
@@ -59,49 +60,3 @@ Metrics can compare:
 
 To reuse metrics functions from `scIB`, [`metrics._utils._get_split`](metrics/_utils.py)
 separates the combined anndata into an integrated and an unintegrated anndata object.
-
-## Metrics
-
-In the following, we will give a short description of the implemented metrics. We split
-by metrics capturing batch correction meaning the removal of batch effects and metrics
-describing biological conservation, meaning how well the biological differences between
-cell states are conserved.
-
-### Batch correction metrics
-
-#### kBET
-
-The kBET algorithm (v.0.99.6, release 4c9dafa) determines whether the label composition
-of a k nearest neighborhood of a cell is similar to the expected (global) label
-composition (Buettner et al., Nat Meth 2019). The test is repeated for a random subset
-of cells, and the results are summarized as a rejection rate over all tested
-neighborhoods.
-
-#### Silhouette batch score
-
-We consider the absolute silhouette width, s(i), on
-batch labels per cell i. Here, 0 indicates that batches are well mixed, and any
-deviation from 0 indicates a batch effect.
-
-#### Principal component regression
-
-Compare the explained variance by before and after integration. Returns a score between
-0 and 1 (scaled=True) with 0 if the variance contribution hasn’t changed. The larger the
-score, the more different the variance contributions are before and after integration.
-
-### Biological conservation metrics
-
-#### Cell cycle score
-
-The cell-cycle conservation score evaluates how well the cell-cycle effect can be
-captured before and after integration.
-
-#### Isolated label silhouette
-
-This score evaluates the compactness for the label(s) that is(are) shared by fewest
-batches. This indicates how well rare cell types can be preserved after integration.
-
-#### Cell type ASW
-
-For the bio-conservation score, the ASW is computed on cell identity labels, measuring
-their compactness
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/api.py b/openproblems/tasks/_batch_integration/batch_integration_embed/api.py
index e7e47528b7..d4d8bd3e44 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/api.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/api.py
@@ -1,46 +1,20 @@
-from ....data.sample import load_sample_data
-from ....tools.decorators import dataset
+from .._common import api
 
-import numpy as np
-import scanpy as sc
+check_dataset = api.check_dataset
+sample_dataset = api.sample_dataset
 
 
-def check_dataset(adata):
-    """Check that dataset output fits expected API."""
-
-    assert "X_uni_pca" in adata.obsm
-    assert "batch" in adata.obs
-    assert "labels" in adata.obs
-    assert "log_normalized" in adata.layers
-
-    return True
-
-
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     assert "X_emb" in adata.obsm
+    # check organism was not removed
+    assert "organism" in adata.uns
     return True
 
 
-@dataset()
-def sample_dataset():
-    """Create a simple dataset to use for testing methods in this task."""
-    adata = load_sample_data()
-
-    adata.var.index = adata.var.gene_short_name.astype(str)
-    sc.pp.normalize_total(adata)
-    sc.pp.log1p(adata)
-    adata.layers["log_normalized"] = adata.X
-    adata.obsm["X_uni_pca"] = sc.pp.pca(adata.X)
-    adata.obs["batch"] = np.random.choice(2, adata.shape[0], replace=True).astype(str)
-    adata.obs["labels"] = np.random.choice(5, adata.shape[0], replace=True).astype(str)
-    adata.var_names_make_unique()
-    adata.obs_names_make_unique()
-    return adata
-
-
 def sample_method(adata):
     """Create sample method output for testing metrics in this task."""
 
     adata.obsm["X_emb"] = adata.obsm["X_uni_pca"]
+    adata.uns["is_baseline"] = False
     return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/datasets/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_embed/datasets/__init__.py
index 4b86a1c17c..bac200686e 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/datasets/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/datasets/__init__.py
@@ -1,2 +1,3 @@
-from ...batch_integration_graph.datasets.immune import immune_batch
-from ...batch_integration_graph.datasets.pancreas import pancreas_batch
+from ..._common.datasets.immune import immune_batch
+from ..._common.datasets.lung import lung_batch
+from ..._common.datasets.pancreas import pancreas_batch
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/methods/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_embed/methods/__init__.py
index dffc02a1f9..6bff40b9ff 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/methods/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/methods/__init__.py
@@ -1,3 +1,8 @@
+from ..._common.methods.baseline import batch_random_integration
+from ..._common.methods.baseline import celltype_random_integration
+from ..._common.methods.baseline import no_integration
+from ..._common.methods.baseline import random_integration
+from ...batch_integration_graph.methods.baseline import celltype_random_graph
 from ...batch_integration_graph.methods.combat import combat_full_scaled
 from ...batch_integration_graph.methods.combat import combat_full_unscaled
 from ...batch_integration_graph.methods.combat import combat_hvg_scaled
@@ -28,3 +33,8 @@
 from ...batch_integration_graph.methods.scanvi import scanvi_hvg_unscaled
 from ...batch_integration_graph.methods.scvi import scvi_full_unscaled
 from ...batch_integration_graph.methods.scvi import scvi_hvg_unscaled
+from .baseline import celltype_random_embedding
+from .baseline import celltype_random_embedding_jitter
+from .baseline import no_integration_batch
+from .scalex import scalex_full
+from .scalex import scalex_hvg
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py b/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py
new file mode 100644
index 0000000000..50148d94fd
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py
@@ -0,0 +1,54 @@
+from .....tools.decorators import baseline_method
+from .....tools.utils import check_version
+from ..._common.methods.baseline import _random_embedding
+
+import numpy as np
+import scanpy as sc
+
+
+@baseline_method(
+    method_name="Random Embedding by Celltype (with jitter)",
+    method_summary=(
+        "Cells are embedded as a one-hot encoding of celltype labels, with a small"
+        " amount of random noise added to the embedding"
+    ),
+)
+def celltype_random_embedding_jitter(adata, test=False):
+    adata.obsm["X_emb"] = _random_embedding(partition=adata.obs["labels"], jitter=0.01)
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="Random Embedding by Celltype",
+    method_summary="Cells are embedded as a one-hot encoding of celltype labels",
+)
+def celltype_random_embedding(adata, test=False):
+    adata.obsm["X_emb"] = _random_embedding(partition=adata.obs["labels"], jitter=None)
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="No Integration by Batch",
+    method_summary="Cells are embedded by computing PCA independently on each batch",
+)
+def no_integration_batch(adata, test=False):
+    """Compute PCA independently on each batch
+
+    See https://github.com/theislab/scib/issues/351
+    """
+    adata.obsm["X_emb"] = np.zeros((adata.shape[0], 50), dtype=float)
+    for batch in adata.obs["batch"].unique():
+        batch_idx = adata.obs["batch"] == batch
+        n_comps = min(50, np.sum(batch_idx))
+        solver = "full" if n_comps == np.sum(batch_idx) else "arpack"
+        adata.obsm["X_emb"][batch_idx, :n_comps] = sc.tl.pca(
+            adata[batch_idx],
+            n_comps=n_comps,
+            use_highly_variable=False,
+            svd_solver=solver,
+            copy=True,
+        ).obsm["X_pca"]
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/methods/scalex.py b/openproblems/tasks/_batch_integration/batch_integration_embed/methods/scalex.py
new file mode 100644
index 0000000000..f4580c90cf
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/methods/scalex.py
@@ -0,0 +1,27 @@
+from ...batch_integration_graph.methods.scalex import _scalex
+from ...batch_integration_graph.methods.scalex import _scalex_method
+from typing import Optional
+
+
+@_scalex_method(method_name="SCALEX (full)")
+def scalex_full(adata, test: bool = False, max_iteration: Optional[int] = None):
+    return _scalex(
+        adata,
+        test=test,
+        max_iteration=max_iteration,
+        compute_neighbors=False,
+        compute_features=False,
+        n_top_features=0,
+    )
+
+
+@_scalex_method(method_name="SCALEX (hvg)")
+def scalex_hvg(adata, test: bool = False, max_iteration: Optional[int] = None):
+    return _scalex(
+        adata,
+        test=test,
+        max_iteration=max_iteration,
+        compute_neighbors=False,
+        compute_features=False,
+        n_top_features=2000,
+    )
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/__init__.py
index 215ac44937..95bc254069 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/__init__.py
@@ -1,6 +1,10 @@
+from .ari import ari
 from .cc_score import cc_score
+from .graph_connectivity import graph_connectivity
+from .iso_label_f1 import isolated_labels_f1
 from .iso_label_sil import isolated_labels_sil
 from .kBET import kBET
+from .nmi import nmi
 from .pcr import pcr
 from .sil_batch import silhouette_batch
 from .silhouette import silhouette
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/_utils.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/_utils.py
deleted file mode 100644
index 8a4b33cb72..0000000000
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/_utils.py
+++ /dev/null
@@ -1,8 +0,0 @@
-def _get_split(adata):
-    uni = adata
-    uni.obsm["X_pca"] = uni.obsm["X_uni_pca"]
-
-    if "X_emb" not in adata.obsm:
-        adata.obsm["X_emb"] = adata.obsm["X_pca"]
-
-    return (uni, adata)
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/ari.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/ari.py
new file mode 100644
index 0000000000..9bfe349d12
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/ari.py
@@ -0,0 +1,20 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import embedding_to_graph
+
+"""
+The Rand index compares the overlap of two clusterings;
+it considers both correct clustering overlaps while also counting correct
+disagreements between two clusterings.
+Similar to NMI, we compared the cell-type labels with the NMI-optimized
+Louvain clustering computed on the integrated dataset.
+The adjustment of the Rand index corrects for randomly correct labels.
+An ARI of 0 or 1 corresponds to random labeling or a perfect match,
+respectively.
+We also used the scikit-learn (v.0.22.1) implementation of the ARI.
+"""
+
+
+@metric(**graph_metrics.ari.metadata)
+def ari(adata):
+    return graph_metrics.ari(embedding_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py
index 322891b202..616be47861 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py
@@ -1,4 +1,5 @@
 from .....tools.decorators import metric
+from .utils import get_split
 
 """
 The cell-cycle conservation score evaluates how well the cell-cycle effect can be
@@ -20,15 +21,21 @@
 
 @metric(
     metric_name="Cell Cycle Score",
+    metric_summary=(
+        "The cell-cycle conservation score evaluates how well the cell-cycle effect can"
+        " be captured before and after integration."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
-def cc_score(adata, test=False):
-    from ._utils import _get_split
+def cc_score(adata):
     from scib.metrics import cell_cycle
 
     try:
-        cc = cell_cycle(*_get_split(adata), "batch", embed="X_emb", organism="human")
+        cc = cell_cycle(
+            *get_split(adata), "batch", embed="X_emb", organism=adata.uns["organism"]
+        )
 
     except ValueError:
         cc = 0
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/graph_connectivity.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/graph_connectivity.py
new file mode 100644
index 0000000000..4fc69fe1a2
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/graph_connectivity.py
@@ -0,0 +1,26 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import embedding_to_graph
+
+"""
+The graph connectivity metric assesses whether the kNN graph representation,
+G, of the integrated data directly connects all cells with the same cell
+identity label. For each cell identity label c, we created the subset kNN
+graph G(Nc;Ec) to contain only cells from a given label. Using these subset
+kNN graphs, we computed the graph connectivity score using the equation:
+
+gc =1/|C| Σc∈C |LCC(G(Nc;Ec))|/|Nc|.
+
+Here, C represents the set of cell identity labels, |LCC()| is the number
+of nodes in the largest connected component of the graph, and |Nc| is the
+number of nodes with cell identity c. The resultant score has a range
+of (0;1], where 1 indicates that all cells with the same cell identity
+are connected in the integrated kNN graph, and the lowest possible score
+indicates a graph where no cell is connected. As this score is computed
+on the kNN graph, it can be used to evaluate all integration outputs.
+"""
+
+
+@metric(**graph_metrics.graph_connectivity.metadata)
+def graph_connectivity(adata):
+    return graph_metrics.graph_connectivity(embedding_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_f1.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_f1.py
new file mode 100644
index 0000000000..578bceed26
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_f1.py
@@ -0,0 +1,31 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import embedding_to_graph
+
+"""
+We developed two isolated label scores to evaluate how well the data integration methods
+dealt with cell identity labels shared by few batches. Specifically, we identified
+isolated cell labels as the labels present in the least number of batches in the
+integration task.
+The score evaluates how well these isolated labels separate from other cell identities.
+We implemented the isolated label metric in two versions:
+(1) the best clustering of the isolated label (F1 score) and
+(2) the global ASW of the isolated label. For the cluster-based score,
+we first optimize the cluster assignment of the isolated label using the F1 score
+across louvain clustering resolutions ranging from 0.1 to 2 in resolution steps of 0.1.
+The optimal F1 score for the isolated label is then used as the metric score.
+The F1 score is a weighted mean of precision and recall given by the equation:
+𝐹1=2×(precision×recall)/(precision+recall).
+
+It returns a value between 0 and 1,
+where 1 shows that all of the isolated label cells and no others are captured in
+the cluster. For the isolated label ASW score, we compute the ASW of isolated
+versus nonisolated labels on the PCA embedding (ASW metric above) and scale this
+score to be between 0 and 1. The final score for each metric version consists of
+the mean isolated score of all isolated labels.
+"""
+
+
+@metric(**graph_metrics.isolated_labels_f1.metadata)
+def isolated_labels_f1(adata):
+    return graph_metrics.isolated_labels_f1(embedding_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py
index c3575de5b8..617e2db6fa 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/iso_label_sil.py
@@ -14,8 +14,14 @@
 
 @metric(
     metric_name="Isolated label Silhouette",
+    metric_summary=(
+        "This score evaluates the compactness for the label(s) that is(are) shared by"
+        " fewest batches. It indicates how well rare cell types can be preserved after"
+        " integration."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 def isolated_labels_sil(adata):
     from scib.metrics import isolated_labels
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py
index 271eeadbd6..9da1f03e20 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/kBET.py
@@ -26,6 +26,13 @@
 
 @metric(
     metric_name="kBET",
+    metric_summary=(
+        "kBET determines whether the label composition of a k nearest neighborhood of a"
+        " cell is similar to the expected (global) label composition. The test is"
+        " repeated for a random subset of cells, and the results are summarized as a"
+        " rejection rate over all tested neighborhoods."
+    ),
+    paper_reference="bttner2018test",
     maximize=True,
     image="openproblems-r-extras",
 )
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/nmi.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/nmi.py
new file mode 100644
index 0000000000..71aa9acdb7
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/nmi.py
@@ -0,0 +1,19 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import embedding_to_graph
+
+"""NMI compares the overlap of two clusterings.
+We used NMI to compare the cell-type labels with Louvain clusters computed on
+the integrated dataset. The overlap was scaled using the mean of the entropy terms
+for cell-type and cluster labels. Thus, NMI scores of 0 or 1 correspond to uncorrelated
+clustering or a perfect match, respectively. We performed optimized Louvain clustering
+for this metric to obtain the best match between clusters and labels.
+Louvain clustering was performed at a resolution range of 0.1 to 2 in steps of 0.1,
+and the clustering output with the highest NMI with the label set was used. We used
+the scikit-learn27 (v.0.22.1) implementation of NMI.
+"""
+
+
+@metric(**graph_metrics.nmi.metadata)
+def nmi(adata):
+    return graph_metrics.nmi(embedding_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py
index 886f26078b..5553754372 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/pcr.py
@@ -1,4 +1,5 @@
 from .....tools.decorators import metric
+from .utils import get_split
 
 """
 Principal component regression, derived from PCA, has previously been used to quantify
@@ -17,11 +18,17 @@
 
 @metric(
     metric_name="PC Regression",
+    metric_summary=(
+        "This compares the explained variance by batch before and after integration. It"
+        " returns a score between 0 and 1 (scaled=True) with 0 if the variance"
+        " contribution hasn’t changed. The larger the score, the more different the"
+        " variance contributions are before and after integration."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 def pcr(adata):
-    from ._utils import _get_split
     from scib.metrics import pcr_comparison
 
-    return pcr_comparison(*_get_split(adata), "batch", embed="X_emb")
+    return pcr_comparison(*get_split(adata), "batch", embed="X_emb")
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py
index c02e5e42aa..45cf2d2f9e 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/sil_batch.py
@@ -23,8 +23,14 @@
 
 @metric(
     metric_name="Batch ASW",
+    metric_summary=(
+        "The absolute silhouette width is computed over batch labels per cell. As 0"
+        " then indicates that batches are well mixed and any deviation from 0 indicates"
+        " a batch effect, we use the 1-abs(ASW) to map the score to the scale [0;1]."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 def silhouette_batch(adata):
     from scib.metrics import silhouette_batch
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py
index 36991e1d67..3b2afb4b0a 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/silhouette.py
@@ -11,8 +11,13 @@
 
 @metric(
     metric_name="Silhouette",
+    metric_summary=(
+        "The absolute silhouette with is computed on cell identity labels, measuring"
+        " their compactness."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 def silhouette(adata):
     from scib.metrics import silhouette
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/utils.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/utils.py
new file mode 100644
index 0000000000..455e92ec76
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/utils.py
@@ -0,0 +1,16 @@
+def embedding_to_graph(adata):
+    import scanpy as sc
+
+    if adata.uns["is_baseline"] and "neighbors" in adata.uns:
+        # precomputed; do nothing
+        return adata
+
+    sc.pp.neighbors(adata, use_rep="X_emb")
+    return adata
+
+
+def get_split(adata):
+    uni = adata
+    uni.obsm["X_pca"] = uni.obsm["X_uni_pca"]
+    uni.X = uni.layers["log_normalized"]
+    return (uni, adata)
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/README.md b/openproblems/tasks/_batch_integration/batch_integration_feature/README.md
index 76ce3f1e33..2b3ba2a5dc 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_feature/README.md
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/README.md
@@ -1,5 +1,3 @@
-<!--- TODO: add links --->
-
 # Batch integration feature
 
 This is a sub-task of the overall batch integration task. Batch (or data) integration
@@ -7,13 +5,14 @@ integrates datasets across batches that arise from various biological and techni
 sources. Methods that integrate batches typically have three different types of output:
 a corrected feature matrix, a joint embedding across batches, and/or an integrated
 cell-cell similarity graph (e.g., a kNN graph). This sub-task focuses on all methods
-that can output feature matrices. Other sub-tasks for batch integration can be found for:
+that can output feature matrices. Other sub-tasks for batch integration can be found
+for:
 
 * [graphs](../batch_integration_graph/), and
 * [embeddings](../batch_integration_embed/)
 
 This sub-task was taken from a [benchmarking study of data integration
-methods](https://www.biorxiv.org/content/10.1101/2020.05.22.111161v2).
+methods](https://openproblems.bio/bibliography#luecken2022benchmarking).
 
 ## API
 
@@ -28,18 +27,26 @@ Datasets should contain the following attributes:
 
 * `adata.obs["batch"]` with the batch covariate, and
 * `adata.obs["label"]` with the cell identity label
+* `adata.obsm['X_uni_pca']` with the PCA embedding of the unintegrated representation
+* `adata.obsp['uni_connectivities']` with an unintegrated connectivity matrix generated
+  by  `scanpy.pp.neighbors()`
 * `adata.layers['counts']` with raw, integer UMI count data,
 * `adata.layers['log_normalized']` with log-normalized data and
 * `adata.X` with log-normalized data
+* `adata.uns['n_genes_pre']` with the number of genes present before integration
+* `adata.uns['hvg_unint']` with a list of 2000 highly variable genes
+   prior to integration (for the hvg conservation metric)
 
 Methods should store their a batch-corrected gene expression matrix in `adata.X`.
+The output should should contain at least 2000 features.
 
 The `openproblems-python-batch-integration` docker container is used for the methods
 that
 can be installed without package conflicts. For R methods, the `openproblems-r-extras`
 container is used.
 
-Most methods in the current task are run in four different scenarios that include scaling
+Most methods in the current task are run in four different scenarios that include
+caling
 and highly variable gene selection:
 
 * `full_unscaled`
@@ -53,10 +60,3 @@ Metrics for this task compare:
 
 To reuse metrics functions from `scIB`, [`metrics._utils._get_split`](metrics/_utils.py)
 separates the combined anndata into an integrated and an unintegrated anndata object.
-
-## Metrics
-
-### HVG conservation
-
-Metric that computes the average percentage of overlapping highly variable genes per
-batch before and after integration.
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/api.py b/openproblems/tasks/_batch_integration/batch_integration_feature/api.py
index 330a3a28b4..38feaf63a7 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_feature/api.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/api.py
@@ -1,51 +1,26 @@
-from ....data.sample import load_sample_data
-from ....tools.decorators import dataset
+from .._common import api
 
-import numpy as np
-import scanpy as sc
+import functools
 
+check_dataset = functools.partial(api.check_dataset, do_check_hvg=True)
+sample_dataset = api.sample_dataset
 
-def check_dataset(adata):
-    """Check that dataset output fits expected API."""
 
-    assert "batch" in adata.obs
-    assert "labels" in adata.obs
-    assert "log_normalized" in adata.layers
-    assert "counts" in adata.layers
-    assert adata.var_names.is_unique
-    assert adata.obs_names.is_unique
-
-    return True
-
-
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     assert "log_normalized" in adata.layers
-    assert adata.layers["log_normalized"] is not adata.X
+    # check hvg_unint is still there
+    assert "hvg_unint" in adata.uns
+    # check n_vars is not too small
+    assert "n_genes_pre" in adata.uns
+    assert adata.n_vars >= min(api.N_HVG_UNINT, adata.uns["n_genes_pre"])
+    if not is_baseline:
+        assert adata.layers["log_normalized"] is not adata.X
     return True
 
 
-@dataset()
-def sample_dataset():
-    """Create a simple dataset to use for testing methods in this task."""
-    adata = load_sample_data()
-
-    adata.var.index = adata.var.gene_short_name.astype(str)
-    sc.pp.normalize_total(adata)
-
-    adata.obsm["X_uni"] = sc.pp.pca(adata.X)
-    adata.obs["batch"] = np.random.choice(2, adata.shape[0], replace=True).astype(str)
-    adata.obs["labels"] = np.random.choice(5, adata.shape[0], replace=True).astype(str)
-    adata.layers["counts"] = adata.X
-    adata.layers["log_normalized"] = adata.X.multiply(
-        10000 / adata.X.sum(axis=1)
-    ).tocsr()
-    adata.var_names_make_unique()
-    adata.obs_names_make_unique()
-    return adata
-
-
 def sample_method(adata):
     """Create sample method output for testing metrics in this task."""
     adata.X = adata.X.multiply(2)
+    adata.uns["is_baseline"] = False
     return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/datasets/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_feature/datasets/__init__.py
index 4b86a1c17c..bac200686e 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_feature/datasets/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/datasets/__init__.py
@@ -1,2 +1,3 @@
-from ...batch_integration_graph.datasets.immune import immune_batch
-from ...batch_integration_graph.datasets.pancreas import pancreas_batch
+from ..._common.datasets.immune import immune_batch
+from ..._common.datasets.lung import lung_batch
+from ..._common.datasets.pancreas import pancreas_batch
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/methods/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_feature/methods/__init__.py
index f1243781f9..2db96e595d 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_feature/methods/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/methods/__init__.py
@@ -1,3 +1,14 @@
+# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_full_scaled
+# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_full_unscaled
+# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_hvg_scaled
+# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_hvg_unscaled
+from ..._common.methods.baseline import batch_random_integration
+from ..._common.methods.baseline import celltype_random_integration
+from ..._common.methods.baseline import no_integration
+from ..._common.methods.baseline import random_integration
+from ...batch_integration_embed.methods.baseline import celltype_random_embedding
+from ...batch_integration_embed.methods.baseline import no_integration_batch
+from ...batch_integration_graph.methods.baseline import celltype_random_graph
 from ...batch_integration_graph.methods.combat import combat_full_scaled
 from ...batch_integration_graph.methods.combat import combat_full_unscaled
 from ...batch_integration_graph.methods.combat import combat_hvg_scaled
@@ -28,6 +39,8 @@
 from ...batch_integration_graph.methods.scanorama import scanorama_feature_full_unscaled
 from ...batch_integration_graph.methods.scanorama import scanorama_feature_hvg_scaled
 from ...batch_integration_graph.methods.scanorama import scanorama_feature_hvg_unscaled
+from .scalex import scalex_full
+from .scalex import scalex_hvg
 
 # from ...batch_integration_graph.methods.scgen import scgen_full_scaled
 # from ...batch_integration_graph.methods.scgen import scgen_full_unscaled
@@ -38,8 +51,3 @@
 # from ...batch_integration_graph.methods.seurat_full import seurat_full_unscaled
 # from ...batch_integration_graph.methods.seurat_full import seurat_hvg_scaled
 # from ...batch_integration_graph.methods.seurat_full import seurat_hvg_unscaled
-
-# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_full_scaled
-# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_full_unscaled
-# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_hvg_scaled
-# from ...batch_integration_graph.methods.seuratrpca import seuratrpca_hvg_unscaled
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/methods/scalex.py b/openproblems/tasks/_batch_integration/batch_integration_feature/methods/scalex.py
new file mode 100644
index 0000000000..1e6e425c46
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/methods/scalex.py
@@ -0,0 +1,27 @@
+from ...batch_integration_graph.methods.scalex import _scalex
+from ...batch_integration_graph.methods.scalex import _scalex_method
+from typing import Optional
+
+
+@_scalex_method(method_name="SCALEX (full)")
+def scalex_full(adata, test: bool = False, max_iteration: Optional[int] = None):
+    return _scalex(
+        adata,
+        test=test,
+        max_iteration=max_iteration,
+        compute_neighbors=False,
+        compute_features=True,
+        n_top_features=0,
+    )
+
+
+@_scalex_method(method_name="SCALEX (hvg)")
+def scalex_hvg(adata, test: bool = False, max_iteration: Optional[int] = None):
+    return _scalex(
+        adata,
+        test=test,
+        max_iteration=max_iteration,
+        compute_neighbors=False,
+        compute_features=True,
+        n_top_features=2000,
+    )
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/__init__.py
index fde16aa8c0..8bd5a56992 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/__init__.py
@@ -1 +1,11 @@
+from .ari import ari
+from .cc_score import cc_score
+from .graph_connectivity import graph_connectivity
 from .hvg_conservation import hvg_conservation
+from .iso_label_f1 import isolated_labels_f1
+from .iso_label_sil import isolated_labels_sil
+from .kBET import kBET
+from .nmi import nmi
+from .pcr import pcr
+from .sil_batch import silhouette_batch
+from .silhouette import silhouette
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/ari.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/ari.py
new file mode 100644
index 0000000000..48bef53b28
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/ari.py
@@ -0,0 +1,20 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import feature_to_graph
+
+"""
+The Rand index compares the overlap of two clusterings;
+it considers both correct clustering overlaps while also counting correct
+disagreements between two clusterings.
+Similar to NMI, we compared the cell-type labels with the NMI-optimized
+Louvain clustering computed on the integrated dataset.
+The adjustment of the Rand index corrects for randomly correct labels.
+An ARI of 0 or 1 corresponds to random labeling or a perfect match,
+respectively.
+We also used the scikit-learn (v.0.22.1) implementation of the ARI.
+"""
+
+
+@metric(**graph_metrics.ari.metadata)
+def ari(adata):
+    return graph_metrics.ari(feature_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/cc_score.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/cc_score.py
new file mode 100644
index 0000000000..778ac40e29
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/cc_score.py
@@ -0,0 +1,25 @@
+from .....tools.decorators import metric
+from ...batch_integration_embed import metrics as embed_metrics
+from .utils import feature_to_embedding
+
+"""
+The cell-cycle conservation score evaluates how well the cell-cycle effect can be
+captured before and after integration. We computed cell-cycle scores using Scanpy’s
+score_cell_cycle function with a reference gene set from Tirosh et al for the
+respective cell-cycle phases. We used the same set of cell-cycle genes for mouse and
+human data (using capitalization to convert between the gene symbols). We then computed
+the variance contribution of the resulting S and G2/M phase scores using principal
+component regression (Principal component regression), which was performed for each
+batch separately. The differences in variance before, Varbefore, and after, Varafter,
+integration were aggregated into a final score between 0 and 1, using the equation:
+CCconservation=1−|Varafter−Varbefore|/Varbefore.
+
+In this equation, values close to 0 indicate lower conservation and 1 indicates complete
+conservation of the variance explained by cell cycle. In other words, the variance
+remains unchanged within each batch for complete conservation, while any deviation from
+the preintegration variance contribution reduces the score."""
+
+
+@metric(**embed_metrics.cc_score.metadata)
+def cc_score(adata):
+    return embed_metrics.cc_score(feature_to_embedding(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/graph_connectivity.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/graph_connectivity.py
new file mode 100644
index 0000000000..4289f1174d
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/graph_connectivity.py
@@ -0,0 +1,26 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import feature_to_graph
+
+"""
+The graph connectivity metric assesses whether the kNN graph representation,
+G, of the integrated data directly connects all cells with the same cell
+identity label. For each cell identity label c, we created the subset kNN
+graph G(Nc;Ec) to contain only cells from a given label. Using these subset
+kNN graphs, we computed the graph connectivity score using the equation:
+
+gc =1/|C| Σc∈C |LCC(G(Nc;Ec))|/|Nc|.
+
+Here, C represents the set of cell identity labels, |LCC()| is the number
+of nodes in the largest connected component of the graph, and |Nc| is the
+number of nodes with cell identity c. The resultant score has a range
+of (0;1], where 1 indicates that all cells with the same cell identity
+are connected in the integrated kNN graph, and the lowest possible score
+indicates a graph where no cell is connected. As this score is computed
+on the kNN graph, it can be used to evaluate all integration outputs.
+"""
+
+
+@metric(**graph_metrics.graph_connectivity.metadata)
+def graph_connectivity(adata):
+    return graph_metrics.graph_connectivity(feature_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/hvg_conservation.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/hvg_conservation.py
index d40b36b740..df23837d91 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/hvg_conservation.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/hvg_conservation.py
@@ -20,13 +20,19 @@
 
 @metric(
     metric_name="HVG conservation",
+    metric_summary=(
+        "This metric computes the average percentage of overlapping highly variable"
+        " genes per batch before and after integration."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",
+    image="openproblems-r-pytorch",
 )
 def hvg_conservation(adata):
     from scib.metrics import hvg_overlap
 
     adata_unint = adata.copy()
     adata_unint.X = adata_unint.layers["log_normalized"]
+    hvg_both = list(set(adata.uns["hvg_unint"]).intersection(adata.var_names))
 
-    return hvg_overlap(adata_unint, adata, "batch")
+    return hvg_overlap(adata_unint, adata[:, hvg_both], "batch")
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_f1.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_f1.py
new file mode 100644
index 0000000000..048ad0996a
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_f1.py
@@ -0,0 +1,31 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import feature_to_graph
+
+"""
+We developed two isolated label scores to evaluate how well the data integration methods
+dealt with cell identity labels shared by few batches. Specifically, we identified
+isolated cell labels as the labels present in the least number of batches in the
+integration task.
+The score evaluates how well these isolated labels separate from other cell identities.
+We implemented the isolated label metric in two versions:
+(1) the best clustering of the isolated label (F1 score) and
+(2) the global ASW of the isolated label. For the cluster-based score,
+we first optimize the cluster assignment of the isolated label using the F1 score˚
+across louvain clustering resolutions ranging from 0.1 to 2 in resolution steps of 0.1.
+The optimal F1 score for the isolated label is then used as the metric score.
+The F1 score is a weighted mean of precision and recall given by the equation:
+𝐹1=2×(precision×recall)/(precision+recall).
+
+It returns a value between 0 and 1,
+where 1 shows that all of the isolated label cells and no others are captured in
+the cluster. For the isolated label ASW score, we compute the ASW of isolated
+versus nonisolated labels on the PCA embedding (ASW metric above) and scale this
+score to be between 0 and 1. The final score for each metric version consists of
+the mean isolated score of all isolated labels.
+"""
+
+
+@metric(**graph_metrics.isolated_labels_f1.metadata)
+def isolated_labels_f1(adata):
+    return graph_metrics.isolated_labels_f1(feature_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_sil.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_sil.py
new file mode 100644
index 0000000000..9f1e3e1115
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/iso_label_sil.py
@@ -0,0 +1,19 @@
+from .....tools.decorators import metric
+from ...batch_integration_embed import metrics as embed_metrics
+from .utils import feature_to_embedding
+
+"""
+Isolated cell labels are defined as the labels present in the least number
+of batches in the integration task. The score evaluates how well these isolated labels
+separate from other cell identities.
+
+The isolated label ASW score is obtained by computing the
+ASW of isolated versus non-isolated labels on the PCA embedding (ASW metric above) and
+scaling this score to be between 0 and 1. The final score for each metric version
+consists of the mean isolated score of all isolated labels.
+"""
+
+
+@metric(**embed_metrics.isolated_labels_sil.metadata)
+def isolated_labels_sil(adata):
+    return embed_metrics.isolated_labels_sil(feature_to_embedding(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/kBET.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/kBET.py
new file mode 100644
index 0000000000..f8ed86d5a7
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/kBET.py
@@ -0,0 +1,31 @@
+from .....tools.decorators import metric
+from ...batch_integration_embed import metrics as embed_metrics
+from .utils import feature_to_embedding
+
+"""
+The kBET algorithm (v.0.99.6, release 4c9dafa) determines whether the label composition
+of a k nearest neighborhood of a cell is similar to the expected (global) label
+composition (Buettner et al., Nat Meth 2019). The test is repeated for a random subset
+of cells, and the results are summarized as a rejection rate over all tested
+neighborhoods. Thus, kBET works on a kNN graph.
+
+We compute kNN graphs where k = 50 for joint embeddings and corrected feature outputs
+via Scanpy preprocessing steps. To test for technical effects and to account for
+cell-type frequency shifts across datasets, we applied kBET
+separately on the batch variable for each cell identity label. Using the kBET defaults,
+a k equal to the median of the number of cells per batch within each label is used for
+this computation. Additionally, we set the minimum and maximum thresholds of k to 10 and
+100, respectively. As kNN graphs that have been subset by cell identity labels may no
+longer be connected, we compute kBET per connected component. If >25% of cells were
+assigned to connected components too small for kBET computation (smaller than k × 3),
+we assigned a kBET score of 1 to denote poor batch removal. Subsequently, kBET scores
+for each label were averaged and subtracted from 1 to give a final kBET score.
+
+In Open Problems we do not run kBET on graph outputs to avoid computation-intensive
+diffusion processes being run.
+"""
+
+
+@metric(**embed_metrics.kBET.metadata)
+def kBET(adata):
+    return embed_metrics.kBET(feature_to_embedding(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/nmi.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/nmi.py
new file mode 100644
index 0000000000..21b1cc55ba
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/nmi.py
@@ -0,0 +1,19 @@
+from .....tools.decorators import metric
+from ...batch_integration_graph import metrics as graph_metrics
+from .utils import feature_to_graph
+
+"""NMI compares the overlap of two clusterings.
+We used NMI to compare the cell-type labels with Louvain clusters computed on
+the integrated dataset. The overlap was scaled using the mean of the entropy terms
+for cell-type and cluster labels. Thus, NMI scores of 0 or 1 correspond to uncorrelated
+clustering or a perfect match, respectively. We performed optimized Louvain clustering
+for this metric to obtain the best match between clusters and labels.
+Louvain clustering was performed at a resolution range of 0.1 to 2 in steps of 0.1,
+and the clustering output with the highest NMI with the label set was used. We used
+the scikit-learn27 (v.0.22.1) implementation of NMI.
+"""
+
+
+@metric(**graph_metrics.nmi.metadata)
+def nmi(adata):
+    return graph_metrics.nmi(feature_to_graph(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/pcr.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/pcr.py
new file mode 100644
index 0000000000..3a556dbe2f
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/pcr.py
@@ -0,0 +1,22 @@
+from .....tools.decorators import metric
+from ...batch_integration_embed import metrics as embed_metrics
+from .utils import feature_to_embedding
+
+"""
+Principal component regression, derived from PCA, has previously been used to quantify
+batch removal. Briefly, the R2 was calculated from a linear regression of the
+covariate of interest (for example, the batch variable B) onto each principal component.
+The variance contribution of the batch effect per principal component was then
+calculated as the product of the variance explained by the ith principal component (PC)
+and the corresponding R2(PCi|B). The sum across all variance contributions by the batch
+effects in all principal components gives the total variance explained by the batch
+variable as follows:
+Var(𝐶|𝐵)=∑𝑖=1𝐺Var(𝐶|PC𝑖)×𝑅2(PC𝑖|𝐵),
+
+where Var(C|PCi) is the variance of the data matrix C explained by the ith principal
+component."""
+
+
+@metric(**embed_metrics.pcr.metadata)
+def pcr(adata):
+    return embed_metrics.pcr(feature_to_embedding(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/sil_batch.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/sil_batch.py
new file mode 100644
index 0000000000..ac98714333
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/sil_batch.py
@@ -0,0 +1,28 @@
+from .....tools.decorators import metric
+from ...batch_integration_embed import metrics as embed_metrics
+from .utils import feature_to_embedding
+
+"""
+We consider the absolute silhouette width, s(i), on
+batch labels per cell i. Here, 0 indicates that batches are well mixed, and any
+deviation from 0 indicates a batch effect:
+𝑠batch(𝑖)=|𝑠(𝑖)|.
+
+To ensure higher scores indicate better batch mixing, these scores are scaled by
+subtracting them from 1. As we expect batches to integrate within cell identity
+clusters, we compute the batchASWj score for each cell label j separately,
+using the equation:
+batchASW𝑗=1|𝐶𝑗|∑𝑖∈𝐶𝑗1−𝑠batch(𝑖),
+
+where Cj is the set of cells with the cell label j and |Cj| denotes the number of cells
+in that set.
+
+To obtain the final batchASW score, the label-specific batchASWj scores are averaged:
+batchASW=1|𝑀|∑𝑗∈𝑀batchASW𝑗.
+
+Here, M is the set of unique cell labels."""
+
+
+@metric(**embed_metrics.silhouette_batch.metadata)
+def silhouette_batch(adata):
+    return embed_metrics.silhouette_batch(feature_to_embedding(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/silhouette.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/silhouette.py
new file mode 100644
index 0000000000..dcd29a8f71
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/silhouette.py
@@ -0,0 +1,16 @@
+from .....tools.decorators import metric
+from ...batch_integration_embed import metrics as embed_metrics
+from .utils import feature_to_embedding
+
+"""
+For the bio-conservation score, the ASW was computed on cell identity labels and
+scaled to a value between 0 and 1 using the equation:
+celltypeASW=(ASW_C+1)/2,
+
+where C denotes the set of all cell identity labels.
+For information about the batch silhouette score, check sil_batch."""
+
+
+@metric(**embed_metrics.silhouette.metadata)
+def silhouette(adata):
+    return embed_metrics.silhouette(feature_to_embedding(adata))
diff --git a/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/utils.py b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/utils.py
new file mode 100644
index 0000000000..d2decfa054
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_feature/metrics/utils.py
@@ -0,0 +1,18 @@
+from ...batch_integration_embed.metrics.utils import embedding_to_graph
+
+
+def feature_to_embedding(adata):
+    import scanpy as sc
+
+    if adata.uns["is_baseline"] and "X_emb" in adata.obsm:
+        # precomputed; do nothing
+        return adata
+
+    adata.obsm["X_emb"] = sc.pp.pca(adata.X)
+    return adata
+
+
+def feature_to_graph(adata):
+    adata = feature_to_embedding(adata)
+    adata = embedding_to_graph(adata)
+    return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/README.md b/openproblems/tasks/_batch_integration/batch_integration_graph/README.md
index 97a30d8432..ba39bc2d58 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/README.md
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/README.md
@@ -1,9 +1,5 @@
-<!--- TODO: add links --->
-
 # Batch integration (graph)
 
-## The task
-
 This is a sub-task of the overall batch integration task. Batch (or data) integration
 methods integrate datasets across batches that arise from various biological and
 technical sources. Methods that integrate batches typically have three different types
@@ -17,14 +13,7 @@ sub-tasks for batch integration can be found for:
 * [corrected features](../batch_integration_feature/)
 
 This sub-task was taken from a [benchmarking study of data integration
-methods](https://www.biorxiv.org/content/10.1101/2020.05.22.111161v2).
-
-## The metrics
-
-Metrics for batch integration (graph) aim to TODO
-
-* **TODO**: TODO
-* **TODO**: TODO
+methods](https://openproblems.bio/bibliography#luecken2022benchmarking).
 
 ## API
 
@@ -40,7 +29,7 @@ Datasets should contain the following attributes:
 * `adata.obs["batch"]` with the batch covariate,
 * `adata.obs["label"]` with the cell identity label,
 * `adata.layers['counts']` with raw, integer UMI count data, and
-* `adata.obsm['X_uni']` with the PCA embedding of the unintegrated representation
+* `adata.obsm['X_uni_pca']` with the PCA embedding of the unintegrated representation
 * `adata.obsp['uni_connectivities']` with an unintegrated connectivity matrix generated
   by  `scanpy.pp.neighbors()`
 * `adata.X` with log-normalized data
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/api.py b/openproblems/tasks/_batch_integration/batch_integration_graph/api.py
index dd45a42aed..23a1fb6fe6 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/api.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/api.py
@@ -1,42 +1,17 @@
-from ....data.sample import load_sample_data
-from ....tools.decorators import dataset
+from .._common import api
 
-import numpy as np
-import scanpy as sc
+MIN_CELLS_PER_CELLTYPE = 50
 
+check_dataset = api.check_dataset
+sample_dataset = api.sample_dataset
 
-def check_dataset(adata):
-    """Check that dataset output fits expected API."""
 
-    assert "X_uni_pca" in adata.obsm
-    assert "batch" in adata.obs
-    assert "labels" in adata.obs
-    assert "uni_connectivities" in adata.obsp
-    assert "log_normalized" in adata.layers
-
-    return True
-
-
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
-    assert "connectivities" in adata.obsp
-    assert "distances" in adata.obsp
+    api.check_neighbors(adata, "neighbors", "connectivities", "distances")
     return True
 
 
-@dataset()
-def sample_dataset():
-    """Create a simple dataset to use for testing methods in this task."""
-    adata = load_sample_data()
-    adata.obsm["X_uni_pca"] = sc.pp.pca(adata.X)
-    adata.layers["log_normalized"] = adata.X
-    adata.obs["batch"] = np.random.choice(2, adata.shape[0], replace=True).astype(str)
-    adata.obs["labels"] = np.random.choice(5, adata.shape[0], replace=True).astype(str)
-
-    sc.pp.neighbors(adata, use_rep="X_uni_pca", key_added="uni")
-    return adata
-
-
 def sample_method(adata):
     """Create sample method output for testing metrics in this task."""
     import scanpy as sc
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/__init__.py
index 3369c29cd4..bac200686e 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/__init__.py
@@ -1,2 +1,3 @@
-from .immune import immune_batch
-from .pancreas import pancreas_batch
+from ..._common.datasets.immune import immune_batch
+from ..._common.datasets.lung import lung_batch
+from ..._common.datasets.pancreas import pancreas_batch
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/__init__.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/__init__.py
index 308a6462c6..8fcbb6dac9 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/__init__.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/__init__.py
@@ -1,3 +1,8 @@
+from ..._common.methods.baseline import batch_random_integration
+from ..._common.methods.baseline import celltype_random_integration
+from ..._common.methods.baseline import no_integration
+from ..._common.methods.baseline import random_integration
+from .baseline import celltype_random_graph
 from .bbknn import bbknn_full_scaled
 from .bbknn import bbknn_full_unscaled
 from .bbknn import bbknn_hvg_scaled
@@ -29,6 +34,8 @@
 from .mnn import mnn_full_unscaled
 from .mnn import mnn_hvg_scaled
 from .mnn import mnn_hvg_unscaled
+from .scalex import scalex_full
+from .scalex import scalex_hvg
 
 # from .saucie_embed import saucie_embed_full_scaled
 # from .saucie_embed import saucie_embed_full_unscaled
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/_utils.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/_utils.py
index 3f34a68054..f549bd31f1 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/_utils.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/_utils.py
@@ -4,6 +4,8 @@ def hvg_batch(adata, batch_key, target_genes, adataOut):
     if adata.n_vars < 2000:
         return adata
     else:
+        # uns and var get trampled
+        uns = adata.uns.copy()
         var = adata.var.copy()
         adata = hvg_batch(
             adata,
@@ -13,13 +15,17 @@ def hvg_batch(adata, batch_key, target_genes, adataOut):
             adataOut=adataOut,
         )
         adata.var = var.loc[adata.var.index]
+        adata.uns = uns
         return adata
 
 
 def scale_batch(adata, batch_key):
     from scib.preprocessing import scale_batch
 
+    # uns and var get trampled
+    uns = adata.uns.copy()
     var = adata.var.copy()
     adata = scale_batch(adata, batch_key)
     adata.var = var.loc[adata.var_names]
+    adata.uns = uns
     return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py
new file mode 100644
index 0000000000..2c876251a1
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/baseline.py
@@ -0,0 +1,19 @@
+from .....tools.decorators import baseline_method
+from .....tools.utils import check_version
+from ..._common.methods.baseline import _random_embedding
+
+import scanpy as sc
+
+
+@baseline_method(
+    method_name="Random Graph by Celltype",
+    method_summary=(
+        "Cells are embedded as a one-hot encoding of celltype labels. A graph is then"
+        " built on this embedding"
+    ),
+)
+def celltype_random_graph(adata, test=False):
+    adata.obsm["X_emb"] = _random_embedding(partition=adata.obs["labels"])
+    sc.pp.neighbors(adata, use_rep="X_emb")
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py
index 017ca8f766..d495e18788 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/bbknn.py
@@ -6,11 +6,17 @@
 
 _bbknn_method = functools.partial(
     method,
+    method_summary=(
+        "BBKNN or batch balanced k nearest neighbours graph is built for each cell by"
+        " identifying its k nearest neighbours within each defined batch separately,"
+        " creating independent neighbour sets for each cell in each batch. These sets"
+        " are then combined and processed with the UMAP algorithm for visualisation."
+    ),
     paper_name="BBKNN: fast batch alignment of single cell transcriptomes",
-    paper_url="https://academic.oup.com/bioinformatics/article/36/3/964/5545955",
+    paper_reference="polanski2020bbknn",
     paper_year=2020,
     code_url="https://github.com/Teichlab/bbknn",
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 
 
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py
index 3043a552e1..d8a67d8421 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/combat.py
@@ -5,12 +5,22 @@
 
 _combat_method = functools.partial(
     method,
-    paper_name="Adjusting batch effects in microarray expression data using "
-    "empirical Bayes methods",
-    paper_url="https://academic.oup.com/biostatistics/article/8/1/118/252073",
+    method_summary=(
+        "ComBat uses an Empirical Bayes (EB) approach to correct for batch effects. It"
+        " estimates batch-specific parameters by pooling information across genes in"
+        " each batch and shrinks the estimates towards the overall mean of the batch"
+        " effect estimates across all genes. These parameters are then used to adjust"
+        " the data for batch effects, leading to more accurate and reproducible"
+        " results."
+    ),
+    paper_name=(
+        "Adjusting batch effects in microarray expression data using empirical Bayes"
+        " methods"
+    ),
+    paper_reference="hansen2012removing",
     paper_year=2007,
     code_url="https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.combat.html",
-    image="openproblems-python-batch-integration",
+    image="openproblems-r-pytorch",
 )
 
 
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py
index 1391d076e9..03680f03dd 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/fastmnn.py
@@ -7,8 +7,15 @@
 
 _fastmnn_method = functools.partial(
     method,
+    method_summary=(
+        "fastMNN performs a multi-sample PCA to reduce dimensionality, identifying MNN"
+        " paris in the low-dimensional space, and then correcting the target batch"
+        " towards the reference using locally weighted correction vectors. The"
+        " corrected target batch is then merged with the reference. The process is"
+        " repeated with the next target batch except for the PCA step."
+    ),
     paper_name="A description of the theory behind the fastMNN algorithm",
-    paper_url="https://marionilab.github.io/FurtherMNN2018/theory/description.html",
+    paper_reference="lun2019fastmnn",
     paper_year=2019,
     code_url="https://doi.org/doi:10.18129/B9.bioc.batchelor",
     image="openproblems-r-extras",
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py
index 90edc8d495..188b7d0781 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/harmony.py
@@ -6,9 +6,17 @@
 
 _harmony_method = functools.partial(
     method,
-    paper_name="Fast, sensitive and accurate integration "
-    "of single-cell data with Harmony",
-    paper_url="https://www.nature.com/articles/s41592-019-0619-0",
+    method_summary=(
+        "Harmony is a method that uses PCA to group the cells into multi-dataset"
+        " clusters, and then computes cluster-specific linear correction factors. Each"
+        " cell is then corrected by its cell-specific linear factor using the"
+        " cluster-weighted average. The method keeps iterating these four steps until"
+        " cell clusters are stable."
+    ),
+    paper_name=(
+        "Fast, sensitive and accurate integration of single-cell data with Harmony"
+    ),
+    paper_reference="korsunsky2019fast",
     paper_year=2019,
     code_url="https://github.com/lilab-bcb/harmony-pytorch",
     image="openproblems-r-pytorch",
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py
index 6617f586ed..9e3c2b012e 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/liger.py
@@ -7,9 +7,17 @@
 
 _liger_method = functools.partial(
     method,
-    paper_name="Single-Cell Multi-omic Integration Compares and "
-    "Contrasts Features of Brain Cell Identity",
-    paper_url="https://doi.org/10.1016/j.cell.2019.05.006",
+    method_summary=(
+        "LIGER or linked inference of genomic experimental relationships uses iNMF"
+        " deriving and implementing a novel coordinate descent algorithm to efficiently"
+        " do the factorization. Joint clustering is performed and factor loadings are"
+        " normalised."
+    ),
+    paper_name=(
+        "Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain"
+        " Cell Identity"
+    ),
+    paper_reference="welch2019single",
     paper_year=2019,
     code_url="https://github.com/welch-lab/liger",
     image="openproblems-r-extras",
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py
index 99dab39203..a8147ec3ae 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py
@@ -5,12 +5,19 @@
 
 _mnn_method = functools.partial(
     method,
-    paper_name="Batch effects in single-cell RNA-sequencing "
-    "data are corrected by matching mutual nearest neighbors",
-    paper_url="https://www.nature.com/articles/nbt.4091",
+    method_summary=(
+        "MNN first detects mutual nearest neighbours in two of the batches and infers a"
+        " projection of the second onto the first batch. After that, additional batches"
+        " are added iteratively."
+    ),
+    paper_name=(
+        "Batch effects in single-cell RNA-sequencing data are corrected by matching"
+        " mutual nearest neighbors"
+    ),
+    paper_reference="haghverdi2018batch",
     paper_year=2018,
     code_url="https://github.com/chriscainx/mnnpy",
-    image="openproblems-python-batch-integration",
+    image="openproblems-r-pytorch",
 )
 
 
@@ -18,7 +25,10 @@ def _mnn(adata):
     from scib.integration import runMNN
     from scib.preprocessing import reduce_data
 
+    # mnn clears adata.uns
+    uns = adata.uns
     adata = runMNN(adata, "batch")
+    adata.uns = uns
     reduce_data(adata, umap=False)
     adata.obsm["X_emb"] = adata.obsm["X_pca"]
     adata.uns["method_code_version"] = check_version("mnnpy")
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scalex.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scalex.py
new file mode 100644
index 0000000000..28eb3ac7e1
--- /dev/null
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scalex.py
@@ -0,0 +1,92 @@
+from .....tools.decorators import method
+from .....tools.utils import check_version
+from typing import Optional
+
+import functools
+
+_scalex_method = functools.partial(
+    method,
+    method_summary=(
+        "SCALEX is a method for integrating heterogeneous single-cell data online using"
+        " a VAE framework. Its generalised encoder disentangles batch-related"
+        " components from batch-invariant biological components, which are then"
+        " projected into a common cell-embedding space."
+    ),
+    paper_name=(
+        "Online single-cell data integration through projecting heterogeneous datasets"
+        " into a common cell-embedding space"
+    ),
+    paper_reference="xiong2021online",
+    paper_year=2022,
+    code_url="https://github.com/jsxlei/SCALEX",
+    image="openproblems-python-pytorch",
+)
+
+
+def _scalex(
+    adata,
+    test: bool = False,
+    n_top_features: int = 0,
+    max_iteration: Optional[int] = None,
+    min_features: Optional[int] = None,
+    min_cells: Optional[int] = None,
+    compute_neighbors: bool = False,
+    compute_features: bool = False,
+):
+    import scalex
+    import scanpy as sc
+
+    if test:
+        max_iteration = max_iteration or 2
+    else:  # pragma: nocover
+        max_iteration = max_iteration or 30000
+
+    if test or compute_features:
+        min_features = min_features or 1
+    else:  # pragma: nocover
+        min_features = min_features or 600
+
+    min_cells = min_cells or 1
+
+    adata = scalex.SCALEX(
+        adata,
+        batch_key="batch",
+        ignore_umap=True,
+        impute=adata.obs["batch"].cat.categories[0] if compute_features else False,
+        max_iteration=max_iteration,
+        min_features=min_features,
+        min_cells=min_cells,
+        n_top_features=n_top_features,
+        outdir=None,
+    )
+    adata.obsm["X_emb"] = adata.obsm["latent"]
+    if compute_features:
+        adata.X = adata.layers["impute"]
+    if compute_neighbors:
+        sc.pp.neighbors(adata, use_rep="X_emb")
+    adata.uns["method_code_version"] = check_version("scalex")
+    return adata
+
+
+@_scalex_method(method_name="SCALEX (full)")
+def scalex_full(adata, test: bool = False, max_iteration: Optional[int] = None):
+    return _scalex(
+        adata,
+        test=test,
+        max_iteration=max_iteration,
+        compute_neighbors=True,
+        compute_features=False,
+        n_top_features=0,
+    )
+
+
+@_scalex_method(method_name="SCALEX (hvg)")
+def scalex_hvg(adata, test: bool = False, max_iteration: Optional[int] = None):
+    return _scalex(
+        adata,
+        test=test,
+        max_iteration=max_iteration,
+        compute_neighbors=True,
+        compute_features=False,
+        n_top_features=2000,
+    )
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py
index d6e80162b5..add9288ef6 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py
@@ -5,12 +5,19 @@
 
 _scanorama_method = functools.partial(
     method,
-    paper_name="Efficient integration of heterogeneous single-cell "
-    "transcriptomes using Scanorama",
-    paper_url="https://www.nature.com/articles/s41587-019-0113-3",
+    method_summary=(
+        "Scanorama is an extension of the MNN method. Other then MNN, it finds mutual"
+        " nearest neighbours over all batches and embeds observations into a joint"
+        " hyperplane."
+    ),
+    paper_name=(
+        "Efficient integration of heterogeneous single-cell transcriptomes using"
+        " Scanorama"
+    ),
+    paper_reference="hie2019efficient",
     paper_year=2019,
     code_url="https://github.com/brianhie/scanorama",
-    image="openproblems-python-batch-integration",
+    image="openproblems-r-pytorch",
 )
 
 
@@ -18,10 +25,12 @@ def _scanorama(adata, use_rep, pca):
     from scib.integration import scanorama
     from scib.preprocessing import reduce_data
 
-    # scanorama clears adata.layers
+    # scanorama clears adata.layers and uns
     layers = adata.layers
+    uns = adata.uns
     adata = scanorama(adata, "batch")
     adata.layers = layers
+    adata.uns = uns
     reduce_data(adata, umap=False, use_rep=use_rep, pca=pca)
     adata.uns["method_code_version"] = check_version("scanorama")
     return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py
index d5bf463974..3202ac38d5 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanvi.py
@@ -6,12 +6,18 @@
 
 _scanvi_method = functools.partial(
     method,
-    paper_name="Probabilistic harmonization and annotation of single‐cell "
-    "transcriptomics data with deep generative models",
-    paper_url="https://doi.org/10.15252/msb.20209620",
+    method_summary=(
+        "ScanVI is an extension of scVI but instead using a Bayesian semi-supervised"
+        " approach for more principled cell annotation."
+    ),
+    paper_name=(
+        "Probabilistic harmonization and annotation of single‐cell transcriptomics data"
+        " with deep generative models"
+    ),
+    paper_reference="xu2021probabilistic",
     paper_year=2021,
     code_url="https://github.com/YosefLab/scvi-tools",
-    image="openproblems-python-batch-integration",
+    image="openproblems-r-pytorch",
 )
 
 
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py
index 9e9a82a9f2..89263b40bb 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scvi.py
@@ -6,11 +6,14 @@
 
 _scvi_method = functools.partial(
     method,
+    method_summary=(
+        "scVI combines a variational autoencoder with a hierarchical Bayesian model."
+    ),
     paper_name="Deep generative modeling for single-cell transcriptomics",
-    paper_url="https://www.nature.com/articles/s41592-018-0229-2",
+    paper_reference="lopez2018deep",
     paper_year=2018,
     code_url="https://github.com/YosefLab/scvi-tools",
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 
 
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/ari.py b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/ari.py
index 13e7eb8ce1..c3e54d89ce 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/ari.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/ari.py
@@ -15,8 +15,14 @@
 
 @metric(
     metric_name="ARI",
+    metric_summary=(
+        "ARI (Adjusted Rand Index) compares the overlap of two clusterings. It"
+        " considers both correct clustering overlaps while also counting correct"
+        " disagreements between two clustering."
+    ),
     maximize=True,
-    image="openproblems-python-batch-integration",
+    paper_reference="luecken2022benchmarking",
+    image="openproblems-r-pytorch",
 )
 def ari(adata):
     from scib.metrics import ari
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/graph_connectivity.py b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/graph_connectivity.py
index 3a9732d0e2..d9e5b3901a 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/graph_connectivity.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/graph_connectivity.py
@@ -21,8 +21,14 @@
 
 @metric(
     metric_name="Graph connectivity",
+    metric_summary=(
+        "The graph connectivity metric assesses whether the kNN graph representation,"
+        " G, of the integrated data connects all cells with the same cell identity"
+        " label."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 def graph_connectivity(adata):
     import scib.metrics
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/iso_label_f1.py b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/iso_label_f1.py
index df86b043d9..ba08ffafd3 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/iso_label_f1.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/iso_label_f1.py
@@ -26,8 +26,14 @@
 
 @metric(
     metric_name="Isolated label F1",
+    metric_summary=(
+        "Isolated cell labels are identified as the labels present in the least number"
+        " of batches in the integration task. The score evaluates how well these"
+        " isolated labels separate from other cell identities based on clustering."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-r-pytorch",
 )
 def isolated_labels_f1(adata):
     from scib.metrics import isolated_labels
@@ -36,7 +42,7 @@ def isolated_labels_f1(adata):
         adata,
         label_key="labels",
         batch_key="batch",
-        embed="X_pca",
+        embed=None,
         cluster=True,
         verbose=False,
     )
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/nmi.py b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/nmi.py
index bbcdc7cd9d..4b9b110809 100644
--- a/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/nmi.py
+++ b/openproblems/tasks/_batch_integration/batch_integration_graph/metrics/nmi.py
@@ -14,8 +14,13 @@
 
 @metric(
     metric_name="NMI",
+    metric_summary=(
+        "NMI compares the overlap of two clusterings. We used NMI to compare the"
+        " cell-type labels with Louvain clusters computed on the integrated dataset."
+    ),
+    paper_reference="luecken2022benchmarking",
     maximize=True,
-    image="openproblems-python-batch-integration",
+    image="openproblems-r-pytorch",
 )
 def nmi(adata):
     from scib.metrics.clustering import opt_louvain  # isort:skip
diff --git a/openproblems/tasks/_cell_cell_communication/README.md b/openproblems/tasks/_cell_cell_communication/README.md
index c0247e73cb..5494d02aae 100644
--- a/openproblems/tasks/_cell_cell_communication/README.md
+++ b/openproblems/tasks/_cell_cell_communication/README.md
@@ -1,7 +1,5 @@
 # Cell-cell Communication
 
-## The task
-
 The growing availability of single-cell data has sparked an increased
 interest in the inference of cell-cell communication (CCC),
 with an ever-growing number of computational tools developed for this purpose.
@@ -10,14 +8,14 @@ Different tools propose distinct preprocessing steps with diverse
 scoring functions, that are challenging to compare and evaluate.
 Furthermore, each tool typically comes with its own set of prior knowledge.
 To harmonize these, [Dimitrov et
-al, 2022](https://doi.org/10.1038/s41467-022-30755-0) recently developed the
-[LIANA](https://github.com/saezlab/liana) framework, which was used
+al, 2022](https://openproblems.bio/bibliography#dimitrov2022comparison) recently
+developed the [LIANA](https://github.com/saezlab/liana) framework, which was used
 as a foundation for this task.
 
 The challenges in evaluating the tools are further exacerbated by the
 lack of a gold standard to benchmark the performance of CCC methods. In an
 attempt to address this, Dimitrov et al use alternative data modalities, including
-the spatial proximity of cell types and inferred
+the spatial proximity of cell types and
 downstream cytokine activities, to generate an inferred ground truth. However,
 these modalities are only approximations of biological reality and come
 with their own assumptions and limitations. In time, the inclusion of more
@@ -40,18 +38,6 @@ More subtasks may be defined that infer communication events on any of the `sour
 cell type, the `target` cell type, the `ligand` molecule, and the receptor.
 More aspects of the communication may also be added in the future.
 
-## The metrics
-
-Metrics for cell-cell communication aim to characterize how good are
-the different scoring methods at prioritizing assumed truth predictions.
-
-* **Odds ratio**: The odds ratio represents the ratio of true and false
-positives within a set of prioritized interactions (top ranked hits) versus
-the same ratio for the remainder of the interactions. Thus, in this
-scenario odds ratios quantify the strength of association between the
-ability of methods to prioritize interactions and those interactions
-assigned to the positive class.
-
 ## API
 
 ### Datasets
@@ -63,41 +49,53 @@ al](https://doi.org/10.1038/s41467-022-30755-0) for more details.
 
 `adata.uns["ccc_target"]` should be a Pandas DataFrame containing:
 
-* `response`: `int`, binary response variable indicating whether an interaction is
-  assumed to have occurred
-
-and at least one of the following columns:
+* `response`: `int`, binary response variable _[0; 1]_ indicating whether an interaction
+  is assumed to have occurred and at least one of the following columns:
 
 * `source`: `str`, name of source cell type in interaction
 * `target`: `str`, name of target cell type in interaction
 * `ligand`: `str`, gene symbol of the ligand in an interaction
 * `receptor`: `str`, gene symbol of the receptor in an interaction
 
-The datasets should also include a
-[NCBI taxonomy ID](https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)
+The datasets should also include a [NCBI taxonomy ID](https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)
 in `adata.uns["target_organism"]` - used to convert the (typically human) prior
 knowledge of the CCC methods to the corresponding gene homologs.
 `adata.X` should contain the raw counts matrix.
 
-For subtasks including ligands or receptors in the inferred interactions, provide a
-prior-k
-
 ### Methods
 
 Methods should predict interactions between cell types without using
 `adata.uns["ccc_target"]`. Predicted interactions should be stored in
-`adata.uns["ccc_pred"]` as a Pandas DataFrame containing all of the following columns:
+`adata.uns["ccc_pred"]` as a Pandas DataFrame containing:
 
 * `score`: `float`, score between `-inf` to `+inf` giving a predicted strength of the
   inferred interaction
+
+and at least two of the following columns:
+
 * `source`: `str`, name of source cell type in interaction
 * `target`: `str`, name of target cell type in interaction
 * `ligand`: `str`, gene symbol of the ligand in an interaction
 * `receptor`: `str`, gene symbol of the receptor in an interaction
 
-Methods should infer a score for each _intersecting interaction_ in the harmonized
-prior-knowledge resource provided by LIANA. We define _intersecting interactions_ as
-those for which the relevant genes are both present in the dataset and the resource.
+The relevance of these columns is determined by the subtask in question
+via `adata.uns["merge_keys"]`, a list of at least two columns from the
+aforementioned columns corresponding to the assumed
+truth in `adata.uns["ccc_target"]`.
+
+Methods should infer a score for each _intersecting interaction_,
+where these represent the intersecting columns between `adata.uns["ccc_pred"]` and
+`adata.uns["ccc_target"]`.
+
+In case, `ligand` and/or `receptor` columns are present
+in `adata.uns["ccc_target"]`, we further define _intersecting interactions_ as
+those for which the relevant genes are present in both the dataset and
+the prior-knowledge resource provided by LIANA.
+
+The predictions of any method which do not uniquely map
+to the columns in `adata.uns["merge_keys"]` are to be **aggregated**.
+By default, aggregation is carried as the `max` and `sum`
+according to columns in the `merge_keys`.
 
 The prior-knowledge resource is available via the
 `cell_cell_communication.utils.ligand_receptor_resource` function, which returns a
diff --git a/openproblems/tasks/_cell_cell_communication/_common/api.py b/openproblems/tasks/_cell_cell_communication/_common/api.py
index f0fdd504df..ac38c8e064 100644
--- a/openproblems/tasks/_cell_cell_communication/_common/api.py
+++ b/openproblems/tasks/_cell_cell_communication/_common/api.py
@@ -3,7 +3,6 @@
 import numbers
 import numpy as np
 import pandas as pd
-import scanpy as sc
 
 SAMPLE_RECEPTOR_NAMES = [
     "LGALS9",
@@ -36,8 +35,8 @@ def assert_is_subset(
             msg = f"{subset_name} is not a subset of {superset_name}. "
         else:
             msg = (
-                f"Allowed proportion ({prop_missing_allowed}) of missing "
-                f"{subset_name} elements exceeded ({prop_missing:.2f}). "
+                f"Allowed proportion ({prop_missing_allowed}) of missing"
+                f" {subset_name} elements exceeded ({prop_missing:.2f}). "
             )
         x_missing = ",".join([x for x in subset[is_missing]])
         raise AssertionError(msg + f"{x_missing} missing from {superset_name}")
@@ -61,6 +60,9 @@ def check_dataset(adata, merge_keys):
     assert "label" in adata.obs
     assert "ccc_target" in adata.uns
 
+    assert "merge_keys" in adata.uns
+    np.testing.assert_array_equal(adata.uns["merge_keys"], merge_keys)
+
     # check target organism
     assert "target_organism" in adata.uns
     assert isinstance(adata.uns["target_organism"], numbers.Integral)
@@ -70,31 +72,57 @@ def check_dataset(adata, merge_keys):
     assert "response" in adata.uns["ccc_target"]
     assert np.issubdtype(adata.uns["ccc_target"]["response"].dtype, int)
     assert np.all(np.isin(adata.uns["ccc_target"]["response"], [0, 1]))
+    assert any(adata.uns["ccc_target"][merge_keys].duplicated()) is False
 
-    # check against resource
     if "ligand" in merge_keys or "receptor" in merge_keys:
         assert "ligand_receptor_resource" in adata.uns
-        assert "receptor_genesymbol" in adata.uns["ligand_receptor_resource"]
-        assert "ligand_genesymbol" in adata.uns["ligand_receptor_resource"]
         assert "var_names_all" in adata.uns
-        assert_is_subset(
-            flatten_complex_subunits(
-                adata.uns["ligand_receptor_resource"]["receptor_genesymbol"]
-            ),
-            adata.uns["var_names_all"],
-            "resource receptor names",
-            "gene names",
-            0.1,
-        )
-        assert_is_subset(
-            flatten_complex_subunits(
-                adata.uns["ligand_receptor_resource"]["ligand_genesymbol"]
-            ),
-            adata.uns["var_names_all"],
-            "resource ligand names",
-            "gene names",
-            0.1,
-        )
+
+        if "receptor" in merge_keys:
+            assert "receptor" in adata.uns["ccc_target"]
+            assert "receptor_genesymbol" in adata.uns["ligand_receptor_resource"]
+
+            # verify target receptors are in resource
+            assert_is_subset(
+                adata.uns["ccc_target"]["receptor"].unique(),
+                np.unique(adata.uns["ligand_receptor_resource"]["receptor_genesymbol"]),
+                "target receptor names",
+                "resource receptor names",
+            )
+
+            # verify resource receptors are in the data
+            assert_is_subset(
+                flatten_complex_subunits(
+                    adata.uns["ligand_receptor_resource"]["receptor_genesymbol"]
+                ),
+                adata.uns["var_names_all"],
+                "resource receptor names",
+                "gene names",
+                0.1,
+            )
+
+        if "ligand" in merge_keys:
+            assert "ligand" in adata.uns["ccc_target"]
+            assert "ligand_genesymbol" in adata.uns["ligand_receptor_resource"]
+
+            # verify target ligands are in resource
+            assert_is_subset(
+                adata.uns["ccc_target"]["ligand"].unique(),
+                np.unique(adata.uns["ligand_receptor_resource"]["ligand_genesymbol"]),
+                "target ligand names",
+                "resource ligand names",
+            )
+
+            # verify resource ligands are in the data
+            assert_is_subset(
+                flatten_complex_subunits(
+                    adata.uns["ligand_receptor_resource"]["ligand_genesymbol"]
+                ),
+                adata.uns["var_names_all"],
+                "resource ligand names",
+                "gene names",
+                0.1,
+            )
 
     # check merge keys
     if "source" in merge_keys:
@@ -114,29 +142,10 @@ def check_dataset(adata, merge_keys):
             "cell types",
         )
 
-    if "receptor" in merge_keys:
-        # verify target receptors are in resource
-        assert "receptor" in adata.uns["ccc_target"]
-        assert_is_subset(
-            adata.uns["ccc_target"]["receptor"].unique(),
-            np.unique(adata.uns["ligand_receptor_resource"]["receptor_genesymbol"]),
-            "target receptor names",
-            "resource receptor names",
-        )
-    if "ligand" in merge_keys:
-        # verify target ligands are in resource
-        assert "ligand" in adata.uns["ccc_target"]
-        assert_is_subset(
-            adata.uns["ccc_target"]["ligand"].unique(),
-            np.unique(adata.uns["ligand_receptor_resource"]["ligand_genesymbol"]),
-            "target ligand names",
-            "resource ligand names",
-        )
-
     return True
 
 
-def check_method(adata, merge_keys):
+def check_method(adata, merge_keys, is_baseline=False):
     """Check that method output fits expected API."""
     assert "ccc_pred" in adata.uns
 
@@ -145,6 +154,9 @@ def check_method(adata, merge_keys):
     assert "score" in adata.uns["ccc_pred"]
     assert np.all(np.isreal(adata.uns["ccc_pred"]["score"]))
 
+    # Check if a single prediction is returned for every merge_key combo
+    assert (adata.uns["ccc_pred"].groupby(merge_keys).size() == 1).all()
+
     # check merge keys
     if "ligand" in merge_keys:
         assert "ligand" in adata.uns["ccc_pred"]
@@ -184,7 +196,12 @@ def check_method(adata, merge_keys):
 
 def sample_dataset(merge_keys):
     """Create a simple dataset to use for testing methods in this task."""
+    import scanpy as sc
+
     adata = load_sample_data()
+    rng = np.random.default_rng(seed=1234)
+
+    adata.uns["merge_keys"] = merge_keys
 
     # keep only the top 10 most variable
     sc.pp.highly_variable_genes(adata, n_top_genes=len(SAMPLE_RECEPTOR_NAMES))
@@ -199,13 +216,24 @@ def sample_dataset(merge_keys):
     # generate target interactions
     adata.uns["ccc_target"] = pd.DataFrame(
         {
-            "response": np.random.binomial(1, 0.2, 50),
-            "ligand": np.random.choice(adata.var.index, 50),
-            "receptor": np.random.choice(adata.var.index, 50),
-            "source": np.random.choice(list(set(adata.obs.label)), 50),
-            "target": np.random.choice(list(set(adata.obs.label)), 50),
+            "ligand": rng.choice(adata.var.index, 50),
+            "receptor": rng.choice(adata.var.index, 50),
+            "source": rng.choice(list(set(adata.obs.label)), 50),
+            "target": rng.choice(list(set(adata.obs.label)), 50),
         }
     )
+    # drop duplicates
+    adata.uns["ccc_target"] = (
+        adata.uns["ccc_target"]
+        .sort_values(merge_keys)
+        .reset_index()
+        .drop_duplicates(subset=merge_keys, keep="first")
+        .reset_index()
+    )
+
+    n_rows = adata.uns["ccc_target"].shape[0]
+    adata.uns["ccc_target"]["response"] = rng.binomial(1, 0.5, n_rows)
+
     # subset columns
     adata.uns["ccc_target"] = adata.uns["ccc_target"][["response"] + merge_keys]
 
@@ -214,23 +242,23 @@ def sample_dataset(merge_keys):
     n_complexes = 5
     n_genes = len(adata.var.index)
     ligand_complexes = [
-        "_".join(np.random.choice(adata.var.index, 2)) for _ in range(n_complexes)
+        "_".join(rng.choice(adata.var.index, 2)) for _ in range(n_complexes)
     ]
     receptor_complexes = [
-        "_".join(np.random.choice(adata.var.index, 2)) for _ in range(n_complexes)
+        "_".join(rng.choice(adata.var.index, 2)) for _ in range(n_complexes)
     ]
     adata.uns["ligand_receptor_resource"] = pd.DataFrame(
         {
             "ligand_genesymbol": np.concatenate(
                 [
                     ligand_complexes,
-                    np.random.choice(adata.var.index, n_genes, replace=False),
+                    rng.choice(adata.var.index, n_genes, replace=False),
                 ]
             ),
             "receptor_genesymbol": np.concatenate(
                 [
                     receptor_complexes,
-                    np.random.choice(adata.var.index, n_genes, replace=False),
+                    rng.choice(adata.var.index, n_genes, replace=False),
                 ]
             ),
         }
@@ -242,7 +270,7 @@ def sample_dataset(merge_keys):
 def sample_method(adata, merge_keys):
     """Create sample method output for testing metrics in this task."""
     row_num = 500
-    np.random.seed(1234)
+    rng = np.random.default_rng(seed=1234)
 
     ligand_msk = ~adata.uns["ligand_receptor_resource"]["ligand_genesymbol"].isin(
         adata.var.index
@@ -254,15 +282,19 @@ def sample_method(adata, merge_keys):
     # keep only plausible interactions
     resource = adata.uns["ligand_receptor_resource"][msk]
 
-    df = pd.DataFrame(np.random.random((row_num, 1)), columns=["score"])
-    df["source"] = np.random.choice(np.unique(adata.obs[["label"]]), row_num)
-    df["target"] = np.random.choice(np.unique(adata.obs[["label"]]), row_num)
-    df["ligand"] = np.random.choice(
-        np.unique(resource["ligand_genesymbol"].values), row_num
-    )
-    df["receptor"] = np.random.choice(
+    df = pd.DataFrame(rng.random((row_num, 1)), columns=["score"])
+    df["source"] = rng.choice(np.unique(adata.obs[["label"]]), row_num)
+    df["target"] = rng.choice(np.unique(adata.obs[["label"]]), row_num)
+    df["ligand"] = rng.choice(np.unique(resource["ligand_genesymbol"].values), row_num)
+    df["receptor"] = rng.choice(
         np.unique(resource["receptor_genesymbol"].values), row_num
     )
+
+    # remove duplicates
+    df = df.sort_values(merge_keys + ["score"]).drop_duplicates(
+        subset=merge_keys, keep="first"
+    )
+
     # subset columns
     df = df[["score"] + merge_keys]
 
diff --git a/openproblems/tasks/_cell_cell_communication/_common/methods/__init__.py b/openproblems/tasks/_cell_cell_communication/_common/methods/__init__.py
index 0db596d2be..cd6c163e4f 100644
--- a/openproblems/tasks/_cell_cell_communication/_common/methods/__init__.py
+++ b/openproblems/tasks/_cell_cell_communication/_common/methods/__init__.py
@@ -1,6 +1,16 @@
-from .liana import cellphonedb
-from .liana import connectome
-from .liana import liana
-from .liana import logfc
-from .liana import natmi
-from .liana import sca
+from .baseline import random_events
+from .baseline import true_events
+from .liana import cellphonedb_max
+from .liana import cellphonedb_sum
+from .liana import connectome_max
+from .liana import connectome_sum
+from .liana import logfc_max
+from .liana import logfc_sum
+from .liana import magnitude_max
+from .liana import magnitude_sum
+from .liana import natmi_max
+from .liana import natmi_sum
+from .liana import sca_max
+from .liana import sca_sum
+from .liana import specificity_max
+from .liana import specificity_sum
diff --git a/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py b/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py
new file mode 100644
index 0000000000..e6704ca460
--- /dev/null
+++ b/openproblems/tasks/_cell_cell_communication/_common/methods/baseline.py
@@ -0,0 +1,49 @@
+from .....tools.decorators import baseline_method
+from .....tools.utils import check_version
+
+import numpy as np
+import pandas as pd
+
+
+@baseline_method(
+    method_name="Random Events",
+    method_summary=(
+        "Random generation of cell-cell communication events by random selection of"
+        " ligand, receptor, source, target, and score"
+    ),
+)
+def random_events(adata, test=False, n_events=1000):
+    rng = np.random.default_rng(seed=1)
+
+    ccc_pred = pd.DataFrame(
+        {
+            "ligand": rng.choice(
+                adata.uns["ligand_receptor_resource"]["ligand_genesymbol"], n_events
+            ),
+            "receptor": np.random.choice(
+                adata.uns["ligand_receptor_resource"]["receptor_genesymbol"], n_events
+            ),
+            "source": rng.choice(adata.obs["label"].cat.categories, n_events),
+            "target": rng.choice(adata.obs["label"].cat.categories, n_events),
+            "score": rng.uniform(0, 1, n_events),
+        }
+    )
+    ccc_pred = ccc_pred.loc[~ccc_pred[adata.uns["merge_keys"]].duplicated()]
+
+    adata.uns["ccc_pred"] = ccc_pred
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="True Events",
+    method_summary=(
+        "Perfect prediction of cell-cell communication events from target data"
+    ),
+)
+def true_events(adata, test=False):
+    adata.uns["ccc_pred"] = adata.uns["ccc_target"].rename(
+        {"response": "score"}, axis=1
+    )
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/_cell_cell_communication/_common/methods/liana.R b/openproblems/tasks/_cell_cell_communication/_common/methods/liana.R
index 3d2a976907..e7dfc41dad 100644
--- a/openproblems/tasks/_cell_cell_communication/_common/methods/liana.R
+++ b/openproblems/tasks/_cell_cell_communication/_common/methods/liana.R
@@ -37,7 +37,9 @@ liana_res <- liana_wrap(sce,
 # Aggregate if a run /w multiple methods
 if (!is.tibble(liana_res)) {
   liana_res <- liana_res %>%
-    liana_aggregate()
+    liana_aggregate(aggregate_how = aggregate_how) %>%
+    # inverse distribution
+    mutate(aggregate_rank = 1 - aggregate_rank)
 }
 
 # Return (Keep Complexes [not subunits] for Consistency)
diff --git a/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py b/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py
index 636e3ac2aa..7c0f254b65 100644
--- a/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py
+++ b/openproblems/tasks/_cell_cell_communication/_common/methods/liana.py
@@ -1,7 +1,8 @@
 from .....tools.conversion import r_function
 from .....tools.decorators import method
-from .....tools.normalize import log_cpm
+from .....tools.normalize import log_cp10k
 from .....tools.utils import check_r_version
+from ..utils import aggregate_method_scores
 from ..utils import ligand_receptor_resource
 
 import functools
@@ -16,35 +17,39 @@ def _p_filt(x, y):
 
 
 _r_liana = r_function(
-    "liana.R", args="sce, op_resource, min_expression_prop, idents_col, test, ..."
+    "liana.R",
+    args="sce, op_resource, min_expression_prop, idents_col, test, aggregate_how, ...",
 )
 
 _liana_method = functools.partial(
     method,
-    paper_name="Comparison of methods and resources for cell-cell "
-    "communication inference from single-cell RNA-Seq data",
-    paper_url="https://www.nature.com/articles/s41467-022-30755-0",
+    method_summary=(
+        "RobustRankAggregate generates a consensus rank of all methods implemented in"
+        " LIANA providing either specificity or magnitude scores."
+    ),
+    paper_name=(
+        "Comparison of methods and resources for cell-cell communication inference from"
+        " single-cell RNA-Seq data"
+    ),
+    paper_reference="dimitrov2022comparison",
     paper_year=2022,
     code_url="https://github.com/saezlab/liana",
     image="openproblems-r-extras",
 )
 
 
-@_liana_method(
-    method_name="LIANA",
-)
-def liana(
+def _liana(
     adata,
     score_col="aggregate_rank",
-    ascending=True,
     min_expression_prop=0.1,
     test=False,
+    aggregate_how=None,
     **kwargs,
 ):
     # log-normalize
-    adata = log_cpm(adata)
-    adata.layers["logcounts"] = adata.layers["log_cpm"]
-    del adata.layers["log_cpm"]
+    adata = log_cp10k(adata)
+    adata.layers["logcounts"] = adata.layers["log_cp10k"]
+    del adata.layers["log_cp10k"]
 
     # Run LIANA
     liana_res = _r_liana(
@@ -53,12 +58,12 @@ def liana(
         min_expression_prop=min_expression_prop,
         idents_col="label",
         test=test,
+        aggregate_how=aggregate_how,
         **kwargs,
     )
 
     # Format results
     liana_res["score"] = liana_res[score_col]
-    liana_res.sort_values("score", ascending=ascending, inplace=True)
     adata.uns["ccc_pred"] = liana_res
 
     adata.uns["method_code_version"] = check_r_version("liana")
@@ -67,18 +72,67 @@ def liana(
 
 
 @_liana_method(
-    method_name="CellPhoneDB",
-    paper_name="CellPhoneDB: inferring cell–cell communication from "
-    "combined expression of multi-subunit ligand–receptor complexes",
-    paper_url="https://www.nature.com/articles/s41596-020-0292-x",
+    method_name="Specificity Rank Aggregate (max)",
+)
+def specificity_max(adata, test=False):
+    adata = _liana(adata, test=test, aggregate_how="specificity")
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="max")
+
+    return adata
+
+
+@_liana_method(
+    method_name="Specificity Rank Aggregate (sum)",
+)
+def specificity_sum(adata, test=False):
+    adata = _liana(adata, test=test, aggregate_how="specificity")
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="sum")
+
+    return adata
+
+
+@_liana_method(
+    method_name="Magnitude Rank Aggregate (max)",
+)
+def magnitude_max(adata, test=False):
+    adata = _liana(adata, test=test, aggregate_how="magnitude")
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="max")
+
+    return adata
+
+
+@_liana_method(
+    method_name="Magnitude Rank Aggregate (sum)",
+)
+def magnitude_sum(adata, test=False):
+    adata = _liana(adata, test=test, aggregate_how="magnitude")
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="sum")
+
+    return adata
+
+
+_cellphonedb_method = functools.partial(
+    _liana_method,
+    method_summary=(
+        "CellPhoneDBv2 calculates a mean of ligand-receptor expression as a measure of"
+        " interaction magnitude, along with a permutation-based p-value as a measure of"
+        " specificity. Here, we use the former to prioritize interactions, subsequent"
+        " to filtering according to p-value less than 0.05."
+    ),
+    paper_name=(
+        "CellPhoneDB: inferring cell–cell communication from combined expression of"
+        " multi-subunit ligand–receptor complexes"
+    ),
+    paper_reference="efremova2020cellphonedb",
     paper_year=2020,
 )
-def cellphonedb(adata, test=False):
-    adata = liana(
+
+
+def _cellphonedb(adata, test=False):
+    adata = _liana(
         adata,
         method="cellphonedb",
         score_col="lr.mean",
-        ascending=False,
         test=test,
         complex_policy="min",
     )
@@ -86,51 +140,181 @@ def cellphonedb(adata, test=False):
     adata.uns["ccc_pred"]["score"] = adata.uns["ccc_pred"].apply(
         lambda x: _p_filt(x.pvalue, x["lr.mean"]), axis=1
     )
-    adata.uns["ccc_pred"].sort_values("score", ascending=False, inplace=True)
 
     return adata
 
 
-@_liana_method(
-    method_name="Connectome",
-    paper_name="Computation and visualization of cell–cell signaling "
-    "topologies in single-cell systems data using Connectome",
-    paper_url="https://www.nature.com/articles/s41598-022-07959-x",
+@_cellphonedb_method(
+    method_name="CellPhoneDB (max)",
+)
+def cellphonedb_max(adata, test=False):
+    adata = _cellphonedb(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="max")
+
+    return adata
+
+
+@_cellphonedb_method(
+    method_name="CellPhoneDB (sum)",
+)
+def cellphonedb_sum(adata, test=False):
+    adata = _cellphonedb(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="sum")
+
+    return adata
+
+
+_connectome_method = functools.partial(
+    _liana_method,
+    method_summary=(
+        "Connectome uses the product of ligand-receptor expression as a measure of"
+        " magnitude, and the average of the z-transformed expression of ligand and"
+        " receptor as a measure of specificity."
+    ),
+    paper_name=(
+        "Computation and visualization of cell–cell signaling topologies in single-cell"
+        " systems data using Connectome"
+    ),
+    paper_reference="raredon2022computation",
     paper_year=2022,
 )
-def connectome(adata, test=False):
-    return liana(
-        adata, method="connectome", score_col="weight_sc", ascending=False, test=test
-    )
 
 
-@_liana_method(
-    method_name="Mean log2FC",
+def _connectome(adata, test=False):
+    return _liana(adata, method="connectome", score_col="weight_sc", test=test)
+
+
+@_connectome_method(
+    method_name="Connectome (max)",
 )
-def logfc(adata, test=False):
-    return liana(
-        adata, method="logfc", score_col="logfc_comb", ascending=False, test=test
-    )
+def connectome_max(adata, test=False):
+    adata = _connectome(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="max")
 
+    return adata
 
-@_liana_method(
-    method_name="NATMI",
+
+@_connectome_method(
+    method_name="Connectome (sum)",
+)
+def connectome_sum(adata, test=False):
+    adata = _connectome(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="sum")
+
+    return adata
+
+
+_logfc_method = functools.partial(
+    _liana_method,
+    method_summary=(
+        "logFC (implemented in LIANA and inspired by iTALK) combines both expression"
+        " and magnitude, and represents the average of one-versus-the-rest log2-fold"
+        " change of ligand and receptor expression per cell type."
+    ),
+)
+
+
+def _logfc(adata, test=False):
+    return _liana(adata, method="logfc", score_col="logfc_comb", test=test)
+
+
+@_logfc_method(
+    method_name="Log2FC (max)",
+)
+def logfc_max(adata, test=False):
+    adata = _logfc(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="max")
+
+    return adata
+
+
+@_logfc_method(
+    method_name="Log2FC (sum)",
+)
+def logfc_sum(adata, test=False):
+    adata = _logfc(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="sum")
+
+    return adata
+
+
+_natmi_method = functools.partial(
+    _liana_method,
+    method_summary=(
+        "NATMI uses the product of ligand-receptor expression as a measure of"
+        " magnitude. As a measure of specificity, NATMI proposes $specificity.edge ="
+        r" \frac{l}{l_s} \cdot \frac{r}{r_s}$; where $l$ and $r$ represent the average"
+        " expression of ligand and receptor per cell type, and $l_s$ and $r_s$"
+        " represent the sums of the average ligand and receptor expression across all"
+        " cell types. We use its specificity measure, as recommended by the authors for"
+        " single-context predictions."
+    ),
     paper_name="Predicting cell-to-cell communication networks using NATMI",
-    paper_url="https://www.nature.com/articles/s41467-020-18873-z",
+    paper_reference="hou2020predicting",
     paper_year=2021,
 )
-def natmi(adata, test=False):
-    return liana(
-        adata, method="natmi", score_col="edge_specificity", ascending=False, test=test
-    )
 
 
-@_liana_method(
-    method_name="SingleCellSignalR",
-    paper_name="SingleCellSignalR: inference of intercellular networks "
-    "from single-cell transcriptomics",
-    paper_url="https://academic.oup.com/nar/article/48/10/e55/5810485",
+def _natmi(adata, test=False):
+    return _liana(adata, method="natmi", score_col="edge_specificity", test=test)
+
+
+@_natmi_method(
+    method_name="NATMI (max)",
+)
+def natmi_max(adata, test=False):
+    adata = _natmi(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="max")
+
+    return adata
+
+
+@_natmi_method(
+    method_name="NATMI (sum)",
+)
+def natmi_sum(adata, test=False):
+    adata = _natmi(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="sum")
+
+    return adata
+
+
+_sca_method = functools.partial(
+    _liana_method,
+    method_summary=(
+        "SingleCellSignalR provides a magnitude score as $LRscore ="
+        r" \frac{\sqrt{lr}}{\mu+\sqrt{lr}}$; where $l$ and $r$ are the average ligand"
+        r" and receptor expression per cell type, and $\mu$ is the mean of the"
+        " expression matrix."
+    ),
+    paper_name=(
+        "SingleCellSignalR: inference of intercellular networks from single-cell"
+        " transcriptomics"
+    ),
+    paper_reference="cabello2020singlecellsignalr",
     paper_year=2021,
 )
-def sca(adata, test=False):
-    return liana(adata, method="sca", score_col="LRscore", ascending=False, test=test)
+
+
+def _sca(adata, test=False):
+    return _liana(adata, method="sca", score_col="LRscore", test=test)
+
+
+@_sca_method(
+    method_name="SingleCellSignalR (max)",
+)
+def sca_max(adata, test=False):
+    adata = _sca(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="max")
+
+    return adata
+
+
+@_sca_method(
+    method_name="SingleCellSignalR (sum)",
+)
+def sca_sum(adata, test=False):
+    adata = _sca(adata, test=test)
+    adata.uns["ccc_pred"] = aggregate_method_scores(adata, how="sum")
+
+    return adata
diff --git a/openproblems/tasks/_cell_cell_communication/_common/metrics/__init__.py b/openproblems/tasks/_cell_cell_communication/_common/metrics/__init__.py
index e7de268379..ce716b5cfa 100644
--- a/openproblems/tasks/_cell_cell_communication/_common/metrics/__init__.py
+++ b/openproblems/tasks/_cell_cell_communication/_common/metrics/__init__.py
@@ -1 +1,2 @@
+from .auprc import auprc
 from .odds_ratio import odds_ratio
diff --git a/openproblems/tasks/_cell_cell_communication/_common/metrics/auprc.py b/openproblems/tasks/_cell_cell_communication/_common/metrics/auprc.py
new file mode 100644
index 0000000000..8a2ac9d3b9
--- /dev/null
+++ b/openproblems/tasks/_cell_cell_communication/_common/metrics/auprc.py
@@ -0,0 +1,22 @@
+from .....tools.decorators import metric
+from ..utils import join_truth_and_pred
+
+
+@metric(
+    metric_name="Precision-recall AUC",
+    metric_summary=(
+        "Area under the precision-recall curve for the binary classification task"
+        " predicting interactions."
+    ),
+    paper_reference="davis2006prauc",
+    maximize=True,
+)
+def auprc(adata):
+    from sklearn.metrics import auc
+    from sklearn.metrics import precision_recall_curve
+
+    gt = join_truth_and_pred(adata)
+    precision, recall, _ = precision_recall_curve(
+        gt["response"], gt["score"], pos_label=1
+    )
+    return auc(recall, precision)
diff --git a/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py b/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py
index 0beaab2335..47bcc63ae9 100644
--- a/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py
+++ b/openproblems/tasks/_cell_cell_communication/_common/metrics/odds_ratio.py
@@ -1,31 +1,55 @@
 from .....tools.decorators import metric
+from ..utils import join_truth_and_pred
 
 import numpy as np
-import scipy.stats as stats
 
 
-@metric(metric_name="Odds Ratio", maximize=True)
-def odds_ratio(adata, merge_keys, top_n=100):
+def _sigmoid_transform(x):
+    return 1 - 1 / (1 + x / 2)
+
+
+@metric(
+    metric_name="Odds Ratio",
+    metric_summary=(
+        "The odds ratio represents the ratio of true and false positives within a set"
+        " of prioritized interactions (top ranked hits) versus the same ratio for the"
+        " remainder of the interactions. Thus, in this scenario odds ratios quantify"
+        " the strength of association between the ability of methods to prioritize"
+        " interactions and those interactions assigned to the positive class."
+    ),
+    paper_reference="bland2000odds",
+    maximize=True,
+)
+def odds_ratio(adata, top_prop=0.05):
     # Join benchmark (assumed truth) and ccc results
     # Get /w ccc_target and a response [0, 1] column
-    gt = adata.uns["ccc_target"].merge(
-        adata.uns["ccc_pred"], on=merge_keys, how="right"
-    )
-    gt = gt[gt["response"].notna()]
+    gt = join_truth_and_pred(adata)
+    gt = gt.sort_values("score", ascending=False)
+    top_n = int(adata.uns["ccc_target"].shape[0] * top_prop)
 
     # assign the top rank interactions to 1
     a = np.zeros(len(gt["score"]))
     a[0:top_n] = 1
     gt.loc[:, ["top_n"]] = a
 
-    # Shape to contingency table
-    table = np.array(gt.pivot_table(index=["top_n", "response"], aggfunc="size"))
-
-    # if positive or negative class is not in top_n
-    if table.shape != (4,):
-        return 1
-
-    # Fisher ET
-    oddsratio, _ = stats.fisher_exact(table.reshape(2, 2))
-
-    return oddsratio
+    top = gt[gt["top_n"] == 1]
+    tp = np.sum(top.response == 1)
+    fp = np.sum(top.response == 0)
+
+    bot = gt[gt["top_n"] == 0]
+    fn = np.sum(bot.response == 1)
+    tn = np.sum(bot.response == 0)
+
+    numerator = tp * tn
+    denominator = fp * fn
+    if denominator == 0:
+        if numerator == 0:
+            # undefined
+            return np.nan
+        else:
+            # perfect score
+            oddsratio = np.inf
+    else:
+        oddsratio = numerator / denominator
+
+    return _sigmoid_transform(oddsratio)
diff --git a/openproblems/tasks/_cell_cell_communication/_common/utils.py b/openproblems/tasks/_cell_cell_communication/_common/utils.py
index 25c43187ec..a33be818db 100644
--- a/openproblems/tasks/_cell_cell_communication/_common/utils.py
+++ b/openproblems/tasks/_cell_cell_communication/_common/utils.py
@@ -91,7 +91,7 @@ def map_gene_symbols(adata, map_filename: Union[str, pathlib.Path]):
             )
 
     return anndata.AnnData(
-        X=scipy.sparse.hstack([adata_one_to_any.X] + many_to_one_X),
+        X=scipy.sparse.hstack([adata_one_to_any.X] + many_to_one_X).tocsr(),
         obs=adata.obs,
         var=pd.DataFrame(
             index=np.concatenate([adata_one_to_any.var.index, many_to_one_genes])
@@ -99,9 +99,30 @@ def map_gene_symbols(adata, map_filename: Union[str, pathlib.Path]):
         layers={
             layer_name: scipy.sparse.hstack(
                 [adata_one_to_any.layers[layer_name]] + many_to_one_layers[layer_name]
-            )
+            ).tocsr()
             for layer_name in adata.layers
         },
         uns=adata.uns,
         obsm=adata.obsm,
     )
+
+
+# Join predictions to target
+def join_truth_and_pred(adata):
+    merge_keys = list(adata.uns["merge_keys"])
+    gt = adata.uns["ccc_target"].merge(adata.uns["ccc_pred"], on=merge_keys, how="left")
+
+    gt.loc[gt["response"].isna(), "response"] = 0
+    gt.loc[gt["score"].isna(), "score"] = np.nanmin(gt["score"]) - np.finfo(float).eps
+
+    return gt
+
+
+def aggregate_method_scores(adata, how):
+    merge_keys = list(adata.uns["merge_keys"])
+    return (
+        adata.uns["ccc_pred"]
+        .groupby(merge_keys)
+        .agg(score=("score", how))
+        .reset_index()
+    )
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/README.md b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/README.md
index 501284657a..3b005b61d9 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/README.md
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/README.md
@@ -1,6 +1,4 @@
-# Cell-cell Communication
-
-## The task
+# Cell-cell Communication (ligand-target)
 
 The growing availability of single-cell data has sparked an increased
 interest in the inference of cell-cell communication (CCC),
@@ -10,14 +8,14 @@ Different tools propose distinct preprocessing steps with diverse
 scoring functions, that are challenging to compare and evaluate.
 Furthermore, each tool typically comes with its own set of prior knowledge.
 To harmonize these, [Dimitrov et
-al, 2022](https://doi.org/10.1038/s41467-022-30755-0) recently developed the
-[LIANA](https://github.com/saezlab/liana) framework, which was used
+al, 2022](https://openproblems.bio/bibliography#dimitrov2022comparison) recently
+developed the [LIANA](https://github.com/saezlab/liana) framework, which was used
 as a foundation for this task.
 
 The challenges in evaluating the tools are further exacerbated by the
 lack of a gold standard to benchmark the performance of CCC methods. In an
 attempt to address this, Dimitrov et al use alternative data modalities, including
-the spatial proximity of cell types and inferred
+the spatial proximity of cell types and
 downstream cytokine activities, to generate an inferred ground truth. However,
 these modalities are only approximations of biological reality and come
 with their own assumptions and limitations. In time, the inclusion of more
@@ -31,18 +29,6 @@ the target cell types. This subtask focuses
 on the prediction of interactions from steady-state, or single-context,
 single-cell data.**
 
-## The metrics
-
-Metrics for cell-cell communication aim to characterize how good are
-the different scoring methods at prioritizing assumed truth predictions.
-
-* **Odds ratio**: The odds ratio represents the ratio of true and false
-positives within a set of prioritized interactions (top ranked hits) versus
-the same ratio for the remainder of the interactions. Thus, in this
-scenario odds ratios quantify the strength of association between the
-ability of methods to prioritize interactions and those interactions
-assigned to the positive class.
-
 ## API
 
 ### Datasets
@@ -55,8 +41,8 @@ al](https://doi.org/10.1038/s41467-022-30755-0) for more details.
 `adata.uns["ccc_target"]` should be a Pandas DataFrame containing all the following
 columns:
 
-* `response`: `int`, binary response variable indicating whether an interaction is
-  assumed to have occurred
+* `response`: `int`, binary response variable _[0; 1]_ indicating whether an interaction
+  is assumed to have occurred
 * `ligand`: `str`, gene symbol of the ligand in an interaction
 * `target`: `str`, name of target cell type in interaction
 
@@ -77,16 +63,17 @@ Methods should predict interactions between cell types without using
 * `ligand`: `str`, gene symbol of the ligand in an interaction
 * `target`: `str`, name of target cell type in interaction
 
-Methods should infer a score for each _intersecting interaction_ in the harmonized
-prior-knowledge resource provided by LIANA. We define _intersecting interactions_ as
-those for which the relevant genes are both present in the dataset and the resource.
+Methods should infer a `score` for each _intersecting interaction_
+between a `ligand` and a `target`.
+We define _intersecting interactions_ as
+those for which the `ligand` genes are present in both the dataset and
+the prior-knowledge resource provided by LIANA, while a `target` is any
+target cell identity label in the dataset.
 
-The prior-knowledge resource is available via the
-`cell_cell_communication.utils.ligand_receptor_resource` function, which returns a
-DataFrame containing the columns `ligand_genesymbol` and `receptor_genesymbol`, which
-correspond to the ligand and receptor genes, respectively. These may contain complexes
-with subunits separated with `_`. Hence, **methods should be able to deal with
-complex-containing interactions**.
+The predictions of any method which do not uniquely map
+to the columns in `adata.uns["merge_keys"]` are to be **aggregated**.
+By default, aggregation is carried as the `max` and `sum`
+according to columns in the `merge_keys`.
 
 ## Prior-knowledge
 
@@ -109,6 +96,13 @@ To ensure the consistency between the IDs in the dataset and those in the
 resource we use a reference map, obtained via BioConductor-v3.15 `org.Hs.eg.db`,
 and are provided in `tnbc_wu2021_gene_symbols.csv`.
 
+The prior-knowledge resource is available via the
+`cell_cell_communication.utils.ligand_receptor_resource` function, which returns a
+DataFrame containing the columns `ligand_genesymbol` and `receptor_genesymbol`, which
+correspond to the ligand and receptor genes, respectively. These may contain complexes
+with subunits separated with `_`. Hence, **methods should be able to deal with
+complex-containing interactions**.
+
 ### Metrics
 
 Metrics should evaluate the concordance between `adata.uns["ccc_target"]` and
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/datasets/tnbc_wu2021.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/datasets/tnbc_wu2021.py
index 275a24edb2..e58d6241c0 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/datasets/tnbc_wu2021.py
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/datasets/tnbc_wu2021.py
@@ -10,10 +10,12 @@
     "Triple negative breast cancer atlas",
     data_url=load_tnbc_data.metadata["data_url"],
     data_reference=load_tnbc_data.metadata["data_reference"],
-    dataset_summary="A single-cell atlas of human breast cancers with inferred "
-    "cytokine activities as assumed true cell-cell communication. Cytokine "
-    "activities were estimated by fitting a multivariate linear model with "
-    "cytokine-focused signatures (see Dimitrov et al., 2022).",
+    dataset_summary=(
+        "Human breast cancer atlas (Wu et al., 2021), with cytokine activities,"
+        " inferred using a multivariate linear model with cytokine-focused signatures,"
+        " as assumed true cell-cell communication (Dimitrov et al., 2022). 42512 cells"
+        " x 28078 features with 29 cell types from 10 patients"
+    ),
     image="openproblems-r-extras",
 )
 def tnbc_data(test=False):
@@ -21,6 +23,7 @@ def tnbc_data(test=False):
     adata = map_gene_symbols(
         adata, pathlib.Path(__file__).parent.joinpath("tnbc_wu2021_gene_symbols.csv")
     )
+    adata.uns["merge_keys"] = ["ligand", "target"]
     adata.uns["ligand_receptor_resource"] = ligand_receptor_resource(
         adata.uns["target_organism"]
     )
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/methods/__init__.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/methods/__init__.py
index 2475b922e7..7e1180f55b 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/methods/__init__.py
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/methods/__init__.py
@@ -1,6 +1,16 @@
-from ..._common.methods import cellphonedb
-from ..._common.methods import connectome
-from ..._common.methods import liana
-from ..._common.methods import logfc
-from ..._common.methods import natmi
-from ..._common.methods import sca
+from ..._common.methods import cellphonedb_max
+from ..._common.methods import cellphonedb_sum
+from ..._common.methods import connectome_max
+from ..._common.methods import connectome_sum
+from ..._common.methods import logfc_max
+from ..._common.methods import logfc_sum
+from ..._common.methods import magnitude_max
+from ..._common.methods import magnitude_sum
+from ..._common.methods import natmi_max
+from ..._common.methods import natmi_sum
+from ..._common.methods import random_events
+from ..._common.methods import sca_max
+from ..._common.methods import sca_sum
+from ..._common.methods import specificity_max
+from ..._common.methods import specificity_sum
+from ..._common.methods import true_events
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/metrics/__init__.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/metrics/__init__.py
index e7de268379..b38d36885f 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/metrics/__init__.py
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/metrics/__init__.py
@@ -1 +1,2 @@
-from .odds_ratio import odds_ratio
+from ..._common.metrics.auprc import auprc
+from ..._common.metrics.odds_ratio import odds_ratio
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/metrics/odds_ratio.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/metrics/odds_ratio.py
deleted file mode 100644
index e5c113c52d..0000000000
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_ligand_target/metrics/odds_ratio.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from .....tools.decorators import metric
-from ..._common.metrics import odds_ratio as _odds_ratio
-from ..api import MERGE_KEYS
-
-
-@metric(**_odds_ratio.metadata)
-def odds_ratio(adata):
-    return _odds_ratio(adata, merge_keys=MERGE_KEYS)
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/README.md b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/README.md
index 3d35dde314..d996555e64 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/README.md
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/README.md
@@ -1,6 +1,4 @@
-# Cell-cell Communication
-
-## The task
+# Cell-cell Communication (source-target)
 
 The growing availability of single-cell data has sparked an increased
 interest in the inference of cell-cell communication (CCC),
@@ -10,14 +8,14 @@ Different tools propose distinct preprocessing steps with diverse
 scoring functions, that are challenging to compare and evaluate.
 Furthermore, each tool typically comes with its own set of prior knowledge.
 To harmonize these, [Dimitrov et
-al, 2022](https://doi.org/10.1038/s41467-022-30755-0) recently developed the
-[LIANA](https://github.com/saezlab/liana) framework, which was used
+al, 2022](https://openproblems.bio/bibliography#dimitrov2022comparison) recently
+developed the [LIANA](https://github.com/saezlab/liana) framework, which was used
 as a foundation for this task.
 
 The challenges in evaluating the tools are further exacerbated by the
 lack of a gold standard to benchmark the performance of CCC methods. In an
 attempt to address this, Dimitrov et al use alternative data modalities, including
-the spatial proximity of cell types and inferred
+the spatial proximity of cell types and
 downstream cytokine activities, to generate an inferred ground truth. However,
 these modalities are only approximations of biological reality and come
 with their own assumptions and limitations. In time, the inclusion of more
@@ -30,18 +28,6 @@ spatially-adjacent source cell types and target cell types. This subtask focuses
 on the prediction of interactions from steady-state, or single-context,
 single-cell data.**
 
-## The metrics
-
-Metrics for cell-cell communication aim to characterize how good are
-the different scoring methods at prioritizing assumed truth predictions.
-
-* **Odds ratio**: The odds ratio represents the ratio of true and false
-positives within a set of prioritized interactions (top ranked hits) versus
-the same ratio for the remainder of the interactions. Thus, in this
-scenario odds ratios quantify the strength of association between the
-ability of methods to prioritize interactions and those interactions
-assigned to the positive class.
-
 ## API
 
 ### Datasets
@@ -54,8 +40,8 @@ al](https://doi.org/10.1038/s41467-022-30755-0) for more details.
 `adata.uns["ccc_target"]` should be a Pandas DataFrame containing all of the
 following columns:
 
-* `response`: `int`, binary response variable indicating whether an interaction is
-  assumed to have occurred
+* `response`: `int`, binary response variable _[0; 1]_ indicating whether an interaction
+  is assumed to have occurred
 * `source`: `str`, name of source cell type in interaction
 * `target`: `str`, name of target cell type in interaction
 
@@ -76,6 +62,15 @@ Methods should predict interactions between cell types without using
 * `source`: `str`, name of source cell type in interaction
 * `target`: `str`, name of target cell type in interaction
 
+Methods should infer a `score` for each _intersecting interaction_
+between a `source` and a `target`, which correspond to all possible combinations
+of the cell identity labels in the dataset.
+
+The predictions of any method which do not uniquely map
+to the columns in `adata.uns["merge_keys"]` are to be **aggregated**.
+By default, aggregation is carried as the `max` and `sum`
+according to columns in the `merge_keys`.
+
 ### Prior-knowledge Resource
 
 Each dataset should be supplemented with a prior knowledge resource of
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/datasets/allen_brain_atlas.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/datasets/allen_brain_atlas.py
index f46e17c1f6..05c90cc6be 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/datasets/allen_brain_atlas.py
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/datasets/allen_brain_atlas.py
@@ -7,14 +7,17 @@
     "Mouse brain atlas",
     data_url=load_mouse_brain_atlas.metadata["data_url"],
     data_reference=load_mouse_brain_atlas.metadata["data_reference"],
-    dataset_summary="A murine brain atlas with inferred spatially-adjacent "
-    "cell types as assumed benchmark truth. Adjacent cell types are inferred "
-    "from z-transformed deconvolution proportion correlations. Generated from "
-    "murine brain 10x Visium slides (see Dimitrov et al., 2022).",
+    dataset_summary=(
+        "A murine brain atlas with adjacent cell types as assumed benchmark truth,"
+        " inferred from deconvolution proportion correlations using matching 10x Visium"
+        " slides (see Dimitrov et al., 2022). 14249 cells x 34617 features with 23 cell"
+        " type labels."
+    ),
     image="openproblems-r-extras",
 )
 def mouse_brain_atlas(test=False):
     adata = load_mouse_brain_atlas(test=test)
+    adata.uns["merge_keys"] = ["source", "target"]
     adata.uns["ligand_receptor_resource"] = ligand_receptor_resource(
         adata.uns["target_organism"]
     )
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/methods/__init__.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/methods/__init__.py
index 2475b922e7..7e1180f55b 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/methods/__init__.py
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/methods/__init__.py
@@ -1,6 +1,16 @@
-from ..._common.methods import cellphonedb
-from ..._common.methods import connectome
-from ..._common.methods import liana
-from ..._common.methods import logfc
-from ..._common.methods import natmi
-from ..._common.methods import sca
+from ..._common.methods import cellphonedb_max
+from ..._common.methods import cellphonedb_sum
+from ..._common.methods import connectome_max
+from ..._common.methods import connectome_sum
+from ..._common.methods import logfc_max
+from ..._common.methods import logfc_sum
+from ..._common.methods import magnitude_max
+from ..._common.methods import magnitude_sum
+from ..._common.methods import natmi_max
+from ..._common.methods import natmi_sum
+from ..._common.methods import random_events
+from ..._common.methods import sca_max
+from ..._common.methods import sca_sum
+from ..._common.methods import specificity_max
+from ..._common.methods import specificity_sum
+from ..._common.methods import true_events
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/metrics/__init__.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/metrics/__init__.py
index e7de268379..b38d36885f 100644
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/metrics/__init__.py
+++ b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/metrics/__init__.py
@@ -1 +1,2 @@
-from .odds_ratio import odds_ratio
+from ..._common.metrics.auprc import auprc
+from ..._common.metrics.odds_ratio import odds_ratio
diff --git a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/metrics/odds_ratio.py b/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/metrics/odds_ratio.py
deleted file mode 100644
index e5c113c52d..0000000000
--- a/openproblems/tasks/_cell_cell_communication/cell_cell_communication_source_target/metrics/odds_ratio.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from .....tools.decorators import metric
-from ..._common.metrics import odds_ratio as _odds_ratio
-from ..api import MERGE_KEYS
-
-
-@metric(**_odds_ratio.metadata)
-def odds_ratio(adata):
-    return _odds_ratio(adata, merge_keys=MERGE_KEYS)
diff --git a/openproblems/tasks/denoising/README.md b/openproblems/tasks/denoising/README.md
index 9bfe698781..9e76488d4f 100644
--- a/openproblems/tasks/denoising/README.md
+++ b/openproblems/tasks/denoising/README.md
@@ -1,44 +1,32 @@
 # Denoising
 
-## The task
-
 Single-cell RNA-Seq protocols only detect a fraction of the mRNA molecules present
 in each cell. As a result, the measurements (UMI counts) observed for each gene and each
 cell are associated with generally high levels of technical noise ([Grün et al.,
-2014](https://www.nature.com/articles/nmeth.2930)). Denoising describes the task of
-estimating the true expression level of each gene in each cell. In the single-cell
-literature, this task is also referred to as *imputation*, a term which is typically
-used for missing data problems in statistics. Similar to the use of the terms "dropout",
-"missing data", and "technical zeros", this terminology can create confusion about the
-underlying measurement process ([Sarkar and Stephens,
-2020](https://www.biorxiv.org/content/10.1101/2020.04.07.030007v2)).
+2014](https://openproblems.bio/bibliography#grn2014validation)). Denoising describes the
+task of estimating the true expression level of each gene in each cell. In the
+single-cell literature, this task is also referred to as *imputation*, a term which is
+typically used for missing data problems in statistics. Similar to the use of the terms
+"dropout", "missing data", and "technical zeros", this terminology can create confusion
+about the underlying measurement process ([Sarkar and Stephens,
+2021](https://openproblems.bio/bibliography#sarkar2021separating)).
 
 A key challenge in evaluating denoising methods is the general lack of a ground truth. A
 recent benchmark study ([Hou et al.,
-2020](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02132-x))
+2020](https://openproblems.bio/bibliography#hou2020systematic))
 relied on flow-sorted datasets, mixture control experiments ([Tian et al.,
-2019](https://www.nature.com/articles/s41592-019-0425-8)), and comparisons with bulk
-RNA-Seq data. Since each of these approaches suffers from specific limitations, it is
-difficult to combine these different approaches into a single quantitative measure of
+2019](https://openproblems.bio/bibliography#tian2019benchmarking)), and comparisons with
+bulk RNA-Seq data. Since each of these approaches suffers from specific limitations, it
+is difficult to combine these different approaches into a single quantitative measure of
 denoising accuracy. Here, we instead rely on an approach termed molecular
 cross-validation (MCV), which was specifically developed to quantify denoising accuracy
 in the absence of a ground truth ([Batson et al.,
-2019](https://www.biorxiv.org/content/10.1101/786269v1)). In MCV, the observed molecules
-in a given scRNA-Seq dataset are first partitioned between a *training* and a *test*
-dataset. Next, a denoising method is applied to the training dataset. Finally, denoising
-accuracy is measured by comparing the result to the test dataset. The authors show that
-both in theory and in practice, the measured denoising accuracy is representative of the
-accuracy that would be obtained on a ground truth dataset.
-
-## The metrics
-
-Metrics for data denoising aim to assess denoising accuracy by comparing the denoised
-*training* set to the randomly sampled *test* set.
-
-* **MSE**: The mean squared error between the denoised counts of the training dataset
-  and the true counts of the test dataset after reweighting by the train/test ratio.
-* **Poisson**: The Poisson log likelihood of observing the true counts of the test
-  dataset given the distribution given in the denoised dataset.
+2019](https://openproblems.bio/bibliography#batson2019molecular)). In MCV, the observed
+molecules in a given scRNA-Seq dataset are first partitioned between a *training* and a
+*test* dataset. Next, a denoising method is applied to the training dataset. Finally,
+denoising accuracy is measured by comparing the result to the test dataset. The authors
+show that both in theory and in practice, the measured denoising accuracy is
+representative of the accuracy that would be obtained on a ground truth dataset.
 
 ## API
 
diff --git a/openproblems/tasks/denoising/api.py b/openproblems/tasks/denoising/api.py
index ec52e4fb7f..816d7eec06 100644
--- a/openproblems/tasks/denoising/api.py
+++ b/openproblems/tasks/denoising/api.py
@@ -18,7 +18,7 @@ def check_dataset(adata):
     return True
 
 
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     assert "denoised" in adata.obsm
     assert isinstance(adata.obsm["denoised"], np.ndarray)
diff --git a/openproblems/tasks/denoising/datasets/__init__.py b/openproblems/tasks/denoising/datasets/__init__.py
index d9891bd987..6f8abdf0a7 100644
--- a/openproblems/tasks/denoising/datasets/__init__.py
+++ b/openproblems/tasks/denoising/datasets/__init__.py
@@ -1 +1,3 @@
+from .pancreas import pancreas
 from .pbmc import pbmc
+from .tabula_muris_senis import tabula_muris_senis_lung_random
diff --git a/openproblems/tasks/denoising/datasets/pancreas.py b/openproblems/tasks/denoising/datasets/pancreas.py
new file mode 100644
index 0000000000..187b2e4106
--- /dev/null
+++ b/openproblems/tasks/denoising/datasets/pancreas.py
@@ -0,0 +1,20 @@
+from ....data.pancreas import load_pancreas
+from ....tools.decorators import dataset
+from . import utils
+
+
+@dataset(
+    dataset_name="Pancreas (inDrop)",
+    data_url=load_pancreas.metadata["data_url"],
+    data_reference=load_pancreas.metadata["data_reference"],
+    dataset_summary=(
+        "Human pancreatic islet scRNA-seq data from 6 datasets across technologies"
+        " (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, and SMARTER-seq). Here"
+        " we just use the inDrop1 batch, which includes1937 cells × 15502 genes."
+    ),
+    image="openproblems-python-pytorch",
+)
+def pancreas(test=False):
+    adata = load_pancreas(test=test, keep_techs=["inDrop1"])
+    adata = utils.split_data(adata)
+    return adata
diff --git a/openproblems/tasks/denoising/datasets/pbmc.py b/openproblems/tasks/denoising/datasets/pbmc.py
index fcf0fc782b..92ed2c94de 100644
--- a/openproblems/tasks/denoising/datasets/pbmc.py
+++ b/openproblems/tasks/denoising/datasets/pbmc.py
@@ -8,10 +8,10 @@
     data_url=load_tenx_1k_pbmc.metadata["data_url"],
     data_reference=load_tenx_1k_pbmc.metadata["data_reference"],
     dataset_summary=(
-        "1k Peripheral Blood Mononuclear Cells (PBMCs) from a healthy donor. "
-        "Sequenced on 10X v3 chemistry in November 2018 by 10X Genomics."
+        "1k Peripheral Blood Mononuclear Cells (PBMCs) from a healthy donor. Sequenced"
+        " on 10X v3 chemistry in November 2018 by 10X Genomics."
     ),
-    image="openproblems-python-extras",
+    image="openproblems-python-pytorch",
 )
 def pbmc(test=False):
     adata = load_tenx_1k_pbmc(test=test)
diff --git a/openproblems/tasks/denoising/datasets/tabula_muris_senis.py b/openproblems/tasks/denoising/datasets/tabula_muris_senis.py
new file mode 100644
index 0000000000..8aca9661a2
--- /dev/null
+++ b/openproblems/tasks/denoising/datasets/tabula_muris_senis.py
@@ -0,0 +1,24 @@
+from ....data.tabula_muris_senis import load_tabula_muris_senis
+from ....tools.decorators import dataset
+from . import utils
+
+
+@dataset(
+    "Tabula Muris Senis Lung",
+    data_url=load_tabula_muris_senis.metadata["data_url"],
+    data_reference=load_tabula_muris_senis.metadata["data_reference"],
+    dataset_summary=(
+        "All lung cells from Tabula Muris Senis, a 500k cell-atlas from 18 organs and"
+        " tissues across the mouse lifespan. Here we use just 10x data from lung. 24540"
+        " cells × 16160 genes across 3 time points."
+    ),
+    image="openproblems-python-pytorch",
+)
+def tabula_muris_senis_lung_random(test=False):
+    adata = load_tabula_muris_senis(
+        organ_list=["lung"],
+        method_list=["droplet"],
+        test=test,
+    )
+    adata = utils.split_data(adata)
+    return adata
diff --git a/openproblems/tasks/denoising/datasets/utils.py b/openproblems/tasks/denoising/datasets/utils.py
index ec31002e72..3a91e9cebb 100644
--- a/openproblems/tasks/denoising/datasets/utils.py
+++ b/openproblems/tasks/denoising/datasets/utils.py
@@ -1,6 +1,5 @@
 import anndata
 import numpy as np
-import scipy.sparse
 
 
 def split_data(
@@ -11,6 +10,7 @@ def split_data(
     Stores "train" and "test" dataset using the AnnData.obsm property.
     """
     import molecular_cross_validation.util
+    import scipy.sparse
 
     random_state = np.random.RandomState(seed)
 
diff --git a/openproblems/tasks/denoising/methods/__init__.py b/openproblems/tasks/denoising/methods/__init__.py
index 611ee9f2dc..b0a3994a61 100644
--- a/openproblems/tasks/denoising/methods/__init__.py
+++ b/openproblems/tasks/denoising/methods/__init__.py
@@ -1,7 +1,13 @@
-from .alra import alra
+from .alra import alra_log
+from .alra import alra_log_reversenorm
+from .alra import alra_sqrt
+from .alra import alra_sqrt_reversenorm
+from .baseline import no_denoising
+from .baseline import perfect_denoising
 from .dca import dca
 from .knn_smoothing import knn_smoothing
 from .magic import knn_naive
 from .magic import magic
 from .magic import magic_approx
-from .no_denoising import no_denoising
+from .magic import magic_approx_reverse_norm
+from .magic import magic_reverse_norm
diff --git a/openproblems/tasks/denoising/methods/alra.py b/openproblems/tasks/denoising/methods/alra.py
index a4d7058e3a..214dae5381 100644
--- a/openproblems/tasks/denoising/methods/alra.py
+++ b/openproblems/tasks/denoising/methods/alra.py
@@ -1,42 +1,71 @@
 from ....tools.conversion import r_function
 from ....tools.decorators import method
 
+import functools
 import logging
 
-_alra = r_function("alra.R")
+_r_alra = r_function("alra.R")
 
 log = logging.getLogger("openproblems")
 
 
-@method(
-    method_name="ALRA",
-    paper_name="Zero-preserving imputation of scRNA-seq data using "
-    "low-rank approximation",
-    paper_url="https://doi.org/10.1101/397588",
+_alra_method = functools.partial(
+    method,
+    method_summary=(
+        "ALRA (Adaptively-thresholded Low Rank Approximation) is a method for"
+        " imputation of missing values in single cell RNA-sequencing data. Given a"
+        " normalised scRNA-seq expression matrix, it first imputes values using rank-k"
+        " approximation, using singular value decomposition. Next, a symmetric"
+        " distribution is fitted to the near-zero imputed values for each gene (row) of"
+        " the matrix. The right “tail” of this distribution is then used to threshold"
+        " the accepted nonzero entries. This same threshold is then used to rescale the"
+        " matrix, once the “biological zeros” have been removed."
+    ),
+    paper_name=(
+        "Zero-preserving imputation of scRNA-seq data using low-rank approximation"
+    ),
+    paper_reference="linderman2018zero",
     paper_year=2018,
     code_url="https://github.com/KlugerLab/ALRA",
     image="openproblems-r-extras",
 )
-def alra(adata, test=False):
+
+
+def _alra(adata, normtype="log", reverse_norm_order=False, test=False):
     import numpy as np
     import rpy2.rinterface_lib.embedded
     import scprep
 
-    # libsize and sqrt norm
-    adata.obsm["train_norm"] = scprep.utils.matrix_transform(
-        adata.obsm["train"], np.sqrt
-    )
-    adata.obsm["train_norm"], libsize = scprep.normalize.library_size_normalize(
-        adata.obsm["train_norm"], rescale=1, return_library_size=True
-    )
-    adata.obsm["train_norm"] = adata.obsm["train_norm"].tocsr()
+    if normtype == "sqrt":
+        norm_fn = np.sqrt
+        denorm_fn = np.square
+    elif normtype == "log":
+        norm_fn = np.log1p
+        denorm_fn = np.expm1
+    else:
+        raise NotImplementedError
+
+    X = adata.obsm["train"].copy()
+    if reverse_norm_order:
+        # inexplicably, this sometimes performs better
+        X = scprep.utils.matrix_transform(X, norm_fn)
+        X, libsize = scprep.normalize.library_size_normalize(
+            X, rescale=1, return_library_size=True
+        )
+    else:
+        X, libsize = scprep.normalize.library_size_normalize(
+            X, rescale=1, return_library_size=True
+        )
+        X = scprep.utils.matrix_transform(X, norm_fn)
+
+    adata.obsm["train_norm"] = X.tocsr()
     # run alra
-    # _alra takes sparse array, returns dense array
+    # _r_alra takes sparse array, returns dense array
     Y = None
     attempts = 0
     while Y is None:
         try:
-            Y = _alra(adata)
+            Y = _r_alra(adata)
         except rpy2.rinterface_lib.embedded.RRuntimeError:  # pragma: no cover
             if attempts < 10:
                 attempts += 1
@@ -45,9 +74,38 @@ def alra(adata, test=False):
                 raise
 
     # transform back into original space
-    Y = scprep.utils.matrix_transform(Y, np.square)
+    # functions are reversed!
+    Y = scprep.utils.matrix_transform(Y, denorm_fn)
     Y = scprep.utils.matrix_vector_elementwise_multiply(Y, libsize, axis=0)
     adata.obsm["denoised"] = Y
 
     adata.uns["method_code_version"] = "1.0.0"
     return adata
+
+
+@_alra_method(
+    method_name="ALRA (sqrt norm, reversed normalization)",
+)
+def alra_sqrt_reversenorm(adata, test=False):
+    return _alra(adata, normtype="sqrt", reverse_norm_order=True, test=False)
+
+
+@_alra_method(
+    method_name="ALRA (log norm, reversed normalization)",
+)
+def alra_log_reversenorm(adata, test=False):
+    return _alra(adata, normtype="log", reverse_norm_order=True, test=False)
+
+
+@_alra_method(
+    method_name="ALRA (sqrt norm)",
+)
+def alra_sqrt(adata, test=False):
+    return _alra(adata, normtype="sqrt", reverse_norm_order=False, test=False)
+
+
+@_alra_method(
+    method_name="ALRA (log norm)",
+)
+def alra_log(adata, test=False):
+    return _alra(adata, normtype="log", reverse_norm_order=False, test=False)
diff --git a/openproblems/tasks/denoising/methods/baseline.py b/openproblems/tasks/denoising/methods/baseline.py
new file mode 100644
index 0000000000..c9003b525d
--- /dev/null
+++ b/openproblems/tasks/denoising/methods/baseline.py
@@ -0,0 +1,24 @@
+from ....tools.decorators import baseline_method
+from ....tools.utils import check_version
+
+
+@baseline_method(
+    method_name="No denoising",
+    method_summary="Denoised outputs are defined from the unmodified input data.",
+)
+def no_denoising(adata, test=False):
+    """Do nothing."""
+    adata.obsm["denoised"] = adata.obsm["train"].toarray()
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="Perfect denoising",
+    method_summary="Denoised outputs are defined from the target data.",
+)
+def perfect_denoising(adata, test=False):
+    """Cheat."""
+    adata.obsm["denoised"] = adata.obsm["test"].toarray()
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/denoising/methods/dca.py b/openproblems/tasks/denoising/methods/dca.py
index eee6c78354..b72474baf7 100644
--- a/openproblems/tasks/denoising/methods/dca.py
+++ b/openproblems/tasks/denoising/methods/dca.py
@@ -1,18 +1,19 @@
 from ....tools.decorators import method
 from ....tools.utils import check_version
 
-import scanpy as sc
-
 
 def _dca(adata, test=False, epochs=None):
+    from dca.api import dca
+
+    import anndata
+
     if test:
         epochs = epochs or 30
     else:  # pragma: nocover
         epochs = epochs or 300
-    from dca.api import dca
 
     # make adata object with train counts
-    adata_train = sc.AnnData(adata.obsm["train"])
+    adata_train = anndata.AnnData(adata.obsm["train"])
     # run DCA
     dca(adata_train, epochs=epochs)
 
@@ -25,11 +26,19 @@ def _dca(adata, test=False, epochs=None):
 
 @method(
     method_name="DCA",
+    method_summary=(
+        "DCA (Deep Count Autoencoder) is a method to remove the effect of dropout in"
+        " scRNA-seq data. DCA takes into account the count structure, overdispersed"
+        " nature and sparsity of scRNA-seq datatypes using a deep autoencoder with a"
+        " zero-inflated negative binomial (ZINB) loss. The autoencoder is then applied"
+        " to the dataset, where the mean of the fitted negative binomial distributions"
+        " is used to fill each entry of the imputed matrix."
+    ),
     paper_name="Single-cell RNA-seq denoising using a deep count autoencoder",
-    paper_url="https://www.nature.com/articles/s41467-018-07931-2",
+    paper_reference="eraslan2019single",
     paper_year=2019,
     code_url="https://github.com/theislab/dca",
-    image="openproblems-python-tf2.4",
+    image="openproblems-python-tensorflow",
 )
 def dca(adata, test=False, epochs=None):
     return _dca(adata, test=test, epochs=epochs)
diff --git a/openproblems/tasks/denoising/methods/knn_smoothing.py b/openproblems/tasks/denoising/methods/knn_smoothing.py
index 6d8aa86d4c..997d9fc496 100644
--- a/openproblems/tasks/denoising/methods/knn_smoothing.py
+++ b/openproblems/tasks/denoising/methods/knn_smoothing.py
@@ -3,10 +3,24 @@
 
 
 @method(
-    method_name="KNN smoothing",
-    paper_name="K-nearest neighbor smoothing for high-throughput "
-    "single-cell RNA-Seq data",
-    paper_url="https://doi.org/10.1101/217737",
+    method_name="Iterative KNN smoothing",
+    method_summary=(
+        "Iterative kNN-smoothing is a method to repair or denoise noisy scRNA-seq"
+        " expression matrices. Given a scRNA-seq expression matrix, KNN-smoothing first"
+        " applies initial normalisation and smoothing. Then, a chosen number of"
+        " principal components is used to calculate Euclidean distances between cells."
+        " Minimally sized neighbourhoods are initially determined from these Euclidean"
+        " distances, and expression profiles are shared between neighbouring cells."
+        " Then, the resultant smoothed matrix is used as input to the next step of"
+        " smoothing, where the size (k) of the considered neighbourhoods is increased,"
+        " leading to greater smoothing. This process continues until a chosen maximum k"
+        " value has been reached, at which point the iteratively smoothed object is"
+        " then optionally scaled to yield a final result."
+    ),
+    paper_name=(
+        "K-nearest neighbor smoothing for high-throughput single-cell RNA-Seq data"
+    ),
+    paper_reference="wagner2018knearest",
     paper_year=2018,
     code_url="https://github.com/yanailab/knn-smoothing",
     image="openproblems-python-extras",
diff --git a/openproblems/tasks/denoising/methods/magic.py b/openproblems/tasks/denoising/methods/magic.py
index fa1feae071..5b99118b4b 100644
--- a/openproblems/tasks/denoising/methods/magic.py
+++ b/openproblems/tasks/denoising/methods/magic.py
@@ -1,3 +1,4 @@
+from ....tools.decorators import baseline_method
 from ....tools.decorators import method
 from ....tools.utils import check_version
 
@@ -7,16 +8,30 @@
 
 _magic_method = functools.partial(
     method,
-    paper_name="Recovering Gene Interactions from Single-Cell Data "
-    "Using Data Diffusion",
-    paper_url="https://doi.org/10.1016/j.cell.2018.05.061",
+    method_summary=(
+        "MAGIC (Markov Affinity-based Graph Imputation of Cells) is a method for"
+        " imputation and denoising of noisy or dropout-prone single cell RNA-sequencing"
+        " data. Given a normalised scRNA-seq expression matrix, it first calculates"
+        " Euclidean distances between each pair of cells in the dataset, which is then"
+        " augmented using a Gaussian kernel (function) and row-normalised to give a"
+        " normalised affinity matrix. A t-step markov process is then calculated, by"
+        " powering this affinity matrix t times. Finally, the powered affinity matrix"
+        " is right-multiplied by the normalised data, causing the final imputed values"
+        " to take the value of a per-gene average weighted by the affinities of cells."
+        " The resultant imputed matrix is then rescaled, to more closely match the"
+        " magnitude of measurements in the normalised (input) matrix."
+    ),
+    paper_name=(
+        "Recovering Gene Interactions from Single-Cell Data Using Data Diffusion"
+    ),
+    paper_reference="van2018recovering",
     paper_year=2018,
     code_url="https://github.com/KrishnaswamyLab/MAGIC",
     image="openproblems-python-extras",
 )
 
 
-def _magic(adata, solver, normtype="sqrt", **kwargs):
+def _magic(adata, solver, normtype="sqrt", reverse_norm_order=False, **kwargs):
     from magic import MAGIC
 
     if normtype == "sqrt":
@@ -28,11 +43,19 @@ def _magic(adata, solver, normtype="sqrt", **kwargs):
     else:
         raise NotImplementedError
 
-    X, libsize = scprep.normalize.library_size_normalize(
-        adata.obsm["train"], rescale=1, return_library_size=True
-    )
+    X = adata.obsm["train"]
+    if reverse_norm_order:
+        # inexplicably, this sometimes performs better
+        X = scprep.utils.matrix_transform(X, norm_fn)
+        X, libsize = scprep.normalize.library_size_normalize(
+            X, rescale=1, return_library_size=True
+        )
+    else:
+        X, libsize = scprep.normalize.library_size_normalize(
+            X, rescale=1, return_library_size=True
+        )
+        X = scprep.utils.matrix_transform(X, norm_fn)
 
-    X = scprep.utils.matrix_transform(X, norm_fn)
     Y = MAGIC(solver=solver, **kwargs, verbose=False).fit_transform(
         X, genes="all_genes"
     )
@@ -52,6 +75,13 @@ def magic(adata, test=False):
     return _magic(adata, solver="exact", normtype="sqrt")
 
 
+@_magic_method(
+    method_name="MAGIC (reversed normalization)",
+)
+def magic_reverse_norm(adata, test=False):
+    return _magic(adata, solver="exact", normtype="sqrt", reverse_norm_order=True)
+
+
 @_magic_method(
     method_name="MAGIC (approximate)",
 )
@@ -59,12 +89,23 @@ def magic_approx(adata, test=False):
     return _magic(adata, solver="approximate", normtype="sqrt")
 
 
-@method(
-    method_name="KNN Smoothing",
-    paper_name="KNN Smoothing (baseline)",
-    paper_url="https://openproblems.bio",
-    paper_year=2022,
-    code_url="https://github.com/openproblems-bio/openproblems",
+@_magic_method(
+    method_name="MAGIC (approximate, reversed normalization)",
+)
+def magic_approx_reverse_norm(adata, test=False):
+    return _magic(adata, solver="approximate", normtype="sqrt", reverse_norm_order=True)
+
+
+@baseline_method(
+    method_name="KNN smoothing",
+    method_summary=(
+        "KNN-smoothing is a method for denoising data based on the k-nearest"
+        " neighbours. Given a normalised scRNA-seq matrix, KNN-smoothing calculates a"
+        " k-nearest neighbour matrix using Euclidean distances between cell pairs. Each"
+        " cell’s denoised expression is then defined as the average expression of each"
+        " of its neighbours."
+    ),
+    is_baseline=False,
     image="openproblems-python-extras",
 )
 def knn_naive(adata, test=False):
diff --git a/openproblems/tasks/denoising/methods/no_denoising.py b/openproblems/tasks/denoising/methods/no_denoising.py
deleted file mode 100644
index 40a369af49..0000000000
--- a/openproblems/tasks/denoising/methods/no_denoising.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from ....tools.decorators import method
-from ....tools.utils import check_version
-
-
-@method(
-    method_name="No denoising",
-    paper_name="Molecular Cross-Validation for Single-Cell RNA-seq",
-    paper_url="https://doi.org/10.1101/786269",
-    paper_year=2019,
-    code_url="https://github.com/czbiohub/molecular-cross-validation",
-)
-def no_denoising(adata, test=False):
-    """Do nothing."""
-    adata.obsm["denoised"] = adata.obsm["train"].toarray()
-    adata.uns["method_code_version"] = check_version("openproblems")
-    return adata
diff --git a/openproblems/tasks/denoising/metrics/mse.py b/openproblems/tasks/denoising/metrics/mse.py
index 63c8f17a16..eb4bf6c85b 100644
--- a/openproblems/tasks/denoising/metrics/mse.py
+++ b/openproblems/tasks/denoising/metrics/mse.py
@@ -1,13 +1,21 @@
 from ....tools.decorators import metric
 
-import anndata
-import scanpy as sc
-import sklearn.metrics
 
-
-@metric(metric_name="Mean-squared error", maximize=False)
+@metric(
+    metric_name="Mean-squared error",
+    metric_summary=(
+        "The mean squared error between the denoised counts of the training dataset and"
+        " the true counts of the test dataset after reweighting by the train/test"
+        " ratio."
+    ),
+    paper_reference="batson2019molecular",
+    maximize=False,
+)
 def mse(adata):
+    import anndata
+    import scanpy as sc
     import scprep
+    import sklearn.metrics
 
     test_data = anndata.AnnData(X=adata.obsm["test"], obs=adata.obs, var=adata.var)
     denoised_data = anndata.AnnData(
diff --git a/openproblems/tasks/denoising/metrics/poisson.py b/openproblems/tasks/denoising/metrics/poisson.py
index e4f0f6a749..b6f8aa6b95 100644
--- a/openproblems/tasks/denoising/metrics/poisson.py
+++ b/openproblems/tasks/denoising/metrics/poisson.py
@@ -1,12 +1,21 @@
 from ....tools.decorators import metric
 
-import scprep
 
-
-@metric(metric_name="Poisson loss", maximize=False, image="openproblems-python-extras")
+@metric(
+    metric_name="Poisson loss",
+    metric_summary=(
+        "The Poisson log likelihood of observing the true counts of the test dataset"
+        " given the distribution given in the denoised dataset."
+    ),
+    paper_reference="batson2019molecular",
+    maximize=False,
+    image="openproblems-python-pytorch",
+)
 def poisson(adata):
     from molecular_cross_validation.mcv_sweep import poisson_nll_loss
 
+    import scprep
+
     test_data = adata.obsm["test"]
     denoised_data = adata.obsm["denoised"]
 
diff --git a/openproblems/tasks/dimensionality_reduction/README.md b/openproblems/tasks/dimensionality_reduction/README.md
index 96376edad1..4a82604d30 100644
--- a/openproblems/tasks/dimensionality_reduction/README.md
+++ b/openproblems/tasks/dimensionality_reduction/README.md
@@ -1,7 +1,5 @@
 # Dimensionality reduction for visualisation
 
-## The task
-
 Dimensionality reduction is one of the key challenges in single-cell data
 representation. Routine single-cell RNA sequencing (scRNA-seq) experiments measure cells
 in roughly 20,000-30,000 dimensions (i.e., features - mostly gene transcripts but also
@@ -9,8 +7,8 @@ other functional elements encoded in mRNA such as lncRNAs). Since its inception,
 scRNA-seq experiments have been growing in terms of the number of cells measured.
 Originally, cutting-edge SmartSeq experiments would yield a few hundred cells, at best.
 Now, it is not uncommon to see experiments that yield over [100,000
-cells](<https://www.nature.com/articles/s41586-018-0590-4>) or even [> 1 million
-cells.](https://doi.org/10.1126/science.aba7721)
+cells](https://openproblems.bio/bibliography#tabula2018single) or even [> 1 million
+cells.](https://openproblems.bio/bibliography#cao2020human)
 
 Each *feature* in a dataset functions as a single dimension. While each of the ~30,000
 dimensions measured in each cell contribute to an underlying data structure, the overall
@@ -21,27 +19,22 @@ high dimensional data don’t distinguish data points well). Thus, we need to fi
 to [dimensionally reduce](https://en.wikipedia.org/wiki/Dimensionality_reduction) the
 data for visualization and interpretation.
 
-## The metrics
-
-* **Root mean square error**: the square root of the mean squared difference between
-  Euclidean distances in the high-dimensional data and Euclidean distances in the
-  dimension-reduced data.
-* **Trustworthiness**: a measurement of similarity between the rank of each point's
-  nearest neighbors in the high-dimensional data and the reduced data ([Venna & Kaski,
-  2001](http://dx.doi.org/10.1007/3-540-44668-0_68)).
-* **Density preservation**: similarity between local densities in the high-dimensional
-  data and the reduced data ([Narayan, Berger & Cho,
-  2020](https://doi.org/10.1038/s41587-020-00801-7))
-* **NN Ranking**: a set of metrics from
-  [pyDRMetrics](https://doi.org/10.17632/jbjd5fmggh.2) relating to the preservation
-  of nearest neighbors in the high-dimensional data and the reduced data.
-
 ## API
 
-**Datasets** should provide un-normalized raw counts in `adata.X`.
+WARNING: other than most tasks, `adata.X` should contain log CP10k-normalized data,
+   This is the case as we are computing ground truth metrics on normalized data,
+   which means methods which use this same normalization are likely to score more
+   highly on these metrics.
+
+**Datasets** should provide *log CP10k normalized counts* in `adata.X` and store the
+original number of genes (i.e., `adata.shape[1]`) in `adata.uns["n_genes"]`. Datasets
+should also contain the nearest-neighbor ranking matrix, required for the `nn_ranking`
+metrics, as computed by `_utils.ranking_matrix(adata.X)` on normalized counts.
 
 **Methods** should assign dimensionally-reduced 2D embedding coordinates to
-`adata.obsm['X_emb']`.
+`adata.obsm['X_emb']`. They *should not* modify the dimensionality of `adata.X` (e.g.
+by subsetting to highly variable features, which should be done on a local copy of the
+data without modifying the AnnData object that is returned.)
 
 **Metrics** should calculate the quality or "goodness of fit" of a dimensional reduction
 **method**. If the un-normalized input counts matrix is required by the matrix it can be
@@ -53,14 +46,13 @@ Different methods can require different pre-processing of the data. Standard
 pre-processing functions are available as part of the `tools` module. Where possible
 each **method** should first call one of these functions and use the processed `adata.X`
 slot as the input to the method. Raw counts are also stored in `adata.layers["counts"]`
-by the standard pre-processing functions, if a method performs its own pre-processing it
-should also do this for use by metrics. For most methods a standard pre-processing with
-the `log_cpm_hvg()` function is used which normalizes the expression matrix to counts
-per million (CPM), performs a log transformation and subsets the data to highly-variable
-genes (HVGs) as selected by scanpy's `high_variable_genes(adata, n_top_genes=n_genes,
-flavor="cell_ranger")` (1000 genes by default). Variants of methods can be created by
-applying different pre-processing prior to the method itself (see `phate.py` for an
-example).
+by the standard pre-processing functions, if a method performs its own pre-processing.
+For most methods a standard pre-processing from `log_cp10k()`, which normalizes the
+expression matrix to counts per 10,000 (CP10k), can be used directly from `adata.X`.
+Variants of methods can be created by applying different pre-processing prior to the
+method itself (see `phate.py` for an example). *Note that using a normalization method
+different from that used for the metrics (log CP10k) may lead to artificially poor method
+performance.*
 
 ## The methods
 
@@ -127,7 +119,7 @@ from [umap-learn](https://umap-learn.readthedocs.io/en/latest/densmap_demo.html)
 
 **Variants:**
 
-* The (logCPM-normalized, 1000 HVG) expression matrix
+* The (logCP10k-normalized, 1000 HVG) expression matrix
 * 50 principal components
 
 ### Potential of heat-diffusion for affinity-based transition embedding (PHATE)
@@ -146,8 +138,8 @@ This implementation is from the [phate package](https://phate.readthedocs.io/en/
 
 **Variants:**
 
-* The square-root CPM transformed expression matrix
-* 50 principal components of the logCPM-normalised, 1000 HVG expression matrix
+* The square-root CP10k transformed expression matrix
+* 50 principal components of the logCP10k-normalised, 1000 HVG expression matrix
 
 ### ivis
 
@@ -166,7 +158,7 @@ package](https://neuralee.readthedocs.io/en/latest/).
 **Variants:**
 
 * Scaled 500 HVGs from a logged expression matrix (no library size normalization)
-* LogCPM-normalised, 1000 HVG expression matrix
+* LogCP10k-normalised, 1000 HVG expression matrix
 
 ### scvis
 
diff --git a/openproblems/tasks/dimensionality_reduction/__init__.py b/openproblems/tasks/dimensionality_reduction/__init__.py
index c32292b3b7..6b6af5d52a 100644
--- a/openproblems/tasks/dimensionality_reduction/__init__.py
+++ b/openproblems/tasks/dimensionality_reduction/__init__.py
@@ -9,7 +9,7 @@
     "Reduction of high-dimensional datasets to 2D for visualization & interpretation"
 )
 
-DEFAULT_LAYER = "counts"
+DEFAULT_LAYER = "log_cp10k"
 
 DATASETS = utils.get_callable_members(datasets)
 METHODS = utils.get_callable_members(methods)
diff --git a/openproblems/tasks/dimensionality_reduction/_utils.py b/openproblems/tasks/dimensionality_reduction/_utils.py
new file mode 100644
index 0000000000..701621b1fb
--- /dev/null
+++ b/openproblems/tasks/dimensionality_reduction/_utils.py
@@ -0,0 +1,27 @@
+from numba import njit
+
+import numpy as np
+
+
+@njit(cache=True, fastmath=True)
+def _ranking_matrix(D: np.ndarray) -> np.ndarray:  # pragma: no cover
+    assert D.shape[0] == D.shape[1]
+    R = np.zeros(D.shape)
+    m = len(R)
+    ks = np.arange(m)
+
+    for i in range(m):
+        for j in range(m):
+            R[i, j] = np.sum(
+                (D[i, :] < D[i, j]) | ((ks < j) & (np.abs(D[i, :] - D[i, j]) <= 1e-12))
+            )
+
+    return R
+
+
+def ranking_matrix(X):
+    from sklearn.metrics import pairwise_distances
+
+    D = pairwise_distances(X)
+    R = _ranking_matrix(D)
+    return R
diff --git a/openproblems/tasks/dimensionality_reduction/api.py b/openproblems/tasks/dimensionality_reduction/api.py
index 5b5634836e..4fff9852a8 100644
--- a/openproblems/tasks/dimensionality_reduction/api.py
+++ b/openproblems/tasks/dimensionality_reduction/api.py
@@ -1,19 +1,27 @@
 from ...data.sample import load_sample_data
 from ...tools.decorators import dataset
+from ...tools.normalize import log_cp10k
+from . import _utils
 
 import numpy as np
-import scanpy as sc
 
 
 def check_dataset(adata):
     """Check that dataset output fits expected API."""
+    assert "n_genes" in adata.uns
+    assert adata.uns["n_genes"] == adata.shape[1]
     return True
 
 
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
+    # check adata.X has not changed
+    assert adata.uns["n_genes"] == adata.shape[1]
+    assert adata.X is adata.layers["log_cp10k"]
+    # check output
     assert "X_emb" in adata.obsm
-    assert adata.obsm["X_emb"].shape[1] == 2
+    if not is_baseline:
+        assert adata.obsm["X_emb"].shape[1] == 2
     assert np.all(np.isfinite(adata.obsm["X_emb"]))
     return True
 
@@ -21,11 +29,17 @@ def check_method(adata):
 @dataset()
 def sample_dataset():
     """Create a simple dataset to use for testing methods in this task."""
-    return load_sample_data()
+    adata = load_sample_data()
+    adata = log_cp10k(adata)
+    adata.uns["n_genes"] = adata.shape[1]
+    adata.obsm["X_ranking"] = _utils.ranking_matrix(adata.X)
+    return adata
 
 
 def sample_method(adata):
     """Create sample method output for testing metrics in this task."""
+    import scanpy as sc
+
     sc.tl.pca(adata)
     adata.obsm["X_emb"] = adata.obsm["X_pca"][:, :2]
     return adata
diff --git a/openproblems/tasks/dimensionality_reduction/datasets/__init__.py b/openproblems/tasks/dimensionality_reduction/datasets/__init__.py
index 0c41aaa00b..14d605081e 100644
--- a/openproblems/tasks/dimensionality_reduction/datasets/__init__.py
+++ b/openproblems/tasks/dimensionality_reduction/datasets/__init__.py
@@ -1,3 +1,4 @@
-from .mouse_blood_olssen_labelled import olsson_2016_mouse_blood
+from .mouse_blood_olsson_labelled import olsson_2016_mouse_blood
 from .mouse_hspc_nestorowa2016 import mouse_hspc_nestorowa2016
 from .tenx_5k_pbmc import tenx_5k_pbmc
+from .zebrafish import zebrafish_labs
diff --git a/openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olssen_labelled.py b/openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olssen_labelled.py
deleted file mode 100644
index bb3b3ccb94..0000000000
--- a/openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olssen_labelled.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from ....data.mouse_blood_olssen_labelled import load_olsson_2016_mouse_blood
-from ....tools.decorators import dataset
-
-
-@dataset(
-    "Mouse myeloid lineage differentiation",
-    data_url=load_olsson_2016_mouse_blood.metadata["data_url"],
-    data_reference=load_olsson_2016_mouse_blood.metadata["data_reference"],
-    dataset_summary="Myeloid lineage differentiation from mouse blood. "
-    "Sequenced by SMARTseq in 2016 by Olsson et al.",
-)
-def olsson_2016_mouse_blood(test=False):
-    return load_olsson_2016_mouse_blood(test=test)
diff --git a/openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olsson_labelled.py b/openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olsson_labelled.py
new file mode 100644
index 0000000000..2d916fdd96
--- /dev/null
+++ b/openproblems/tasks/dimensionality_reduction/datasets/mouse_blood_olsson_labelled.py
@@ -0,0 +1,21 @@
+from ....data.mouse_blood_olsson_labelled import load_olsson_2016_mouse_blood
+from ....tools.decorators import dataset
+from ....tools.normalize import log_cp10k
+from .._utils import ranking_matrix
+
+
+@dataset(
+    "Mouse myeloid lineage differentiation",
+    data_url=load_olsson_2016_mouse_blood.metadata["data_url"],
+    data_reference=load_olsson_2016_mouse_blood.metadata["data_reference"],
+    dataset_summary=(
+        "Myeloid lineage differentiation from mouse blood. Sequenced by SMARTseq in"
+        " 2016 by Olsson et al. 660 cells x 112815 features with 4 cell type labels"
+    ),
+)
+def olsson_2016_mouse_blood(test=False):
+    adata = load_olsson_2016_mouse_blood(test=test)
+    adata.uns["n_genes"] = adata.shape[1]
+    adata = log_cp10k(adata)
+    adata.obsm["X_ranking"] = ranking_matrix(adata.X)
+    return adata
diff --git a/openproblems/tasks/dimensionality_reduction/datasets/mouse_hspc_nestorowa2016.py b/openproblems/tasks/dimensionality_reduction/datasets/mouse_hspc_nestorowa2016.py
index b010474aa0..8e1bc3c15b 100644
--- a/openproblems/tasks/dimensionality_reduction/datasets/mouse_hspc_nestorowa2016.py
+++ b/openproblems/tasks/dimensionality_reduction/datasets/mouse_hspc_nestorowa2016.py
@@ -1,13 +1,21 @@
 from ....data.mouse_hspc_nestorowa2016 import load_mouse_hspc_nestorowa2016
 from ....tools.decorators import dataset
+from ....tools.normalize import log_cp10k
+from .._utils import ranking_matrix
 
 
 @dataset(
     "Mouse hematopoietic stem cell differentiation",
     data_url=load_mouse_hspc_nestorowa2016.metadata["data_url"],
     data_reference=load_mouse_hspc_nestorowa2016.metadata["data_reference"],
-    dataset_summary="1.6k hematopoietic stem and progenitor cells from mouse bone "
-    "marrow. Sequenced by Smart-seq2.",
+    dataset_summary=(
+        "1.6k hematopoietic stem and progenitor cells from mouse bone marrow. Sequenced"
+        " by Smart-seq2. 1920 cells x 43258 features with 3 cell type labels"
+    ),
 )
 def mouse_hspc_nestorowa2016(test=False):
-    return load_mouse_hspc_nestorowa2016(test=test)
+    adata = load_mouse_hspc_nestorowa2016(test=test)
+    adata.uns["n_genes"] = adata.shape[1]
+    adata = log_cp10k(adata)
+    adata.obsm["X_ranking"] = ranking_matrix(adata.X)
+    return adata
diff --git a/openproblems/tasks/dimensionality_reduction/datasets/tenx_5k_pbmc.py b/openproblems/tasks/dimensionality_reduction/datasets/tenx_5k_pbmc.py
index 3b94989deb..a18e67a719 100644
--- a/openproblems/tasks/dimensionality_reduction/datasets/tenx_5k_pbmc.py
+++ b/openproblems/tasks/dimensionality_reduction/datasets/tenx_5k_pbmc.py
@@ -1,5 +1,7 @@
 from ....data.tenx import load_tenx_5k_pbmc
 from ....tools.decorators import dataset
+from ....tools.normalize import log_cp10k
+from .._utils import ranking_matrix
 
 
 @dataset(
@@ -7,9 +9,14 @@
     data_url=load_tenx_5k_pbmc.metadata["data_url"],
     data_reference=load_tenx_5k_pbmc.metadata["data_reference"],
     dataset_summary=(
-        "5k Peripheral Blood Mononuclear Cells (PBMCs) from a healthy donor. "
-        "Sequenced on 10X v3 chemistry in July 2019 by 10X Genomics."
+        "5k Peripheral Blood Mononuclear Cells (PBMCs) from a healthy donor. Sequenced"
+        " on 10X v3 chemistry in July 2019 by 10X Genomics. 5247 cells x 20822 features"
+        " with no cell type labels"
     ),
 )
 def tenx_5k_pbmc(test=False):
-    return load_tenx_5k_pbmc(test=test)
+    adata = load_tenx_5k_pbmc(test=test)
+    adata.uns["n_genes"] = adata.shape[1]
+    adata = log_cp10k(adata)
+    adata.obsm["X_ranking"] = ranking_matrix(adata.X)
+    return adata
diff --git a/openproblems/tasks/dimensionality_reduction/datasets/zebrafish.py b/openproblems/tasks/dimensionality_reduction/datasets/zebrafish.py
new file mode 100644
index 0000000000..369a589c53
--- /dev/null
+++ b/openproblems/tasks/dimensionality_reduction/datasets/zebrafish.py
@@ -0,0 +1,28 @@
+from ....data.zebrafish import load_zebrafish
+from ....tools.decorators import dataset
+from ....tools.normalize import log_cp10k
+from .._utils import ranking_matrix
+
+
+@dataset(
+    "Zebrafish",
+    data_url=load_zebrafish.metadata["data_url"],
+    data_reference=load_zebrafish.metadata["data_reference"],
+    dataset_summary=(
+        "90k cells from zebrafish embryos throughout the first day of development, with"
+        " and without a knockout of chordin, an important developmental gene."
+        " Dimensions: 26022 cells, 25258 genes. 24 cell types (avg. 1084±1156 cells per"
+        " cell type)."
+    ),
+)
+def zebrafish_labs(test=False):
+    import scanpy as sc
+
+    adata = load_zebrafish(test=test)
+    if not test:
+        # this dataset is too big
+        sc.pp.subsample(adata, n_obs=25000)
+    adata.uns["n_genes"] = adata.shape[1]
+    adata = log_cp10k(adata)
+    adata.obsm["X_ranking"] = ranking_matrix(adata.X)
+    return adata
diff --git a/openproblems/tasks/dimensionality_reduction/methods/__init__.py b/openproblems/tasks/dimensionality_reduction/methods/__init__.py
index 4240a96951..acc9e64a78 100644
--- a/openproblems/tasks/dimensionality_reduction/methods/__init__.py
+++ b/openproblems/tasks/dimensionality_reduction/methods/__init__.py
@@ -1,10 +1,26 @@
-from .densmap import densmap_logCPM_1kHVG
-from .densmap import densmap_pca_logCPM_1kHVG
+from .baseline import random_features
+from .baseline import spectral_features
+from .baseline import true_features
+from .diffusion_map import diffusion_map
 from .neuralee import neuralee_default
-from .neuralee import neuralee_logCPM_1kHVG
-from .pca import pca_logCPM_1kHVG
+from .neuralee import neuralee_logCP10k_1kHVG
+from .pca import pca_logCP10k
+from .pca import pca_logCP10k_1kHVG
 from .phate import phate_default
-from .phate import phate_logCPM_1kHVG
+from .phate import phate_logCP10k
+from .phate import phate_logCP10k_1kHVG
 from .phate import phate_sqrt
-from .tsne import tsne_logCPM_1kHVG
-from .umap import umap_logCPM_1kHVG
+from .pymde import pymde_distances_log_cp10k
+from .pymde import pymde_distances_log_cp10k_hvg
+from .pymde import pymde_neighbors_log_cp10k
+from .pymde import pymde_neighbors_log_cp10k_hvg
+from .tsne import tsne_logCP10k
+from .tsne import tsne_logCP10k_1kHVG
+from .umap import densmap_logCP10k
+from .umap import densmap_logCP10k_1kHVG
+from .umap import densmap_pca_logCP10k
+from .umap import densmap_pca_logCP10k_1kHVG
+from .umap import umap_logCP10k
+from .umap import umap_logCP10k_1kHVG
+from .umap import umap_pca_logCP10k
+from .umap import umap_pca_logCP10k_1kHVG
diff --git a/openproblems/tasks/dimensionality_reduction/methods/baseline.py b/openproblems/tasks/dimensionality_reduction/methods/baseline.py
new file mode 100644
index 0000000000..849efd853d
--- /dev/null
+++ b/openproblems/tasks/dimensionality_reduction/methods/baseline.py
@@ -0,0 +1,64 @@
+from ....tools.decorators import baseline_method
+from ....tools.normalize import log_cp10k
+from ....tools.utils import check_version
+from .diffusion_map import diffusion_map
+from typing import Optional
+
+import numpy as np
+
+
+@baseline_method(
+    method_name="Random Features",
+    method_summary=(
+        "Randomly generated two-dimensional coordinates from a normal distribution."
+    ),
+)
+def random_features(adata, test=False):
+    adata.obsm["X_emb"] = np.random.normal(0, 1, (adata.shape[0], 2))
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="True Features",
+    method_summary="Use of the original feature inputs as the 'embedding'.",
+)
+def true_features(adata, test=False):
+    adata.obsm["X_emb"] = adata.X
+    if test:
+        adata.obsm["X_emb"] = adata.obsm["X_emb"][:, :100]
+
+    adata.obsm["X_emb"] = adata.obsm["X_emb"].toarray()
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="True Features (logCP10k)",
+    method_summary="Use of the original feature inputs as the 'embedding'.",
+)
+def true_features_log_cp10k(adata, test=False):
+    adata = log_cp10k(adata)
+    adata.obsm["X_emb"] = adata.X
+    if test:
+        adata.obsm["X_emb"] = adata.obsm["X_emb"][:, :100]
+
+    adata.obsm["X_emb"] = adata.obsm["X_emb"].toarray()
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="Spectral Features",
+    method_summary="Use 1000-dimensional diffusions maps as an embedding",
+)
+def spectral_features(adata, test=False, n_comps: Optional[int] = None):
+
+    if test:
+        n_comps = n_comps or 20
+    else:
+        n_comps = n_comps or 1000
+
+    n_comps = min(n_comps, min(adata.shape) - 2)
+
+    return diffusion_map(adata, n_comps=n_comps)
diff --git a/openproblems/tasks/dimensionality_reduction/methods/densmap.py b/openproblems/tasks/dimensionality_reduction/methods/densmap.py
deleted file mode 100644
index 900e9f78f2..0000000000
--- a/openproblems/tasks/dimensionality_reduction/methods/densmap.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from ....tools.decorators import method
-from ....tools.normalize import log_cpm_hvg
-from ....tools.utils import check_version
-
-import functools
-import scanpy as sc
-
-_densmap_method = functools.partial(
-    method,
-    paper_name="Assessing single-cell transcriptomic variability through"
-    " density-preserving data visualization",
-    paper_url="https://www.nature.com/articles/s41587-020-00801-7",
-    paper_year=2021,
-    code_url="https://github.com/lmcinnes/umap",
-    image="openproblems-python-extras",
-)
-
-
-def _densmap(adata, obsm=None):
-    from umap import UMAP
-
-    if obsm:
-        X = adata.obsm[obsm]
-    else:
-        X = adata.X
-    adata.obsm["X_emb"] = UMAP(densmap=True, random_state=42).fit_transform(X)
-    adata.uns["method_code_version"] = check_version("umap-learn")
-    return adata
-
-
-@_densmap_method(method_name="densMAP (logCPM, 1kHVG)")
-def densmap_logCPM_1kHVG(adata, test: bool = False):
-    adata = log_cpm_hvg(adata)
-    return _densmap(adata)
-
-
-@_densmap_method(method_name="densMAP PCA (logCPM, 1kHVG)")
-def densmap_pca_logCPM_1kHVG(adata, test: bool = False):
-    adata = log_cpm_hvg(adata)
-    sc.tl.pca(adata, n_comps=50, svd_solver="arpack")
-    return _densmap(adata, obsm="X_pca")
diff --git a/openproblems/tasks/dimensionality_reduction/methods/diffusion_map.py b/openproblems/tasks/dimensionality_reduction/methods/diffusion_map.py
new file mode 100644
index 0000000000..429c047b88
--- /dev/null
+++ b/openproblems/tasks/dimensionality_reduction/methods/diffusion_map.py
@@ -0,0 +1,68 @@
+from ....tools.decorators import method
+from ....tools.normalize import log_cp10k
+from ....tools.utils import check_version
+
+
+def _diffusion_map(graph, n_comps, t, n_retries=1):
+    import numpy as np
+    import scipy.sparse.linalg
+
+    diag_data = np.asarray(graph.sum(axis=0))
+    identity = scipy.sparse.identity(graph.shape[0], dtype=np.float64)
+    diag = scipy.sparse.spdiags(
+        1.0 / np.sqrt(diag_data), 0, graph.shape[0], graph.shape[0]
+    )
+    laplacian = identity - diag * graph * diag
+    num_lanczos_vectors = max(2 * n_comps + 1, int(np.sqrt(graph.shape[0])))
+    try:
+        eigenvalues, eigenvectors = scipy.sparse.linalg.eigsh(
+            laplacian,
+            n_comps,
+            which="SM",
+            ncv=num_lanczos_vectors,
+            tol=1e-4,
+            v0=np.ones(laplacian.shape[0]),
+            maxiter=graph.shape[0] * 5,
+        )
+        return (eigenvalues**t) * eigenvectors
+    except scipy.sparse.linalg.ArpackNoConvergence:
+        if n_retries > 0:
+            # add some noise and try again
+            graph_rand = graph.copy().tocoo()
+            graph_rand.row = np.random.choice(
+                graph_rand.shape[0], len(graph_rand.row), replace=True
+            )
+            graph_rand.data *= 0.01
+            return _diffusion_map(
+                graph + graph_rand, n_comps, t, n_retries=n_retries - 1
+            )
+        else:
+            raise
+
+
+@method(
+    method_name="Diffusion maps",
+    method_summary=(
+        "Diffusion maps uses an affinity matrix to describe the similarity between data"
+        " points, which is then transformed into a graph Laplacian. The"
+        " eigenvalue-weighted eigenvectors of the graph Laplacian are then used to"
+        " create the embedding. Diffusion maps is calculated on the logCPM expression"
+        " matrix."
+    ),
+    paper_reference="coifman2006diffusion",
+    paper_name="Diffusion maps",
+    paper_year=2006,
+    code_url="https://github.com/openproblems-bio/openproblems",
+)
+def diffusion_map(
+    adata, n_comps: int = 2, t: int = 1, test: bool = False, n_retries: int = 1
+):
+    import umap
+
+    adata = log_cp10k(adata)
+
+    graph = umap.UMAP(transform_mode="graph").fit_transform(adata.X)
+
+    adata.obsm["X_emb"] = _diffusion_map(graph, n_comps, t, n_retries=n_retries)
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/dimensionality_reduction/methods/neuralee.py b/openproblems/tasks/dimensionality_reduction/methods/neuralee.py
index 279123b42f..750537aa5c 100644
--- a/openproblems/tasks/dimensionality_reduction/methods/neuralee.py
+++ b/openproblems/tasks/dimensionality_reduction/methods/neuralee.py
@@ -1,5 +1,5 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm_hvg
+from ....tools.normalize import log_cp10k_hvg
 from ....tools.utils import check_version
 from anndata import AnnData
 from typing import Optional
@@ -12,13 +12,24 @@
 
 _neuralee_method = functools.partial(
     method,
-    paper_name="NeuralEE: A GPU-Accelerated Elastic Embedding "
-    "Dimensionality Reduction Method for "
-    "Visualizing Large-Scale scRNA-Seq Data",
-    paper_url="https://www.frontiersin.org/articles/10.3389/fgene.2020.00786/full",
+    method_summary=(
+        "NeuralEE is a neural network implementation of elastic embedding. It is a"
+        " non-linear method that preserves pairwise distances between data points."
+        " NeuralEE uses a neural network to optimize an objective function that"
+        " measures the difference between pairwise distances in the original"
+        " high-dimensional space and the two-dimensional space. It is computed on both"
+        " the recommended input from the package authors of 500 HVGs selected from a"
+        " logged expression matrix (without sequencing depth scaling) and the default"
+        " logCPM matrix with 1000 HVGs."
+    ),
+    paper_name=(
+        "NeuralEE: A GPU-Accelerated Elastic Embedding Dimensionality Reduction Method"
+        " for Visualizing Large-Scale scRNA-Seq Data"
+    ),
+    paper_reference="xiong2020neuralee",
     paper_year=2020,
     code_url="https://github.com/HiBearME/NeuralEE",
-    image="openproblems-python-extras",
+    image="openproblems-python-pytorch",
 )
 
 
@@ -49,6 +60,7 @@ def _create_neuralee_dataset(
 
 def _neuralee(
     adata,
+    genes=None,
     d: int = 2,
     test: bool = False,
     subsample_genes: Optional[int] = None,
@@ -58,20 +70,25 @@ def _neuralee(
 
     import torch
 
+    if genes is not None:
+        adata_input = adata[:, genes].copy()
+    else:
+        adata_input = adata
+
     # this can fail due to sparseness of data; if so, retry with more genes
     # note that this is a deviation from the true default behavior, which fails
     # see https://github.com/openproblems-bio/openproblems/issues/375
     while True:
         try:
             dataset = _create_neuralee_dataset(
-                adata, normalize=normalize, subsample_genes=subsample_genes
+                adata_input, normalize=normalize, subsample_genes=subsample_genes
             )
         except ValueError:
-            if subsample_genes is not None and subsample_genes < adata.n_vars:
-                subsample_genes = min(adata.n_vars, int(subsample_genes * 1.2))
+            if subsample_genes is not None and subsample_genes < adata_input.n_vars:
+                subsample_genes = min(adata_input.n_vars, int(subsample_genes * 1.2))
                 log.warning(
-                    "ValueError in neuralee_default. "
-                    f"Increased subsample_genes to {subsample_genes}"
+                    "ValueError in neuralee_default. Increased subsample_genes to"
+                    f" {subsample_genes}"
                 )
             else:
                 raise
@@ -91,10 +108,21 @@ def _neuralee(
 
 @_neuralee_method(method_name="NeuralEE (CPU) (Default)")
 def neuralee_default(adata: AnnData, test: bool = False) -> AnnData:
-    return _neuralee(adata, test=test, normalize=True, subsample_genes=500)
+    # neuralee needs raw counts
+    adata.X = adata.layers["counts"]
+    adata = _neuralee(adata, test=test, normalize=True, subsample_genes=500)
+    # revert to expected values
+    adata.X = adata.layers["log_cp10k"]
+    return adata
 
 
-@_neuralee_method(method_name="NeuralEE (CPU) (logCPM, 1kHVG)")
-def neuralee_logCPM_1kHVG(adata: AnnData, test: bool = False) -> AnnData:
-    adata = log_cpm_hvg(adata)
-    return _neuralee(adata, test=test, normalize=False, subsample_genes=None)
+@_neuralee_method(method_name="NeuralEE (CPU) (logCP10k, 1kHVG)")
+def neuralee_logCP10k_1kHVG(adata: AnnData, test: bool = False) -> AnnData:
+    adata = log_cp10k_hvg(adata)
+    return _neuralee(
+        adata,
+        genes=adata.var["highly_variable"],
+        test=test,
+        normalize=False,
+        subsample_genes=None,
+    )
diff --git a/openproblems/tasks/dimensionality_reduction/methods/pca.py b/openproblems/tasks/dimensionality_reduction/methods/pca.py
index e81772d1f0..939a9babec 100644
--- a/openproblems/tasks/dimensionality_reduction/methods/pca.py
+++ b/openproblems/tasks/dimensionality_reduction/methods/pca.py
@@ -1,21 +1,50 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm_hvg
+from ....tools.normalize import log_cp10k
+from ....tools.normalize import log_cp10k_hvg
 from ....tools.utils import check_version
 
-import scanpy as sc
+import functools
 
-
-@method(
-    method_name="Principle Component Analysis (PCA) (logCPM, 1kHVG)",
+_pca_method = functools.partial(
+    method,
+    method_summary=(
+        'PCA or "Principal Component Analysis" is a linear method that finds orthogonal'
+        " directions in the data that capture the most variance. The first two"
+        " principal components are chosen as the two-dimensional embedding. We select"
+        " only the first two principal components as the two-dimensional embedding. PCA"
+        " is calculated on the logCPM expression matrix with and without selecting 1000"
+        " HVGs."
+    ),
     paper_name="On lines and planes of closest fit to systems of points in space",
-    paper_url="https://doi.org/10.1080/14786440109462720",
+    paper_reference="pearson1901pca",
     paper_year=1901,
-    code_url="https://scikit-learn.org/stable/modules/generated/"
-    "sklearn.decomposition.PCA.html",
+    code_url=(
+        "https://scikit-learn.org/stable/modules/generated/"
+        "sklearn.decomposition.PCA.html"
+    ),
 )
-def pca_logCPM_1kHVG(adata, test: bool = False):
-    adata = log_cpm_hvg(adata)
-    sc.tl.pca(adata, n_comps=50, svd_solver="arpack")
-    adata.obsm["X_emb"] = adata.obsm["X_pca"][:, :2]
+
+
+def _pca(adata, genes=None):
+    import scanpy as sc
+
+    if genes is not None:
+        X = adata[:, genes].copy().X
+    else:
+        X = adata.X
+
+    adata.obsm["X_emb"] = sc.tl.pca(X, n_comps=2, svd_solver="arpack")
     adata.uns["method_code_version"] = check_version("scikit-learn")
     return adata
+
+
+@_pca_method(method_name="PCA (logCP10k)")
+def pca_logCP10k(adata, test: bool = False):
+    adata = log_cp10k(adata)
+    return _pca(adata)
+
+
+@_pca_method(method_name="PCA (logCP10k, 1kHVG)")
+def pca_logCP10k_1kHVG(adata, test: bool = False):
+    adata = log_cp10k_hvg(adata)
+    return _pca(adata, genes=adata.var["highly_variable"])
diff --git a/openproblems/tasks/dimensionality_reduction/methods/phate.py b/openproblems/tasks/dimensionality_reduction/methods/phate.py
index 9384880d13..7c40f316bc 100644
--- a/openproblems/tasks/dimensionality_reduction/methods/phate.py
+++ b/openproblems/tasks/dimensionality_reduction/methods/phate.py
@@ -1,6 +1,7 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm_hvg
-from ....tools.normalize import sqrt_cpm
+from ....tools.normalize import log_cp10k
+from ....tools.normalize import log_cp10k_hvg
+from ....tools.normalize import sqrt_cp10k
 from ....tools.utils import check_version
 from typing import Optional
 
@@ -8,15 +9,29 @@
 
 _phate_method = functools.partial(
     method,
-    paper_name="Visualizing Transitions and Structure for Biological Data Exploration",
-    paper_url="https://www.nature.com/articles/s41587-019-0336-3",
+    method_summary=(
+        "PHATE or “Potential of Heat - diffusion for Affinity - based Transition"
+        " Embedding” uses the potential of heat diffusion to preserve trajectories in a"
+        " dataset via a diffusion process. It is an affinity - based method that"
+        " creates an embedding by finding the dominant eigenvalues of a Markov"
+        " transition matrix. We evaluate several variants including using the"
+        " recommended square - root transformed CPM matrix as input, this input with"
+        " the gamma parameter set to zero and the normal logCPM transformed matrix with"
+        " and without HVG selection."
+    ),
+    paper_name=(
+        "Visualizing Structure and Transitions in High-Dimensional Biological Data"
+    ),
+    paper_reference="moon2019visualizing",
     paper_year=2019,
     code_url="https://github.com/KrishnaswamyLab/PHATE/",
     image="openproblems-python-extras",
 )
 
 
-def _phate(adata, test: bool = False, n_pca: Optional[int] = None, gamma: float = 1):
+def _phate(
+    adata, test: bool = False, genes=None, n_pca: Optional[int] = None, gamma: float = 1
+):
     from phate import PHATE
 
     if test:
@@ -24,25 +39,42 @@ def _phate(adata, test: bool = False, n_pca: Optional[int] = None, gamma: float
     else:  # pragma: no cover
         n_pca = n_pca or 100
 
+    if genes is not None:
+        X = adata[:, genes].copy().X
+    else:
+        X = adata.X
+
     phate_op = PHATE(n_pca=n_pca, verbose=False, n_jobs=-1, gamma=gamma)
-    adata.obsm["X_emb"] = phate_op.fit_transform(adata.X)
+    adata.obsm["X_emb"] = phate_op.fit_transform(X)
     adata.uns["method_code_version"] = check_version("phate")
     return adata
 
 
 @_phate_method(method_name="PHATE (default)")
 def phate_default(adata, test: bool = False, n_pca: Optional[int] = None):
-    adata = sqrt_cpm(adata)
-    return _phate(adata, test=test, n_pca=n_pca)
+    adata = sqrt_cp10k(adata)
+    adata = _phate(adata, test=test, n_pca=n_pca)
+    # revert to expected adata.X
+    adata = log_cp10k(adata)
+    return adata
 
 
 @_phate_method(method_name="PHATE (gamma=0)")
 def phate_sqrt(adata, test: bool = False, n_pca: Optional[int] = None):
-    adata = sqrt_cpm(adata)
-    return _phate(adata, test=test, n_pca=n_pca, gamma=0)
+    adata = sqrt_cp10k(adata)
+    adata = _phate(adata, test=test, n_pca=n_pca, gamma=0)
+    # revert to expected adata.X
+    adata = log_cp10k(adata)
+    return adata
 
 
-@_phate_method(method_name="PHATE (logCPM, 1kHVG)")
-def phate_logCPM_1kHVG(adata, test: bool = False, n_pca: Optional[int] = None):
-    adata = log_cpm_hvg(adata)
+@_phate_method(method_name="PHATE (logCP10k)")
+def phate_logCP10k_1kHVG(adata, test: bool = False, n_pca: Optional[int] = None):
+    adata = log_cp10k(adata)
     return _phate(adata, test=test, n_pca=n_pca)
+
+
+@_phate_method(method_name="PHATE (logCP10k, 1kHVG)")
+def phate_logCP10k(adata, test: bool = False, n_pca: Optional[int] = None):
+    adata = log_cp10k_hvg(adata)
+    return _phate(adata, test=test, genes=adata.var["highly_variable"], n_pca=n_pca)
diff --git a/openproblems/tasks/dimensionality_reduction/methods/pymde.py b/openproblems/tasks/dimensionality_reduction/methods/pymde.py
new file mode 100644
index 0000000000..8e8ce79aa4
--- /dev/null
+++ b/openproblems/tasks/dimensionality_reduction/methods/pymde.py
@@ -0,0 +1,139 @@
+from ....tools.decorators import method
+from ....tools.normalize import log_cp10k
+from ....tools.normalize import log_cp10k_hvg
+from ....tools.utils import check_version
+from typing import Optional
+
+import functools
+import scanpy as sc
+
+_pymde_method = functools.partial(
+    method,
+    method_summary=(
+        "PyMDE is a Python implementation of minimum-distortion embedding. It is a"
+        " non-linear method that preserves distances between cells or neighborhoods in"
+        " the high-dimensional space. It is computed with options to preserve distances"
+        " between cells or neighbourhoods and with the logCPM matrix with and without"
+        " HVG selection as input."
+    ),
+    paper_name="Minimum-Distortion Embedding",
+    paper_reference="agrawal2021mde",
+    paper_year=2021,
+    code_url="https://pymde.org/",
+    image="openproblems-python-pytorch",
+)
+
+
+def _pymde(
+    adata,
+    method: str = "neighbors",
+    genes=None,
+    n_pca: Optional[int] = None,
+    test: bool = False,
+    max_iter: Optional[int] = None,
+    memory_size: Optional[int] = None,
+):
+    import pymde
+
+    if genes is not None:
+        adata_input = adata[:, genes].copy()
+    else:
+        adata_input = adata
+
+    embed_kwargs = {}
+    if test:
+        n_pca = n_pca or 20
+        embed_kwargs["max_iter"] = max_iter or 20
+        embed_kwargs["memory_size"] = memory_size or 2
+    else:  # pragma: nocover
+        n_pca = n_pca or 100
+        if max_iter is not None:
+            embed_kwargs["max_iter"] = max_iter
+        if memory_size is not None:
+            embed_kwargs["memory_size"] = memory_size
+    sc.tl.pca(adata_input, n_comps=n_pca, svd_solver="arpack")
+    X = adata_input.obsm["X_pca"]
+    if method == "neighbors":
+        mde_fn = pymde.preserve_neighbors
+    elif method == "distances":
+        mde_fn = pymde.preserve_distances
+    else:
+        raise NotImplementedError
+    adata.obsm["X_emb"] = (
+        mde_fn(X, embedding_dim=2, verbose=True)
+        .embed(**embed_kwargs, verbose=True)
+        .detach()
+        .numpy()
+    )
+    adata.uns["method_code_version"] = check_version("pymde")
+    return adata
+
+
+@_pymde_method(
+    method_name="PyMDE Preserve Neighbors (logCP10k)",
+)
+def pymde_neighbors_log_cp10k(
+    adata,
+    test: bool = False,
+    max_iter: Optional[int] = None,
+    memory_size: Optional[int] = None,
+):
+    adata = log_cp10k(adata)
+    return _pymde(
+        adata, method="neighbors", test=test, max_iter=max_iter, memory_size=memory_size
+    )
+
+
+@_pymde_method(
+    method_name="PyMDE Preserve Neighbors (logCP10k, 1kHVG)",
+)
+def pymde_neighbors_log_cp10k_hvg(
+    adata,
+    test: bool = False,
+    max_iter: Optional[int] = None,
+    memory_size: Optional[int] = None,
+):
+    adata = log_cp10k_hvg(adata)
+    return _pymde(
+        adata,
+        method="neighbors",
+        genes=adata.var["highly_variable"],
+        test=test,
+        max_iter=max_iter,
+        memory_size=memory_size,
+    )
+
+
+@_pymde_method(
+    method_name="PyMDE Preserve Distances (logCP10k)",
+)
+def pymde_distances_log_cp10k(
+    adata,
+    test: bool = False,
+    max_iter: Optional[int] = None,
+    memory_size: Optional[int] = None,
+):
+    adata = log_cp10k(adata)
+    return _pymde(
+        adata, method="distances", test=test, max_iter=max_iter, memory_size=memory_size
+    )
+
+
+@_pymde_method(
+    method_name="PyMDE Preserve Distances (logCP10k, 1kHVG)",
+)
+def pymde_distances_log_cp10k_hvg(
+    adata,
+    test: bool = False,
+    max_iter: Optional[int] = None,
+    memory_size: Optional[int] = None,
+):
+    adata = log_cp10k_hvg(adata)
+    return _pymde(
+        adata,
+        method="distances",
+        genes=adata.var["highly_variable"],
+        test=test,
+        max_iter=max_iter,
+        memory_size=memory_size,
+    )
diff --git a/openproblems/tasks/dimensionality_reduction/methods/tsne.py b/openproblems/tasks/dimensionality_reduction/methods/tsne.py
index 6b987c6457..095f30f5b8 100644
--- a/openproblems/tasks/dimensionality_reduction/methods/tsne.py
+++ b/openproblems/tasks/dimensionality_reduction/methods/tsne.py
@@ -1,23 +1,53 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm_hvg
+from ....tools.normalize import log_cp10k
+from ....tools.normalize import log_cp10k_hvg
 from ....tools.utils import check_version
 
-import scanpy as sc
+import functools
 
-
-@method(
-    method_name="“t-Distributed Stochastic Neighbor Embedding (t-SNE) (logCPM, 1kHVG)",
+_tsne_method = functools.partial(
+    method,
+    method_summary=(
+        "t-SNE or t-distributed Stochastic Neighbor Embedding converts similarities"
+        " between data points to joint probabilities and tries to minimize the"
+        " Kullback-Leibler divergence between the joint probabilities of the"
+        " low-dimensional embedding and the high-dimensional data. We use the"
+        " implementation in the scanpy package with the result of PCA on the logCPM"
+        " expression matrix (with and without HVG selection)."
+    ),
     paper_name="Visualizing Data using t-SNE",
-    paper_url="https://www.jmlr.org/papers/v9/vandermaaten08a.html",
+    paper_reference="vandermaaten2008visualizing",
     paper_year=2008,
-    code_url="https://scikit-learn.org/stable/modules/generated/"
-    "sklearn.manifold.TSNE.html#sklearn.manifold.TSNE",
+    code_url=(
+        "https://scikit-learn.org/stable/modules/generated/"
+        "sklearn.manifold.TSNE.html#sklearn.manifold.TSNE"
+    ),
     image="openproblems-python-extras",
 )
-def tsne_logCPM_1kHVG(adata, test: bool = False, n_pca=50):
-    adata = log_cpm_hvg(adata)
-    sc.tl.pca(adata, n_comps=n_pca, svd_solver="arpack")
+
+
+def _tsne(adata, genes=None, test=False, n_pca=50):
+    import scanpy as sc
+
+    if genes is not None:
+        X = adata[:, genes].copy().X
+    else:
+        X = adata.X
+
+    adata.obsm["X_pca"] = sc.tl.pca(X, n_comps=n_pca, svd_solver="arpack")
     sc.tl.tsne(adata, use_rep="X_pca", n_pcs=n_pca)
     adata.obsm["X_emb"] = adata.obsm["X_tsne"]
     adata.uns["method_code_version"] = check_version("MulticoreTSNE")
     return adata
+
+
+@_tsne_method(method_name="t-SNE (logCP10k, 1kHVG)")
+def tsne_logCP10k_1kHVG(adata, test: bool = False, n_pca=50):
+    adata = log_cp10k_hvg(adata)
+    return _tsne(adata, genes=adata.var["highly_variable"], test=test, n_pca=n_pca)
+
+
+@_tsne_method(method_name="t-SNE (logCP10k)")
+def tsne_logCP10k(adata, test: bool = False, n_pca=50):
+    adata = log_cp10k(adata)
+    return _tsne(adata, test=test, n_pca=n_pca)
diff --git a/openproblems/tasks/dimensionality_reduction/methods/umap.py b/openproblems/tasks/dimensionality_reduction/methods/umap.py
index a64533f0b8..31509caeea 100644
--- a/openproblems/tasks/dimensionality_reduction/methods/umap.py
+++ b/openproblems/tasks/dimensionality_reduction/methods/umap.py
@@ -1,24 +1,108 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm_hvg
+from ....tools.normalize import log_cp10k
+from ....tools.normalize import log_cp10k_hvg
 from ....tools.utils import check_version
 
-import scanpy as sc
+import functools
 
-
-@method(
-    method_name="Uniform Manifold Approximation and Projection (UMAP), "
-    "as implemented by scanpy (logCPM, 1kHVG)",
-    paper_name="UMAP: Uniform Manifold Approximation and Projection for "
-    "Dimension Reduction",
-    paper_url="https://arxiv.org/abs/1802.03426",
+_umap_method = functools.partial(
+    method,
+    method_summary=(
+        "UMAP or Uniform Manifold Approximation and Projection is an algorithm for"
+        " dimension reduction based on manifold learning techniques and ideas from"
+        " topological data analysis. We perform UMAP on the logCPM expression matrix"
+        " before and after HVG selection and with and without PCA as a pre-processing"
+        " step."
+    ),
+    paper_name=(
+        "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction"
+    ),
+    paper_reference="mcinnes2018umap",
     paper_year=2018,
     code_url="https://github.com/lmcinnes/umap",
 )
-def umap_logCPM_1kHVG(adata, test: bool = False, n_pca=50):
-    adata = log_cpm_hvg(adata)
-    sc.tl.pca(adata, n_comps=50, svd_solver="arpack")
-    sc.pp.neighbors(adata, use_rep="X_pca", n_pcs=n_pca)
-    sc.tl.umap(adata)
-    adata.obsm["X_emb"] = adata.obsm["X_umap"]
+_densmap_method = functools.partial(
+    method,
+    method_summary=(
+        "densMAP is a modification of UMAP that adds an extra cost term in order to"
+        " preserve information about the relative local density of the data. It is"
+        " performed on the same inputs as UMAP."
+    ),
+    paper_name=(
+        "Assessing single-cell transcriptomic variability through density-preserving"
+        " data visualization"
+    ),
+    paper_reference="narayan2021assessing",
+    paper_year=2021,
+    code_url="https://github.com/lmcinnes/umap",
+    image="openproblems-python-extras",
+)
+
+
+def _umap(adata, n_comps=None, genes=None, densmap=False):
+    from umap import UMAP
+
+    import scanpy as sc
+
+    if genes is not None:
+        adata_input = adata[:, genes].copy()
+    else:
+        adata_input = adata
+
+    if n_comps is not None:
+        sc.tl.pca(adata_input, n_comps=n_comps, svd_solver="arpack")
+        X = adata_input.obsm["X_pca"]
+    else:
+        X = adata_input.X
+
+    adata.obsm["X_emb"] = UMAP(densmap=densmap, random_state=42).fit_transform(X)
     adata.uns["method_code_version"] = check_version("umap-learn")
     return adata
+
+
+@_umap_method(method_name="UMAP (logCP10k, 1kHVG)")
+def umap_logCP10k_1kHVG(adata, test: bool = False):
+    adata = log_cp10k_hvg(adata)
+    return _umap(adata, genes=adata.var["highly_variable"])
+
+
+@_umap_method(method_name="UMAP PCA (logCP10k, 1kHVG)")
+def umap_pca_logCP10k_1kHVG(adata, test: bool = False):
+    adata = log_cp10k_hvg(adata)
+    return _umap(adata, n_comps=50, genes=adata.var["highly_variable"])
+
+
+@_umap_method(method_name="UMAP (logCP10k)")
+def umap_logCP10k(adata, test: bool = False):
+    adata = log_cp10k(adata)
+    return _umap(adata)
+
+
+@_umap_method(method_name="UMAP PCA (logCP10k)")
+def umap_pca_logCP10k(adata, test: bool = False):
+    adata = log_cp10k(adata)
+    return _umap(adata, n_comps=50)
+
+
+@_densmap_method(method_name="densMAP (logCP10k, 1kHVG)")
+def densmap_logCP10k_1kHVG(adata, test: bool = False):
+    adata = log_cp10k_hvg(adata)
+    return _umap(adata, densmap=True, genes=adata.var["highly_variable"])
+
+
+@_densmap_method(method_name="densMAP PCA (logCP10k, 1kHVG)")
+def densmap_pca_logCP10k_1kHVG(adata, test: bool = False):
+    adata = log_cp10k_hvg(adata)
+    return _umap(adata, densmap=True, n_comps=50, genes=adata.var["highly_variable"])
+
+
+@_densmap_method(method_name="densMAP (logCP10k)")
+def densmap_logCP10k(adata, test: bool = False):
+    adata = log_cp10k(adata)
+    return _umap(adata, densmap=True)
+
+
+@_densmap_method(method_name="densMAP PCA (logCP10k)")
+def densmap_pca_logCP10k(adata, test: bool = False):
+    adata = log_cp10k(adata)
+    return _umap(adata, densmap=True, n_comps=50)
diff --git a/openproblems/tasks/dimensionality_reduction/metrics/__init__.py b/openproblems/tasks/dimensionality_reduction/metrics/__init__.py
index 44f7cb478f..943ede344b 100644
--- a/openproblems/tasks/dimensionality_reduction/metrics/__init__.py
+++ b/openproblems/tasks/dimensionality_reduction/metrics/__init__.py
@@ -1,9 +1,10 @@
 from .density import density_preservation
+from .distance_correlation import distance_correlation
+from .distance_correlation import distance_correlation_spectral
 from .nn_ranking import continuity
 from .nn_ranking import lcmc
 from .nn_ranking import qglobal
 from .nn_ranking import qlocal
 from .nn_ranking import qnn
 from .nn_ranking import qnn_auc
-from .root_mean_square_error import rmse
 from .trustworthiness import trustworthiness
diff --git a/openproblems/tasks/dimensionality_reduction/metrics/density.py b/openproblems/tasks/dimensionality_reduction/metrics/density.py
index a44a248aeb..6225c8ee6d 100644
--- a/openproblems/tasks/dimensionality_reduction/metrics/density.py
+++ b/openproblems/tasks/dimensionality_reduction/metrics/density.py
@@ -1,7 +1,5 @@
 from anndata import AnnData
 from openproblems.tools.decorators import metric
-from scipy.sparse import issparse
-from scipy.stats import pearsonr
 from typing import Optional
 
 import numpy as np
@@ -49,7 +47,7 @@ def _calculate_radii(
 
     # directly taken from: https://github.com/lmcinnes/umap/blob/
     # 317ce81dc64aec9e279aa1374ac809d9ced236f6/umap/umap_.py#L1190-L1243
-    (knn_indices, knn_dists, rp_forest,) = nearest_neighbors(
+    knn_indices, knn_dists, _ = nearest_neighbors(
         X,
         n_neighbors,
         "euclidean",
@@ -59,7 +57,7 @@ def _calculate_radii(
         verbose=False,
     )
 
-    emb_graph, emb_sigmas, emb_rhos, emb_dists = fuzzy_simplicial_set(
+    emb_graph, _, _, emb_dists = fuzzy_simplicial_set(
         X,
         n_neighbors,
         random_state,
@@ -98,18 +96,23 @@ def _calculate_radii(
     return np.log(epsilon + (re / mu_sum))
 
 
-@metric("density preservation", maximize=True, image="openproblems-python-extras")
+@metric(
+    "Density preservation",
+    metric_summary=(
+        "Similarity between local densities in the high-dimensional data and the"
+        " reduced data."
+    ),
+    paper_reference="narayan2021assessing",
+    maximize=True,
+)
 def density_preservation(adata: AnnData) -> float:
-    from umap import UMAP
+    from scipy.sparse import issparse
+    from scipy.stats import pearsonr
 
     emb = adata.obsm["X_emb"]
-    if np.any(np.isnan(emb)):
-        return 0.0
 
     high_dim = adata.X.A if issparse(adata.X) else adata.X
-    _, ro, _ = UMAP(
-        n_neighbors=_K, random_state=_SEED, densmap=True, output_dens=True
-    ).fit_transform(high_dim)
+    ro = _calculate_radii(high_dim, n_neighbors=_K, random_state=_SEED)
     # in principle, we could just call _calculate_radii(high_dim, ...)
     # this is made sure that the test pass (otherwise, there was .02 difference in corr)
     re = _calculate_radii(emb, n_neighbors=_K, random_state=_SEED)
diff --git a/openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py b/openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py
new file mode 100644
index 0000000000..52dd1075ad
--- /dev/null
+++ b/openproblems/tasks/dimensionality_reduction/metrics/distance_correlation.py
@@ -0,0 +1,61 @@
+from ....tools.decorators import metric
+from ....tools.normalize import log_cp10k
+from ..methods.diffusion_map import diffusion_map
+
+
+def _distance_correlation(X, X_emb):
+    import scipy.spatial
+    import scipy.stats
+
+    high_dimensional_distance_vector = scipy.spatial.distance.pdist(X)
+    low_dimensional_distance_vector = scipy.spatial.distance.pdist(X_emb)
+    return scipy.stats.spearmanr(
+        low_dimensional_distance_vector, high_dimensional_distance_vector
+    )[0]
+
+
+@metric(
+    metric_name="Distance correlation",
+    metric_summary=(
+        "Spearman correlation between all pairwise Euclidean distances in the original"
+        " and dimension-reduced data"
+    ),
+    maximize=True,
+    paper_reference="schober2018correlation",
+)
+def distance_correlation(adata, n_svd=500):
+    """Calculate the distance correlation
+
+    Computes Spearman correlations between distances on the full (or processed) data
+    matrix and the dimensionally-reduced matrix
+    """
+    import sklearn.decomposition
+
+    adata = log_cp10k(adata)
+    X = adata.X
+    if n_svd < min(X.shape):
+        X = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(X)
+    else:
+        X = X.toarray()
+    return _distance_correlation(X, adata.obsm["X_emb"])
+
+
+@metric(
+    metric_name="Distance correlation (spectral)",
+    metric_summary=(
+        "Spearman correlation between all pairwise diffusion distances in the original"
+        " and dimension-reduced data"
+    ),
+    maximize=True,
+    paper_reference="coifman2006diffusion",
+)
+def distance_correlation_spectral(adata, n_comps=1000):
+    """Calculate the spectral distance correlation
+
+    Computes Spearman correlations between distances on high-dimensional Laplacian
+    eigenmaps on the full (or processed) data matrix and the dimensionally-reduced
+    matrix
+    """
+    n_comps = min(n_comps, min(adata.shape) - 2)
+    adata_true = diffusion_map(adata.copy(), n_comps=n_comps)
+    return _distance_correlation(adata_true.obsm["X_emb"], adata.obsm["X_emb"])
diff --git a/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py b/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py
index 1a546790b6..2c3cde36ff 100644
--- a/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py
+++ b/openproblems/tasks/dimensionality_reduction/metrics/nn_ranking.py
@@ -15,11 +15,9 @@
 """
 
 from ....tools.decorators import metric
-from ....tools.normalize import log_cpm_hvg
+from .._utils import ranking_matrix
 from anndata import AnnData
 from numba import njit
-from scipy.sparse import issparse
-from sklearn.metrics import pairwise_distances
 from typing import Tuple
 
 import numpy as np
@@ -36,22 +34,6 @@
 _K = 30
 
 
-@njit(cache=True, fastmath=True)
-def _ranking_matrix(D: np.ndarray) -> np.ndarray:  # pragma: no cover
-    assert D.shape[0] == D.shape[1]
-    R = np.zeros(D.shape)
-    m = len(R)
-    ks = np.arange(m)
-
-    for i in range(m):
-        for j in range(m):
-            R[i, j] = np.sum(
-                (D[i, :] < D[i, j]) | ((ks < j) & (np.abs(D[i, :] - D[i, j]) <= 1e-12))
-            )
-
-    return R
-
-
 @njit(cache=True, fastmath=True)
 def _coranking_matrix(R1: np.ndarray, R2: np.ndarray) -> np.ndarray:  # pragma: no cover
     assert R1.shape == R2.shape
@@ -67,104 +49,158 @@ def _coranking_matrix(R1: np.ndarray, R2: np.ndarray) -> np.ndarray:  # pragma:
 
 
 @njit(cache=True, fastmath=True)
-def _metrics(
-    Q: np.ndarray,
-) -> Tuple[
-    np.ndarray, np.ndarray, np.ndarray, float, np.ndarray, int, float, float
-]:  # pragma: no cover
-    Q = Q[1:, 1:]
-    m = len(Q)
+def _continuity(Q: np.ndarray, m: int) -> np.ndarray:  # pragma: no cover
 
-    T = np.zeros(m - 1)  # trustworthiness
     C = np.zeros(m - 1)  # continuity
-    QNN = np.zeros(m)  # Co-k-nearest neighbor size
-    LCMC = np.zeros(m)  # Local Continuity Meta Criterion
 
     for k in range(m - 1):
-        Qs = Q[k:, :k]
-        # a column vector of weights. weight = rank error = actual_rank - k
-        W = np.arange(Qs.shape[0]).reshape(-1, 1)
-        # 1 - normalized hard-k-intrusions. lower-left region.
-        # weighted by rank error (rank - k)
-        T[k] = 1 - np.sum(Qs * W) / (k + 1) / m / (m - 1 - k)
         Qs = Q[:k, k:]
         # a row vector of weights. weight = rank error = actual_rank - k
         W = np.arange(Qs.shape[1]).reshape(1, -1)
         # 1 - normalized hard-k-extrusions. upper-right region
-        C[k] = 1 - np.sum(Qs * W) / (k + 1) / m / (m - 1 - k)
+        C[k] = 1 - np.sum(Qs * W) / ((k + 1) * m * (m - 1 - k))
+
+    return C
+
+
+@njit(cache=True, fastmath=True)
+def _qnn(Q: np.ndarray, m: int) -> np.ndarray:  # pragma: no cover
+
+    QNN = np.zeros(m)  # Co-k-nearest neighbor size
 
     for k in range(m):
         # Q[0,0] is always m. 0-th nearest neighbor is always the point itself.
         # Exclude Q[0,0]
         QNN[k] = np.sum(Q[: k + 1, : k + 1]) / ((k + 1) * m)
-        LCMC[k] = QNN[k] - (k + 1) / (m - 1)
 
+    return QNN
+
+
+def _lcmc(QNN: np.ndarray, m: int) -> np.ndarray:
+    LCMC = QNN - (np.arange(m) + 1) / (m - 1)
+    return LCMC
+
+
+def _kmax(LCMC: np.ndarray) -> int:
     kmax = np.argmax(LCMC)
+    return kmax  # type: ignore
+
+
+def _q_local(QNN: np.ndarray, kmax: int) -> float:
     Qlocal = np.sum(QNN[: kmax + 1]) / (kmax + 1)
+    return Qlocal
+
+
+def _q_global(QNN: np.ndarray, kmax: int, m: int) -> float:
     # skip the last. The last is (m-1)-nearest neighbor, including all samples.
     Qglobal = np.sum(QNN[kmax:-1]) / (m - kmax - 1)
-    AUC = np.mean(QNN)
-
-    return T, C, QNN, AUC, LCMC, kmax, Qlocal, Qglobal
+    return Qglobal
 
 
-def _high_dim(adata: AnnData) -> np.ndarray:
-    adata.X = adata.layers["counts"]
-    adata = log_cpm_hvg(adata)
-    high_dim = adata.X
-    return high_dim.A if issparse(high_dim) else high_dim
+def _qnn_auc(QNN: np.ndarray) -> float:
+    AUC = np.mean(QNN)
+    return AUC  # type: ignore
 
 
-def _fit(
-    X: np.ndarray, E: np.ndarray
-) -> Tuple[float, float, float, float, float, float, float]:
-    if np.any(np.isnan(E)):
-        return 0.0, 0.0, 0.0, 0.5, -np.inf, -np.inf, -np.inf
+def _fit(adata: AnnData) -> Tuple[float, float, float, float, float, float, float]:
+    Rx = adata.obsm["X_ranking"]
+    E = adata.obsm["X_emb"]
 
-    Dx = pairwise_distances(X)
-    De = pairwise_distances(E)
-    Rx, Re = _ranking_matrix(Dx), _ranking_matrix(De)
+    Re = ranking_matrix(E)
     Q = _coranking_matrix(Rx, Re)
+    Q = Q[1:, 1:]
+    m = len(Q)
 
-    T, C, QNN, AUC, LCMC, _kmax, Qlocal, Qglobal = _metrics(Q)
-
-    return T[_K], C[_K], QNN[_K], AUC, LCMC[_K], Qlocal, Qglobal
+    return Q, m
 
 
-@metric("continuity", maximize=True)
+@metric(
+    "continuity",
+    metric_summary=(
+        "Continuity measures error of hard extrusions based on nearest neighbor"
+        " coranking"
+    ),
+    paper_reference="zhang2021pydrmetrics",
+    maximize=True,
+)
 def continuity(adata: AnnData) -> float:
-    _, C, _, *_ = _fit(_high_dim(adata), adata.obsm["X_emb"])
+    Q, m = _fit(adata)
+    C = _continuity(Q, m)[_K]
     return float(np.clip(C, 0.0, 1.0))  # in [0, 1]
 
 
-@metric("co-KNN size", maximize=True)
+@metric(
+    "co-KNN size",
+    metric_summary=(
+        "co-KNN size counts how many points are in both k-nearest neighbors before and"
+        " after the dimensionality reduction"
+    ),
+    paper_reference="zhang2021pydrmetrics",
+    maximize=True,
+)
 def qnn(adata: AnnData) -> float:
-    _, _, QNN, *_ = _fit(_high_dim(adata), adata.obsm["X_emb"])
+    Q, m = _fit(adata)
+    QNN = _qnn(Q, m)[_K]
     # normalized in the code to [0, 1]
     return float(np.clip(QNN, 0.0, 1.0))
 
 
-@metric("co-KNN AUC", maximize=True)
+@metric(
+    "co-KNN AUC",
+    metric_summary="co-KNN AUC is area under the co-KNN curve",
+    paper_reference="zhang2021pydrmetrics",
+    maximize=True,
+)
 def qnn_auc(adata: AnnData) -> float:
-    _, _, _, AUC, *_ = _fit(_high_dim(adata), adata.obsm["X_emb"])
+    Q, m = _fit(adata)
+    QNN = _qnn(Q, m)
+    AUC = _qnn_auc(QNN)
     return float(np.clip(AUC, 0.5, 1.0))  # in [0.5, 1]
 
 
-@metric("local continuity meta criterion", maximize=True)
+@metric(
+    "local continuity meta criterion",
+    metric_summary=(
+        "The local continuity meta criterion is the co-KNN size with baseline removal"
+        " which favors locality"
+    ),
+    paper_reference="zhang2021pydrmetrics",
+    maximize=True,
+)
 def lcmc(adata: AnnData) -> float:
-    *_, LCMC, _, _ = _fit(_high_dim(adata), adata.obsm["X_emb"])
+    Q, m = _fit(adata)
+    QNN = _qnn(Q, m)
+    LCMC = _lcmc(QNN, m)[_K]
     return LCMC
 
 
-@metric("local property", maximize=True)
+@metric(
+    "local property",
+    metric_summary="The local property metric is a summary of the local co-KNN",
+    paper_reference="zhang2021pydrmetrics",
+    maximize=True,
+)
 def qlocal(adata: AnnData) -> float:
     # according to authors, this is usually preferred to
     # qglobal, because human are more sensitive to nearer neighbors
-    *_, Qlocal, _ = _fit(_high_dim(adata), adata.obsm["X_emb"])
+    Q, m = _fit(adata)
+    QNN = _qnn(Q, m)
+    LCMC = _lcmc(QNN, m)
+    kmax = _kmax(LCMC)
+    Qlocal = _q_local(QNN, kmax)
     return Qlocal
 
 
-@metric("global property", maximize=True)
+@metric(
+    "global property",
+    metric_summary="The global property metric is a summary of the global co-KNN",
+    paper_reference="zhang2021pydrmetrics",
+    maximize=True,
+)
 def qglobal(adata: AnnData) -> float:
-    *_, Qglobal = _fit(_high_dim(adata), adata.obsm["X_emb"])
+    Q, m = _fit(adata)
+    QNN = _qnn(Q, m)
+    LCMC = _lcmc(QNN, m)
+    kmax = _kmax(LCMC)
+    Qglobal = _q_global(QNN, kmax, m)
     return Qglobal
diff --git a/openproblems/tasks/dimensionality_reduction/metrics/root_mean_square_error.py b/openproblems/tasks/dimensionality_reduction/metrics/root_mean_square_error.py
deleted file mode 100644
index 9e12d5d798..0000000000
--- a/openproblems/tasks/dimensionality_reduction/metrics/root_mean_square_error.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from ....tools.decorators import metric
-
-import numpy as np
-import scipy as sp
-import sklearn.decomposition
-import sklearn.metrics
-
-
-def calculate_squareform_pairwise_distance(data):
-    """Calculate pairwise distances.
-
-    Compute pairwise distance between points in a matrix / vector and then format this
-    into a squareform vector.
-    """
-    return sp.spatial.distance.squareform(sp.spatial.distance.pdist(data))
-
-
-def calculate_rmse(adata, n_svd=200):
-    """Calculate dimensional reduction stress via root mean square error."""
-    X = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X)
-    high_dimensional_distance_matrix = calculate_squareform_pairwise_distance(X)
-
-    low_dimensional_distance_matrix = calculate_squareform_pairwise_distance(
-        adata.obsm["X_emb"]
-    )
-
-    diff = high_dimensional_distance_matrix - low_dimensional_distance_matrix
-
-    kruskel_matrix = np.sqrt(diff**2 / sum(low_dimensional_distance_matrix**2))
-
-    kruskel_score = np.sqrt(sum(diff**2) / sum(low_dimensional_distance_matrix**2))
-
-    y_actual = high_dimensional_distance_matrix
-    y_predic = low_dimensional_distance_matrix
-
-    rms = np.sqrt(sklearn.metrics.mean_squared_error(y_actual, y_predic))
-
-    return kruskel_matrix, kruskel_score, rms
-
-
-@metric(metric_name="root mean squared error", maximize=True)
-def rmse(adata):
-    """Calculate the root mean squared error.
-
-    Computes  (RMSE) between the full (or processed) data matrix and a list of
-    dimensionally-reduced matrices.
-    """
-    (
-        adata.obsp["kruskel_matrix"],
-        adata.uns["kruskel_score"],
-        adata.uns["rmse_score"],
-    ) = calculate_rmse(adata)
-
-    return float(adata.uns["rmse_score"])
diff --git a/openproblems/tasks/dimensionality_reduction/metrics/trustworthiness.py b/openproblems/tasks/dimensionality_reduction/metrics/trustworthiness.py
index 3daf3360cd..6f2387747c 100644
--- a/openproblems/tasks/dimensionality_reduction/metrics/trustworthiness.py
+++ b/openproblems/tasks/dimensionality_reduction/metrics/trustworthiness.py
@@ -1,12 +1,21 @@
 from ....tools.decorators import metric
 from anndata import AnnData
-from sklearn import manifold
 
 import numpy as np
 
 
-@metric(metric_name="trustworthiness", maximize=True)
+@metric(
+    metric_name="trustworthiness",
+    metric_summary=(
+        "a measurement of similarity between the rank of each point's nearest neighbors"
+        " in the high-dimensional data and the reduced data."
+    ),
+    paper_reference="venna2001neighborhood",
+    maximize=True,
+)
 def trustworthiness(adata: AnnData) -> float:
+    from sklearn import manifold
+
     high_dim, low_dim = adata.X, adata.obsm["X_emb"]
 
     score = manifold.trustworthiness(
diff --git a/openproblems/tasks/label_projection/README.md b/openproblems/tasks/label_projection/README.md
index 5a9dbc4951..4b17408a74 100644
--- a/openproblems/tasks/label_projection/README.md
+++ b/openproblems/tasks/label_projection/README.md
@@ -1,21 +1,19 @@
 # Label Projection
 
-## The task
-
 A major challenge for integrating single cell datasets is creating matching cell type
 annotations for each cell. One of the most common strategies for annotating cell types
 is referred to as
-["cluster-then-annotate"](https://www.nature.com/articles/s41576-018-0088-9) whereby
+["cluster-then-annotate"](https://openproblems.bio/bibliography#kiselev2019challenges) whereby
 cells are aggregated into clusters based on feature similarity and then manually
 characterized based on differential gene expression or previously identified marker
 genes. Recently, methods have emerged to build on this strategy and annotate cells
-using [known marker genes](https://www.nature.com/articles/s41592-019-0535-3). However,
+using [known marker genes](https://openproblems.bio/bibliography#pliner2019supervised). However,
 these strategies pose a difficulty for integrating atlas-scale datasets as the
 particular annotations may not match.
 
 To ensure that the cell type labels in newly generated datasets match existing reference
 datasets, some methods align cells to a previously annotated [reference
-dataset](https://academic.oup.com/bioinformatics/article/35/22/4688/54802990) and then
+dataset](https://openproblems.bio/bibliography#hou2019scmatch) and then
 _project_ labels from the reference to the new dataset.
 
 Here, we compare methods for annotation based on a reference dataset. The datasets
@@ -24,20 +22,6 @@ with matching labels. These datasets are then split into training and test batch
 the task of each method is to train a cell type classifer on the training set and
 project those labels onto the test set.
 
-## The metrics
-
-Metrics for label projection aim to characterize how well each classifer correctly
-assigns cell type labels to cells in the test set.
-
-* **Accuracy**: Average number of correctly applied labels.
-* **F1 score**: The [F1
-  score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html)
-  is a weighted average of the precision and recall over all class labels, where an F1
-  score reaches its best value at 1 and worst score at 0, where each class contributes
-  to the score relative to its frequency in the dataset.
-* **Macro F1 score**: The macro F1 score is an unweighted F1 score, where each class
-  contributes equally, regardless of its frequency.
-
 ## API
 
 Datasets should contain the following attributes:
diff --git a/openproblems/tasks/label_projection/api.py b/openproblems/tasks/label_projection/api.py
index 6e9cc6517d..2fc5be81d6 100644
--- a/openproblems/tasks/label_projection/api.py
+++ b/openproblems/tasks/label_projection/api.py
@@ -11,14 +11,14 @@ def check_dataset(adata):
     assert "batch" in adata.obs
     assert "is_train" in adata.obs
     assert np.issubdtype(adata.obs["is_train"].dtype, bool)
-    assert pd.api.types.is_categorical(adata.obs["batch"])
-    assert pd.api.types.is_categorical(adata.obs["labels"])
+    assert pd.api.types.is_categorical_dtype(adata.obs["batch"])
+    assert pd.api.types.is_categorical_dtype(adata.obs["labels"])
     assert np.sum(adata.obs["is_train"]) > 0
     assert np.sum(~adata.obs["is_train"]) > 0
     return True
 
 
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     assert "labels_pred" in adata.obs
     assert np.issubdtype(adata.obs["is_train"].dtype, bool)
diff --git a/openproblems/tasks/label_projection/datasets/__init__.py b/openproblems/tasks/label_projection/datasets/__init__.py
index ecd67d700d..5b9afe98b5 100644
--- a/openproblems/tasks/label_projection/datasets/__init__.py
+++ b/openproblems/tasks/label_projection/datasets/__init__.py
@@ -4,5 +4,5 @@
 from .pancreas import pancreas_random
 from .pancreas import pancreas_random_label_noise
 from .tabula_muris_senis import tabula_muris_senis_lung_random
-from .zebrafish import zebrafish_labels
+from .zebrafish import zebrafish_labs
 from .zebrafish import zebrafish_random
diff --git a/openproblems/tasks/label_projection/datasets/cengen.py b/openproblems/tasks/label_projection/datasets/cengen.py
index 6089b2fad6..e53acd7b13 100644
--- a/openproblems/tasks/label_projection/datasets/cengen.py
+++ b/openproblems/tasks/label_projection/datasets/cengen.py
@@ -5,11 +5,14 @@
 
 
 @dataset(
-    "CeNGEN (by batch)",
+    "CeNGEN (split by batch)",
     data_url=load_cengen.metadata["data_url"],
     data_reference=load_cengen.metadata["data_reference"],
-    dataset_summary="100k FACS-isolated C. elegans neurons from 17 experiments "
-    "sequenced on 10x Genomics. Split into train/test by experimental batch.",
+    dataset_summary=(
+        "100k FACS-isolated C. elegans neurons from 17 experiments sequenced on 10x"
+        " Genomics. Split into train/test by experimental batch. Dimensions: 100955"
+        " cells, 22469 genes. 169 cell types (avg. 597±800 cells per cell type)."
+    ),
 )
 def cengen_batch(test=False):
     adata = load_cengen(test=test)
@@ -30,8 +33,11 @@ def cengen_batch(test=False):
     "CeNGEN (random split)",
     data_url=load_cengen.metadata["data_url"],
     data_reference=load_cengen.metadata["data_reference"],
-    dataset_summary="100k FACS-isolated C. elegans neurons from 17 experiments "
-    "sequenced on 10x Genomics. Split into train/test randomly.",
+    dataset_summary=(
+        "100k FACS-isolated C. elegans neurons from 17 experiments sequenced on 10x"
+        " Genomics. Split into train/test randomly. Dimensions: 100955 cells, 22469"
+        " genes. 169 cell types avg. 597±800 cells per cell type)."
+    ),
 )
 def cengen_random(test=False):
     adata = load_cengen(test=test)
diff --git a/openproblems/tasks/label_projection/datasets/pancreas.py b/openproblems/tasks/label_projection/datasets/pancreas.py
index 0981ed8ed0..a84f8e8e4e 100644
--- a/openproblems/tasks/label_projection/datasets/pancreas.py
+++ b/openproblems/tasks/label_projection/datasets/pancreas.py
@@ -9,9 +9,12 @@
     "Pancreas (by batch)",
     data_url=load_pancreas.metadata["data_url"],
     data_reference=load_pancreas.metadata["data_reference"],
-    dataset_summary="Human pancreatic islet scRNA-seq data from 6 datasets "
-    "across technologies (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, "
-    "and SMARTER-seq). Split into train/test by experimental batch.",
+    dataset_summary=(
+        "Human pancreatic islet scRNA-seq data from 6 datasets across technologies"
+        " (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, and SMARTER-seq). Split"
+        " into train/test by experimental batch. Dimensions: 16382 cells, 18771 genes."
+        " 14 cell types (avg. 1170±1703 cells per cell type)."
+    ),
 )
 def pancreas_batch(test=False):
     adata = load_pancreas(test=test)
@@ -32,9 +35,12 @@ def pancreas_batch(test=False):
     "Pancreas (random split)",
     data_url=load_pancreas.metadata["data_url"],
     data_reference=load_pancreas.metadata["data_reference"],
-    dataset_summary="Human pancreatic islet scRNA-seq data from 6 datasets "
-    "across technologies (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, "
-    "and SMARTER-seq). Split into train/test randomly.",
+    dataset_summary=(
+        "Human pancreatic islet scRNA-seq data from 6 datasets across technologies"
+        " (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, and SMARTER-seq). Split"
+        " into train/test randomly. Dimensions: 16382 cells, 18771 genes. 14 cell types"
+        " (avg. 1170±1703 cells per cell type)."
+    ),
 )
 def pancreas_random(test=False):
     adata = load_pancreas(test=test)
@@ -53,9 +59,12 @@ def pancreas_random(test=False):
     "Pancreas (random split with label noise)",
     data_url=load_pancreas.metadata["data_url"],
     data_reference=load_pancreas.metadata["data_reference"],
-    dataset_summary="Human pancreatic islet scRNA-seq data from 6 datasets "
-    "across technologies (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, "
-    "and SMARTER-seq). Split into train/test randomly with 20% label noise.",
+    dataset_summary=(
+        "Human pancreatic islet scRNA-seq data from 6 datasets across technologies"
+        " (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, and SMARTER-seq). Split"
+        " into train/test randomly with 20% label noise. Dimensions: 16382 cells, 18771"
+        " genes. 14 cell types (avg. 1170±1703 cells per cell type)."
+    ),
 )
 def pancreas_random_label_noise(test=False):
     adata = load_pancreas(test=test)
diff --git a/openproblems/tasks/label_projection/datasets/tabula_muris_senis.py b/openproblems/tasks/label_projection/datasets/tabula_muris_senis.py
index 60780fafa1..62be5b43c1 100644
--- a/openproblems/tasks/label_projection/datasets/tabula_muris_senis.py
+++ b/openproblems/tasks/label_projection/datasets/tabula_muris_senis.py
@@ -8,15 +8,19 @@
     "Tabula Muris Senis Lung (random split)",
     data_url=load_tabula_muris_senis.metadata["data_url"],
     data_reference=load_tabula_muris_senis.metadata["data_reference"],
-    dataset_summary="All lung cells from Tabula Muris Senis, a 500k cell-atlas from 18 "
-    "organs and tissues across the mouse lifespan. Split into train/test randomly.",
+    dataset_summary=(
+        "All lung cells from Tabula Muris Senis, a 500k cell-atlas from 18 organs and"
+        " tissues across the mouse lifespan. Split into train/test randomly."
+        " Dimensions: 24540 cells, 17985 genes. 39 cell types (avg. 629±999 cells per"
+        " cell type)."
+    ),
 )
 def tabula_muris_senis_lung_random(test=False):
     adata = load_tabula_muris_senis(
         test=test, organ_list=["lung"], method_list=["droplet"]
     )
     adata.obs["labels"] = adata.obs["free_annotation"]
-    adata.obs["batch"] = adata.obs["mouse.id"]
+    adata.obs["batch"] = adata.obs["donor_id"]
     adata.obs["is_train"] = np.random.choice(
         [True, False], adata.shape[0], replace=True, p=[0.8, 0.2]
     )
diff --git a/openproblems/tasks/label_projection/datasets/zebrafish.py b/openproblems/tasks/label_projection/datasets/zebrafish.py
index 30999ed261..b09409d223 100644
--- a/openproblems/tasks/label_projection/datasets/zebrafish.py
+++ b/openproblems/tasks/label_projection/datasets/zebrafish.py
@@ -5,14 +5,17 @@
 
 
 @dataset(
-    "Zebrafish (by labels)",
+    "Zebrafish (by laboratory)",
     data_url=load_zebrafish.metadata["data_url"],
     data_reference=load_zebrafish.metadata["data_reference"],
-    dataset_summary="90k cells from zebrafish embryos throughout the first day of "
-    "development, with and without a knockout of chordin, an important developmental "
-    "gene. Split into train/test by laboratory.",
+    dataset_summary=(
+        "90k cells from zebrafish embryos throughout the first day of development, with"
+        " and without a knockout of chordin, an important developmental gene. Split"
+        " into train/test by laboratory. Dimensions: 26022 cells, 25258 genes. 24 cell"
+        " types (avg. 1084±1156 cells per cell type)."
+    ),
 )
-def zebrafish_labels(test=False):
+def zebrafish_labs(test=False):
     adata = load_zebrafish(test=test)
     adata.obs["labels"] = adata.obs["cell_type"]
     adata.obs["batch"] = adata.obs["lab"]
@@ -24,9 +27,12 @@ def zebrafish_labels(test=False):
     "Zebrafish (random split)",
     data_url=load_zebrafish.metadata["data_url"],
     data_reference=load_zebrafish.metadata["data_reference"],
-    dataset_summary="90k cells from zebrafish embryos throughout the first day of "
-    "development, with and without a knockout of chordin, an important developmental "
-    "gene. Split into train/test randomly.",
+    dataset_summary=(
+        "90k cells from zebrafish embryos throughout the first day of development, with"
+        " and without a knockout of chordin, an important developmental gene. Split"
+        " into train/test randomly. Dimensions: 26022 cells, 25258 genes. 24 cell types"
+        " (avg. 1084±1156 cells per cell type)."
+    ),
 )
 def zebrafish_random(test=False):
     adata = load_zebrafish(test=test)
diff --git a/openproblems/tasks/label_projection/methods/__init__.py b/openproblems/tasks/label_projection/methods/__init__.py
index 7899498aea..eeff54495a 100644
--- a/openproblems/tasks/label_projection/methods/__init__.py
+++ b/openproblems/tasks/label_projection/methods/__init__.py
@@ -1,10 +1,11 @@
 from .baseline import majority_vote
 from .baseline import random_labels
-from .knn_classifier import knn_classifier_log_cpm
+from .baseline import true_labels
+from .knn_classifier import knn_classifier_log_cp10k
 from .knn_classifier import knn_classifier_scran
-from .logistic_regression import logistic_regression_log_cpm
+from .logistic_regression import logistic_regression_log_cp10k
 from .logistic_regression import logistic_regression_scran
-from .mlp import mlp_log_cpm
+from .mlp import mlp_log_cp10k
 from .mlp import mlp_scran
 from .scvi_tools import scanvi_all_genes
 from .scvi_tools import scanvi_hvg
@@ -13,5 +14,5 @@
 from .scvi_tools import scarches_scanvi_xgb_all_genes
 from .scvi_tools import scarches_scanvi_xgb_hvg
 from .seurat import seurat
-from .xgboost import xgboost_log_cpm
+from .xgboost import xgboost_log_cp10k
 from .xgboost import xgboost_scran
diff --git a/openproblems/tasks/label_projection/methods/baseline.py b/openproblems/tasks/label_projection/methods/baseline.py
index 6bc0b8a1c5..38b98f0c75 100644
--- a/openproblems/tasks/label_projection/methods/baseline.py
+++ b/openproblems/tasks/label_projection/methods/baseline.py
@@ -1,15 +1,16 @@
-from ....tools.decorators import method
+from ....tools.decorators import baseline_method
 from ....tools.utils import check_version
 
 import numpy as np
 
 
-@method(
+@baseline_method(
     method_name="Majority Vote",
-    paper_name="Majority Vote (baseline)",
-    paper_url="https://openproblems.bio",
-    paper_year=2022,
-    code_url="https://github.com/openproblems-bio/openproblems",
+    method_summary=(
+        "Assignment of all predicted labels as the most common label in the training"
+        " data"
+    ),
+    is_baseline=False,
 )
 def majority_vote(adata, test=False):
     majority = adata.obs.labels[adata.obs.is_train].value_counts().index[0]
@@ -20,12 +21,12 @@ def majority_vote(adata, test=False):
     return adata
 
 
-@method(
+@baseline_method(
     method_name="Random Labels",
-    paper_name="Random Labels (baseline)",
-    paper_url="https://openproblems.bio",
-    paper_year=2022,
-    code_url="https://github.com/openproblems-bio/openproblems",
+    method_summary=(
+        "Random assignment of predicted labels proportionate to label abundance in"
+        " training data"
+    ),
 )
 def random_labels(adata, test=False):
     label_distribution = adata.obs.labels[adata.obs.is_train].value_counts()
@@ -40,3 +41,13 @@ def random_labels(adata, test=False):
 
     adata.uns["method_code_version"] = check_version("openproblems")
     return adata
+
+
+@baseline_method(
+    method_name="True Labels",
+    method_summary="Perfect assignment of the predicted labels from the test labels",
+)
+def true_labels(adata, test=False):
+    adata.obs["labels_pred"] = adata.obs["labels"]
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/label_projection/methods/knn_classifier.py b/openproblems/tasks/label_projection/methods/knn_classifier.py
index ffe576bc72..c7c1d2a73d 100644
--- a/openproblems/tasks/label_projection/methods/knn_classifier.py
+++ b/openproblems/tasks/label_projection/methods/knn_classifier.py
@@ -1,26 +1,38 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 from ....tools.normalize import log_scran_pooling
 from .sklearn import classifier
 
 import functools
-import sklearn.neighbors
 
 _knn_classifier_method = functools.partial(
     method,
+    method_summary=(
+        'K-neighbors classifier uses the "k-nearest neighbours" approach, which is a'
+        " popular machine learning algorithm for classification and regression tasks."
+        " The assumption underlying KNN in this context is that cells with similar gene"
+        " expression profiles tend to belong to the same cell type. For each unlabelled"
+        " cell, this method computes the $k$ labelled cells (in this case, 5) with the"
+        " smallest distance in PCA space, and assigns that cell the most common cell"
+        " type among its $k$ nearest neighbors."
+    ),
     paper_name="Nearest neighbor pattern classification",
-    paper_url="https://doi.org/10.1109/TIT.1967.1053964",
+    paper_reference="cover1967nearest",
     paper_year=1967,
-    code_url="https://scikit-learn.org/stable/modules/generated/"
-    "sklearn.neighbors.KNeighborsClassifier.html",
+    code_url=(
+        "https://scikit-learn.org/stable/modules/generated/"
+        "sklearn.neighbors.KNeighborsClassifier.html"
+    ),
 )
 
 
 @_knn_classifier_method(
-    method_name="K-neighbors classifier (log CPM)",
+    method_name="K-neighbors classifier (log CP10k)",
 )
-def knn_classifier_log_cpm(adata, test=False):
-    adata = log_cpm(adata)
+def knn_classifier_log_cp10k(adata, test=False):
+    import sklearn.neighbors
+
+    adata = log_cp10k(adata)
     return classifier(adata, estimator=sklearn.neighbors.KNeighborsClassifier)
 
 
@@ -29,5 +41,7 @@ def knn_classifier_log_cpm(adata, test=False):
     image="openproblems-r-base",
 )
 def knn_classifier_scran(adata, test=False):
+    import sklearn.neighbors
+
     adata = log_scran_pooling(adata)
     return classifier(adata, estimator=sklearn.neighbors.KNeighborsClassifier)
diff --git a/openproblems/tasks/label_projection/methods/logistic_regression.py b/openproblems/tasks/label_projection/methods/logistic_regression.py
index 25a2c40a5a..4d7c2cf854 100644
--- a/openproblems/tasks/label_projection/methods/logistic_regression.py
+++ b/openproblems/tasks/label_projection/methods/logistic_regression.py
@@ -1,22 +1,31 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 from ....tools.normalize import log_scran_pooling
 from .sklearn import classifier
 
 import functools
-import sklearn.linear_model
 
 _logistic_regression_method = functools.partial(
     method,
+    method_summary=(
+        "Logistic Regression estimates parameters of a logistic function for"
+        " multivariate classification tasks. Here, we use 100-dimensional whitened PCA"
+        " coordinates as independent variables, and the model minimises the cross"
+        " entropy loss over all cell type classes. "
+    ),
     paper_name="Applied Logistic Regression",
-    paper_url="https://books.google.com/books?id=64JYAwAAQBAJ",
+    paper_reference="hosmer2013applied",
     paper_year=2013,
-    code_url="https://scikit-learn.org/stable/modules/generated/"
-    "sklearn.linear_model.LogisticRegression.html",
+    code_url=(
+        "https://scikit-learn.org/stable/modules/generated/"
+        "sklearn.linear_model.LogisticRegression.html"
+    ),
 )
 
 
 def _logistic_regression(adata, test=False, max_iter=None):
+    import sklearn.linear_model
+
     if test:
         max_iter = max_iter or 100
     else:  # pragma: no cover
@@ -27,10 +36,10 @@ def _logistic_regression(adata, test=False, max_iter=None):
 
 
 @_logistic_regression_method(
-    method_name="Logistic regression (log CPM)",
+    method_name="Logistic regression (log CP10k)",
 )
-def logistic_regression_log_cpm(adata, test=False, max_iter=None):
-    adata = log_cpm(adata)
+def logistic_regression_log_cp10k(adata, test=False, max_iter=None):
+    adata = log_cp10k(adata)
     return _logistic_regression(adata, test=test, max_iter=max_iter)
 
 
diff --git a/openproblems/tasks/label_projection/methods/mlp.py b/openproblems/tasks/label_projection/methods/mlp.py
index 87e626bab6..0e92978105 100644
--- a/openproblems/tasks/label_projection/methods/mlp.py
+++ b/openproblems/tasks/label_projection/methods/mlp.py
@@ -1,22 +1,34 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 from ....tools.normalize import log_scran_pooling
 from .sklearn import classifier
 
 import functools
-import sklearn.neural_network
 
 _mlp_method = functools.partial(
     method,
+    method_summary=(
+        'MLP or "Multi-Layer Perceptron" is a type of artificial neural network that'
+        " consists of multiple layers of interconnected neurons. Each neuron computes a"
+        " weighted sum of all neurons in the previous layer and transforms it with"
+        " nonlinear activation function. The output layer provides the final"
+        " prediction, and network weights are updated by gradient descent to minimize"
+        " the cross entropy loss. Here, the input data is 100-dimensional whitened PCA"
+        " coordinates for each cell, and we use two hidden layers of 100 neurons each."
+    ),
     paper_name="Connectionist learning procedures",
-    paper_url="https://doi.org/10.1016/0004-3702(89)90049-0",
+    paper_reference="hinton1989connectionist",
     paper_year=1990,
-    code_url="https://scikit-learn.org/stable/modules/generated/"
-    "sklearn.neural_network.MLPClassifier.html",
+    code_url=(
+        "https://scikit-learn.org/stable/modules/generated/"
+        "sklearn.neural_network.MLPClassifier.html"
+    ),
 )
 
 
 def _mlp(adata, test=False, max_iter=None, hidden_layer_sizes=None):
+    import sklearn.neural_network
+
     if test:
         hidden_layer_sizes = hidden_layer_sizes or (20,)
         max_iter = max_iter or 100
@@ -32,10 +44,10 @@ def _mlp(adata, test=False, max_iter=None, hidden_layer_sizes=None):
 
 
 @_mlp_method(
-    method_name="Multilayer perceptron (log CPM)",
+    method_name="Multilayer perceptron (log CP10k)",
 )
-def mlp_log_cpm(adata, test=False, max_iter=None, hidden_layer_sizes=None):
-    adata = log_cpm(adata)
+def mlp_log_cp10k(adata, test=False, max_iter=None, hidden_layer_sizes=None):
+    adata = log_cp10k(adata)
     return _mlp(
         adata, test=test, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes
     )
diff --git a/openproblems/tasks/label_projection/methods/scvi_tools.py b/openproblems/tasks/label_projection/methods/scvi_tools.py
index 869dd85c78..5348b54435 100644
--- a/openproblems/tasks/label_projection/methods/scvi_tools.py
+++ b/openproblems/tasks/label_projection/methods/scvi_tools.py
@@ -1,26 +1,45 @@
 from ....tools.decorators import method
 from ....tools.utils import check_version
+from .xgboost import _xgboost
 from typing import Optional
 
 import functools
 
 _scanvi_method = functools.partial(
     method,
-    paper_name="Probabilistic harmonization and annotation of single-cell"
-    " transcriptomics data with deep generative models",
-    paper_url="https://doi.org/10.15252/msb.20209620",
+    method_summary=(
+        'scANVI or "single-cell ANnotation using Variational Inference" is a'
+        " semi-supervised variant of the scVI(Lopez et al. 2018) algorithm. Like scVI,"
+        " scANVI uses deep neural networks and stochastic optimization to model"
+        " uncertainty caused by technical noise and bias in single - cell"
+        " transcriptomics measurements. However, scANVI also leverages cell type labels"
+        " in the generative modelling. In this approach, scANVI is used to predict the"
+        " cell type labels of the unlabelled test data."
+    ),
+    paper_name=(
+        "Probabilistic harmonization and annotation of single-cell transcriptomics data"
+        " with deep generative models"
+    ),
+    paper_reference="xu2021probabilistic",
     paper_year=2021,
     code_url="https://github.com/YosefLab/scvi-tools",
-    image="openproblems-python-scvi",
+    image="openproblems-python-pytorch",
 )
 
 _scanvi_scarches_method = functools.partial(
     method,
+    method_summary=(
+        'scArches+scANVI or "Single-cell architecture surgery" is a deep learning'
+        " method for mapping new datasets onto a pre-existing reference model, using"
+        " transfer learning and parameter optimization. It first uses scANVI to build a"
+        " reference model from the training data, and then apply scArches to map the"
+        " test data onto the reference model and make predictions."
+    ),
     paper_name="Query to reference single-cell integration with transfer learning",
-    paper_url="https://doi.org/10.1101/2020.07.16.205997",
+    paper_reference="lotfollahi2020query",
     paper_year=2021,
     code_url="https://github.com/YosefLab/scvi-tools",
-    image="openproblems-python-scvi",
+    image="openproblems-python-pytorch",
 )
 
 
@@ -95,6 +114,7 @@ def _scanvi_scarches(
     n_layers=None,
     prediction_method="scanvi",
 ):
+    import numpy as np
     import scvi
 
     if test:
@@ -106,11 +126,14 @@ def _scanvi_scarches(
         n_layers = n_layers or 2
         n_hidden = n_hidden or 128
 
+    unlabeled_category = "Unknown"
+
     # new obs labels to mask test set
+    adata.obs["scanvi_labels"] = np.where(
+        adata.obs["is_train"], adata.obs["labels"], unlabeled_category
+    )
     adata_train = adata[adata.obs["is_train"]].copy()
-    adata_train.obs["scanvi_labels"] = adata_train.obs["labels"].copy()
     adata_test = adata[~adata.obs["is_train"]].copy()
-    adata_test.obs["scanvi_labels"] = "Unknown"
     scvi.model.SCVI.setup_anndata(
         adata_train, batch_key="batch", labels_key="scanvi_labels"
     )
@@ -135,7 +158,9 @@ def _scanvi_scarches(
         train_kwargs["limit_train_batches"] = 10
         train_kwargs["limit_val_batches"] = 10
     scvi_model.train(**train_kwargs)
-    model = scvi.model.SCANVI.from_scvi_model(scvi_model, unlabeled_category="Unknown")
+    model = scvi.model.SCANVI.from_scvi_model(
+        scvi_model, unlabeled_category=unlabeled_category
+    )
     model.train(**train_kwargs)
 
     query_model = scvi.model.SCANVI.load_query_data(adata_test, model)
@@ -149,7 +174,7 @@ def _scanvi_scarches(
     if prediction_method == "scanvi":
         preds = _pred_scanvi(adata, query_model)
     elif prediction_method == "xgboost":
-        preds = _pred_xgb(adata, adata_train, adata_test, query_model, test=test)
+        preds = _pred_xgb(adata, query_model, test=test)
 
     return preds
 
@@ -166,58 +191,15 @@ def _pred_scanvi(adata, query_model):
 # note: could extend test option
 def _pred_xgb(
     adata,
-    adata_train,
-    adata_test,
     query_model,
-    label_col="labels",
     test=False,
     num_round: Optional[int] = None,
 ):
-    import numpy as np
-    import xgboost as xgb
-
-    df = _classif_df(adata_train, query_model, label_col)
-
-    df["labels_int"] = df["labels"].cat.codes
-    categories = df["labels"].cat.categories
-
-    # X_train = df.drop(columns="labels")
-    X_train = df.drop(columns=["labels", "labels_int"])
-    # y_train = df["labels"].astype("category")
-    y_train = df["labels_int"].astype(int)
-
-    X_test = query_model.get_latent_representation(adata_test)
-
-    if test:
-        num_round = num_round or 2
-    else:
-        num_round = num_round or 5
-
-    xgbc = xgb.XGBClassifier(tree_method="hist", objective="multi:softprob")
-
-    xgbc.fit(X_train, y_train)
-
-    # adata_test.obs["preds_test"] = xgbc.predict(X_test)
-    adata_test.obs["preds_test"] = categories[xgbc.predict(X_test)]
-
-    preds = [
-        adata_test.obs["preds_test"][idx] if idx in adata_test.obs_names else np.nan
-        for idx in adata.obs_names
-    ]
-
-    return preds
-
-
-def _classif_df(adata, trained_model, label_col):
-    import pandas as pd
-
-    emb_data = trained_model.get_latent_representation(adata)
-
-    df = pd.DataFrame(data=emb_data, index=adata.obs_names)
-
-    df["labels"] = adata.obs[label_col]
-
-    return df
+    adata.obsm["X_emb"] = query_model.get_latent_representation(adata)
+    adata = _xgboost(
+        adata, test=test, obsm="X_emb", num_round=num_round, tree_method="hist"
+    )
+    return adata.obs["labels_pred"]
 
 
 @_scanvi_method(method_name="scANVI (All genes)")
diff --git a/openproblems/tasks/label_projection/methods/seurat.py b/openproblems/tasks/label_projection/methods/seurat.py
index 874f2c9b54..f234bf17d6 100644
--- a/openproblems/tasks/label_projection/methods/seurat.py
+++ b/openproblems/tasks/label_projection/methods/seurat.py
@@ -12,8 +12,17 @@
 
 @method(
     method_name="Seurat reference mapping (SCTransform)",
+    method_summary=(
+        "Seurat reference mapping is a cell type label transfer method provided by the"
+        " Seurat package. Gene expression counts are first normalised by SCTransform"
+        " before computing PCA. Then it finds mutual nearest neighbours, known as"
+        " transfer anchors, between the labelled and unlabelled part of the data in PCA"
+        " space, and computes each cell’s distance to each of the anchor pairs."
+        " Finally, it uses the labelled anchors to predict cell types for unlabelled"
+        " cells based on these distances."
+    ),
     paper_name="Integrated analysis of multimodal single-cell data",
-    paper_url="https://doi.org/10.1016/j.cell.2021.04.048",
+    paper_reference="hao2021integrated",
     paper_year=2021,
     code_url="https://github.com/satijalab/seurat",
     image="openproblems-r-extras",
diff --git a/openproblems/tasks/label_projection/methods/sklearn.py b/openproblems/tasks/label_projection/methods/sklearn.py
index 977446167a..18ec00ca65 100644
--- a/openproblems/tasks/label_projection/methods/sklearn.py
+++ b/openproblems/tasks/label_projection/methods/sklearn.py
@@ -2,12 +2,13 @@
 from .utils import pca_op
 
 import numpy as np
-import sklearn.pipeline
-import sklearn.preprocessing
 
 
 def classifier(adata, estimator, n_pca=100, **kwargs):
     """Run a generic scikit-learn classifier."""
+    import sklearn.pipeline
+    import sklearn.preprocessing
+
     adata_train = adata[adata.obs["is_train"]]
     adata_test = adata[~adata.obs["is_train"]].copy()
 
diff --git a/openproblems/tasks/label_projection/methods/utils.py b/openproblems/tasks/label_projection/methods/utils.py
index 056b586e5d..f1925684f3 100644
--- a/openproblems/tasks/label_projection/methods/utils.py
+++ b/openproblems/tasks/label_projection/methods/utils.py
@@ -1,8 +1,6 @@
-import scipy.sparse
-import sklearn.decomposition
-
-
 def pca_op(adata_train, adata_test, n_components=100):
+    import scipy.sparse
+    import sklearn.decomposition
 
     is_sparse = scipy.sparse.issparse(adata_train.X)
 
diff --git a/openproblems/tasks/label_projection/methods/xgboost.py b/openproblems/tasks/label_projection/methods/xgboost.py
index b489f98bce..290a636dd0 100644
--- a/openproblems/tasks/label_projection/methods/xgboost.py
+++ b/openproblems/tasks/label_projection/methods/xgboost.py
@@ -1,5 +1,5 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 from ....tools.normalize import log_scran_pooling
 from ....tools.utils import check_version
 from typing import Optional
@@ -9,14 +9,26 @@
 
 _xgboost_method = functools.partial(
     method,
+    method_summary=(
+        "XGBoost is a gradient boosting decision tree model that learns multiple tree"
+        " structures in the form of a series of input features and their values,"
+        " leading to a prediction decision, and averages predictions from all its"
+        " trees. Here, input features are normalised gene expression values."
+    ),
     paper_name="XGBoost: A Scalable Tree Boosting System",
-    paper_url="https://doi.org/10.1145/2939672.2939785",
+    paper_reference="chen2016xgboost",
     paper_year=2016,
     code_url="https://xgboost.readthedocs.io/en/stable/index.html",
 )
 
 
-def _xgboost(adata, test: bool = False, num_round: Optional[int] = None):
+def _xgboost(
+    adata,
+    test: bool = False,
+    obsm: Optional[str] = None,
+    num_round: Optional[int] = None,
+    **kwargs,
+):
     import xgboost as xgb
 
     if test:
@@ -30,12 +42,19 @@ def _xgboost(adata, test: bool = False, num_round: Optional[int] = None):
     adata_train = adata[adata.obs["is_train"]]
     adata_test = adata[~adata.obs["is_train"]].copy()
 
-    xg_train = xgb.DMatrix(adata_train.X, label=adata_train.obs["labels_int"])
-    xg_test = xgb.DMatrix(adata_test.X, label=adata_test.obs["labels_int"])
+    xg_train = xgb.DMatrix(
+        adata_train.obsm[obsm] if obsm else adata_train.X,
+        label=adata_train.obs["labels_int"],
+    )
+    xg_test = xgb.DMatrix(
+        adata_test.obsm[obsm] if obsm else adata_test.X,
+        label=adata_test.obs["labels_int"],
+    )
 
     param = dict(
         objective="multi:softmax",
         num_class=len(categories),
+        **kwargs,
     )
 
     watchlist = [(xg_train, "train")]
@@ -55,11 +74,11 @@ def _xgboost(adata, test: bool = False, num_round: Optional[int] = None):
 
 
 @_xgboost_method(
-    method_name="XGBoost (log CPM)",
+    method_name="XGBoost (log CP10k)",
     image="openproblems-python-extras",
 )
-def xgboost_log_cpm(adata, test: bool = False, num_round: Optional[int] = None):
-    adata = log_cpm(adata)
+def xgboost_log_cp10k(adata, test: bool = False, num_round: Optional[int] = None):
+    adata = log_cp10k(adata)
     return _xgboost(adata, test=test, num_round=num_round)
 
 
diff --git a/openproblems/tasks/label_projection/metrics/accuracy.py b/openproblems/tasks/label_projection/metrics/accuracy.py
index 5e661fa097..37a67a7526 100644
--- a/openproblems/tasks/label_projection/metrics/accuracy.py
+++ b/openproblems/tasks/label_projection/metrics/accuracy.py
@@ -1,11 +1,17 @@
 from ....tools.decorators import metric
 
 import numpy as np
-import sklearn.preprocessing
 
 
-@metric(metric_name="Accuracy", maximize=True)
+@metric(
+    metric_name="Accuracy",
+    metric_summary="Average number of correctly applied labels.",
+    paper_reference="grandini2020metrics",
+    maximize=True,
+)
 def accuracy(adata):
+    import sklearn.preprocessing
+
     encoder = sklearn.preprocessing.LabelEncoder().fit(adata.obs["labels"])
     test_data = adata[~adata.obs["is_train"]]
 
diff --git a/openproblems/tasks/label_projection/metrics/f1.py b/openproblems/tasks/label_projection/metrics/f1.py
index 94ea5446e7..47ce546c82 100644
--- a/openproblems/tasks/label_projection/metrics/f1.py
+++ b/openproblems/tasks/label_projection/metrics/f1.py
@@ -1,10 +1,10 @@
 from ....tools.decorators import metric
 
-import sklearn.metrics
-import sklearn.preprocessing
-
 
 def _f1(adata, average="weighted"):
+    import sklearn.metrics
+    import sklearn.preprocessing
+
     encoder = sklearn.preprocessing.LabelEncoder().fit(adata.obs["labels"])
     test_data = adata[~adata.obs["is_train"]]
 
@@ -16,11 +16,29 @@ def _f1(adata, average="weighted"):
     )
 
 
-@metric(metric_name="F1 score", maximize=True)
+@metric(
+    metric_name="F1 score",
+    metric_summary=(
+        "The [F1 score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html)"  # noqa: E501
+        " is a weighted average of the precision and recall over all class labels,"
+        " where an F1 score reaches its best value at 1 and worst score at 0, where"
+        " each class contributes to the score relative to its frequency in the dataset."
+    ),
+    paper_reference="grandini2020metrics",
+    maximize=True,
+)
 def f1(adata):
     return _f1(adata, average="weighted")
 
 
-@metric(metric_name="Macro F1 score", maximize=True)
+@metric(
+    metric_name="Macro F1 score",
+    metric_summary=(
+        "The macro F1 score is an unweighted F1 score, where each class contributes"
+        " equally, regardless of its frequency."
+    ),
+    paper_reference="grandini2020metrics",
+    maximize=True,
+)
 def f1_macro(adata):
     return _f1(adata, average="macro")
diff --git a/openproblems/tasks/multimodal_data_integration/README.md b/openproblems/tasks/matching_modalities/README.md
similarity index 60%
rename from openproblems/tasks/multimodal_data_integration/README.md
rename to openproblems/tasks/matching_modalities/README.md
index 88884cdc74..cbc07db2ff 100644
--- a/openproblems/tasks/multimodal_data_integration/README.md
+++ b/openproblems/tasks/matching_modalities/README.md
@@ -1,16 +1,14 @@
-# Multimodal data integration
-
-## The task
+# Matching modalities
 
 Cellular function is regulated by the complex interplay of different types of biological
 molecules (DNA, RNA, proteins, etc.), which determine the state of a cell. Several
 recently described technologies allow for simultaneous measurement of different aspects
-of cellular state. For example, [sci-CAR](https://doi.org/10.1126/science.aau0730)
+of cellular state. For example, [sci-CAR](https://openproblems.bio/bibliography#cao2018joint)
 jointly profiles RNA expression and chromatin accessibility on the same cell and
-[CITE-seq](https://doi.org/10.1038/nmeth.4380) measures surface protein abundance and
-RNA expression from each cell. These technologies enable us to better understand
-cellular function, however datasets are still rare and there are tradeoffs that these
-measurements make for to profile multiple modalities.
+[CITE-seq](https://openproblems.bio/bibliography#stoeckius2017simultaneous) measures
+surface protein abundance and RNA expression from each cell. These technologies enable
+us to better understand cellular function, however datasets are still rare and there are
+tradeoffs that these measurements make for to profile multiple modalities.
 
 Joint methods can be more expensive or lower throughput or more noisy than measuring a
 single modality at a time. Therefore it is useful to develop methods that are capable
@@ -23,18 +21,6 @@ data as ground truth so that we can evaluate when the observations from the same
 acquired using different modalities are similar. A perfect result has each of the paired
 observations sharing the same coordinates in the latent space.
 
-## The metrics
-
-Metrics for multimodal data integration aim to characterize how well the aligned
-datasets correspond to the ground truth.
-
-* **kNN AUC**: Let $f(i) ∈ F$ be the scRNA-seq measurement of cell $i$, and $g(i) ∈ G$
-  be the scATAC- seq measurement of cell $i$. kNN-AUC calculates the average percentage
-  overlap of neighborhoods of $f(i)$ in $F$ with neighborhoods of $g(i)$ in $G$. Higher
-  is better.
-* **MSE**: Mean squared error (MSE) is the average distance between each pair of matched
-  observations of the same cell in the learned latent space. Lower is better.
-
 ## API
 
 Datasets should include matched measurements from two modalities, which are contained in
diff --git a/openproblems/tasks/multimodal_data_integration/__init__.py b/openproblems/tasks/matching_modalities/__init__.py
similarity index 100%
rename from openproblems/tasks/multimodal_data_integration/__init__.py
rename to openproblems/tasks/matching_modalities/__init__.py
diff --git a/openproblems/tasks/multimodal_data_integration/api.py b/openproblems/tasks/matching_modalities/api.py
similarity index 96%
rename from openproblems/tasks/multimodal_data_integration/api.py
rename to openproblems/tasks/matching_modalities/api.py
index ba04842f0d..afc9d38add 100644
--- a/openproblems/tasks/multimodal_data_integration/api.py
+++ b/openproblems/tasks/matching_modalities/api.py
@@ -14,7 +14,7 @@ def check_dataset(adata):
     return True
 
 
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     assert "aligned" in adata.obsm
     assert "mode2_aligned" in adata.obsm
diff --git a/openproblems/tasks/multimodal_data_integration/datasets/__init__.py b/openproblems/tasks/matching_modalities/datasets/__init__.py
similarity index 100%
rename from openproblems/tasks/multimodal_data_integration/datasets/__init__.py
rename to openproblems/tasks/matching_modalities/datasets/__init__.py
diff --git a/openproblems/tasks/multimodal_data_integration/datasets/citeseq.py b/openproblems/tasks/matching_modalities/datasets/citeseq.py
similarity index 60%
rename from openproblems/tasks/multimodal_data_integration/datasets/citeseq.py
rename to openproblems/tasks/matching_modalities/datasets/citeseq.py
index 338423ef9e..5923b5eabb 100644
--- a/openproblems/tasks/multimodal_data_integration/datasets/citeseq.py
+++ b/openproblems/tasks/matching_modalities/datasets/citeseq.py
@@ -6,9 +6,11 @@
     "CITE-seq Cord Blood Mononuclear Cells",
     data_url=load_citeseq_cbmc.metadata["data_url"],
     data_reference=load_citeseq_cbmc.metadata["data_reference"],
-    dataset_summary="8k cord blood mononuclear cells sequenced by CITEseq, a multimodal"
-    " addition to the 10x scRNA-seq platform that allows simultaneous measurement of "
-    "RNA and protein.",
+    dataset_summary=(
+        "8k cord blood mononuclear cells sequenced by CITEseq, a multimodal addition to"
+        " the 10x scRNA-seq platform that allows simultaneous measurement of RNA and"
+        " protein."
+    ),
 )
 def citeseq_cbmc(test=False):
     return load_citeseq_cbmc(test=test)
diff --git a/openproblems/tasks/multimodal_data_integration/datasets/scicar.py b/openproblems/tasks/matching_modalities/datasets/scicar.py
similarity index 59%
rename from openproblems/tasks/multimodal_data_integration/datasets/scicar.py
rename to openproblems/tasks/matching_modalities/datasets/scicar.py
index c3891fa12b..dddb373b29 100644
--- a/openproblems/tasks/multimodal_data_integration/datasets/scicar.py
+++ b/openproblems/tasks/matching_modalities/datasets/scicar.py
@@ -7,9 +7,11 @@
     "sciCAR Cell Lines",
     data_url=load_scicar_cell_lines.metadata["data_url"],
     data_reference=load_scicar_cell_lines.metadata["data_reference"],
-    dataset_summary="5k cells from a time-series of dexamethasone treatment sequenced "
-    "by sci-CAR, a combinatorial indexing-based co-assay that jointly profiles "
-    "chromatin accessibility and mRNA.",
+    dataset_summary=(
+        "5k cells from a time-series of dexamethasone treatment sequenced by sci-CAR, a"
+        " combinatorial indexing-based co-assay that jointly profiles chromatin"
+        " accessibility and mRNA."
+    ),
 )
 def scicar_cell_lines(test=False):
     return load_scicar_cell_lines(test=test)
@@ -19,9 +21,11 @@ def scicar_cell_lines(test=False):
     "sciCAR Mouse Kidney",
     data_url=load_scicar_mouse_kidney.metadata["data_url"],
     data_reference=load_scicar_cell_lines.metadata["data_reference"],
-    dataset_summary="11k cells from adult mouse kidney sequenced "
-    "by sci-CAR, a combinatorial indexing-based co-assay that jointly profiles "
-    "chromatin accessibility and mRNA.",
+    dataset_summary=(
+        "11k cells from adult mouse kidney sequenced by sci-CAR, a combinatorial"
+        " indexing-based co-assay that jointly profiles chromatin accessibility and"
+        " mRNA."
+    ),
 )
 def scicar_mouse_kidney(test=False):
     return load_scicar_mouse_kidney(test=test)
diff --git a/openproblems/tasks/matching_modalities/methods/__init__.py b/openproblems/tasks/matching_modalities/methods/__init__.py
new file mode 100644
index 0000000000..dae56bb780
--- /dev/null
+++ b/openproblems/tasks/matching_modalities/methods/__init__.py
@@ -0,0 +1,7 @@
+from .baseline import random_features
+from .baseline import true_features
+from .harmonic_alignment import harmonic_alignment_log_scran_pooling
+from .harmonic_alignment import harmonic_alignment_sqrt_cp10k
+from .mnn import mnn_log_cp10k
+from .mnn import mnn_log_scran_pooling
+from .procrustes import procrustes
diff --git a/openproblems/tasks/matching_modalities/methods/baseline.py b/openproblems/tasks/matching_modalities/methods/baseline.py
new file mode 100644
index 0000000000..0593fbabc3
--- /dev/null
+++ b/openproblems/tasks/matching_modalities/methods/baseline.py
@@ -0,0 +1,47 @@
+from ....tools.decorators import baseline_method
+from ....tools.normalize import log_cp10k
+from ....tools.utils import check_version
+
+import numpy as np
+
+
+@baseline_method(
+    method_name="Random Features",
+    method_summary=(
+        "20-dimensional SVD is computed on the first modality, and is then randomly"
+        " permuted twice, once for use as the output for each modality, producing"
+        " random features with no correlation between modalities."
+    ),
+)
+def random_features(adata, test=False, n_svd=20):
+    import sklearn.decomposition
+
+    n_svd = min([n_svd, min(adata.X.shape) - 1, min(adata.obsm["mode2"].shape) - 1])
+    adata = log_cp10k(adata)
+    X_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X)
+    adata.obsm["aligned"] = X_pca[np.random.permutation(np.arange(adata.shape[0]))]
+    adata.obsm["mode2_aligned"] = X_pca[
+        np.random.permutation(np.arange(adata.shape[0]))
+    ]
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="True Features",
+    method_summary=(
+        "20-dimensional SVD is computed on the first modality, and this same embedding"
+        " is used as output for both modalities, producing perfectly aligned features"
+        " from each modality."
+    ),
+)
+def true_features(adata, test=False, n_svd=20):
+    import sklearn.decomposition
+
+    n_svd = min([n_svd, min(adata.X.shape) - 1, min(adata.obsm["mode2"].shape) - 1])
+    adata = log_cp10k(adata)
+    X_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X)
+    adata.obsm["aligned"] = X_pca
+    adata.obsm["mode2_aligned"] = X_pca
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/multimodal_data_integration/methods/harmonic_alignment.py b/openproblems/tasks/matching_modalities/methods/harmonic_alignment.py
similarity index 71%
rename from openproblems/tasks/multimodal_data_integration/methods/harmonic_alignment.py
rename to openproblems/tasks/matching_modalities/methods/harmonic_alignment.py
index e17db7332e..46565c9535 100644
--- a/openproblems/tasks/multimodal_data_integration/methods/harmonic_alignment.py
+++ b/openproblems/tasks/matching_modalities/methods/harmonic_alignment.py
@@ -1,16 +1,23 @@
 from ....tools.decorators import method
-from ....tools.normalize import log_cpm
+from ....tools.normalize import log_cp10k
 from ....tools.normalize import log_scran_pooling
-from ....tools.normalize import sqrt_cpm
+from ....tools.normalize import sqrt_cp10k
 from ....tools.utils import check_version
 
 import functools
-import sklearn.decomposition
 
 _harmonic_alignment_method = functools.partial(
     method,
+    method_summary=(
+        "Harmonic alignment embeds cellular data from each modality into a common space"
+        " by computing a mapping between the 100-dimensional diffusion maps of each"
+        " modality. This mapping is computed by computing an isometric transformation"
+        " of the eigenmaps, and concatenating the resulting diffusion maps together"
+        " into a joint 200-dimensional space. This joint diffusion map space is used as"
+        " output for the task."
+    ),
     paper_name="Harmonic Alignment",
-    paper_url="https://doi.org/10.1137/1.9781611976236.36",
+    paper_reference="stanley2020harmonic",
     paper_year=2020,
     code_url="https://github.com/KrishnaswamyLab/harmonic-alignment",
 )
@@ -20,6 +27,7 @@ def _harmonic_alignment(
     adata, test=False, n_svd=None, n_eigenvectors=None, n_pca_XY=None, n_filters=None
 ):
     import harmonicalignment
+    import sklearn.decomposition
 
     if test:
         n_svd = n_svd or 20
@@ -51,13 +59,13 @@ def _harmonic_alignment(
 
 
 @_harmonic_alignment_method(
-    method_name="Harmonic Alignment (sqrt CPM)", image="openproblems-python-extras"
+    method_name="Harmonic Alignment (sqrt CP10k)", image="openproblems-python-extras"
 )
-def harmonic_alignment_sqrt_cpm(
+def harmonic_alignment_sqrt_cp10k(
     adata, test=False, n_svd=None, n_eigenvectors=None, n_pca_XY=None, n_filters=None
 ):
-    adata = sqrt_cpm(adata)
-    adata = log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
+    adata = sqrt_cp10k(adata)
+    adata = log_cp10k(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
     _harmonic_alignment(
         adata,
         test=test,
@@ -76,7 +84,7 @@ def harmonic_alignment_log_scran_pooling(
     adata, test=False, n_svd=None, n_eigenvectors=None, n_pca_XY=None, n_filters=None
 ):
     adata = log_scran_pooling(adata)
-    adata = log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
+    adata = log_cp10k(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
     _harmonic_alignment(
         adata,
         test=test,
diff --git a/openproblems/tasks/multimodal_data_integration/methods/mnn.R b/openproblems/tasks/matching_modalities/methods/mnn.R
similarity index 100%
rename from openproblems/tasks/multimodal_data_integration/methods/mnn.R
rename to openproblems/tasks/matching_modalities/methods/mnn.R
diff --git a/openproblems/tasks/matching_modalities/methods/mnn.py b/openproblems/tasks/matching_modalities/methods/mnn.py
new file mode 100644
index 0000000000..039129bfe0
--- /dev/null
+++ b/openproblems/tasks/matching_modalities/methods/mnn.py
@@ -0,0 +1,51 @@
+from ....tools.conversion import r_function
+from ....tools.decorators import method
+from ....tools.normalize import log_cp10k
+from ....tools.normalize import log_scran_pooling
+from ....tools.utils import check_r_version
+
+import functools
+
+_mnn = r_function("mnn.R")
+
+_mnn_method = functools.partial(
+    method,
+    method_summary=(
+        "Mutual nearest neighbors (MNN) embeds cellular data from each modality into a"
+        " common space by computing a mapping between modality-specific 100-dimensional"
+        " SVD embeddings. The embeddings are integrated using the FastMNN version of"
+        " the MNN algorithm, which generates an embedding of the second modality mapped"
+        " to the SVD space of the first. This corrected joint SVD space is used as"
+        " output for the task."
+    ),
+    paper_name=(
+        "Batch effects in single-cell RNA-sequencing data are corrected by matching"
+        " mutual nearest neighbors"
+    ),
+    paper_reference="haghverdi2018batch",
+    paper_year=2018,
+    code_url="https://github.com/LTLA/batchelor",
+    image="openproblems-r-extras",
+)
+
+
+@_mnn_method(
+    method_name="Mutual Nearest Neighbors (log CP10k)",
+)
+def mnn_log_cp10k(adata, test=False):
+    adata = log_cp10k(adata)
+    adata = log_cp10k(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
+    adata = _mnn(adata)
+    adata.uns["method_code_version"] = check_r_version("batchelor")
+    return adata
+
+
+@_mnn_method(
+    method_name="Mutual Nearest Neighbors (log scran)",
+)
+def mnn_log_scran_pooling(adata, test=False):
+    adata = log_scran_pooling(adata)
+    adata = log_cp10k(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
+    adata = _mnn(adata)
+    adata.uns["method_code_version"] = check_r_version("batchelor")
+    return adata
diff --git a/openproblems/tasks/matching_modalities/methods/procrustes.py b/openproblems/tasks/matching_modalities/methods/procrustes.py
new file mode 100644
index 0000000000..a7ccd337d6
--- /dev/null
+++ b/openproblems/tasks/matching_modalities/methods/procrustes.py
@@ -0,0 +1,41 @@
+from ....tools.decorators import method
+from ....tools.normalize import log_cp10k
+from ....tools.utils import check_version
+
+
+@method(
+    method_name="Procrustes superimposition",
+    method_summary=(
+        "Procrustes superimposition embeds cellular data from each modality into a"
+        " common space by aligning the 100-dimensional SVD embeddings to one another by"
+        " using an isomorphic transformation that minimizes the root mean squared"
+        " distance between points. The unmodified SVD embedding and the transformed"
+        " second modality are used as output for the task."
+    ),
+    paper_name="Generalized Procrustes analysis",
+    paper_reference="gower1975generalized",
+    paper_year=1975,
+    code_url=(
+        "https://docs.scipy.org/doc/scipy/reference/generated/"
+        "scipy.spatial.procrustes.html"
+    ),
+)
+def procrustes(adata, test=False, n_svd=None):
+    import scipy.spatial
+    import sklearn.decomposition
+
+    if test:
+        n_svd = n_svd or 20
+    else:  # pragma: no cover
+        n_svd = n_svd or 100
+    n_svd = min([n_svd, min(adata.X.shape) - 1, min(adata.obsm["mode2"].shape) - 1])
+    adata = log_cp10k(adata)
+    adata = log_cp10k(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
+    X_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X)
+    Y_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.obsm["mode2"])
+    X_proc, Y_proc, _ = scipy.spatial.procrustes(X_pca, Y_pca)
+    adata.obsm["aligned"] = X_proc
+    adata.obsm["mode2_aligned"] = Y_proc
+
+    adata.uns["method_code_version"] = check_version("scipy")
+    return adata
diff --git a/openproblems/tasks/multimodal_data_integration/metrics/__init__.py b/openproblems/tasks/matching_modalities/metrics/__init__.py
similarity index 100%
rename from openproblems/tasks/multimodal_data_integration/metrics/__init__.py
rename to openproblems/tasks/matching_modalities/metrics/__init__.py
diff --git a/openproblems/tasks/multimodal_data_integration/metrics/knn_auc.py b/openproblems/tasks/matching_modalities/metrics/knn_auc.py
similarity index 71%
rename from openproblems/tasks/multimodal_data_integration/metrics/knn_auc.py
rename to openproblems/tasks/matching_modalities/metrics/knn_auc.py
index 88d3d12c0b..8e783e17fd 100644
--- a/openproblems/tasks/multimodal_data_integration/metrics/knn_auc.py
+++ b/openproblems/tasks/matching_modalities/metrics/knn_auc.py
@@ -1,12 +1,23 @@
 from ....tools.decorators import metric
 
 import numpy as np
-import sklearn.decomposition
-import sklearn.neighbors
 
 
-@metric(metric_name="kNN Area Under the Curve", maximize=True)
+@metric(
+    metric_name="kNN Area Under the Curve",
+    metric_summary=(
+        "Let $f(i) ∈ F$ be the scRNA-seq measurement of cell $i$, and $g(i) ∈ G$ be the"
+        " scATAC- seq measurement of cell $i$. kNN-AUC calculates the average"
+        " percentage overlap of neighborhoods of $f(i)$ in $F$ with neighborhoods of"
+        " $g(i)$ in $G$. Higher is better."
+    ),
+    paper_reference="stanley2020harmonic",
+    maximize=True,
+)
 def knn_auc(adata, proportion_neighbors=0.1, n_svd=100):
+    import sklearn.decomposition
+    import sklearn.neighbors
+
     n_svd = min([n_svd, min(adata.X.shape) - 1])
     n_neighbors = int(np.ceil(proportion_neighbors * adata.X.shape[0]))
     X_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X)
diff --git a/openproblems/tasks/multimodal_data_integration/metrics/mse.py b/openproblems/tasks/matching_modalities/metrics/mse.py
similarity index 64%
rename from openproblems/tasks/multimodal_data_integration/metrics/mse.py
rename to openproblems/tasks/matching_modalities/metrics/mse.py
index ed8a1563a9..49dbf462c2 100644
--- a/openproblems/tasks/multimodal_data_integration/metrics/mse.py
+++ b/openproblems/tasks/matching_modalities/metrics/mse.py
@@ -13,7 +13,15 @@ def _square(X):
         return scprep.utils.toarray(X) ** 2
 
 
-@metric(metric_name="Mean squared error", maximize=False)
+@metric(
+    metric_name="Mean squared error",
+    metric_summary=(
+        "Mean squared error (MSE) is the average distance between each pair of matched"
+        " observations of the same cell in the learned latent space. Lower is better."
+    ),
+    paper_reference="lance2022multimodal",
+    maximize=False,
+)
 def mse(adata):
     X = scprep.utils.toarray(adata.obsm["aligned"])
     Y = scprep.utils.toarray(adata.obsm["mode2_aligned"])
diff --git a/openproblems/tasks/multimodal_data_integration/methods/__init__.py b/openproblems/tasks/multimodal_data_integration/methods/__init__.py
deleted file mode 100644
index 5c7d74eab8..0000000000
--- a/openproblems/tasks/multimodal_data_integration/methods/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .harmonic_alignment import harmonic_alignment_log_scran_pooling
-from .harmonic_alignment import harmonic_alignment_sqrt_cpm
-from .mnn import mnn_log_cpm
-from .mnn import mnn_log_scran_pooling
-from .procrustes import procrustes
diff --git a/openproblems/tasks/multimodal_data_integration/methods/mnn.py b/openproblems/tasks/multimodal_data_integration/methods/mnn.py
deleted file mode 100644
index 92eeeddf8f..0000000000
--- a/openproblems/tasks/multimodal_data_integration/methods/mnn.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from ....tools.conversion import r_function
-from ....tools.decorators import method
-from ....tools.normalize import log_cpm
-from ....tools.normalize import log_scran_pooling
-from ....tools.utils import check_r_version
-
-import functools
-
-_mnn = r_function("mnn.R")
-
-_mnn_method = functools.partial(
-    method,
-    paper_name="Batch effects in single-cell RNA-sequencing data are corrected by "
-    "matching mutual nearest neighbors",
-    paper_url="https://www.nature.com/articles/nbt.4091",
-    paper_year=2018,
-    code_url="https://github.com/LTLA/batchelor",
-    image="openproblems-r-extras",
-)
-
-
-@_mnn_method(
-    method_name="Mutual Nearest Neighbors (log CPM)",
-)
-def mnn_log_cpm(adata, test=False):
-    adata = log_cpm(adata)
-    adata = log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
-    adata = _mnn(adata)
-    adata.uns["method_code_version"] = check_r_version("batchelor")
-    return adata
-
-
-@_mnn_method(
-    method_name="Mutual Nearest Neighbors (log scran)",
-)
-def mnn_log_scran_pooling(adata, test=False):
-    adata = log_scran_pooling(adata)
-    adata = log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
-    adata = _mnn(adata)
-    adata.uns["method_code_version"] = check_r_version("batchelor")
-    return adata
diff --git a/openproblems/tasks/multimodal_data_integration/methods/procrustes.py b/openproblems/tasks/multimodal_data_integration/methods/procrustes.py
deleted file mode 100644
index be3a1e8e0f..0000000000
--- a/openproblems/tasks/multimodal_data_integration/methods/procrustes.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from ....tools.decorators import method
-from ....tools.normalize import log_cpm
-from ....tools.utils import check_version
-
-import scipy.spatial
-import sklearn.decomposition
-
-
-@method(
-    method_name="Procrustes",
-    paper_name="Generalized Procrustes analysis",
-    paper_url="https://link.springer.com/content/pdf/10.1007/BF02291478.pdf",
-    paper_year=1975,
-    code_url="https://docs.scipy.org/doc/scipy/reference/generated/"
-    "scipy.spatial.procrustes.html",
-)
-def procrustes(adata, test=False, n_svd=None):
-    if test:
-        n_svd = n_svd or 20
-    else:  # pragma: no cover
-        n_svd = n_svd or 100
-    n_svd = min([n_svd, min(adata.X.shape) - 1, min(adata.obsm["mode2"].shape) - 1])
-    adata = log_cpm(adata)
-    adata = log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var")
-    X_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X)
-    Y_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.obsm["mode2"])
-    X_proc, Y_proc, _ = scipy.spatial.procrustes(X_pca, Y_pca)
-    adata.obsm["aligned"] = X_proc
-    adata.obsm["mode2_aligned"] = Y_proc
-
-    adata.uns["method_code_version"] = check_version("scipy")
-    return adata
diff --git a/openproblems/tasks/regulatory_effect_prediction/README.md b/openproblems/tasks/regulatory_effect_prediction/README.md
index 1bd06f809c..0735377a26 100644
--- a/openproblems/tasks/regulatory_effect_prediction/README.md
+++ b/openproblems/tasks/regulatory_effect_prediction/README.md
@@ -1,10 +1,10 @@
 # Chromatin accessibility prediction
 
 Chromatin accessibility prediction refers to the gene expression prediction of a cell or
-cell type from ATAC-seq peaks. For a summary or all relevant models, see gene score
-method in [Jeffrey M. Granja et
-al.](https://www.biorxiv.org/content/10.1101/2020.04.28.066498v1), [Su Wang et
-al.](https://pubmed.ncbi.nlm.nih.gov/24263090/) et al.
+cell type from ATAC-seq peaks. For a summary of all relevant models, see gene score
+methods in [Jeffrey M. Granja et
+al.](https://openproblems.bio/bibliography#granja2021archr), [Su Wang et
+al.](https://openproblems.bio/bibliography#wang2013target) et al.
 
 ## API
 
diff --git a/openproblems/tasks/regulatory_effect_prediction/api.py b/openproblems/tasks/regulatory_effect_prediction/api.py
index 28a5dcd262..266d67bc76 100644
--- a/openproblems/tasks/regulatory_effect_prediction/api.py
+++ b/openproblems/tasks/regulatory_effect_prediction/api.py
@@ -26,7 +26,7 @@ def check_dataset(adata):
     return True
 
 
-def check_method(adata):
+def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     assert "gene_score" in adata.obsm
     assert adata.obsm["gene_score"].shape == adata.X.shape
diff --git a/openproblems/tasks/regulatory_effect_prediction/datasets/scicar.py b/openproblems/tasks/regulatory_effect_prediction/datasets/scicar.py
index e5dff69ea6..32f12fd5cf 100644
--- a/openproblems/tasks/regulatory_effect_prediction/datasets/scicar.py
+++ b/openproblems/tasks/regulatory_effect_prediction/datasets/scicar.py
@@ -6,9 +6,11 @@
     "sciCAR Mouse Kidney with cell clusters",
     data_url=scicar.load_scicar_mouse_kidney.metadata["data_url"],
     data_reference=scicar.load_scicar_mouse_kidney.metadata["data_reference"],
-    dataset_summary="11k cells from adult mouse kidney sequenced "
-    "by sci-CAR, a combinatorial indexing-based co-assay that jointly profiles "
-    "chromatin accessibility and mRNA.",
+    dataset_summary=(
+        "11k cells from adult mouse kidney sequenced by sci-CAR, a combinatorial"
+        " indexing-based co-assay that jointly profiles chromatin accessibility and"
+        " mRNA."
+    ),
 )
 def scicar_mouse_kidney(test=False):
     adata = scicar.load_scicar_mouse_kidney(test=test)
diff --git a/openproblems/tasks/regulatory_effect_prediction/methods/__init__.py b/openproblems/tasks/regulatory_effect_prediction/methods/__init__.py
index 953d36fd49..20a47dc3d3 100644
--- a/openproblems/tasks/regulatory_effect_prediction/methods/__init__.py
+++ b/openproblems/tasks/regulatory_effect_prediction/methods/__init__.py
@@ -1 +1,3 @@
+from .baseline import random_scores
+from .baseline import true_scores
 from .beta import beta
diff --git a/openproblems/tasks/regulatory_effect_prediction/methods/baseline.py b/openproblems/tasks/regulatory_effect_prediction/methods/baseline.py
new file mode 100644
index 0000000000..f8305c8b60
--- /dev/null
+++ b/openproblems/tasks/regulatory_effect_prediction/methods/baseline.py
@@ -0,0 +1,29 @@
+from ....tools.decorators import baseline_method
+from ....tools.utils import check_version
+
+import numpy as np
+
+
+@baseline_method(
+    method_name="Random Scores",
+    method_summary=(
+        "Random generation of gene scores by random permutation of gene expression"
+        " values"
+    ),
+)
+def random_scores(adata, test=False):
+    adata.obsm["gene_score"] = adata.X[
+        np.random.permutation(np.arange(adata.X.shape[0]))
+    ]
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="True Scores",
+    method_summary="Perfect prediction of gene scores from gene expression values",
+)
+def true_scores(adata, test=False):
+    adata.obsm["gene_score"] = adata.X
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/regulatory_effect_prediction/methods/beta.py b/openproblems/tasks/regulatory_effect_prediction/methods/beta.py
index 68aa9746ce..30f7a70bcb 100644
--- a/openproblems/tasks/regulatory_effect_prediction/methods/beta.py
+++ b/openproblems/tasks/regulatory_effect_prediction/methods/beta.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import pandas as pd
-import scanpy as sc
 import scipy.sparse
 
 
@@ -70,6 +69,8 @@ def _get_annotation(adata, retries=3):
 
 
 def _filter_mitochondrial(adata):
+    import scanpy as sc
+
     if adata.uns["species"] in ["mus_musculus", "homo_sapiens"]:
         adata.var["mt"] = adata.var.gene_short_name.str.lower().str.startswith(
             "mt-"
@@ -92,6 +93,8 @@ def _filter_n_genes_max(adata):
 
 
 def _filter_n_genes_min(adata):
+    import scanpy as sc
+
     adata_filter = adata.copy()
     sc.pp.filter_cells(adata_filter, min_genes=200)
     if adata_filter.shape[0] > 100:
@@ -100,6 +103,8 @@ def _filter_n_genes_min(adata):
 
 
 def _filter_n_cells(adata):
+    import scanpy as sc
+
     adata_filter = adata.copy()
     sc.pp.filter_genes(adata_filter, min_cells=5)
     if adata_filter.shape[1] > 100:
@@ -117,6 +122,7 @@ def _filter_has_chr(adata):
 def _beta(adata, test=False, top_genes=None, threshold=1):
     """Calculate gene scores and insert into .obsm."""
     import pybedtools
+    import scanpy as sc
 
     if test:
         top_genes = top_genes or 100
@@ -223,13 +229,23 @@ def _beta(adata, test=False, top_genes=None, threshold=1):
 
 @method(
     method_name="BETA",
-    paper_name="Target analysis by integration of transcriptome "
-    "and ChIP-seq data with BETA",
-    paper_url="https://pubmed.ncbi.nlm.nih.gov/24263090/",
+    method_summary=(
+        "Binding and expression target analysis (BETA) is a software package that"
+        " integrates ChIP-seq of TFs or chromatin regulators with differential gene"
+        " expression data to infer direct target genes. BETA has three functions: (i)"
+        " to predict whether the factor has activating or repressive function; (ii) to"
+        " infer the factor's target genes; and (iii) to identify the motif of the"
+        " factor and its collaborators, which might modulate the factor's activating or"
+        " repressive function."
+    ),
+    paper_name=(
+        "Target analysis by integration of transcriptome and ChIP-seq data with BETA"
+    ),
+    paper_reference="wang2013target",
     paper_year=2013,
     code_version="1.0",
     code_url="http://cistrome.org/BETA",
-    image="openproblems-python-extras",
+    image="openproblems-python-bedtools",
 )
 def beta(adata, test=False, top_genes=None, threshold=1):
     adata = _beta(adata, test=test, top_genes=top_genes, threshold=threshold)
diff --git a/openproblems/tasks/regulatory_effect_prediction/metrics/correlation.py b/openproblems/tasks/regulatory_effect_prediction/metrics/correlation.py
index 29366d8565..ac62c59376 100644
--- a/openproblems/tasks/regulatory_effect_prediction/metrics/correlation.py
+++ b/openproblems/tasks/regulatory_effect_prediction/metrics/correlation.py
@@ -1,11 +1,12 @@
 from ....tools.decorators import metric
 
 import numpy as np
-import scipy.sparse
-import scipy.stats
 
 
 def _correlation(adata, method="pearson"):
+    import scipy.sparse
+    import scipy.stats
+
     if method == "pearson":
         method = scipy.stats.pearsonr
     else:
@@ -25,11 +26,27 @@ def _correlation(adata, method="pearson"):
     return np.median(cors[~np.isnan(cors)])
 
 
-@metric(metric_name="Median Pearson correlation", maximize=True)
+@metric(
+    metric_name="Median Pearson correlation",
+    metric_summary=(
+        "Median Pearson correlation between predicted and true gene expression over all"
+        " genes."
+    ),
+    paper_reference="schober2018correlation",
+    maximize=True,
+)
 def pearson_correlation(adata):
     return _correlation(adata)
 
 
-@metric(metric_name="Median Spearman correlation", maximize=True)
+@metric(
+    metric_name="Median Spearman correlation",
+    metric_summary=(
+        "Median Spearman correlation between predicted and true gene expression over"
+        " all genes."
+    ),
+    paper_reference="schober2018correlation",
+    maximize=True,
+)
 def spearman_correlation(adata):
     return _correlation(adata, method="spearman")
diff --git a/openproblems/tasks/spatial_decomposition/README.md b/openproblems/tasks/spatial_decomposition/README.md
index 79b3db1d8e..5994fedfd3 100644
--- a/openproblems/tasks/spatial_decomposition/README.md
+++ b/openproblems/tasks/spatial_decomposition/README.md
@@ -1,7 +1,5 @@
 # Spatial Decomposition/Deconvolution
 
-## The task
-
 Spatial decomposition (also often referred to as Spatial deconvolution) is
 applicable to spatial transcriptomics data where the transcription profile of
 each capture location (spot, voxel, bead, etc.) do not share a bijective
@@ -18,21 +16,6 @@ scNuc-seq) to guide the inference process, while the latter only work with the
 spatial data. We require that all datasets have an associated reference single
 cell data set, but methods are free to ignore this information.
 
-## Metrics
-
-### R2
-
-R2 pronounced as "R squared", also known as the "coefficient of determination". R2
-reports the fraction of the true proportion values' (`adata.obsm["proportions_true"]`)
-variance that can be explained by the predicted proportion values
-(`adata.obsm["proportion_pred"]`). The **best score**, and upper bound, is 1.0. There is
-no fixed lower bound for the metric. The _uniform/non-weighted average_ across all cell
-types/states is used to summarize performance. See the
-[sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html)
-documentation for details on the implementation and the
-[wikipedia](https://en.wikipedia.org/wiki/Coefficient_of_determination) site for more
-general information regarding the metric.
-
 ## API
 
 Datasets consists of 2 `anndata.AnnData` objects, concatenated by key
diff --git a/openproblems/tasks/spatial_decomposition/api.py b/openproblems/tasks/spatial_decomposition/api.py
index 3fa833ac03..53a6a9797a 100644
--- a/openproblems/tasks/spatial_decomposition/api.py
+++ b/openproblems/tasks/spatial_decomposition/api.py
@@ -32,7 +32,7 @@ def check_dataset(adata: AnnData):
     return True
 
 
-def check_method(adata: AnnData):
+def check_method(adata: AnnData, is_baseline=False):
     """Check that method output fits expected API."""
     assert np.all(adata.obs["modality"] == "sp")
     assert "proportions_pred" in adata.obsm
@@ -41,8 +41,8 @@ def check_method(adata: AnnData):
     assert isinstance(adata.obsm["proportions_true"], np.ndarray)
     assert np.all(np.isfinite(adata.obsm["proportions_true"]))
     assert adata.obsm["proportions_pred"].shape == adata.obsm["proportions_true"].shape
-    proportions_sum = np.sum(adata.obsm["proportions_true"], axis=1)
-    np.testing.assert_allclose(proportions_sum, 1)
+    proportions_sum = np.sum(adata.obsm["proportions_pred"], axis=1)
+    np.testing.assert_allclose(proportions_sum, 1, atol=1e-6)
     return True
 
 
diff --git a/openproblems/tasks/spatial_decomposition/datasets/__init__.py b/openproblems/tasks/spatial_decomposition/datasets/__init__.py
index 7b405366a9..1a6f6a7474 100644
--- a/openproblems/tasks/spatial_decomposition/datasets/__init__.py
+++ b/openproblems/tasks/spatial_decomposition/datasets/__init__.py
@@ -2,3 +2,6 @@
 from .pancreas import pancreas_alpha_0_5
 from .pancreas import pancreas_alpha_1
 from .pancreas import pancreas_alpha_5
+from .tabula_muris_senis import tabula_muris_senis_alpha_0_5
+from .tabula_muris_senis import tabula_muris_senis_alpha_1
+from .tabula_muris_senis import tabula_muris_senis_alpha_5
diff --git a/openproblems/tasks/spatial_decomposition/datasets/destvi/generate.py b/openproblems/tasks/spatial_decomposition/datasets/destvi/generate.py
index 84a408d876..089a6a618c 100644
--- a/openproblems/tasks/spatial_decomposition/datasets/destvi/generate.py
+++ b/openproblems/tasks/spatial_decomposition/datasets/destvi/generate.py
@@ -4,14 +4,18 @@
 
 @dataset(
     "DestVI",
-    data_url="https://github.com/romain-lopez/DestVI-reproducibility/"
-    "blob/master/simulations/make_dataset.py",
-    data_reference="https://doi.org/10.1038/s41587-022-01272-8",
-    dataset_summary="scRNA-seq is generated based on learn NB parameters"
-    "from the destVI manuscripts leveraging sparsePCA. Number of cells and"
-    "cell types present in each spatial spot is computed via combination of"
-    "kernel-based parametrization of a categorical distribution and the NB model.",
-    image="openproblems-python-extras",
+    data_url=(
+        "https://github.com/romain-lopez/DestVI-reproducibility/"
+        "blob/master/simulations/make_dataset.py"
+    ),
+    data_reference="lopez2022destvi",
+    dataset_summary=(
+        "scRNA-seq is generated based on learn NB parameters from the destVI"
+        " manuscripts leveraging sparsePCA. Number of cells and cell types present in"
+        " each spatial spot is computed via combination of kernel-based parametrization"
+        " of a categorical distribution and the NB model."
+    ),
+    image="openproblems-python-pytorch",
 )
 def destvi(test=False):
     from .utils import generate_synthetic_dataset
diff --git a/openproblems/tasks/spatial_decomposition/datasets/destvi/utils.py b/openproblems/tasks/spatial_decomposition/datasets/destvi/utils.py
index 89a1d36f2b..95a50c079e 100644
--- a/openproblems/tasks/spatial_decomposition/datasets/destvi/utils.py
+++ b/openproblems/tasks/spatial_decomposition/datasets/destvi/utils.py
@@ -9,15 +9,11 @@
 from scipy.sparse import csr_matrix
 from scipy.spatial.distance import pdist
 from scipy.spatial.distance import squareform
-from sklearn.cluster import AgglomerativeClustering
-from sklearn.decomposition import PCA
-from sklearn.neighbors import kneighbors_graph
 from typing import Optional
 
 import anndata
 import numpy as np
 import pandas as pd
-import scanpy as sc
 
 
 def categorical(p, n_samples):
@@ -66,6 +62,11 @@ def generate_synthetic_dataset(
     K_sampled: Optional[int] = None,  # cells sampled for each spot
     seed: int = 0,
 ):
+    from sklearn.cluster import AgglomerativeClustering
+    from sklearn.decomposition import PCA
+    from sklearn.neighbors import kneighbors_graph
+
+    import scanpy as sc
     import torch
 
     np.random.seed(seed)
@@ -137,7 +138,7 @@ def generate_synthetic_dataset(
     )
     sc_anndata.obs["cell_type"] = cell_types_sc[:, :K_sampled].reshape(-1, 1)
     sc_anndata.obs["label"] = sc_anndata.obs["cell_type"].astype(str).astype("category")
-    sc_anndata.obs["n_counts"] = np.sum(sc_anndata.X, axis=1)
+    sc_anndata.obs["n_counts"] = np.sum(sc_anndata.X, axis=1).A.flatten()
     sc_anndata.obsm["gamma"] = gamma_sc[:, :K_sampled].reshape(-1, gamma.shape[-1])
     sc_anndata.obsm["spatial"] = location_sc[:, :K_sampled].reshape(-1, 2)
     if n_cells is not None:
@@ -225,9 +226,6 @@ def generate_synthetic_dataset(
         st_anndata.uns["key_clustering"] = key_list
         st_anndata.uns["target_list"] = [1] + target_list
 
-    sc_anndata.layers["counts"] = sc_anndata.X.copy()
-    st_anndata.layers["counts"] = st_anndata.X.copy()
-
     merged_anndata = merge_sc_and_sp(sc_anndata, st_anndata, test=test)
 
     return merged_anndata
diff --git a/openproblems/tasks/spatial_decomposition/datasets/pancreas.py b/openproblems/tasks/spatial_decomposition/datasets/pancreas.py
index 7c27ebf758..9c5c70bcbd 100644
--- a/openproblems/tasks/spatial_decomposition/datasets/pancreas.py
+++ b/openproblems/tasks/spatial_decomposition/datasets/pancreas.py
@@ -2,54 +2,59 @@
 from ....data.utils import filter_genes_cells
 from ....tools.decorators import dataset
 from .utils import generate_synthetic_dataset
+from typing import List
+from typing import Optional
 
-import scanpy as sc
+import functools
 
-
-@dataset(
-    "Pancreas (alpha=1)",
+_pancreas_dataset = functools.partial(
+    dataset,
     data_url=load_pancreas.metadata["data_url"],
     data_reference=load_pancreas.metadata["data_reference"],
-    dataset_summary="Human pancreas cells aggregated from single-cell"
-    " (Dirichlet alpha=1)",
 )
-def pancreas_alpha_1(test=False, n_obs=100):
-    adata = load_pancreas(test=test, integer_only=True)
+_DATASET_SUMMARY = (
+    "Human pancreas cells aggregated from single-cell (Dirichlet alpha={})"
+)
+
+
+def _pancreas_synthetic(
+    alpha: float,
+    test: bool = False,
+    n_obs: int = 100,
+    keep_techs: Optional[List[str]] = None,
+):
+    import scanpy as sc
+
+    adata = load_pancreas(test=test, keep_techs=keep_techs or ["inDrop3"])
     sc.pp.filter_genes(adata, min_counts=10)
     adata.obs["label"] = adata.obs["celltype"]
 
-    merged_adata = generate_synthetic_dataset(adata, n_obs=n_obs, alpha=1, test=test)
+    merged_adata = generate_synthetic_dataset(
+        adata, n_obs=n_obs, alpha=alpha, test=test
+    )
     filter_genes_cells(merged_adata)
     return merged_adata
 
 
-@dataset(
-    "Pancreas (alpha=5)",
-    data_url=load_pancreas.metadata["data_url"],
-    data_reference=load_pancreas.metadata["data_reference"],
-    dataset_summary="Human pancreas cells aggregated from single-cell"
-    " (Dirichlet alpha=5)",
+@_pancreas_dataset(
+    "Pancreas (alpha=1)",
+    dataset_summary=_DATASET_SUMMARY.format(1),
 )
-def pancreas_alpha_5(test=False, n_obs=100):
-    adata = load_pancreas(test=test, integer_only=True)
-    adata.obs["label"] = adata.obs["celltype"]
+def pancreas_alpha_1(test=False, n_obs=100, keep_techs: Optional[List[str]] = None):
+    return _pancreas_synthetic(test=test, n_obs=n_obs, alpha=1, keep_techs=keep_techs)
 
-    merged_adata = generate_synthetic_dataset(adata, n_obs=n_obs, alpha=5)
-    filter_genes_cells(merged_adata)
-    return merged_adata
+
+@_pancreas_dataset(
+    "Pancreas (alpha=5)",
+    dataset_summary=_DATASET_SUMMARY.format(5),
+)
+def pancreas_alpha_5(test=False, n_obs=100, keep_techs: Optional[List[str]] = None):
+    return _pancreas_synthetic(test=test, n_obs=n_obs, alpha=5, keep_techs=keep_techs)
 
 
-@dataset(
+@_pancreas_dataset(
     "Pancreas (alpha=0.5)",
-    data_url=load_pancreas.metadata["data_url"],
-    data_reference=load_pancreas.metadata["data_reference"],
-    dataset_summary="Human pancreas cells aggregated from single-cell"
-    " (Dirichlet alpha=0.5)",
+    dataset_summary=_DATASET_SUMMARY.format(0.5),
 )
-def pancreas_alpha_0_5(test=False, n_obs=100):
-    adata = load_pancreas(test=test, integer_only=True)
-    adata.obs["label"] = adata.obs["celltype"]
-
-    merged_adata = generate_synthetic_dataset(adata, n_obs=n_obs, alpha=0.5)
-    filter_genes_cells(merged_adata)
-    return merged_adata
+def pancreas_alpha_0_5(test=False, n_obs=100, keep_techs: Optional[List[str]] = None):
+    return _pancreas_synthetic(test=test, n_obs=n_obs, alpha=0.5, keep_techs=keep_techs)
diff --git a/openproblems/tasks/spatial_decomposition/datasets/tabula_muris_senis.py b/openproblems/tasks/spatial_decomposition/datasets/tabula_muris_senis.py
new file mode 100644
index 0000000000..79fda7200b
--- /dev/null
+++ b/openproblems/tasks/spatial_decomposition/datasets/tabula_muris_senis.py
@@ -0,0 +1,53 @@
+from ....data.tabula_muris_senis import load_tabula_muris_senis
+from ....data.utils import filter_genes_cells
+from ....tools.decorators import dataset
+from .utils import generate_synthetic_dataset
+
+import functools
+
+
+def _tabula_muris_senis(alpha, test, n_obs):
+    adata = load_tabula_muris_senis(
+        test=test, organ_list=["lung"], method_list=["droplet"]
+    )
+    adata = adata[adata.obs["age"] == "30m"].copy()
+    adata.obs["label"] = adata.obs["free_annotation"]
+
+    merged_adata = generate_synthetic_dataset(
+        adata, n_obs=n_obs, alpha=alpha, test=test
+    )
+    filter_genes_cells(merged_adata)
+    return merged_adata
+
+
+_tabula_muris_senis_dataset = functools.partial(
+    dataset,
+    data_url=load_tabula_muris_senis.metadata["data_url"],
+    data_reference=load_tabula_muris_senis.metadata["data_reference"],
+)
+
+
+@_tabula_muris_senis_dataset(
+    "Tabula muris senis (alpha=1)",
+    dataset_summary="Mouse lung cells aggregated from single-cell (Dirichlet alpha=1)",
+)
+def tabula_muris_senis_alpha_1(test=False, n_obs=100):
+    return _tabula_muris_senis(alpha=1, test=test, n_obs=n_obs)
+
+
+@_tabula_muris_senis_dataset(
+    "Tabula muris senis (alpha=5)",
+    dataset_summary="Mouse lung cells aggregated from single-cell (Dirichlet alpha=5)",
+)
+def tabula_muris_senis_alpha_5(test=False, n_obs=100):
+    return _tabula_muris_senis(alpha=5, test=test, n_obs=n_obs)
+
+
+@_tabula_muris_senis_dataset(
+    "Tabula muris senis (alpha=0.5)",
+    dataset_summary=(
+        "Mouse lung cells aggregated from single-cell (Dirichlet alpha=0.5)"
+    ),
+)
+def tabula_muris_senis_alpha_0_5(test=False, n_obs=100):
+    return _tabula_muris_senis(alpha=0.5, test=test, n_obs=n_obs)
diff --git a/openproblems/tasks/spatial_decomposition/datasets/utils.py b/openproblems/tasks/spatial_decomposition/datasets/utils.py
index 07f8b604ca..bdb7ffe648 100644
--- a/openproblems/tasks/spatial_decomposition/datasets/utils.py
+++ b/openproblems/tasks/spatial_decomposition/datasets/utils.py
@@ -139,7 +139,7 @@ def generate_synthetic_dataset(
     adata_spatial.obsm["n_cells"] = sp_c
     adata_merged = merge_sc_and_sp(adata, adata_spatial, test=test)
     adata_merged.X[adata_merged.X == np.inf] = adata_merged.X.max()  # remove inf
-    adata_merged.layers["counts"] = adata_merged.X.copy()
+    adata_merged.layers["counts"] = adata_merged.X
 
     return adata_merged
 
diff --git a/openproblems/tasks/spatial_decomposition/methods/__init__.py b/openproblems/tasks/spatial_decomposition/methods/__init__.py
index bb1e6f57c8..68c1251809 100644
--- a/openproblems/tasks/spatial_decomposition/methods/__init__.py
+++ b/openproblems/tasks/spatial_decomposition/methods/__init__.py
@@ -1,11 +1,13 @@
+from .baseline import random_proportions
+from .baseline import true_proportions
 from .cell2location import cell2location_amortised_detection_alpha_20
+from .cell2location import cell2location_detection_alpha_1
 from .cell2location import cell2location_detection_alpha_20
 from .cell2location import cell2location_detection_alpha_20_nb
 from .cell2location import cell2location_detection_alpha_200
 from .destvi import destvi
 from .nmfreg import nmfreg
 from .nnls import nnls_scipy
-from .random import random_proportion_assignment
 from .rctd import rctd
 from .seuratv3 import seuratv3
 from .stereoscope import stereoscope
diff --git a/openproblems/tasks/spatial_decomposition/methods/baseline.py b/openproblems/tasks/spatial_decomposition/methods/baseline.py
new file mode 100644
index 0000000000..f8d1eee962
--- /dev/null
+++ b/openproblems/tasks/spatial_decomposition/methods/baseline.py
@@ -0,0 +1,37 @@
+from ....tools.decorators import baseline_method
+from ....tools.utils import check_version
+from ..utils import split_sc_and_sp
+
+import numpy as np
+
+
+@baseline_method(
+    method_name="Random Proportions",
+    method_summary=(
+        "Random assignment of predicted celltype proportions from a Dirichlet"
+        " distribution."
+    ),
+)
+def random_proportions(adata, test=False):
+    adata_sc, adata = split_sc_and_sp(adata)
+    label_distribution = adata_sc.obs["label"].value_counts()
+    adata.obsm["proportions_pred"] = np.random.dirichlet(
+        label_distribution,
+        size=adata.shape[0],
+    )
+
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
+
+
+@baseline_method(
+    method_name="True Proportions",
+    method_summary=(
+        "Perfect assignment of predicted celltype proportions from the ground truth."
+    ),
+)
+def true_proportions(adata, test=False):
+    _, adata = split_sc_and_sp(adata)
+    adata.obsm["proportions_pred"] = adata.obsm["proportions_true"]
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/spatial_decomposition/methods/cell2location.py b/openproblems/tasks/spatial_decomposition/methods/cell2location.py
index 3a599cb7e6..caf304f205 100644
--- a/openproblems/tasks/spatial_decomposition/methods/cell2location.py
+++ b/openproblems/tasks/spatial_decomposition/methods/cell2location.py
@@ -8,11 +8,19 @@
 
 _cell2location_method = functools.partial(
     method,
+    method_summary=(
+        "Cell2location is a decomposition method based on Negative Binomial regression"
+        " that is able to account for batch effects in estimating the single-cell gene"
+        " expression signature used for the spatial decomposition step. Note that since"
+        " batch information is unavailable in this task, here we use either a"
+        " hard-coded reference, or a negative-binomial learned reference without batch"
+        " labels. The parameter alpha refers to the detection efficiency prior."
+    ),
     paper_name="Cell2location maps fine-grained cell types in spatial transcriptomics",
-    paper_url="https://doi.org/10.1038/s41587-021-01139-4",
+    paper_reference="kleshchevnikov2022cell2location",
     paper_year=2022,
     code_url="https://github.com/BayraktarLab/cell2location",
-    image="openproblems-python-extras",
+    image="openproblems-python-pytorch",
 )
 
 
@@ -22,7 +30,7 @@ def _cell2location(
     n_cells_per_location=20,
     hard_coded_reference=True,
     amortised=False,
-    num_samples=1000,
+    num_samples=None,
     sc_batch_size=2500,
     st_batch_size=None,
     test=False,
@@ -38,7 +46,7 @@ def _cell2location(
     if test:
         max_epochs_sc = max_epochs_sc or 2
         max_epochs_st = max_epochs_st or 2
-        num_samples = num_samples or 10
+        num_samples = num_samples or 2
     else:  # pragma: nocover
         max_epochs_sc = max_epochs_sc or 250
         max_epochs_st = max_epochs_st or 30000
@@ -147,20 +155,21 @@ def _cell2location(
     )
 
     adata.obsm["proportions_pred"] = adata.obsm["q05_cell_abundance_w_sf"].values
+    adata.obsm["proportions_pred"] /= adata.obsm["proportions_pred"].sum(axis=1)[
+        :, None
+    ]
     adata.uns["method_code_version"] = check_version("cell2location")
     return adata
 
 
-@_cell2location_method(
-    method_name="Cell2location (detection_alpha=20, reference hard-coded)"
-)
+@_cell2location_method(method_name="Cell2location (alpha=20, reference hard-coded)")
 def cell2location_detection_alpha_20(
     adata,
     detection_alpha=20,
     n_cells_per_location=20,
     hard_coded_reference=True,
     amortised=False,
-    num_samples=1000,
+    num_samples=None,
     sc_batch_size=2500,
     st_batch_size=None,
     test: bool = False,
@@ -182,16 +191,43 @@ def cell2location_detection_alpha_20(
     )
 
 
-@_cell2location_method(
-    method_name="Cell2location (detection_alpha=20, reference NB without batch info)"
-)
+@_cell2location_method(method_name="Cell2location (alpha=1, reference hard-coded)")
+def cell2location_detection_alpha_1(
+    adata,
+    detection_alpha=1,
+    n_cells_per_location=20,
+    hard_coded_reference=True,
+    amortised=False,
+    num_samples=None,
+    sc_batch_size=2500,
+    st_batch_size=None,
+    test: bool = False,
+    max_epochs_sc: Optional[int] = None,
+    max_epochs_st: Optional[int] = None,
+):
+    return _cell2location(
+        adata,
+        detection_alpha=detection_alpha,
+        n_cells_per_location=n_cells_per_location,
+        hard_coded_reference=hard_coded_reference,
+        amortised=amortised,
+        num_samples=num_samples,
+        sc_batch_size=sc_batch_size,
+        st_batch_size=st_batch_size,
+        test=test,
+        max_epochs_sc=max_epochs_sc,
+        max_epochs_st=max_epochs_st,
+    )
+
+
+@_cell2location_method(method_name="Cell2location (alpha=20, NB reference)")
 def cell2location_detection_alpha_20_nb(
     adata,
     detection_alpha=20,
     n_cells_per_location=20,
     hard_coded_reference=False,
     amortised=False,
-    num_samples=1000,
+    num_samples=None,
     sc_batch_size=2500,
     st_batch_size=None,
     test: bool = False,
@@ -213,16 +249,14 @@ def cell2location_detection_alpha_20_nb(
     )
 
 
-@_cell2location_method(
-    method_name="Cell2location (detection_alpha=200, reference hard-coded)"
-)
+@_cell2location_method(method_name="Cell2location (alpha=200, reference hard-coded)")
 def cell2location_detection_alpha_200(
     adata,
     detection_alpha=200,
     n_cells_per_location=20,
     hard_coded_reference=True,
     amortised=False,
-    num_samples=1000,
+    num_samples=None,
     sc_batch_size=2500,
     st_batch_size=None,
     test: bool = False,
@@ -244,16 +278,14 @@ def cell2location_detection_alpha_200(
     )
 
 
-@_cell2location_method(
-    method_name="Cell2location, amortised (detection_alpha=20, reference hard-coded)"
-)
+@_cell2location_method(method_name="Cell2location (alpha=20, amortised, hard-coded)")
 def cell2location_amortised_detection_alpha_20(
     adata,
     detection_alpha=20,
     n_cells_per_location=20,
     hard_coded_reference=True,
     amortised=True,
-    num_samples=1000,
+    num_samples=None,
     sc_batch_size=2500,
     st_batch_size=1024,
     test: bool = False,
diff --git a/openproblems/tasks/spatial_decomposition/methods/destvi.py b/openproblems/tasks/spatial_decomposition/methods/destvi.py
index 9330ba1d92..3b9f7134fe 100644
--- a/openproblems/tasks/spatial_decomposition/methods/destvi.py
+++ b/openproblems/tasks/spatial_decomposition/methods/destvi.py
@@ -6,12 +6,19 @@
 
 @method(
     method_name="DestVI",
-    paper_name="DestVI identifies continuums of cell types in spatial "
-    "transcriptomics data",
-    paper_url="https://doi.org/10.1038/s41587-022-01272-8",
+    method_summary=(
+        "destVI is a decomposition method that leverages a conditional generative model"
+        " of spatial transcriptomics down to the sub-cell-type variation level, which"
+        " is then used to decompose the cell-type proportions determining the spatial"
+        " organization of a tissue."
+    ),
+    paper_name=(
+        "DestVI identifies continuums of cell types in spatial transcriptomics data"
+    ),
+    paper_reference="lopez2022destvi",
     paper_year=2022,
     code_url="https://github.com/YosefLab/scvi-tools",
-    image="openproblems-python-extras",
+    image="openproblems-python-pytorch",
 )
 def destvi(
     adata,
@@ -26,8 +33,8 @@ def destvi(
         max_epochs_sp = max_epochs_sp or 10
         max_epochs_sc = max_epochs_sc or 10
     else:  # pragma: nocover
-        max_epochs_sc = max_epochs_sc or 300
-        max_epochs_sp = max_epochs_sp or 2500
+        max_epochs_sc = max_epochs_sc or 500
+        max_epochs_sp = max_epochs_sp or 10000
 
     adata_sc, adata = split_sc_and_sp(adata)
 
@@ -36,15 +43,17 @@ def destvi(
     sc_model.train(
         max_epochs=max_epochs_sc,
         early_stopping=True,
-        early_stopping_monitor="reconstruction_loss_train",
+        train_size=0.9,
+        validation_size=0.1,
+        early_stopping_monitor="elbo_validation",
     )
     DestVI.setup_anndata(adata)
 
     st_model = DestVI.from_rna_model(adata, sc_model)
     st_model.train(
         max_epochs=max_epochs_sp,
-        early_stopping=True,
-        early_stopping_monitor="reconstruction_loss_train",
+        batch_size=min(int(adata.n_obs / 20 + 3), 128),
+        plan_kwargs={"min_kl_weight": 3.0, "max_kl_weight": 3},
     )
     adata.obsm["proportions_pred"] = st_model.get_proportions().to_numpy()
     adata.uns["method_code_version"] = check_version("scvi-tools")
diff --git a/openproblems/tasks/spatial_decomposition/methods/nmfreg.py b/openproblems/tasks/spatial_decomposition/methods/nmfreg.py
index 0876cb37a1..3ba6fddb85 100644
--- a/openproblems/tasks/spatial_decomposition/methods/nmfreg.py
+++ b/openproblems/tasks/spatial_decomposition/methods/nmfreg.py
@@ -7,9 +7,17 @@
 
 @method(
     method_name="NMF-reg",
-    paper_name="Slide-seq: A scalable technology for measuring genome-wide"
-    " expression at high spatial resolution",
-    paper_url="https://doi.org/10.1126/science.aaw1219",
+    method_summary=(
+        "NMFreg is a decomposition method based on Non-negative Matrix Factorization"
+        " Regression (NMFreg) that reconstructs expression of each spatial location as"
+        " a weighted combination of cell-type signatures defined by scRNA-seq. It was"
+        " originally developed for Slide-seq data."
+    ),
+    paper_name=(
+        "Slide-seq: A scalable technology for measuring genome-wide expression at high"
+        " spatial resolution"
+    ),
+    paper_reference="rodriques2019slide",
     paper_year=2019,
     code_url="https://github.com/tudaga/NMFreg_tutorial",
 )
diff --git a/openproblems/tasks/spatial_decomposition/methods/nnls.py b/openproblems/tasks/spatial_decomposition/methods/nnls.py
index d1caf7b532..4996217c90 100644
--- a/openproblems/tasks/spatial_decomposition/methods/nnls.py
+++ b/openproblems/tasks/spatial_decomposition/methods/nnls.py
@@ -9,11 +9,19 @@
 
 @method(
     method_name="Non-Negative Least Squares",
-    paper_name="Solving Least Squares Problems",
-    paper_url="https://doi.org/10.1137/1.9781611971217",
-    paper_year=1987,
-    code_url="https://docs.scipy.org/doc/scipy/"
-    "reference/generated/scipy.optimize.nnls.html",
+    method_summary=(
+        "NNLS13 is a decomposition method based on Non-Negative Least Square Regression"
+        " (NNLS). It was originally introduced by the method AutoGenes"
+    ),
+    paper_name=(
+        "AutoGeneS: Automatic gene selection using multi-objective optimization for"
+        " RNA-seq deconvolution"
+    ),
+    paper_reference="aliee2021autogenes",
+    paper_year=2021,
+    code_url=(
+        "https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.nnls.html"
+    ),
 )
 def nnls_scipy(adata, test=False):
     from scipy.optimize import nnls
diff --git a/openproblems/tasks/spatial_decomposition/methods/random.py b/openproblems/tasks/spatial_decomposition/methods/random.py
deleted file mode 100644
index bb3f930e78..0000000000
--- a/openproblems/tasks/spatial_decomposition/methods/random.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from ....tools.decorators import method
-from ....tools.utils import check_version
-from ..utils import split_sc_and_sp
-
-import numpy as np
-
-
-@method(
-    method_name="Random assignment (baseline)",
-    paper_name="Open Problems for Single Cell Analysis",
-    paper_url="https://openproblems.bio",
-    paper_year=2022,
-    code_url="https://github.com/openproblems-bio/openproblems",
-)
-def random_proportion_assignment(adata, test=False):
-    _, adata = split_sc_and_sp(adata)
-    n_types = adata.obsm["proportions_true"].shape[1]
-    props = np.random.dirichlet(
-        np.ones(n_types),
-        size=adata.shape[0],
-    )
-
-    adata.obsm["proportions_pred"] = props
-    adata.uns["method_code_version"] = check_version("openproblems")
-
-    return adata
diff --git a/openproblems/tasks/spatial_decomposition/methods/rctd.R b/openproblems/tasks/spatial_decomposition/methods/rctd.R
index 5989b3eb7c..044c87d20e 100644
--- a/openproblems/tasks/spatial_decomposition/methods/rctd.R
+++ b/openproblems/tasks/spatial_decomposition/methods/rctd.R
@@ -4,6 +4,13 @@
 #'
 #' @param sce_sc SingleCellExperiment single-cell dataset
 #' @param sce_sp SingleCellExperiment spatial dataset
+#' @param fc_cutoff minimum log-fold-change (across cell types) for genes to be
+#' included in the platform effect normalization step.
+#' @param fc_cutoff_reg minimum log-fold-change (across cell types) for genes to
+#' be included in the RCTD step.
+#' @param max_cores for parallel processing, the number of cores used. If set to
+#' 1, parallel processing is not used. The system will additionally be checked
+#' for number of available cores.
 #' @return sce_sp SingleCellExperiment spatial dataset with predictions in obs
 
 library(spacexr)
@@ -30,9 +37,14 @@ colnames(sp_counts) <- colnames(sce_sp)
 puck <- SpatialRNA(sp_coords, sp_counts)
 # create RCTD object from reference and spatialRNA objects
 my_rctd <- create.RCTD(
-  puck, reference,
-  max_cores = 1,
-  test_mode = FALSE, UMI_min_sigma = 100
+  puck,
+  reference,
+  max_cores = max_cores,
+  fc_cutoff = fc_cutoff,
+  fc_cutoff_reg = fc_cutoff_reg,
+  test_mode = FALSE,
+  UMI_min_sigma = 100,
+  CELL_MIN_INSTANCE = 1
 )
 # run analysis and get results
 my_rctd <- run.RCTD(my_rctd)
diff --git a/openproblems/tasks/spatial_decomposition/methods/rctd.py b/openproblems/tasks/spatial_decomposition/methods/rctd.py
index c05b703463..26ffedf5cc 100644
--- a/openproblems/tasks/spatial_decomposition/methods/rctd.py
+++ b/openproblems/tasks/spatial_decomposition/methods/rctd.py
@@ -2,34 +2,64 @@
 from ....tools.decorators import method
 from ....tools.utils import check_r_version
 from ..utils import split_sc_and_sp
+from typing import Optional
 
+import multiprocessing
 import numpy as np
 
-_rctd = r_function("rctd.R", args="sce_sc, sce_sp")
+_rctd = r_function("rctd.R", args="sce_sc, sce_sp, fc_cutoff, fc_cutoff_reg, max_cores")
 
 
 @method(
     method_name="RCTD",
+    method_summary=(
+        "RCTD (Robust Cell Type Decomposition) is a decomposition method that uses"
+        " signatures learnt from single-cell data to decompose spatial expression of"
+        " tissues.  It is able to platform effect normalization step, which normalizes"
+        " the scRNA-seq cell type profiles to match the platform effects of the spatial"
+        " transcriptomics dataset."
+    ),
     paper_name="Robust decomposition of cell type mixtures in spatial transcriptomics",
-    paper_url="https://doi.org/10.1038/s41587-021-00830-w",
+    paper_reference="cable2021robust",
     paper_year=2020,
     code_url="https://github.com/dmcable/spacexr",
     image="openproblems-r-extras",
 )
-def rctd(adata, test=False):
-    # exctract single cell reference data
+def rctd(
+    adata,
+    fc_cutoff: Optional[float] = None,
+    fc_cutoff_reg: Optional[float] = None,
+    test=False,
+):
+    if test:
+        fc_cutoff = fc_cutoff or 0.05
+        fc_cutoff_reg = fc_cutoff_reg or 0.075
+    else:  # pragma: nocover
+        fc_cutoff = fc_cutoff or 0.5
+        fc_cutoff_reg = fc_cutoff_reg or 0.75
+    # extract single cell reference data
     adata_sc, adata = split_sc_and_sp(adata)
+    labels = np.unique(adata_sc.obs["label"])
 
     # set spatial coordinates for the single cell data
     adata_sc.obsm["spatial"] = np.ones((adata_sc.shape[0], 2))
+    # remove rare cell types to prevent RCTD error
+    celltype_counts = adata_sc.obs["label"].value_counts()
+    adata_sc = adata_sc[
+        ~adata_sc.obs["label"].isin(celltype_counts[celltype_counts < 25].index)
+    ].copy()
     # run RCTD
-    adata = _rctd(adata_sc, adata)
+    adata = _rctd(
+        adata_sc, adata, fc_cutoff, fc_cutoff_reg, max_cores=multiprocessing.cpu_count()
+    )
 
     # get predicted cell type proportions from obs
-    cell_type_names = [x for x in adata.obs.columns if x.startswith("xCT")]
+    cell_type_names = [f"xCT_{label}" for label in labels]
 
     # add proportions
-    adata.obsm["proportions_pred"] = adata.obs[cell_type_names].to_numpy()
+    adata.obsm["proportions_pred"] = (
+        adata.obs.reindex(cell_type_names, axis=1).fillna(0).to_numpy()
+    )
 
     adata.uns["method_code_version"] = check_r_version("spacexr")
 
diff --git a/openproblems/tasks/spatial_decomposition/methods/seuratv3.R b/openproblems/tasks/spatial_decomposition/methods/seuratv3.R
index 7aa7d05273..a75b3961f2 100644
--- a/openproblems/tasks/spatial_decomposition/methods/seuratv3.R
+++ b/openproblems/tasks/spatial_decomposition/methods/seuratv3.R
@@ -2,6 +2,7 @@
 #' @param sce_sc SingleCellExperiment single-cell data
 #' @param sce_sp SingleCellExperiment spatial data
 #' @param n_pcs int Number of principal components
+#' @param sctransform_n_cells int Number of cells sampled to build NB regression
 
 options(error = rlang::entrace)
 
@@ -16,20 +17,28 @@ args <- readRDS("/tmp/openproblems_seurat_args.rds")
 sce_sc <- args$sce_sc
 sce_sp <- args$sce_sp
 n_pcs <- args$n_pcs
+sctransform_n_cells <- args$sctransform_n_cells
 
 # R base for seuratv3.py
 sce_sc <- as.Seurat(sce_sc, counts = "X", data = NULL)
 sce_sp <- as.Seurat(sce_sp, counts = "X", data = NULL)
 
 # Normalize and do dimred for spatial data
-sce_sp <- SCTransform(sce_sp, assay = "originalexp", verbose = TRUE)
+sce_sp <- SCTransform(
+  sce_sp,
+  assay = "originalexp",
+  ncells = min(sctransform_n_cells, nrow(sce_sp)),
+  verbose = TRUE
+)
 
 sce_sp <- RunPCA(sce_sp, assay = "SCT", verbose = FALSE, n_pcs = n_pcs)
 
 # Normalize and do dimred for single cell data
 sce_sc <- SCTransform(
   sce_sc,
-  assay = "originalexp", ncells = min(3000, nrow(sce_sc)), verbose = TRUE
+  assay = "originalexp",
+  ncells = min(sctransform_n_cells, nrow(sce_sc)),
+  verbose = TRUE
 )
 sce_sc <- RunPCA(sce_sc, verbose = FALSE, n_pcs = n_pcs)
 
diff --git a/openproblems/tasks/spatial_decomposition/methods/seuratv3.py b/openproblems/tasks/spatial_decomposition/methods/seuratv3.py
index 9e5809d33f..46c1f7e560 100644
--- a/openproblems/tasks/spatial_decomposition/methods/seuratv3.py
+++ b/openproblems/tasks/spatial_decomposition/methods/seuratv3.py
@@ -7,22 +7,35 @@
 import pandas as pd
 import pathlib
 
-_seuratv3 = r_function("seuratv3_wrapper.R", args="sce_sc, sce_sp, n_pcs, script_path")
+_seuratv3 = r_function(
+    "seuratv3_wrapper.R", args="sce_sc, sce_sp, n_pcs, sctransform_n_cells, script_path"
+)
 
 
 @method(
     method_name="SeuratV3",
+    method_summary=(
+        "SeuratV3 is a decomposition method that is based on Canonical Correlation"
+        " Analysis (CCA)."
+    ),
     paper_name="Comprehensive Integration of Single-Cell Data",
-    paper_url="https://doi.org/10.1016/j.cell.2019.05.031",
+    paper_reference="stuart2019comprehensive",
     paper_year=2019,
     code_url="https://satijalab.org/seurat/archive/v3.2/spatial_vignette.html",
     image="openproblems-r-extras",
 )
-def seuratv3(adata, test: bool = False, n_pca: Optional[int] = None):
+def seuratv3(
+    adata,
+    test: bool = False,
+    n_pca: Optional[int] = None,
+    sctransform_n_cells: Optional[int] = None,
+):
     if test:
-        n_pca = n_pca or 10
+        n_pca = n_pca or 2
+        sctransform_n_cells = sctransform_n_cells or 50
     else:  # pragma: nocover
         n_pca = n_pca or 30
+        sctransform_n_cells = sctransform_n_cells or 5000
     # extract single cell reference data
     adata_sc, adata = split_sc_and_sp(adata)
     # proportions_true gets lost in translation
@@ -31,6 +44,7 @@ def seuratv3(adata, test: bool = False, n_pca: Optional[int] = None):
         adata_sc,
         adata,
         n_pcs=n_pca,
+        sctransform_n_cells=sctransform_n_cells,
         script_path=pathlib.Path(__file__).parent.joinpath("seuratv3.R").as_posix(),
     )
     # get predicted cell type proportions from obs
diff --git a/openproblems/tasks/spatial_decomposition/methods/seuratv3_wrapper.R b/openproblems/tasks/spatial_decomposition/methods/seuratv3_wrapper.R
index f5fb26a1aa..f7f42c691e 100644
--- a/openproblems/tasks/spatial_decomposition/methods/seuratv3_wrapper.R
+++ b/openproblems/tasks/spatial_decomposition/methods/seuratv3_wrapper.R
@@ -6,10 +6,16 @@
 #' @param sce_sc SingleCellExperiment single-cell data
 #' @param sce_sp SingleCellExperiment spatial data
 #' @param n_pcs int Number of principal components
+#' @param sctransform_n_cells int Number of cells sampled to build NB regression
 #' @param script_path character Path to seuratv3.R
 
 saveRDS(
-  list(sce_sc = sce_sc, sce_sp = sce_sp, n_pcs = n_pcs),
+  list(
+    sce_sc = sce_sc,
+    sce_sp = sce_sp,
+    n_pcs = n_pcs,
+    sctransform_n_cells = sctransform_n_cells
+  ),
   "/tmp/openproblems_seurat_args.rds"
 )
 # clear memory
diff --git a/openproblems/tasks/spatial_decomposition/methods/stereoscope.py b/openproblems/tasks/spatial_decomposition/methods/stereoscope.py
index c2695e4253..83b23d1ea1 100644
--- a/openproblems/tasks/spatial_decomposition/methods/stereoscope.py
+++ b/openproblems/tasks/spatial_decomposition/methods/stereoscope.py
@@ -5,12 +5,20 @@
 
 @method(
     method_name="Stereoscope",
-    paper_name="Single-cell and spatial transcriptomics enables probabilistic "
-    "inference of cell type topography",
-    paper_url="https://doi.org/10.1038/s41587-022-01272-8",
+    method_summary=(
+        "Stereoscope is a decomposition method based on Negative Binomial regression."
+        " It is similar in scope and implementation to cell2location but less flexible"
+        " to incorporate additional covariates such as batch effects and other type of"
+        " experimental design annotations."
+    ),
+    paper_name=(
+        "Single-cell and spatial transcriptomics enables probabilistic inference of"
+        " cell type topography"
+    ),
+    paper_reference="andersson2020single",
     paper_year=2020,
     code_url="https://github.com/scverse/scvi-tools",
-    image="openproblems-python-extras",
+    image="openproblems-python-pytorch",
 )
 def stereoscope(adata, test=False, max_epochs_sc=None, max_epochs_sp=None):
     from scvi.external import RNAStereoscope
diff --git a/openproblems/tasks/spatial_decomposition/methods/tangram.py b/openproblems/tasks/spatial_decomposition/methods/tangram.py
index 329676c4aa..8aabb486e6 100644
--- a/openproblems/tasks/spatial_decomposition/methods/tangram.py
+++ b/openproblems/tasks/spatial_decomposition/methods/tangram.py
@@ -5,12 +5,20 @@
 
 @method(
     method_name="Tangram",
-    paper_name="Deep learning and alignment of spatially resolved single-cell "
-    "transcriptomes with Tangram",
-    paper_url="https://doi.org/10.1038/s41592-021-01264-7",
+    method_summary=(
+        "Tangram is a method to map gene expression signatures from scRNA-seq data to"
+        " spatial data. It performs the cell type mapping by learning a similarity"
+        " matrix between single-cell and spatial locations based on gene expression"
+        " profiles."
+    ),
+    paper_name=(
+        "Deep learning and alignment of spatially resolved single-cell transcriptomes"
+        " with Tangram"
+    ),
+    paper_reference="biancalani2021deep",
     paper_year=2021,
     code_url="https://github.com/broadinstitute/Tangram",
-    image="openproblems-python-extras",
+    image="openproblems-python-pytorch",
 )
 def tangram(adata, test=False, num_epochs=None, n_markers=None):
     # analysis based on:
diff --git a/openproblems/tasks/spatial_decomposition/methods/vanillanmf.py b/openproblems/tasks/spatial_decomposition/methods/vanillanmf.py
index e9ff3e4d16..81438d68d3 100644
--- a/openproblems/tasks/spatial_decomposition/methods/vanillanmf.py
+++ b/openproblems/tasks/spatial_decomposition/methods/vanillanmf.py
@@ -6,12 +6,24 @@
 
 @method(
     method_name="Non-Negative Matrix Factorization (NMF)",
-    paper_name="Fast local algorithms for large scale nonnegative "
-    "matrix and tensor factorizations",
-    paper_url="https://doi.org/10.1587/transfun.E92.A.708",
+    method_summary=(
+        "NMF is a decomposition method based on Non-negative Matrix Factorization (NMF)"
+        " that reconstructs expression of each spatial location as a weighted"
+        " combination of cell-type signatures defined by scRNA-seq. It is a simpler"
+        " baseline than NMFreg as it only performs the NMF step based on mean"
+        " expression signatures of cell types, returning the weights loading of the NMF"
+        " as (normalized) cell type proportions, without the regression step."
+    ),
+    paper_name=(
+        "Fast local algorithms for large scale nonnegative matrix and tensor"
+        " factorizations"
+    ),
+    paper_reference="cichocki2009fast",
     paper_year=2009,
-    code_url="https://scikit-learn.org/stable/modules/generated/"
-    "sklearn.decomposition.NMF.html",
+    code_url=(
+        "https://scikit-learn.org/stable/modules/generated/"
+        "sklearn.decomposition.NMF.html"
+    ),
 )
 def nmf(adata, test=False, max_iter=None, random_state=17):
     """NMF for spatial deconvolution."""
diff --git a/openproblems/tasks/spatial_decomposition/metrics/r2.py b/openproblems/tasks/spatial_decomposition/metrics/r2.py
index 083f23ffde..29e1d4492b 100644
--- a/openproblems/tasks/spatial_decomposition/metrics/r2.py
+++ b/openproblems/tasks/spatial_decomposition/metrics/r2.py
@@ -1,7 +1,18 @@
 from ....tools.decorators import metric
 
 
-@metric(metric_name="r2", maximize=True)
+@metric(
+    metric_name="r2",
+    metric_summary=(
+        "R2, or the “coefficient of determination”, reports the fraction of the true"
+        " proportion values’ variance that can be explained by the predicted proportion"
+        " values. The best score, and upper bound, is 1.0. There is no fixed lower"
+        " bound for the metric. The uniform/non-weighted average across all cell"
+        " types/states is used to summarise performance."
+    ),
+    maximize=True,
+    paper_reference="miles2005rsquared",
+)
 def r2(adata):
     import sklearn.metrics
 
diff --git a/openproblems/tools/conversion.py b/openproblems/tools/conversion.py
index 7b32dc65f8..d80393accd 100644
--- a/openproblems/tools/conversion.py
+++ b/openproblems/tools/conversion.py
@@ -20,6 +20,7 @@ def r_function(filename, args="sce"):
     fun : scprep.run.RFunction
         Python callable evaluating the R code
     """
+
     assert filename.endswith(".R")
 
     # get the path to the module that called `r_function`
diff --git a/openproblems/tools/decorators.py b/openproblems/tools/decorators.py
index 2e102eca4a..ed5c9bec58 100644
--- a/openproblems/tools/decorators.py
+++ b/openproblems/tools/decorators.py
@@ -4,7 +4,6 @@
 import anndata
 import functools
 import logging
-import memory_profiler
 import time
 
 log = logging.getLogger("openproblems")
@@ -25,13 +24,19 @@ def normalize(adata, *args, obsm=None, obs=None, var=None, **kwargs):
             else:
                 obs = adata.uns[obs] if obs else adata.obs
                 var = adata.uns[var] if var else adata.var
-                adata_temp = anndata.AnnData(adata.obsm[obsm], obs=obs, var=var)
+                adata_temp = anndata.AnnData(
+                    adata.obsm[obsm],
+                    obs=obs,
+                    var=var,
+                    layers={"counts": adata.obsm[obsm]},
+                )
                 adata_temp = func(adata_temp, *args, **kwargs)
                 adata.obsm[obsm] = adata.obsm[cache_name] = adata_temp.X
         else:
             if func.__name__ in adata.layers:
                 adata.X = adata.layers[func.__name__]
             else:
+                adata.X = adata.layers["counts"]
                 adata = func(adata, *args, **kwargs)
                 adata.layers[func.__name__] = adata.X
 
@@ -48,12 +53,14 @@ def _backport_code_version(apply_method, code_version):
 
 def method(
     method_name,
+    method_summary,
     paper_name,
-    paper_url,
+    paper_reference,
     paper_year,
     code_url,
     code_version=None,
     image="openproblems",
+    is_baseline=False,
 ):
     """Decorate a method function.
 
@@ -61,31 +68,39 @@ def method(
     ----------
     method_name : str
         Unique human readable name of the method
+    method_summary : str
+        Short summary of the method
     paper_name : str
         Title of the seminal paper describing the method
-    paper_url : str
-        Link to the paper, preferably a DOI URL
+    paper_reference : str
+        BibTex key from `main.bib` referring to the paper
     paper_year : int
         Year the paper was published
     code_url : str
         Link to the code base providing the canonical implementation
     image : str, optional (default: "openproblems")
         Name of the Docker image to be used for this method
+    is_baseline : bool, optional (default: False)
+        If True, this method serves as a baseline for the task
     """
 
     def decorator(func):
         @functools.wraps(func)
-        def apply_method(*args, **kwargs):
+        def apply_method(adata: anndata.AnnData, *args, **kwargs):
             log.debug("Running {} method".format(func.__name__))
-            return func(*args, **kwargs)
+            adata = func(adata, *args, **kwargs)
+            adata.uns["is_baseline"] = is_baseline
+            return adata
 
         apply_method.metadata = dict(
             method_name=method_name,
+            method_summary=method_summary,
             paper_name=paper_name,
-            paper_url=paper_url,
+            paper_reference=paper_reference,
             paper_year=paper_year,
             code_url=code_url,
             image=image,
+            is_baseline=is_baseline,
         )
         apply_method = _backport_code_version(apply_method, code_version)
         return apply_method
@@ -93,7 +108,19 @@ def apply_method(*args, **kwargs):
     return decorator
 
 
-def metric(metric_name, maximize, image="openproblems"):
+baseline_method = functools.partial(
+    method,
+    paper_name="Open Problems for Single Cell Analysis",
+    paper_reference="openproblems",
+    paper_year=2022,
+    code_url="https://github.com/openproblems-bio/openproblems",
+    is_baseline=True,
+)
+
+
+def metric(
+    metric_name, maximize, metric_summary, paper_reference, image="openproblems"
+):
     """Decorate a metric function.
 
     Parameters
@@ -105,6 +132,11 @@ def metric(metric_name, maximize, image="openproblems"):
     ----------
     metric_name : str
         Unique human readable name of the metric
+    metric_summary : str
+        Short summary of the metric
+    paper_reference : str
+        BibTex key from `main.bib` referring to the seminal paper in which the metric
+        was defined
     maximize : bool
         If True, the metric should be maximized. If False, it should be minimized.
     image : str, optional (default: "openproblems")
@@ -113,12 +145,16 @@ def metric(metric_name, maximize, image="openproblems"):
 
     def decorator(func):
         @functools.wraps(func)
-        def apply_metric(*args, **kwargs):
+        def apply_metric(adata: anndata.AnnData, *args, **kwargs):
             log.debug("Running {} metric".format(func.__name__))
-            return func(*args, **kwargs)
+            return func(adata.copy(), *args, **kwargs)
 
         apply_metric.metadata = dict(
-            metric_name=metric_name, maximize=maximize, image=image
+            metric_name=metric_name,
+            metric_summary=metric_summary,
+            paper_reference=paper_reference,
+            maximize=maximize,
+            image=image,
         )
         return apply_metric
 
@@ -141,9 +177,10 @@ def dataset(
     data_url : str
         Link to the original source of the dataset
     data_reference : str
-        Link to the paper describing how the dataset was generated
+        BibTex key from `main.bib` referring to the paper describing how the dataset was
+        generated
     dataset_summary : str
-        Short (<80 character) summary of the dataset
+        Short summary of the dataset
     image : str, optional (default: "openproblems")
         Name of the Docker image to be used for this dataset
     """
@@ -176,6 +213,7 @@ def profile(func):
     result : dict
         Contains 'result', 'runtime_s', 'memory_mb', 'memory_leaked_mb'
     """
+    import memory_profiler
 
     @functools.wraps(func)
     def decorated(*args, **kwargs):
diff --git a/openproblems/tools/normalize.py b/openproblems/tools/normalize.py
index e8fba61c23..6477c5cd79 100644
--- a/openproblems/tools/normalize.py
+++ b/openproblems/tools/normalize.py
@@ -2,7 +2,6 @@
 
 import anndata as ad
 import logging
-import scanpy as sc
 import scprep
 
 log = logging.getLogger("openproblems")
@@ -31,6 +30,8 @@
 @decorators.normalizer
 def log_scran_pooling(adata: ad.AnnData) -> ad.AnnData:
     """Normalize data with scran via rpy2."""
+    import scanpy as sc
+
     scprep.run.install_bioconductor("scran")
     adata.obs["size_factors"] = _scran(adata)
     adata.X = scprep.utils.matrix_vector_elementwise_multiply(
@@ -40,43 +41,52 @@ def log_scran_pooling(adata: ad.AnnData) -> ad.AnnData:
     return adata
 
 
-def _cpm(adata: ad.AnnData):
-    adata.layers["counts"] = adata.X.copy()
-    sc.pp.normalize_total(adata, target_sum=1e6, key_added="size_factors")
+def _cp10k(adata: ad.AnnData):
+    import scanpy as sc
+
+    adata.X = sc.pp.normalize_total(
+        adata, target_sum=1e4, key_added="size_factors", inplace=False
+    )["X"]
 
 
 @decorators.normalizer
-def cpm(adata: ad.AnnData) -> ad.AnnData:
-    """Normalize data to counts per million."""
-    _cpm(adata)
+def cp10k(adata: ad.AnnData) -> ad.AnnData:
+    """Normalize data to counts per 10,000."""
+    _cp10k(adata)
     return adata
 
 
 @decorators.normalizer
-def log_cpm(adata: ad.AnnData) -> ad.AnnData:
-    """Normalize data to log counts per million."""
-    _cpm(adata)
+def log_cp10k(adata: ad.AnnData) -> ad.AnnData:
+    """Normalize data to log counts per 10,000."""
+    import scanpy as sc
+
+    _cp10k(adata)
     sc.pp.log1p(adata)
     return adata
 
 
 @decorators.normalizer
-def sqrt_cpm(adata: ad.AnnData) -> ad.AnnData:
-    """Normalize data to sqrt counts per million."""
-    _cpm(adata)
+def sqrt_cp10k(adata: ad.AnnData) -> ad.AnnData:
+    """Normalize data to sqrt counts per 10,000."""
+    _cp10k(adata)
     adata.X = scprep.transform.sqrt(adata.X)
     return adata
 
 
 @decorators.normalizer
-def log_cpm_hvg(adata: ad.AnnData, n_genes: int = 1000) -> ad.AnnData:
-    """Normalize logCPM HVG
+def log_cp10k_hvg(adata: ad.AnnData, n_genes: int = 1000) -> ad.AnnData:
+    """Normalize logCP10k HVG
 
-    Normalize data to log counts per million and select n_genes highly
-    variable genes
+    Normalize data to log counts per 10,000 and annotate n_genes highly
+    variable genes. In order to subset the data to HVGs, use
+    ```
+    adata = adata[:, adata.var["highly_variable"]].copy()
+    ```
     """
+    import scanpy as sc
 
-    adata = log_cpm(adata)
+    adata = log_cp10k(adata)
 
     if adata.n_vars < n_genes:
         log.warning(
@@ -85,6 +95,5 @@ def log_cpm_hvg(adata: ad.AnnData, n_genes: int = 1000) -> ad.AnnData:
         n_genes = int(adata.n_vars * 0.5)
 
     sc.pp.highly_variable_genes(adata, n_top_genes=n_genes, flavor="cell_ranger")
-    adata = adata[:, adata.var["highly_variable"]].copy()
 
     return adata
diff --git a/openproblems/utils.py b/openproblems/utils.py
index 3cbc56d356..6a9e23fda4 100644
--- a/openproblems/utils.py
+++ b/openproblems/utils.py
@@ -18,8 +18,8 @@ def temporary(func, version=None, *args, **kwargs):
         raise TypeError("temporary() missing 1 required keyword argument: 'version'")
     if packaging.version.parse(__version__) >= packaging.version.parse(version):
         raise RuntimeError(
-            "Temporary function {}.{} is temporary and should not be used "
-            "after version {} (current version: {})".format(
+            "Temporary function {}.{} is temporary and should not be used after version"
+            " {} (current version: {})".format(
                 func.__module__, func.__name__, version, __version__
             )
         )
@@ -48,3 +48,8 @@ def get_members(module):
 def get_callable_members(module):
     """Get all callable public members from a module."""
     return [member for member in get_members(module) if callable(member)]
+
+
+def get_member_id(member):
+    """Get the submodule or function name for a task, dataset, method or metric"""
+    return member.__name__.split(".")[-1]
diff --git a/openproblems/version.py b/openproblems/version.py
index 906d362f7d..49e0fc1e09 100644
--- a/openproblems/version.py
+++ b/openproblems/version.py
@@ -1 +1 @@
-__version__ = "0.6.0"
+__version__ = "0.7.0"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000..e396c1f8ad
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[tool.isort]
+profile = "black"
+force_single_line = true
+force_alphabetical_sort = true
+
+[tool.black]
+experimental_string_processing = true
diff --git a/pytest.ini b/pytest.ini
index 447d92b2a1..0947bd0a12 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -10,4 +10,8 @@ filterwarnings =
     ignore:X\.dtype being converted to np\.float32 from float64:FutureWarning
     ignore:is_categorical is deprecated and will be removed in a future version:FutureWarning
     ignore:The use of (converter|py2rpy|rpy2py) in module rpy2.robjects.conversion is deprecated.:DeprecationWarning
+    ignore:`Model\.state_updates` will be removed in a future version\.:UserWarning
+    ignore:Tensorflow not installed. ParametricUMAP will be unavailable:ImportWarning
+    ignore:Deprecated call to `pkg_resources\.declare_namespace:DeprecationWarning
+    ignore:pkg_resources is deprecated as an API:DeprecationWarning
     always:Container failed with AssertionError\. Retrying [0-9]* more time:RuntimeWarning
diff --git a/scripts/generate_test_matrix.py b/scripts/generate_test_matrix.py
new file mode 100644
index 0000000000..06395af4a9
--- /dev/null
+++ b/scripts/generate_test_matrix.py
@@ -0,0 +1,22 @@
+import json
+import openproblems
+
+_CORE_TEST_SUITES = ["test_core"]
+_TASK_TEST_SUITES = ["test_task"]
+
+
+def generate_matrix():
+    suites = _CORE_TEST_SUITES.copy()
+    for task in openproblems.TASKS:
+        task_name = openproblems.utils.get_member_id(task)
+        suites.extend([f"{suite} and {task_name}" for suite in _TASK_TEST_SUITES])
+    return suites
+
+
+def main():
+    matrix = generate_matrix()
+    print(json.dumps(matrix))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/install_renv.R b/scripts/install_renv.R
index b3c6698850..f9d7f66889 100644
--- a/scripts/install_renv.R
+++ b/scripts/install_renv.R
@@ -1,3 +1,7 @@
+if (nchar(Sys.getenv("BIOCVERSION")) > 0) {
+  renv::settings$bioconductor.version(Sys.getenv("BIOCVERSION"))
+}
+
 as_integer_version <- function(v) {
   class(v) <- "list"
   v[[1]]
@@ -15,7 +19,7 @@ compare_version <- function(v1, v2) {
 }
 
 check_available <- function(remote) {
-  remote <- renv:::renv_remotes_resolve(remote)
+  remote <- with_retries(renv:::renv_remotes_resolve, spec = remote)
   tryCatch(
     {
       version <- packageVersion(remote$Package)
@@ -35,12 +39,59 @@ strip_comments <- function(remote) {
   gsub("\\s*#.*", "", remote)
 }
 
+with_retries <- function(func,
+                         attempts = 10,
+                         sleep_once = 3,
+                         sleep_multiple = 60,
+                         backoff = 2,
+                         ...) {
+  result <- NULL
+  attempt <- 1
+  sleep <- sleep_once
+  while (is.null(result) && attempt < attempts) {
+    attempt <- attempt + 1
+    try(
+      result <- func(...)
+    )
+    closeAllConnections()
+    Sys.sleep(sleep)
+    if (sleep == sleep_once) {
+      sleep <- sleep_multiple
+    } else {
+      sleep <- sleep * backoff
+    }
+  }
+  if (is.null(result)) {
+    # last attempt
+    result <- func(...)
+  }
+  result
+}
+
+patch_renv <- function() {
+  if (!requireNamespace("memoise", quietly = TRUE)) install.packages("memoise")
+  # set the new env between renv imports and base env, only if not already done
+  if (!is(renv:::renv_remotes_resolve, "memoised")) {
+    # memoize renv_remotes_resolve
+    renv_remotes_resolve_memoised <- memoise::memoise(
+      renv:::renv_remotes_resolve
+    )
+    assignInNamespace(
+      "renv_remotes_resolve",
+      renv_remotes_resolve_memoised,
+      "renv"
+    )
+  }
+}
+
 install_renv <- function(requirements_file, ...) {
+  patch_renv()
   remotes <- scan(requirements_file, what = character(), sep = "\n")
   remotes <- sapply(remotes, strip_comments)
   remotes_installed <- sapply(remotes, check_available)
   remotes_to_install <- remotes[!remotes_installed]
+  message(paste0("Installing ", length(remotes_to_install), " packages"))
   if (length(remotes_to_install) > 0) {
-    renv::install(remotes_to_install, ...)
+    with_retries(renv::install, packages = remotes_to_install, ...)
   }
 }
diff --git a/scripts/upgrade_renv.R b/scripts/upgrade_renv.R
index 6c74adb033..fccc679b7d 100644
--- a/scripts/upgrade_renv.R
+++ b/scripts/upgrade_renv.R
@@ -1,3 +1,7 @@
+if (nchar(Sys.getenv("BIOCVERSION")) > 0) {
+  renv::settings$bioconductor.version(Sys.getenv("BIOCVERSION"))
+}
+
 upgraded_remote_version <- function(remote) {
   if (remote$Source == "Repository") {
     out <- paste0(remote$Package, "@", remote$Version)
diff --git a/setup.cfg b/setup.cfg
index b0c7316e85..81cbb08f72 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,9 +2,9 @@
 ignore =
     # top-level module docstring
     D100, D104,
-    # space before : conflicts with black
+    # space before: conflicts with black
     E203,
-    # import not in alphabetical : conflicts with isort
+    # import not in alphabetical: conflicts with isort
     H306
 per-file-ignores =
     # imported but unused
@@ -19,8 +19,3 @@ exclude =
     build,
     dist,
     Snakefile
-
-[isort]
-profile = black
-force_single_line = true
-force_alphabetical_sort = true
diff --git a/setup.py b/setup.py
index 6012e3335f..6ea808be24 100644
--- a/setup.py
+++ b/setup.py
@@ -4,38 +4,43 @@
 import os
 
 install_requires = [
-    "numpy>=1.22,<1.24",
-    "scikit-learn==1.1.*",
     "anndata==0.8.*",
-    "scprep>=1.2.1",
-    "scipy>=1.8,<1.10",
-    "scanpy>=1.6",
-    "louvain==0.7.*",
-    "python-igraph<0.10",
+    "colorama==0.4.*",
     "decorator<5.0",  # pinned in #324
+    "louvain==0.8.*",
+    "matplotlib<3.7.0",
     "memory-profiler==0.60",
-    "colorama==0.4.*",
+    "numpy>=1.21,<1.24",
     "packaging==21.3",
+    "pandas==1.3.5",
+    "python-igraph==0.10.*",
+    "scanpy>=1.6",
+    "scipy>=1.7,<1.10",
+    "scikit-learn>=1.0,<1.2",
+    "scprep>=1.2.2",
     "umap-learn==0.5.*",
+    "requests==2.28.*",
 ]
 
 r_requires = [
-    "rpy2<3.5.5",
-    "anndata2ri==1.1.*",
+    "rpy2>=3.4,<3.4.3",
+    "anndata2ri==1.0.6",
 ]
 
-evaluate_requires = ["snakemake>=7.8,<7.15", "tabulate<0.9"]
+evaluate_requires = ["snakemake>=7.8,<7.17", "tabulate<0.9"]
 
-process_requires = ["numpyencoder==0.3.*"]
+process_requires = ["numpyencoder==0.3.*", "gitpython==3.1.*"]
 
 test_requires = [
     "pytest==7.1.*",
     "pytest-cov>=3.0,<4.1",
-    "black==22.8.0",
-    "coverage==6.4.*",
+    "pytest-timestamper==0.0.9",
+    "black==22.10.0",
+    "coverage>=6.4,<6.6",
     "codecov==2.1.*",
     "parameterized==0.8.*",
     "requests==2.28.*",
+    "bibtexparser==1.4.*",
 ]
 
 version_py = os.path.join(os.path.dirname(__file__), "openproblems", "version.py")
diff --git a/test/docker_run.sh b/test/docker_run.sh
index da7108c9c4..c15a73c5e6 100644
--- a/test/docker_run.sh
+++ b/test/docker_run.sh
@@ -15,6 +15,7 @@ if [ ! -f ~/.install_complete ]; then
   if echo "$FREEZE" | grep -q annoy; then
     python3 -m pip install --force "$(echo "$FREEZE" | grep annoy)"
   fi
+  python3 -m pip check
   touch ~/.install_complete
 fi
 
diff --git a/test/test_1_methods.py b/test/test_1_methods.py
deleted file mode 100644
index e765724f9d..0000000000
--- a/test/test_1_methods.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import openproblems
-import os
-import parameterized
-import pytest
-import utils.docker
-import utils.git
-import utils.name
-
-pytestmark = pytest.mark.skipif(
-    len(utils.git.list_modified_tasks()) == 0, reason="No tasks have been modified"
-)
-RETRIES = (
-    int(os.environ["PYTEST_MAX_RETRIES"]) if "PYTEST_MAX_RETRIES" in os.environ else 2
-)
-
-
-@parameterized.parameterized.expand(
-    [
-        (
-            task.__name__.split(".")[-1],
-            method.__name__,
-            method.metadata["image"],
-        )
-        for task in utils.git.list_modified_tasks()
-        for method in task.METHODS
-    ],
-    name_func=utils.name.name_test,
-    skip_on_empty=True,
-)
-@utils.docker.docker_test(timeout=600, retries=RETRIES)
-def test_method(task_name, method_name, image):
-    """Test application of a method."""
-    import anndata
-    import openproblems.utils
-
-    task = getattr(openproblems.tasks, task_name)
-    method = getattr(task.methods, method_name)
-    adata = task.api.sample_dataset()
-    openproblems.log.debug(
-        "Testing {} method from {} task".format(method.__name__, task.__name__)
-    )
-    adata = method(adata, test=True)
-    assert isinstance(adata, anndata.AnnData)
-    assert task.api.check_method(adata)
-    if "method_code_version" not in adata.uns:
-        openproblems.utils.future_warning(
-            "Setting code_version in the method decorator is deprecated. "
-            "Store code version in `adata.uns['method_code_version']` instead.",
-            error_version="1.0",
-            error_category=TypeError,
-        )
-        assert method.metadata["code_version"] is not None
-    else:
-        assert adata.uns["method_code_version"] != "ModuleNotFound"
-
-
-@parameterized.parameterized.expand(
-    [(method,) for task in openproblems.TASKS for method in task.METHODS],
-    name_func=utils.name.name_test,
-)
-def test_method_metadata(method):
-    """Test for existence of method metadata."""
-    assert hasattr(method, "metadata")
-    for attr in [
-        "method_name",
-        "paper_name",
-        "paper_url",
-        "paper_year",
-        "code_url",
-        "image",
-    ]:
-        assert attr in method.metadata
-
-    assert isinstance(method.metadata["image"], str)
-    assert method.metadata["image"].startswith("openproblems")
-    assert isinstance(method.metadata["method_name"], str)
-    assert isinstance(method.metadata["paper_name"], str)
-    assert isinstance(method.metadata["paper_year"], int)
-    assert isinstance(method.metadata["paper_url"], str)
-    assert utils.asserts.assert_url_accessible(method.metadata["paper_url"])
-    assert isinstance(method.metadata["code_url"], str)
-    assert utils.asserts.assert_url_accessible(method.metadata["code_url"])
diff --git a/test/test_1_metrics.py b/test/test_1_metrics.py
deleted file mode 100644
index 97ac695c99..0000000000
--- a/test/test_1_metrics.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import openproblems
-import parameterized
-import pytest
-import utils.git
-import utils.name
-
-pytestmark = pytest.mark.skipif(
-    len(utils.git.list_modified_tasks()) == 0, reason="No tasks have been modified"
-)
-
-
-@parameterized.parameterized.expand(
-    [(metric,) for task in openproblems.TASKS for metric in task.METRICS],
-    name_func=utils.name.name_test,
-)
-def test_metric_metadata(metric):
-    """Test for existence of metric metadata."""
-    assert hasattr(metric, "metadata")
-    for attr in ["metric_name", "maximize", "image"]:
-        assert attr in metric.metadata
-    assert isinstance(metric.metadata["maximize"], bool)
-    assert isinstance(metric.metadata["metric_name"], str)
-    assert isinstance(metric.metadata["image"], str)
-    assert metric.metadata["image"].startswith("openproblems")
-
-
-@parameterized.parameterized.expand(
-    [
-        (
-            task.__name__.split(".")[-1],
-            metric.__name__,
-            metric.metadata["image"],
-        )
-        for task in utils.git.list_modified_tasks()
-        for metric in task.METRICS
-    ],
-    name_func=utils.name.name_test,
-    skip_on_empty=True,
-)
-@utils.docker.docker_test
-def test_metric(task_name, metric_name, image):
-    """Test computation of a metric."""
-    import numbers
-
-    task = getattr(openproblems.tasks, task_name)
-    metric = getattr(task.metrics, metric_name)
-    adata = task.api.sample_dataset()
-    adata = task.api.sample_method(adata)
-    openproblems.log.debug(
-        "Testing {} metric from {} task".format(metric.__name__, task.__name__)
-    )
-    m = metric(adata)
-    assert isinstance(m, numbers.Number)
diff --git a/test/test_4_dimensionality_reduction.py b/test/test_4_dimensionality_reduction.py
deleted file mode 100644
index e713c8d6ae..0000000000
--- a/test/test_4_dimensionality_reduction.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""Specific tests for the dimensionality_reduction task"""
-import openproblems
-import pytest
-import utils.docker
-import utils.git
-
-# global skip
-TASK = openproblems.tasks.dimensionality_reduction
-pytestmark = pytest.mark.skipif(
-    TASK not in utils.git.list_modified_tasks(),
-    reason="Relevant task has not been modified",
-)
-
-
-@utils.docker.docker_test(image=TASK.metrics.trustworthiness.metadata["image"])
-def test_trustworthiness_sparse():
-    from scipy.sparse import csr_matrix
-
-    task = openproblems.tasks.dimensionality_reduction
-    metric = task.metrics.trustworthiness
-
-    adata = task.api.sample_dataset()
-    adata = task.api.sample_method(adata)
-    openproblems.log.debug(
-        "Testing {} metric from {} task".format(metric.__name__, task.__name__)
-    )
-    adata.X = csr_matrix(adata.X)
-    m = metric(adata)
-
-    assert isinstance(m, float)
-    assert 0 <= m <= 1
-
-
-@utils.docker.docker_test(image=TASK.metrics.density_preservation.metadata["image"])
-def test_density_preservation_matches_densmap():
-    from openproblems.tasks.dimensionality_reduction.metrics.density import _K
-    from openproblems.tasks.dimensionality_reduction.metrics.density import _SEED
-    from scipy.stats import pearsonr
-    from umap import UMAP
-
-    import numpy as np
-
-    task = openproblems.tasks.dimensionality_reduction
-    metric = openproblems.tasks.dimensionality_reduction.metrics.density_preservation
-
-    adata = task.api.sample_dataset()
-    adata = task.api.sample_method(adata)
-    openproblems.log.debug(
-        "Testing {} metric from {} task".format(metric.__name__, task.__name__)
-    )
-
-    (emb, ro, re) = UMAP(
-        n_neighbors=_K, random_state=_SEED, densmap=True, output_dens=True
-    ).fit_transform(adata.X)
-    expected = pearsonr(ro, re)[0]
-
-    adata.obsm["X_emb"] = emb
-    actual = metric(adata)
-
-    np.testing.assert_allclose(expected, actual)
diff --git a/test/test_5_cli.py b/test/test_core_cli.py
similarity index 80%
rename from test/test_5_cli.py
rename to test/test_core_cli.py
index 389b35d35c..71e15c726b 100644
--- a/test/test_5_cli.py
+++ b/test/test_core_cli.py
@@ -1,7 +1,9 @@
 from openproblems.api.hash import docker_labels_from_api
+from openproblems.api.hash import git_hash
 from openproblems.api.main import main
 from openproblems.api.utils import print_output
 
+import importlib
 import numpy as np
 import openproblems
 import os
@@ -24,11 +26,13 @@ def test_print(capsys):
 def test_tasks(capsys):
     """Test task listing."""
     result = np.array(main(["tasks"], do_print=False))
-    expected = np.array([task.__name__.split(".")[-1] for task in openproblems.TASKS])
+    expected = np.array(
+        [openproblems.utils.get_member_id(task) for task in openproblems.TASKS]
+    )
     assert np.all(result == expected)
     result = np.array(main(["tasks"], do_print=True))
-    expected = (
-        "\n".join([task.__name__.split(".")[-1] for task in openproblems.TASKS]) + "\n"
+    expected = "\n".join(
+        [openproblems.utils.get_member_id(task) for task in openproblems.TASKS] + [""]
     )
     captured = capsys.readouterr()
     assert captured.out == expected
@@ -42,7 +46,7 @@ def test_list(task):
     """Test function listing."""
     result = np.array(
         main(
-            ["list", "--task", task.__name__.split(".")[-1], "--datasets"],
+            ["list", "--task", openproblems.utils.get_member_id(task), "--datasets"],
             do_print=False,
         )
     )
@@ -51,7 +55,7 @@ def test_list(task):
 
     result = np.array(
         main(
-            ["list", "--task", task.__name__.split(".")[-1], "--methods"],
+            ["list", "--task", openproblems.utils.get_member_id(task), "--methods"],
             do_print=False,
         )
     )
@@ -60,7 +64,7 @@ def test_list(task):
 
     result = np.array(
         main(
-            ["list", "--task", task.__name__.split(".")[-1], "--metrics"],
+            ["list", "--task", openproblems.utils.get_member_id(task), "--metrics"],
             do_print=False,
         )
     )
@@ -73,7 +77,7 @@ def _test_image(task, function_type, function):
         [
             "image",
             "--task",
-            task.__name__.split(".")[-1],
+            openproblems.utils.get_member_id(task),
             function_type,
             function.__name__,
         ],
@@ -130,7 +134,7 @@ def test_help(capsys):
 @parameterized.parameterized.expand(
     [
         ("label_projection", "--datasets", "pancreas_batch"),
-        ("multimodal_data_integration", "--methods", "mnn_log_scran_pooling"),
+        ("matching_modalities", "--methods", "mnn_log_scran_pooling"),
     ],
     name_func=utils.name.name_test,
 )
@@ -149,13 +153,33 @@ def test_hash(task, function_type, function_name):
 
 def test_hash_docker_api():
     assert docker_labels_from_api("circleci/python", tag="3.8-bullseye") is None
+    labels = docker_labels_from_api("singlecellopenproblems/openproblems", tag="latest")
+    assert "bio.openproblems.build" in labels
+    assert "bio.openproblems.hash" in labels
+    assert isinstance(labels["bio.openproblems.build"], str)
+    assert isinstance(labels["bio.openproblems.hash"], str)
+    assert labels["bio.openproblems.build"] in ["github_actions", "local"]
+
+
+@parameterized.parameterized.expand(
+    [
+        (openproblems.tasks.label_projection.datasets.zebrafish_labs,),
+        (openproblems.tasks.label_projection.methods.knn_classifier_log_cp10k,),
+    ],
+    name_func=utils.name.name_test,
+)
+def test_git_hash(func):
+    h1 = git_hash(func)
+    module = importlib.import_module(func.__wrapped__.__module__)
+    assert git_hash(module) == h1
+    assert git_hash(module.__file__) == h1
 
 
 @parameterized.parameterized.expand(
     [
         (dataset, method, metric)
-        for dataset in ["zebrafish_labels", None]
-        for method in ["logistic_regression_log_cpm", None]
+        for dataset in ["zebrafish_labs", None]
+        for method in ["logistic_regression_log_cp10k", None]
         for metric in ["accuracy", None]
     ],
     name_func=utils.name.name_test,
@@ -198,7 +222,7 @@ def __zero_metric(*args):
             [
                 "evaluate",
                 "--task",
-                task.__name__.split(".")[-1],
+                openproblems.utils.get_member_id(task),
                 "--input",
                 dataset_file,
                 metric_name,
@@ -226,7 +250,7 @@ def test_pipeline():
                 "--test",
                 "--output",
                 dataset_file,
-                "zebrafish_labels",
+                "zebrafish_labs",
             ],
             do_print=False,
         )
@@ -242,7 +266,7 @@ def test_pipeline():
                 method_file,
                 "--version-file",
                 version_file,
-                "logistic_regression_log_cpm",
+                "logistic_regression_log_cp10k",
             ],
             do_print=False,
         )
diff --git a/test/test_core_metadata.py b/test/test_core_metadata.py
new file mode 100644
index 0000000000..6d8876c8ca
--- /dev/null
+++ b/test/test_core_metadata.py
@@ -0,0 +1,101 @@
+import openproblems
+import openproblems.utils
+import parameterized
+import utils
+import utils.asserts
+import utils.cache
+import utils.git
+import utils.name
+
+DATASET_SUMMARY_MINLEN = 40
+DATASET_SUMMARY_MAXLEN = 400
+
+METHOD_NAME_MAXLEN = 50
+METHOD_SUMMARY_MINLEN = 40
+METHOD_SUMMARY_MAXLEN = 1000
+
+METRIC_SUMMARY_MINLEN = 40
+METRIC_SUMMARY_MAXLEN = 400
+
+
+@parameterized.parameterized.expand(
+    [(dataset,) for task in openproblems.TASKS for dataset in task.DATASETS],
+    name_func=utils.name.name_test,
+)
+def test_dataset_metadata(dataset):
+    """Test for existence of dataset metadata."""
+    assert hasattr(dataset, "metadata")
+    for attr in [
+        "dataset_name",
+        "data_url",
+        "data_reference",
+        "dataset_summary",
+        "image",
+    ]:
+        assert attr in dataset.metadata
+        assert dataset.metadata[attr] is not None
+
+    assert isinstance(dataset.metadata["dataset_name"], str)
+    assert isinstance(dataset.metadata["image"], str)
+    assert dataset.metadata["image"].startswith("openproblems")
+    assert isinstance(dataset.metadata["dataset_summary"], str)
+    assert len(dataset.metadata["dataset_summary"]) > DATASET_SUMMARY_MINLEN
+    assert len(dataset.metadata["dataset_summary"]) < DATASET_SUMMARY_MAXLEN
+    assert isinstance(dataset.metadata["data_url"], str)
+    assert utils.asserts.assert_url_accessible(dataset.metadata["data_url"])
+    assert isinstance(dataset.metadata["data_reference"], str)
+    assert utils.asserts.assert_valid_reference(dataset.metadata["data_reference"])
+
+
+@parameterized.parameterized.expand(
+    [(method,) for task in openproblems.TASKS for method in task.METHODS],
+    name_func=utils.name.name_test,
+)
+def test_method_metadata(method):
+    """Test for existence of method metadata."""
+    assert hasattr(method, "metadata")
+    for attr in [
+        "method_name",
+        "paper_name",
+        "paper_reference",
+        "paper_year",
+        "code_url",
+        "image",
+        "is_baseline",
+    ]:
+        assert attr in method.metadata
+
+    assert isinstance(method.metadata["image"], str)
+    assert method.metadata["image"].startswith("openproblems")
+    assert isinstance(method.metadata["method_name"], str)
+    assert len(method.metadata["method_name"]) < METHOD_NAME_MAXLEN
+    assert isinstance(method.metadata["method_summary"], str)
+    assert len(method.metadata["method_summary"]) > METHOD_SUMMARY_MINLEN
+    assert len(method.metadata["method_summary"]) < METHOD_SUMMARY_MAXLEN
+    assert isinstance(method.metadata["paper_name"], str)
+    assert isinstance(method.metadata["paper_year"], int)
+    assert isinstance(method.metadata["paper_reference"], str)
+    assert utils.asserts.assert_valid_reference(method.metadata["paper_reference"])
+    assert isinstance(method.metadata["code_url"], str)
+    assert utils.asserts.assert_url_accessible(method.metadata["code_url"])
+    assert isinstance(method.metadata["is_baseline"], bool)
+
+
+@parameterized.parameterized.expand(
+    [(metric,) for task in openproblems.TASKS for metric in task.METRICS],
+    name_func=utils.name.name_test,
+)
+def test_metric_metadata(metric):
+    """Test for existence of metric metadata."""
+    assert hasattr(metric, "metadata")
+    for attr in ["metric_name", "maximize", "image"]:
+        assert attr in metric.metadata
+    assert isinstance(metric.metadata["maximize"], bool)
+    assert isinstance(metric.metadata["metric_name"], str)
+    assert isinstance(metric.metadata["metric_summary"], str)
+    assert len(metric.metadata["metric_summary"]) > METRIC_SUMMARY_MINLEN
+    assert len(metric.metadata["metric_summary"]) < METRIC_SUMMARY_MAXLEN
+    assert isinstance(metric.metadata["image"], str)
+    assert metric.metadata["image"].startswith("openproblems")
+    assert isinstance(metric.metadata["paper_reference"], str)
+    assert utils.asserts.assert_valid_reference(metric.metadata["paper_reference"])
diff --git a/test/test_0_tasks.py b/test/test_core_tasks.py
similarity index 96%
rename from test/test_0_tasks.py
rename to test/test_core_tasks.py
index f8851f56ec..949a371031 100644
--- a/test/test_0_tasks.py
+++ b/test/test_core_tasks.py
@@ -29,7 +29,7 @@ def test_members(self):
         assert len(self.task._task_summary) < TASK_SUMMARY_MAXLEN
         assert hasattr(self.task, "DEFAULT_LAYER")
         assert isinstance(self.task.DEFAULT_LAYER, str)
-        assert self.task.DEFAULT_LAYER in ["counts", "log_normalized"]
+        assert self.task.DEFAULT_LAYER in ["counts", "log_normalized", "log_cp10k"]
         assert hasattr(self.task, "api")
         assert isinstance(self.task.api, MODULE)
         for list_name in ["DATASETS", "METHODS", "METRICS"]:
@@ -40,7 +40,7 @@ def test_members(self):
             for method in method_list:
                 assert callable(method)
 
-    def test_task_api_members(self):
+    def test_api_members(self):
         """Test that task.api has the required members"""
         assert hasattr(self.task.api, "check_dataset")
         assert hasattr(self.task.api, "check_method")
@@ -52,7 +52,7 @@ def test_task_api_members(self):
         assert callable(self.task.api.sample_method)
         assert hasattr(self.task.api.sample_dataset, "metadata")
 
-    def test_task_api_is_consistent(self):
+    def test_api_is_consistent(self):
         """Test that a task's API is self-consistent"""
         adata = self.task.api.sample_dataset()
         assert self.task.api.check_dataset(adata)
diff --git a/test/test_5_tools.py b/test/test_core_tools.py
similarity index 83%
rename from test/test_5_tools.py
rename to test/test_core_tools.py
index d0667fd9a8..560bcd8352 100644
--- a/test/test_5_tools.py
+++ b/test/test_core_tools.py
@@ -17,11 +17,12 @@ def _dense_data(X):
 
 
 @parameterized.parameterized_class(
-    ("normalizer"),
+    ("normalizer", "sparse"),
     [
-        (staticmethod(normalizer),)
-        for normalizer in openproblems.utils.get_callable_members(
-            openproblems.tools.normalize
+        (staticmethod(normalizer), sparse)
+        for normalizer, sparse in zip(
+            openproblems.utils.get_callable_members(openproblems.tools.normalize),
+            [True, False],
         )
     ],
     class_name_func=utils.name.name_test,
@@ -32,16 +33,29 @@ class TestNormalizeX(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Generate and normalize data."""
-        cls.adata = utils.data.data()
+        cls.adata = utils.data.data(sparse=cls.sparse)
+        cls.counts = cls.adata.layers["counts"].copy()
         cls.cache_name = cls.normalizer.__name__
         assert utils.asserts.assert_finite(cls.adata.X)
         assert cls.cache_name not in cls.adata.layers
         cls.adata = cls.normalizer(cls.adata)
 
+    def test_shape(self):
+        """Test that normalized data is the same shape as the input."""
+        assert self.adata.X.shape == self.counts.shape
+
+    def test_class(self):
+        """Test that normalized data is the same class as the input."""
+        assert isinstance(self.adata.X, type(self.counts))
+
     def test_finite(self):
         """Test that normalized data is finite."""
         assert utils.asserts.assert_finite(self.adata.X)
 
+    def test_not_inplace(self):
+        """Test that normalization does not happen inplace."""
+        utils.asserts.assert_array_equal(self.adata.layers["counts"], self.counts)
+
     def test_layers(self):
         """Test that normalized data is cached in adata.layers."""
         assert self.cache_name in self.adata.layers
@@ -67,7 +81,7 @@ def test_cache(self):
 
 
 @parameterized.parameterized_class(
-    ("normalizer"),
+    "normalizer",
     [
         (staticmethod(normalizer),)
         for normalizer in openproblems.utils.get_callable_members(
diff --git a/test/test_5_utils.py b/test/test_core_utils.py
similarity index 93%
rename from test/test_5_utils.py
rename to test/test_core_utils.py
index 8ea5084879..89fb9cfb24 100644
--- a/test/test_5_utils.py
+++ b/test/test_core_utils.py
@@ -10,7 +10,7 @@ def test_temporary_version_missing():
     """Test temporary decorator behavior with missing version."""
 
     @openproblems.utils.temporary
-    def test_fn():
+    def test_fn():  # pragma: nocover
         pass
 
     np.testing.assert_raises_regex(
@@ -27,13 +27,13 @@ def test_temporary_version_future():
         temp_version = "{}.{}".format(version.major - 1, 0)
 
     @openproblems.utils.temporary(version=temp_version)
-    def test_fn():
+    def test_fn():  # pragma: nocover
         pass
 
     np.testing.assert_raises_regex(
         RuntimeError,
-        "Temporary function {}.{} is temporary and should not be used "
-        "after version {}".format(test_fn.__module__, test_fn.__name__, temp_version),
+        "Temporary function {}.{} is temporary and should not be used after version {}"
+        .format(test_fn.__module__, test_fn.__name__, temp_version),
         test_fn,
     )
 
diff --git a/test/test_2_load_data.py b/test/test_task_1_load_data.py
similarity index 80%
rename from test/test_2_load_data.py
rename to test/test_task_1_load_data.py
index bff0acee8c..9dfd3e6099 100644
--- a/test/test_2_load_data.py
+++ b/test/test_task_1_load_data.py
@@ -1,25 +1,19 @@
 import openproblems
 import parameterized
-import pytest
 import utils.docker
-import utils.git
 import utils.name
 
-pytestmark = pytest.mark.skipif(
-    len(utils.git.list_modified_tasks()) == 0, reason="No tasks have been modified"
-)
-
 
 @parameterized.parameterized.expand(
     [
         (
-            task.__name__.split(".")[-1],
+            openproblems.utils.get_member_id(task),
             dataset.__name__,
             test,
             utils.TEMPDIR.name,
             dataset.metadata["image"],
         )
-        for task in utils.git.list_modified_tasks()
+        for task in openproblems.TASKS
         for dataset in task.DATASETS
         for test in [True]
     ],
@@ -27,7 +21,7 @@
     skip_on_empty=True,
 )
 @utils.docker.docker_test(retries=2)
-def test_load_dataset(task_name, dataset_name, test, tempdir, image):
+def test_load_dataset(task_name, dataset_name, test, tempdir, image):  # pragma: nocover
     """Test loading and caching of a dataset."""
     import utils.asserts
     import utils.cache
diff --git a/test/test_3_datasets.py b/test/test_task_2_datasets.py
similarity index 70%
rename from test/test_3_datasets.py
rename to test/test_task_2_datasets.py
index cc0638baa4..6eeb87afd7 100644
--- a/test/test_3_datasets.py
+++ b/test/test_task_2_datasets.py
@@ -12,13 +12,6 @@
 import utils.git
 import utils.name
 
-DATASET_SUMMARY_MINLEN = 40
-DATASET_SUMMARY_MAXLEN = 280
-
-pytestmark = pytest.mark.skipif(
-    len(utils.git.list_modified_tasks()) == 0, reason="No tasks have been modified"
-)
-
 
 def _assert_not_bytes(X):
     if isinstance(X, pd.Series):
@@ -43,7 +36,7 @@ def _assert_not_bytes(X):
     ("dataset", "task", "test", "tempdir"),
     [
         (staticmethod(dataset), task, test, utils.TEMPDIR.name)
-        for task in utils.git.list_modified_tasks()
+        for task in openproblems.TASKS
         for dataset in task.DATASETS
         for test in [True]
     ],
@@ -63,7 +56,7 @@ def setUpClass(cls):
                 test=cls.test,
                 dependency="test_load_dataset",
             )
-        except AssertionError as e:
+        except AssertionError as e:  # pragma: nocover
             if str(e) == "Intermediate file missing. Did test_load_dataset fail?":
                 pytest.skip("Dataset not loaded successfully")
             else:
@@ -94,6 +87,7 @@ def test_adata_shape(self):
     def test_sparse(self):
         """Ensure output is sparse."""
         assert scipy.sparse.issparse(self.adata.X)
+        assert isinstance(self.adata.X, scipy.sparse.csr_matrix)
 
     def test_not_bytes(self):
         """Ensure output does not contain byte strings."""
@@ -124,32 +118,3 @@ def test_normalize(self, normalizer):
             adata = self.adata.copy()
             adata = normalizer(adata)
             utils.asserts.assert_finite(adata.X)
-
-
-@parameterized.parameterized.expand(
-    [(dataset,) for task in openproblems.TASKS for dataset in task.DATASETS],
-    name_func=utils.name.name_test,
-)
-def test_dataset_metadata(dataset):
-    """Test for existence of dataset metadata."""
-    assert hasattr(dataset, "metadata")
-    for attr in [
-        "dataset_name",
-        "data_url",
-        "data_reference",
-        "dataset_summary",
-        "image",
-    ]:
-        assert attr in dataset.metadata
-        assert dataset.metadata[attr] is not None
-
-    assert isinstance(dataset.metadata["dataset_name"], str)
-    assert isinstance(dataset.metadata["image"], str)
-    assert dataset.metadata["image"].startswith("openproblems")
-    assert isinstance(dataset.metadata["dataset_summary"], str)
-    assert len(dataset.metadata["dataset_summary"]) > DATASET_SUMMARY_MINLEN
-    assert len(dataset.metadata["dataset_summary"]) < DATASET_SUMMARY_MAXLEN
-    assert isinstance(dataset.metadata["data_url"], str)
-    assert utils.asserts.assert_url_accessible(dataset.metadata["data_url"])
-    assert isinstance(dataset.metadata["data_reference"], str)
-    assert utils.asserts.assert_url_accessible(dataset.metadata["data_reference"])
diff --git a/test/test_4_cell_cell_communication.py b/test/test_task_cell_cell_communication_source_target.py
similarity index 82%
rename from test/test_4_cell_cell_communication.py
rename to test/test_task_cell_cell_communication_source_target.py
index 192069df79..453ce9b370 100644
--- a/test/test_4_cell_cell_communication.py
+++ b/test/test_task_cell_cell_communication_source_target.py
@@ -6,21 +6,12 @@
 import openproblems.tasks._cell_cell_communication._common.utils
 import os
 import pandas as pd
-import pytest
 import tempfile
 import unittest
 import utils.docker
 import utils.git
 
-# global skip
-SUBTASKS = [
-    openproblems.tasks.cell_cell_communication_source_target,
-    openproblems.tasks.cell_cell_communication_ligand_target,
-]
-pytestmark = pytest.mark.skipif(
-    any([task not in utils.git.list_modified_tasks() for task in SUBTASKS]),
-    reason="Relevant task has not been modified",
-)
+TASK = openproblems.tasks.cell_cell_communication_source_target
 
 
 class TestApi(unittest.TestCase):
@@ -37,8 +28,8 @@ def test_assert_is_subset(self):
         )
         self.assertRaisesRegex(
             AssertionError,
-            r"test_subset is not a subset of test_superset\. "
-            "d missing from test_superset",
+            r"test_subset is not a subset of test_superset\. d missing from"
+            r" test_superset",
             common.api.assert_is_subset,
             ["a", "b", "c", "d"],
             ["a", "b", "c"],
@@ -48,8 +39,8 @@ def test_assert_is_subset(self):
         )
         self.assertRaisesRegex(
             AssertionError,
-            r"Allowed proportion \(0.24\) of missing test_subset elements exceeded "
-            r"\(0\.25\)\. d missing from test_superset",
+            r"Allowed proportion \(0.24\) of missing test_subset elements exceeded"
+            r" \(0\.25\)\. d missing from test_superset",
             common.api.assert_is_subset,
             ["a", "b", "c", "d"],
             ["a", "b", "c"],
@@ -59,7 +50,7 @@ def test_assert_is_subset(self):
         )
 
     def test_map_gene_symbols(self):
-        adata = common.api.sample_dataset(SUBTASKS[0].api.MERGE_KEYS)
+        adata = common.api.sample_dataset(TASK.api.MERGE_KEYS)
         index = adata.var.index.to_numpy()
         index[0] = "many_to_one_1"
         index[1] = "many_to_one_2"
@@ -111,19 +102,31 @@ def test_map_gene_symbols(self):
         self.assertNotIn("one_from_none", adata_mapped.var.index)
 
 
-@utils.docker.docker_test(image=SUBTASKS[0].metrics.odds_ratio.metadata["image"])
-def test_odds_ratio_no_match():
+@utils.docker.docker_test(image=TASK.metrics.odds_ratio.metadata["image"])
+def test_odds_ratio_no_match():  # pragma: nocover
     import numpy as np
 
     task = openproblems.tasks.cell_cell_communication_source_target
     metric = task.metrics.odds_ratio
 
-    adata = task.api.sample_dataset()
-    adata = task.api.sample_method(adata)
     openproblems.log.debug(
         "Testing {} metric from {} task".format(metric.__name__, task.__name__)
     )
-    adata.uns["ccc_target"]["response"] = np.nan
-    m = metric(adata)
 
+    adata = task.api.sample_dataset()
+
+    # check expected output
+    adata = task.api.sample_method(adata)
+    m = metric(adata, top_prop=0.4)
+    assert np.issubdtype("float64", m)
+    assert m == 0.7
+
+    # force perfect score
+    adata = task.methods.true_events(adata)
+    m = metric(adata, top_prop=0.4)
     assert m == 1
+
+    # force exception
+    adata.uns["ccc_target"]["response"] = 0
+    m = metric(adata, top_prop=0.4)
+    assert m is np.nan
diff --git a/test/test_task_dimensionality_reduction.py b/test/test_task_dimensionality_reduction.py
new file mode 100644
index 0000000000..b96b4234c2
--- /dev/null
+++ b/test/test_task_dimensionality_reduction.py
@@ -0,0 +1,108 @@
+"""Specific tests for the dimensionality_reduction task"""
+import openproblems
+import parameterized
+import utils.docker
+import utils.git
+
+# global skip
+TASK = openproblems.tasks.dimensionality_reduction
+
+
+@utils.docker.docker_test(image=TASK.metrics.trustworthiness.metadata["image"])
+def test_trustworthiness_sparse():  # pragma: nocover
+    from scipy.sparse import csr_matrix
+
+    task = openproblems.tasks.dimensionality_reduction
+    metric = task.metrics.trustworthiness
+
+    adata = task.api.sample_dataset()
+    adata = task.api.sample_method(adata)
+    openproblems.log.debug(
+        "Testing {} metric from {} task".format(metric.__name__, task.__name__)
+    )
+    adata.X = csr_matrix(adata.X)
+    m = metric(adata)
+
+    assert isinstance(m, float)
+    assert 0 <= m <= 1
+
+
+def test_density_preservation_matches_densmap():
+    from openproblems.tasks.dimensionality_reduction.metrics.density import _K
+    from openproblems.tasks.dimensionality_reduction.metrics.density import _SEED
+    from scipy.stats import pearsonr
+    from umap import UMAP
+
+    import numpy as np
+
+    task = openproblems.tasks.dimensionality_reduction
+    metric = openproblems.tasks.dimensionality_reduction.metrics.density_preservation
+
+    adata = task.api.sample_dataset()
+    adata = task.api.sample_method(adata)
+    openproblems.log.debug(
+        "Testing {} metric from {} task".format(metric.__name__, task.__name__)
+    )
+
+    (emb, ro, re) = UMAP(
+        n_neighbors=_K, random_state=_SEED, densmap=True, output_dens=True
+    ).fit_transform(adata.X)
+    expected = pearsonr(ro, re)[0]
+
+    adata.obsm["X_emb"] = emb
+    actual = metric(adata)
+
+    np.testing.assert_allclose(expected, actual, rtol=1e-3)
+
+
+@parameterized.parameterized.expand(
+    [(200,), (1000,)],
+    name_func=utils.name.name_test,
+)
+def test_distance_correlation_with_svd(n_svd):
+    import numpy as np
+
+    task = openproblems.tasks.dimensionality_reduction
+    metric = openproblems.tasks.dimensionality_reduction.metrics.distance_correlation
+
+    adata = task.api.sample_dataset()
+    adata = task.api.sample_method(adata)
+    adata.obsm["X_emb"] = adata.X.toarray()
+
+    expected = 1
+    actual = metric(adata, n_svd=n_svd)
+
+    np.testing.assert_allclose(expected, actual, rtol=1e-3)
+
+
+def test_density_preservation_perfect():
+    import numpy as np
+
+    task = openproblems.tasks.dimensionality_reduction
+    metric = openproblems.tasks.dimensionality_reduction.metrics.density_preservation
+
+    adata = task.api.sample_dataset()
+    adata = task.api.sample_method(adata)
+
+    adata.obsm["X_emb"] = adata.X.toarray()
+    actual = metric(adata)
+
+    np.testing.assert_allclose(1, actual)
+
+
+def test_diffusion_map_no_convergence():
+    import numpy as np
+    import scipy.sparse.linalg
+
+    adata = (
+        openproblems.tasks.dimensionality_reduction.datasets.olsson_2016_mouse_blood()
+    )
+    # no exception with retries
+    adata = openproblems.tasks.dimensionality_reduction.methods.diffusion_map(adata)
+    # exception with no retries
+    np.testing.assert_raises(
+        scipy.sparse.linalg.ArpackNoConvergence,
+        openproblems.tasks.dimensionality_reduction.methods.diffusion_map,
+        adata,
+        n_retries=0,
+    )
diff --git a/test/test_task_methods.py b/test/test_task_methods.py
new file mode 100644
index 0000000000..bdd18283e2
--- /dev/null
+++ b/test/test_task_methods.py
@@ -0,0 +1,51 @@
+import openproblems
+import os
+import parameterized
+import utils.asserts
+import utils.docker
+import utils.git
+import utils.name
+
+RETRIES = (
+    int(os.environ["PYTEST_MAX_RETRIES"]) if "PYTEST_MAX_RETRIES" in os.environ else 2
+)
+
+
+@parameterized.parameterized.expand(
+    [
+        (
+            openproblems.utils.get_member_id(task),
+            method.__name__,
+            method.metadata["image"],
+        )
+        for task in openproblems.TASKS
+        for method in task.METHODS
+    ],
+    name_func=utils.name.name_test,
+    skip_on_empty=True,
+)
+@utils.docker.docker_test(timeout=600, retries=RETRIES)
+def test_method(task_name, method_name, image):  # pragma: nocover
+    """Test application of a method."""
+    import anndata
+    import openproblems.utils
+
+    task = getattr(openproblems.tasks, task_name)
+    method = getattr(task.methods, method_name)
+    adata = task.api.sample_dataset()
+    openproblems.log.debug(
+        "Testing {} method from {} task".format(method.__name__, task.__name__)
+    )
+    adata = method(adata, test=True)
+    assert isinstance(adata, anndata.AnnData)
+    assert task.api.check_method(adata, is_baseline=method.metadata["is_baseline"])
+    if "method_code_version" not in adata.uns:
+        openproblems.utils.future_warning(
+            "Setting code_version in the method decorator is deprecated. Store code"
+            " version in `adata.uns['method_code_version']` instead.",
+            error_version="1.0",
+            error_category=TypeError,
+        )
+        assert method.metadata["code_version"] is not None
+    else:
+        assert adata.uns["method_code_version"] != "ModuleNotFound"
diff --git a/test/test_task_metrics.py b/test/test_task_metrics.py
new file mode 100644
index 0000000000..d149ed7dfe
--- /dev/null
+++ b/test/test_task_metrics.py
@@ -0,0 +1,34 @@
+import openproblems
+import parameterized
+import utils.asserts
+import utils.docker
+import utils.name
+
+
+@parameterized.parameterized.expand(
+    [
+        (
+            openproblems.utils.get_member_id(task),
+            metric.__name__,
+            metric.metadata["image"],
+        )
+        for task in openproblems.TASKS
+        for metric in task.METRICS
+    ],
+    name_func=utils.name.name_test,
+    skip_on_empty=True,
+)
+@utils.docker.docker_test
+def test_metric(task_name, metric_name, image):  # pragma: nocover
+    """Test computation of a metric."""
+    import numbers
+
+    task = getattr(openproblems.tasks, task_name)
+    metric = getattr(task.metrics, metric_name)
+    adata = task.api.sample_dataset()
+    adata = task.api.sample_method(adata)
+    openproblems.log.debug(
+        "Testing {} metric from {} task".format(metric.__name__, task.__name__)
+    )
+    m = metric(adata)
+    assert isinstance(m, numbers.Number)
diff --git a/test/utils/asserts.py b/test/utils/asserts.py
index d5663eed29..bc4c11408b 100644
--- a/test/utils/asserts.py
+++ b/test/utils/asserts.py
@@ -2,12 +2,17 @@
 
 import functools
 import numpy as np
+import pathlib
 import scipy.sparse
 
 _REQUEST_HEADERS = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) "
-    "Gecko/20100101 Firefox/71.0"
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0"
+    )
 }
+FILEPATH = pathlib.Path(__file__)
+
+_MISSING_DOIS = ["vandermaaten2008visualizing", "hosmer2013applied"]
 
 
 def assert_array_equal(X, Y):
@@ -43,3 +48,22 @@ def assert_url_accessible(url):
     with requests.head(url, headers=_REQUEST_HEADERS) as response:
         assert _response_ok(response), (url, response.status_code)
     return True
+
+
+@functools.lru_cache(None)
+def _load_bibliography():
+    import bibtexparser
+
+    bib_path = FILEPATH.parents[2].joinpath("main.bib")
+    with open(bib_path, "r") as handle:
+        return bibtexparser.load(handle)
+
+
+def assert_valid_reference(ref):
+    bib = _load_bibliography()
+    assert ref in bib.entries_dict
+    bibentry = bib.entries_dict[ref]
+    if not (bibentry["ENTRYTYPE"] == "misc" or ref in _MISSING_DOIS):
+        assert "doi" in bibentry
+        assert assert_url_accessible(f"https://doi.org/{bibentry['doi']}")
+    return True
diff --git a/test/utils/cache.py b/test/utils/cache.py
index 93b31098a6..fcdfaa15d3 100644
--- a/test/utils/cache.py
+++ b/test/utils/cache.py
@@ -1,10 +1,11 @@
 import anndata
+import openproblems
 import os
 
 
 def _cache_name(tempdir, task, dataset, test=None, method=None):
     if not isinstance(task, str):
-        task = task.__name__.split(".")[-1]
+        task = openproblems.utils.get_member_id(task)
     if not isinstance(dataset, str):
         dataset = dataset.__name__
     if method is not None:
diff --git a/test/utils/data.py b/test/utils/data.py
index 2e1d8921b9..8b71216fc3 100644
--- a/test/utils/data.py
+++ b/test/utils/data.py
@@ -1,10 +1,14 @@
 import anndata
 import numpy as np
+import scipy.sparse
 
 
-def data(obsm=None):
+def data(sparse=False, obsm=None):
     """Create fake data."""
-    adata = anndata.AnnData(np.random.poisson(2, (100, 30)).astype(np.float32))
+    data = np.random.poisson(2, (100, 30)).astype(np.float32)
+    if sparse:
+        data = scipy.sparse.csr_matrix(data)
+    adata = anndata.AnnData(data, layers={"counts": data})
     if obsm is not None:
         adata.obsm[obsm] = adata.X * 2 + 1
         adata.uns["{}_obs".format(obsm)] = np.arange(adata.shape[0]) + 5
diff --git a/test/utils/docker.py b/test/utils/docker.py
index 1c6476191f..974409923e 100644
--- a/test/utils/docker.py
+++ b/test/utils/docker.py
@@ -105,8 +105,8 @@ def image_requires_docker(image):
     else:
         if not docker_available():
             raise RuntimeError(
-                "The Dockerfile for image {} is newer than the "
-                "latest push, but Docker is not available.".format(image)
+                "The Dockerfile for image {} is newer than the latest push, but Docker"
+                " is not available.".format(image)
             )
         if docker_image_age(image) < git_file_age:
             import sys
@@ -225,8 +225,8 @@ def run_image(image, script, *args, timeout=None, retries=0):
             if retries > 0 and not isinstance(e, exceptions.TimeoutError):
                 time = "time" if retries == 1 else "times"
                 warnings.warn(
-                    f"Container failed with {type(e).__name__}. "
-                    f"Retrying {retries} more {time}",
+                    f"Container failed with {type(e).__name__}. Retrying {retries} more"
+                    f" {time}",
                     RuntimeWarning,
                 )
                 retries -= 1
diff --git a/test/utils/git.py b/test/utils/git.py
index 3c20a6b575..261e83e0e2 100644
--- a/test/utils/git.py
+++ b/test/utils/git.py
@@ -1,7 +1,6 @@
 from . import run
 
 import functools
-import openproblems
 import os
 
 TESTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -59,22 +58,6 @@ def task_dir(task):
     return os.path.relpath(os.path.dirname(task.__file__), BASEDIR)
 
 
-def task_modified(task):
-    """Check if the task has changed relative to base/main."""
-    return git_has_diff(task_dir(task))
-
-
-def core_modified():
-    """Check if the core repo has changed relative to base/main.
-
-    We exclude all task directories as well as any md files and the website.
-    """
-    task_exclusions = [f":^{task_dir(task)}" for task in openproblems.TASKS]
-    diff_target = ["./openproblems", "./docker", "./test", ":^*.md", ":^website"]
-    diff_target += task_exclusions
-    return git_has_diff(diff_target)
-
-
 def git_rev_parse(branch):
     """Get the current commit of a branch"""
     return run.run(
@@ -94,18 +77,3 @@ def is_pull_request():
     if "GITHUB_EVENT_NAME" in os.environ:
         return os.environ["GITHUB_EVENT_NAME"] == "pull_request"
     return False
-
-
-@functools.lru_cache(None)
-def list_modified_tasks():
-    """List tasks for which testing must be run.
-
-    Return all tasks if the core repo has changed,
-    otherwise just those that have changed relative to base/main.
-
-    If we are currently in a pull request or at the HEAD of base/main, test all tasks.
-    """
-    if is_pull_request() or core_modified() or is_main_head():
-        return openproblems.TASKS
-
-    return [task for task in openproblems.TASKS if task_modified(task)]
diff --git a/test/utils/name.py b/test/utils/name.py
index 446bcb9d58..1ea94355c6 100644
--- a/test/utils/name.py
+++ b/test/utils/name.py
@@ -17,9 +17,10 @@ def name_test(testcase_func, param_num, param):
     """Get a human readable name for a parameterized test."""
     args = param.values() if isinstance(param, dict) else param.args
 
-    return "%s_%s" % (
+    name_params = [
         testcase_func.__name__,
         parameterized.parameterized.to_safe_name(
             "_".join(object_name(x) for x in args if x != TEMPDIR.name)
         ),
-    )
+    ]
+    return "_".join(name_params)
diff --git a/workflow/Snakefile b/workflow/Snakefile
index c8378a3cea..d67769e8e9 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -27,6 +27,7 @@ rule docker_refresh:
     input: tools.refresh_images
 
 rule refresh_docker_image:
+    threads: 0
     priority: 50
     input:
         dockerfile = "{}/{{image}}/refresh.Dockerfile".format(tools.IMAGES_DIR),
@@ -39,7 +40,7 @@ rule refresh_docker_image:
         label = tools.build_type,
         hash = tools.build_hash,
     shell:
-        "docker build --label bio.openproblems.build={params.label} --label bio.openproblems.hash={params.hash} -f {input.dockerfile} -t {params.user}/{wildcards.image} .."
+        "docker build --progress=plain --label bio.openproblems.build={params.label} --label bio.openproblems.hash={params.hash} -f {input.dockerfile} -t {params.user}/{wildcards.image} .."
 
 rule refresh_dockerfile:
     priority: 50
@@ -78,6 +79,7 @@ rule update_docker_image:
         "touch {output}"
 
 rule build_docker_image:
+    threads: 0.5
     input:
         dockerfile = "{}/{{image}}/Dockerfile".format(tools.IMAGES_DIR),
         requirements = tools.docker_build_requirements,
@@ -90,7 +92,7 @@ rule build_docker_image:
         label = tools.build_type,
         hash = tools.build_hash,
     shell:
-        "docker build --label bio.openproblems.build={params.label} --label bio.openproblems.hash={params.hash} -f {input.dockerfile} -t {params.user}/{wildcards.image} .."
+        "docker build --progress=plain --label bio.openproblems.build={params.label} --label bio.openproblems.hash={params.hash} -f {input.dockerfile} -t {params.user}/{wildcards.image} .."
 
 rule password_docker:
     output:
@@ -108,6 +110,7 @@ rule login_docker:
         "docker login --username=singlecellopenproblems --password=$(cat {input})"
 
 rule push_docker_image:
+    threads: 0
     input:
         build = "{}/{{image}}/.docker_update".format(tools.IMAGES_DIR),
         login = ".docker_login",
@@ -117,6 +120,7 @@ rule push_docker_image:
         "docker push --quiet singlecellopenproblems/{wildcards.image}"
 
 rule pull_docker_image:
+    threads: 0
     output:
         temp(touch("{}/{{image}}/.docker_pull".format(tools.IMAGES_DIR)))
     shell:
diff --git a/workflow/generate_website_markdown.py b/workflow/generate_website_markdown.py
deleted file mode 100644
index 9bb3bbf643..0000000000
--- a/workflow/generate_website_markdown.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import openproblems
-import os
-import pathlib
-import re
-import sys
-import workflow_utils
-
-INDEX_TOML_TEMPLATE = """+++
-title = "{task_name}"
-summary = "{task_summary}"
-headless = false
-theme = "op"
-+++
-"""
-
-DATASET_TOML_TEMPLATE = """+++
-title = "{dataset_name}"
-summary = "{dataset_summary}"
-+++
-"""
-
-API_PATTERN = re.compile(r"^#.*API$")
-HEADING_PATTERN = re.compile(r"^# ")
-
-
-def write_index_md(task, outdir):
-    output_md = INDEX_TOML_TEMPLATE.format(
-        task_name=task._task_name, task_summary=task._task_summary
-    )
-    readme_file = task.__file__.replace("__init__.py", "README.md")
-    with open(readme_file, "r") as readme_handle:
-        for line in readme_handle:
-            if HEADING_PATTERN.match(line):
-                # exclude top-level headings
-                continue
-            if API_PATTERN.match(line):
-                # exclude everything after ## API
-                break
-            output_md += line
-
-    output_file = os.path.join(outdir, "_index.md")
-    with open(output_file, "w") as output_handle:
-        output_handle.write(output_md)
-
-
-def write_dataset_md(dataset, outdir):
-    output_md = DATASET_TOML_TEMPLATE.format(
-        dataset_name=dataset.metadata["dataset_name"],
-        dataset_summary=dataset.metadata["dataset_summary"],
-    )
-
-    dataset_name = dataset.__name__.split(".")[-1]
-    output_file = os.path.join(outdir, f"{dataset_name}.md")
-    with open(output_file, "w") as output_handle:
-        output_handle.write(output_md)
-
-
-def main(outdir):
-    for task in openproblems.TASKS:
-        if workflow_utils.task_is_incomplete(task):
-            # don't write md for incomplete tasks
-            continue
-        task_outdir = os.path.join(outdir, task.__name__.split(".")[-1])
-        if not os.path.isdir(task_outdir):
-            pathlib.Path(task_outdir).mkdir(parents=True, exist_ok=True)
-        write_index_md(task, task_outdir)
-        for dataset in task.DATASETS:
-            write_dataset_md(dataset, task_outdir)
-
-
-if __name__ == "__main__":
-    main(sys.argv[1])
diff --git a/workflow/parse_metadata.py b/workflow/parse_metadata.py
new file mode 100644
index 0000000000..ebfda7a60f
--- /dev/null
+++ b/workflow/parse_metadata.py
@@ -0,0 +1,87 @@
+import json
+import openproblems
+import openproblems.api.hash
+import pathlib
+import re
+import sys
+import workflow_utils
+
+API_PATTERN = re.compile(r"^#.*API$")
+HEADING_PATTERN = re.compile(r"^# ")
+
+
+def get_task_description(task):
+    description = ""
+    readme_file = task.__file__.replace("__init__.py", "README.md")
+    with open(readme_file, "r") as readme_handle:
+        for line in readme_handle:
+            if HEADING_PATTERN.match(line):
+                # exclude top-level headings
+                continue
+            if API_PATTERN.match(line):
+                # exclude everything after ## API
+                break
+            description += line
+    return description
+
+
+def write_task_json(task, outdir: pathlib.Path):
+    data = {
+        "task_id": openproblems.utils.get_member_id(task),
+        "commit_sha": openproblems.api.hash.git_hash(task),
+        "task_name": task._task_name,
+        "task_summary": task._task_summary,
+        "task_description": get_task_description(task),
+        "repo": "openproblems-bio/openproblems",
+    }
+    with open(outdir.joinpath("task_info.json"), "w") as handle:
+        json.dump(data, handle, indent=4)
+
+
+def _write_function_json(task, outdir: pathlib.Path, functions, function_type: str):
+    data = []
+    for function in functions:
+        function.metadata.update(
+            {
+                "task_id": openproblems.utils.get_member_id(task),
+                "commit_sha": openproblems.api.hash.git_hash(function),
+                f"{function_type}_id": openproblems.utils.get_member_id(function),
+                "implementation_url": (
+                    "https://github.com/openproblems-bio/openproblems/"
+                    f"blob/main/{function.__module__.replace('.', '/')}.py"
+                ),
+            }
+        )
+        data.append(function.metadata)
+
+    with open(outdir.joinpath(f"{function_type}_info.json"), "w") as handle:
+        json.dump(data, handle, indent=4)
+
+
+def write_dataset_json(task, outdir: pathlib.Path):
+    _write_function_json(task, outdir, task.DATASETS, "dataset")
+
+
+def write_method_json(task, outdir: pathlib.Path):
+    _write_function_json(task, outdir, task.METHODS, "method")
+
+
+def write_metric_json(task, outdir: pathlib.Path):
+    _write_function_json(task, outdir, task.METRICS, "metric")
+
+
+def main(outdir: pathlib.Path):
+    for task in openproblems.TASKS:
+        if workflow_utils.task_is_incomplete(task):
+            # don't write json for incomplete tasks
+            continue
+        task_outdir = outdir.joinpath(openproblems.utils.get_member_id(task), "data")
+        task_outdir.mkdir(parents=True, exist_ok=True)
+        write_task_json(task, task_outdir)
+        write_dataset_json(task, task_outdir)
+        write_method_json(task, task_outdir)
+        write_metric_json(task, task_outdir)
+
+
+if __name__ == "__main__":
+    main(pathlib.Path(sys.argv[1]))
diff --git a/workflow/parse_nextflow.py b/workflow/parse_nextflow.py
index bc9ef3a21a..9b9cb31ec6 100644
--- a/workflow/parse_nextflow.py
+++ b/workflow/parse_nextflow.py
@@ -1,10 +1,44 @@
+"""
+Schema:
+
+# content/benchmarks/{task.__name__}/data/results.json
+[
+    {
+        "task_id": task.__name__,
+        "commit_sha": "abc123",
+        "method_id": method.__name__,
+        "dataset_id": dataset.__name__,
+        "submission_time": "1970-01-01 00:00:00.000",
+        "code_version": openproblems.__version__,
+        "resources": {
+            "duration_sec": 100.0,
+            "cpu_pct": 100.0,
+            "peak_memory_mb": 1000.0,
+            "disk_read_mb": 1000.0,
+            "disk_write_mb": 1000.0,
+        }
+        "metric_values": {
+            metric.__name__: 1.0,
+            ...
+        }
+        "scaled_scores": {
+            metric.__name__: 1.0,
+            ...
+        },
+        "mean_score": 1.0
+    },
+    ...
+]
+"""
 import collections
+import copy
 import json
 import numpy as np
 import numpyencoder
 import openproblems.api.utils
 import os
 import pandas as pd
+import pathlib
 import sys
 import warnings
 import workflow_utils
@@ -26,36 +60,36 @@ def dump_json(obj, fp):
 size_units = {"B": 1, "KB": 10**3, "MB": 10**6, "GB": 10**9, "TB": 10**12}
 
 
-def parse_size_to_gb(size):
-    """Convert a file size to an integer in GB.
+def parse_size_to_mb(size):
+    """Convert a file size to an integer in MB.
 
     Example
     -------
-    >>> parse_size_to_gb("1000 MB")
-    1
+    >>> parse_size_to_gb("1 GB")
+    1000
     """
     number, unit = [string.strip() for string in size.split()]
-    return int(float(number) * size_units[unit]) / size_units["GB"]
+    return int(float(number) * size_units[unit]) / size_units["MB"]
 
 
 time_units = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24}
 
 
-def parse_time_to_min(time):
-    """Convert a duration to an integer in minutes.
+def parse_time_to_sec(time):
+    """Convert a duration to an integer in seconds.
 
     Example
     -------
     >>> parse_time_to_min("2m 30s")
-    2.5
+    150
     """
     if " " in time:
-        return sum([parse_time_to_min(t) for t in time.split(" ")])
+        return sum([parse_time_to_sec(t) for t in time.split(" ")])
     time = time.strip()
     for unit, value in time_units.items():
         if time.endswith(unit):
             number = float(time.replace(unit, ""))
-            return number * value / time_units["m"]
+            return number * value / time_units["s"]
 
 
 def read_trace(filename):
@@ -83,11 +117,14 @@ def read_trace(filename):
 
 def parse_trace_to_dict(df):
     """Parse the trace dataframe and convert to dict."""
+    print(f"Parsing {df.shape[0]} trace records")
     results = collections.defaultdict(lambda: collections.defaultdict(dict))
     for task_name in df["task"].unique():
         df_task = df.loc[df["task"] == task_name]
+        print(f"{task_name}: {df_task.shape[0]} records")
         for dataset_name in df_task["dataset"].unique():
             df_dataset = df_task.loc[df_task["dataset"] == dataset_name]
+            print(f"{task_name}.{dataset_name}: {df_task.shape[0]} records")
             for _, row in df_dataset.iterrows():
                 method_name = row["method"]
                 results[task_name][dataset_name][method_name] = row.to_dict()
@@ -97,13 +134,13 @@ def parse_trace_to_dict(df):
     return results
 
 
-def parse_metric_results(results_path, results):
+def parse_metric_results(results_path: pathlib.Path, results):
     """Add metric results to the trace output."""
     missing_traces = []
-    for filename in os.listdir(os.path.join(results_path, "results/metrics")):
-        with open(
-            os.path.join(results_path, "results/metrics", filename), "r"
-        ) as handle:
+    metric_filenames = os.listdir(results_path.joinpath("results", "metrics"))
+    print(f"Loading {len(metric_filenames)} metric results")
+    for filename in sorted(metric_filenames):
+        with open(results_path.joinpath("results", "metrics", filename), "r") as handle:
             result = float(handle.read().strip())
         task_name, dataset_name, method_name, metric_name = filename.replace(
             ".metric.txt", ""
@@ -124,12 +161,12 @@ def parse_metric_results(results_path, results):
     return results
 
 
-def parse_method_versions(results_path, results):
+def parse_method_versions(results_path: pathlib.Path, results):
     """Add method versions to the trace output."""
     missing_traces = []
-    for filename in os.listdir(os.path.join(results_path, "results/method_versions")):
+    for filename in os.listdir(results_path.joinpath("results", "method_versions")):
         with open(
-            os.path.join(results_path, "results/method_versions", filename), "r"
+            results_path.joinpath("results", "method_versions", filename), "r"
         ) as handle:
             code_version = handle.read().strip()
         task_name, dataset_name, method_name = filename.replace(
@@ -150,116 +187,128 @@ def parse_method_versions(results_path, results):
     return results
 
 
-def compute_ranking(task_name, dataset_results):
-    """Rank all methods on a specific dataset."""
-    rankings = np.zeros(len(dataset_results))
-    metric_names = list(dataset_results.values())[0]["metrics"].keys()
+def normalize_scores(task_name, dataset_results):
+    """Normalize method scores to [0, 1] based on baseline method scores."""
+    for method_name in dataset_results:
+        # store original unnormalized results
+        dataset_results[method_name]["metrics_raw"] = copy.copy(
+            dataset_results[method_name]["metrics"]
+        )
+    metric_names = list(list(dataset_results.values())[0]["metrics"].keys())
+
     for metric_name in metric_names:
-        metric = openproblems.api.utils.get_function(task_name, "metrics", metric_name)
-        sorted_order = np.argsort(
+        try:
+            metric = openproblems.api.utils.get_function(
+                task_name, "metrics", metric_name
+            )
+        except openproblems.api.utils.NoSuchFunctionError as e:
+            print(f"[WARN] {e}")
+            del dataset_results[method_name]["metrics"][metric_name]
+            continue
+        metric_scores = np.array(
             [
                 dataset_results[method_name]["metrics"][metric_name]
                 for method_name in dataset_results
             ]
         )
-        if metric.metadata["maximize"]:
-            sorted_order = sorted_order[::-1]
-        rankings += np.argsort(sorted_order)
-
-    method_names = list(dataset_results.keys())
-    final_ranking = {
-        method_names[method_idx]: rank + 1
-        for method_idx, rank in zip(
-            np.argsort(rankings), np.arange(len(dataset_results))
+        baseline_methods = []
+        for method_name in list(dataset_results.keys()):
+            try:
+                method = openproblems.api.utils.get_function(
+                    task_name,
+                    "methods",
+                    method_name,
+                )
+            except openproblems.api.utils.NoSuchFunctionError as e:
+                print(f"[WARN] {e}")
+                del dataset_results[method_name]
+            if method.metadata["is_baseline"]:
+                baseline_methods.append(method_name)
+        if len(baseline_methods) < 2:
+            # just use all methods as a fallback
+            baseline_methods = dataset_results.keys()
+        baseline_scores = np.array(
+            [
+                dataset_results[method_name]["metrics"][metric_name]
+                for method_name in baseline_methods
+            ]
         )
-    }
-    return final_ranking
+        baseline_min = np.nanmin(baseline_scores)
+        baseline_range = np.nanmax(baseline_scores) - baseline_min
+        metric_scores -= baseline_min
+        metric_scores /= np.where(baseline_range != 0, baseline_range, 1)
+        if not metric.metadata["maximize"]:
+            metric_scores = 1 - metric_scores
+        for method_name, score in zip(dataset_results, metric_scores):
+            dataset_results[method_name]["metrics"][metric_name] = score
+    return dataset_results
+
+
+def fix_values(metric_result):
+    if np.isnan(metric_result):
+        return "NaN"
+    if np.isneginf(metric_result):
+        return "-Inf"
+    if np.isinf(metric_result):
+        return "Inf"
+    return metric_result
+
+
+def fix_values_scaled(metric_result):
+    if np.isnan(metric_result) or np.isinf(metric_result):
+        return 0
+    return metric_result
 
 
 def dataset_results_to_json(task_name, dataset_name, dataset_results):
-    """Convert the raw dataset results to pretty JSON for web."""
-    dataset = openproblems.api.utils.get_function(task_name, "datasets", dataset_name)
-    output = dict(
-        name=dataset.metadata["dataset_name"],
-        data_url=dataset.metadata["data_url"],
-        data_reference=dataset.metadata["data_reference"],
-        headers=dict(names=["Rank"], fixed=["Name", "Paper", "Website", "Code"]),
-        results=list(),
-    )
-    ranking = compute_ranking(task_name, dataset_results)
-    metric_names = set()
-    for method_name, rank in ranking.items():
-        method_results = dataset_results[method_name]
-        method = openproblems.api.utils.get_function(task_name, "methods", method_name)
+    dataset_results = normalize_scores(task_name, dataset_results)
+    out = []
+    for method_name, method_results in dataset_results.items():
+        raw = {k: fix_values(v) for k, v in method_results["metrics_raw"].items()}
+        scaled = {k: fix_values_scaled(v) for k, v in method_results["metrics"].items()}
+        resources = {
+            "duration_sec": parse_time_to_sec(method_results["duration"]),
+            "cpu_pct": float(method_results["%cpu"].replace("%", "")),
+            "peak_memory_mb": parse_size_to_mb(method_results["peak_rss"]),
+            "disk_read_mb": parse_size_to_mb(method_results["rchar"]),
+            "disk_write_mb": parse_size_to_mb(method_results["wchar"]),
+        }
         result = {
-            "Name": method.metadata["method_name"],
-            "Paper": method.metadata["paper_name"],
-            "Paper URL": method.metadata["paper_url"],
-            "Year": method.metadata["paper_year"],
-            "Library": method.metadata["code_url"],
-            "Implementation": "https://github.com/openproblems-bio/openproblems/"
-            f"blob/main/{method.__module__.replace('.', '/')}",
-            "Version": method_results["code_version"],
-            "Runtime (min)": parse_time_to_min(method_results["realtime"]),
-            "CPU (%)": float(method_results["%cpu"].replace("%", "")),
-            "Memory (GB)": parse_size_to_gb(method_results["peak_rss"]),
-            "Rank": rank,
+            "task_id": task_name,
+            "commit_sha": workflow_utils.get_sha(),
+            "method_id": method_name,
+            "dataset_id": dataset_name,
+            "submission_time": method_results["submit"],
+            "code_version": method_results["code_version"],
+            "resources": resources,
+            "metric_values": raw,
+            "scaled_scores": scaled,
+            "mean_score": np.array(list(scaled.values())).mean(),
         }
-        for metric_name, metric_result in method_results["metrics"].items():
-            metric = openproblems.api.utils.get_function(
-                task_name, "metrics", metric_name
-            )
-            result[metric.metadata["metric_name"]] = metric_result
-            metric_names.add(metric.metadata["metric_name"])
-        output["results"].append(result)
-    output["headers"]["names"].extend(list(metric_names))
-    output["headers"]["names"].extend(
-        [
-            "Memory (GB)",
-            "Runtime (min)",
-            "CPU (%)",
-            "Name",
-            "Paper",
-            "Code",
-            "Year",
-        ]
-    )
-    return output
+        out.append(result)
+    return out
 
 
-def results_to_json(results, outdir):
+def results_to_json(results, outdir: pathlib.Path):
     """Convert the full results to pretty JSON for web."""
-    if not os.path.isdir(outdir):
-        os.mkdir(outdir)
     for task_name, task_results in results.items():
-        if workflow_utils.task_is_incomplete(
-            openproblems.api.utils.str_to_task(task_name)
-        ):
-            # don't write results for incomplete tasks
-            continue
+        task_results_out = []
+        task_dir = outdir.joinpath(task_name, "data")
+        task_dir.mkdir(parents=True, exist_ok=True)
         for dataset_name, dataset_results in task_results.items():
             results_dir = os.path.join(outdir, task_name)
             if not os.path.isdir(results_dir):
                 os.mkdir(results_dir)
-            filename = os.path.join(results_dir, "{}.json".format(dataset_name))
-            try:
-                dataset_results_json = dataset_results_to_json(
-                    task_name, dataset_name, dataset_results
-                )
-            except openproblems.api.utils.NoSuchFunctionError:
-                continue
-            with open(filename, "w") as handle:
-                dump_json(
-                    dataset_results_json,
-                    handle,
-                )
+            task_results_out.extend(
+                dataset_results_to_json(task_name, dataset_name, dataset_results)
+            )
+        with open(task_dir.joinpath("results.json"), "w") as handle:
+            dump_json(task_results_out, handle)
 
 
-def main(results_path, outdir):
+def main(results_path: pathlib.Path, outdir: pathlib.Path):
     """Parse the nextflow output."""
-    df = read_trace(
-        os.path.join(results_path, "results/pipeline_info/execution_trace.txt")
-    )
+    df = read_trace(results_path.joinpath("results/pipeline_info/execution_trace.txt"))
     results = parse_trace_to_dict(df)
     results = parse_metric_results(results_path, results)
     results = parse_method_versions(results_path, results)
@@ -268,4 +317,4 @@ def main(results_path, outdir):
 
 
 if __name__ == "__main__":
-    main(sys.argv[1], sys.argv[2])
+    main(pathlib.Path(sys.argv[1]), pathlib.Path(sys.argv[2]))
diff --git a/workflow/snakemake_tools.py b/workflow/snakemake_tools.py
index 8c9b92e42b..81b5036ee6 100644
--- a/workflow/snakemake_tools.py
+++ b/workflow/snakemake_tools.py
@@ -173,9 +173,9 @@ def docker_image_age(image, pull_on_error=True):
             return docker_image_age(image, pull_on_error=False)
         elif date_string == "":
             warnings.warn(
-                "Docker image singlecellopenproblems/{} not found; "
-                "assuming needs rebuild. If you think this message is in error, "
-                "you can fix this by running `snakemake -j 1 docker_pull`".format(image)
+                "Docker image singlecellopenproblems/{} not found; assuming needs"
+                " rebuild. If you think this message is in error, you can fix this by"
+                " running `snakemake -j 1 docker_pull`".format(image)
             )
             return -1
         else:
@@ -264,6 +264,7 @@ def docker_image_label(image, label):
     return output
 
 
+@functools.lru_cache(None)
 def docker_imagespec_changed(image, dockerfile):
     """Check if the Dockerfile has changed
 
@@ -273,6 +274,17 @@ def docker_imagespec_changed(image, dockerfile):
     If working with a github actions-built image, check if there is any diff
     between the Dockerfile and base/main
     """
+    base_image = _docker_base(image)
+    if base_image is not None:
+        base_docker_path = os.path.join(IMAGES_DIR, base_image)
+        base_dockerfile = os.path.join(base_docker_path, "Dockerfile")
+        if docker_imagespec_changed(base_image, base_dockerfile):
+            print(
+                "{}: base image spec changed".format(image),
+                file=sys.stderr,
+            )
+            return True
+
     if not docker_image_exists(image):
         # will be downloaded from dockerhub
         build_type = "github_actions"
diff --git a/workflow/workflow_utils.py b/workflow/workflow_utils.py
index b0e7d0cb47..dbaca0e332 100644
--- a/workflow/workflow_utils.py
+++ b/workflow/workflow_utils.py
@@ -1,12 +1,27 @@
+import functools
+import git
+import openproblems
+import pathlib
+
 TASK_MIN_DATASETS = 1
 TASK_MIN_METHODS = 3
 TASK_MIN_METRICS = 1
 
 
+@functools.lru_cache()
+def get_sha():
+    repo = git.Repo(pathlib.Path(openproblems.__path__[0]).parent)
+    assert not repo.bare
+    return repo.head.commit.hexsha
+
+
 def task_is_incomplete(task):
     if len(task.DATASETS) < TASK_MIN_DATASETS:
         return True
-    if len(task.METHODS) < TASK_MIN_METHODS:
+    non_baseline_methods = [
+        method for method in task.METHODS if not method.metadata["is_baseline"]
+    ]
+    if len(non_baseline_methods) < TASK_MIN_METHODS:
         return True
     if len(task.METRICS) < TASK_MIN_METRICS:
         return True