From 0ed1b06988fce28ab9bf59ee12df4f941f8d1e03 Mon Sep 17 00:00:00 2001
From: "Daniel E. Schaffer" <dschaffe@mit.edu>
Date: Sun, 19 Oct 2025 19:16:05 -0400
Subject: [PATCH 1/3] Split Scanorama

---
 .../config.vsh.yaml                           |  6 +-
 .../script.py                                 |  8 +--
 .../scanorama_integrate/config.vsh.yaml       | 42 +++++++++++++
 src/methods/scanorama_integrate/script.py     | 59 +++++++++++++++++++
 src/workflows/run_benchmark/config.vsh.yaml   |  3 +-
 src/workflows/run_benchmark/main.nf           |  3 +-
 6 files changed, 110 insertions(+), 11 deletions(-)
 rename src/methods/{scanorama => scanorama_correct}/config.vsh.yaml (94%)
 rename src/methods/{scanorama => scanorama_correct}/script.py (92%)
 create mode 100644 src/methods/scanorama_integrate/config.vsh.yaml
 create mode 100644 src/methods/scanorama_integrate/script.py

diff --git a/src/methods/scanorama/config.vsh.yaml b/src/methods/scanorama_correct/config.vsh.yaml
similarity index 94%
rename from src/methods/scanorama/config.vsh.yaml
rename to src/methods/scanorama_correct/config.vsh.yaml
index cb7c2f44..c346d96a 100644
--- a/src/methods/scanorama/config.vsh.yaml
+++ b/src/methods/scanorama_correct/config.vsh.yaml
@@ -1,6 +1,6 @@
 __merge__: /src/api/comp_method.yaml
-name: scanorama
-label: Scanorama
+name: scanorama_correct
+label: Scanorama-Corrrect
 summary: Efficient integration of heterogeneous single-cell transcriptomes using Scanorama
 description: |
   Scanorama enables batch-correction and integration of heterogeneous scRNA-seq datasets.
@@ -17,7 +17,7 @@ links:
   repository: https://github.com/brianhie/scanorama
   documentation: https://github.com/brianhie/scanorama#readme
 info:
-  method_types: [feature, embedding]
+  method_types: [feature]
   preferred_normalization: log_cp10k
 resources:
   - type: python_script
diff --git a/src/methods/scanorama/script.py b/src/methods/scanorama_correct/script.py
similarity index 92%
rename from src/methods/scanorama/script.py
rename to src/methods/scanorama_correct/script.py
index 2ddb91df..e0831869 100644
--- a/src/methods/scanorama/script.py
+++ b/src/methods/scanorama_correct/script.py
@@ -8,8 +8,7 @@
     'output': 'output.h5ad',
 }
 meta = {
-    'name': 'foo',
-    'config': 'bar'
+    'name': 'scanorama-correct',
 }
 ## VIASH END
 
@@ -57,7 +56,7 @@ def merge_adata(*adata_list, **kwargs):
 batch_categories = adata.obs['batch'].cat.categories
 for i in batch_categories:
     split.append(adata[adata.obs['batch'] == i].copy())
-corrected = scanorama.correct_scanpy(split, return_dimred=True)
+corrected = scanorama.correct_scanpy(split, return_dimred=False)
 corrected = merge_adata(*corrected, batch_key='batch', batch_categories=batch_categories, index_unique=None)
 
 print("Store output", flush=True)
@@ -71,9 +70,6 @@ def merge_adata(*adata_list, **kwargs):
     },
     layers={
         'corrected_counts': corrected.X,
-    },
-    obsm={
-        'X_emb': corrected.obsm["X_scanorama"],
     }
 )
 
diff --git a/src/methods/scanorama_integrate/config.vsh.yaml b/src/methods/scanorama_integrate/config.vsh.yaml
new file mode 100644
index 00000000..a53ab6c5
--- /dev/null
+++ b/src/methods/scanorama_integrate/config.vsh.yaml
@@ -0,0 +1,42 @@
+__merge__: /src/api/comp_method.yaml
+name: scanorama_integrate
+label: Scanorama-Integrate
+summary: Efficient integration of heterogeneous single-cell transcriptomes using Scanorama
+description: |
+  Scanorama enables batch-correction and integration of heterogeneous scRNA-seq datasets.
+  It is designed to be used in scRNA-seq pipelines downstream of noise-reduction methods,
+  including those for imputation and highly-variable gene filtering. The results from
+  Scanorama integration and batch correction can then be used as input to other tools
+  for scRNA-seq clustering, visualization, and analysis.
+references:
+  # Hie, B., Bryson, B. & Berger, B. Efficient integration of heterogeneous single-cell
+  # transcriptomes using Scanorama. Nat Biotechnol 37, 685–691 (2019).
+  # https://doi.org/10.1038/s41587-019-0113-3
+  doi: 10.1038/s41587-019-0113-3
+links:
+  repository: https://github.com/brianhie/scanorama
+  documentation: https://github.com/brianhie/scanorama#readme
+info:
+  method_types: [embedding]
+  preferred_normalization: log_cp10k
+arguments:
+  - name: --dimred
+    type: integer
+    default: 100
+    description: Embedding dimension
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/read_anndata_partial.py
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+      - type: python
+        pypi:
+          - scanorama
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [hightime, highmem, lowcpu]
diff --git a/src/methods/scanorama_integrate/script.py b/src/methods/scanorama_integrate/script.py
new file mode 100644
index 00000000..bbe6ca86
--- /dev/null
+++ b/src/methods/scanorama_integrate/script.py
@@ -0,0 +1,59 @@
+import sys
+import anndata as ad
+import scanorama
+import numpy as np
+
+## VIASH START
+par = {
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
+    'output': 'output.h5ad',
+    'dimred': 100
+}
+meta = {
+    'name': 'scanorama-integrate',
+}
+## VIASH END
+
+sys.path.append(meta["resources_dir"])
+from read_anndata_partial import read_anndata
+
+
+print('Read input', flush=True)
+adata = read_anndata(
+    par['input'],
+    X='layers/normalized',
+    obs='obs',
+    var='var',
+    uns='uns'
+)
+
+print('Run scanorama', flush=True)
+split = []
+batch_categories = adata.obs['batch'].cat.categories
+for b in batch_categories:
+    split.append(adata[adata.obs['batch'] == b].copy())
+scanorama.integrate_scanpy(split, dimred=par["dimred"])
+
+#From https://colab.research.google.com/drive/1CebA3Ow4jXITK0dW5el320KVTX_szhxG
+result = np.zeros((adata.shape[0], split[0].obsm["X_scanorama"].shape[1]))
+for i, b in enumerate(batch_categories):
+    result[adata.obs['batch'] == b] = split[i].obsm["X_scanorama"]
+
+
+print("Store output", flush=True)
+output = ad.AnnData(
+    obs=adata.obs[[]],
+    var=adata.var[[]],
+    uns={
+        'dataset_id': adata.uns['dataset_id'],
+        'normalization_id': adata.uns['normalization_id'],
+        'method_id': meta['name'],
+    },
+    obsm={
+        'X_emb': result
+    },
+    shape=adata.shape,
+)
+
+print("Write output to file", flush=True)
+output.write(par['output'], compression='gzip')
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 09905ad0..ba479227 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -99,7 +99,8 @@ dependencies:
   - name: methods/mnnpy
   - name: methods/pyliger
   - name: methods/scalex
-  - name: methods/scanorama
+  - name: methods/scanorama_correct
+  - name: methods/scanorama_integrate
   - name: methods/scanvi
   - name: methods/scgpt_finetuned
   - name: methods/scgpt_zeroshot
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index 6196f749..541e95b7 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -27,7 +27,8 @@ methods = [
   mnnpy,
   pyliger,
   scalex,
-  scanorama,
+  scanorama_correct,
+  scanorama_integrate,
   scanvi,
   scgpt_finetuned.run(
     args: [model: file("s3://openproblems-work/cache/scGPT_human.zip")]

From c768daf5892b6c724166a78111e0dfa303b3eb6f Mon Sep 17 00:00:00 2001
From: "Daniel E. Schaffer" <dschaffe@mit.edu>
Date: Sun, 19 Oct 2025 19:26:39 -0400
Subject: [PATCH 2/3] Specify change

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 890c4eb7..4e4e55c4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,12 @@
 * Update scPRINT to use latest stable version (PR #70)
 * Fix kbet dependencies to numpy<2 and scipy<=1.13 (PR #78).
 
+* Split Scanorama into two methods/scores
+    - Split scanorama into embedding (integrate) and count-correction (correct) modes, instead of running both together. 
+        This makes clear what the reported score(s) are describing, and also corrects the misleadingly low score that 
+        the combined method receives. The scores for each componenet are in line with their scores from v1, where the modes 
+        were seperated.  
+
 # task_batch_integration 2.0.0
 
 A major update to the OpenProblems framework, switching from a Python-based framework to a Viash + Nextflow-based framework. This update features the same concepts as the previous version, but with a new implementation that is more flexible, scalable, and maintainable.

From db5f0669e6be3feaa5c293551498d05978c84f73 Mon Sep 17 00:00:00 2001
From: "Daniel E. Schaffer" <dschaffe@mit.edu>
Date: Sun, 19 Oct 2025 19:28:41 -0400
Subject: [PATCH 3/3] typos

---
 CHANGELOG.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4e4e55c4..80aa7b38 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,10 +25,10 @@
 * Fix kbet dependencies to numpy<2 and scipy<=1.13 (PR #78).
 
 * Split Scanorama into two methods/scores
-    - Split scanorama into embedding (integrate) and count-correction (correct) modes, instead of running both together. 
+    - Split Scanorama into embedding (integrate) and count-correction (correct) modes, instead of running both together. 
         This makes clear what the reported score(s) are describing, and also corrects the misleadingly low score that 
-        the combined method receives. The scores for each componenet are in line with their scores from v1, where the modes 
-        were seperated.  
+        the combined method receives. The scores for each component  are in line with their scores from v1, where the modes 
+        were separated.  
 
 # task_batch_integration 2.0.0