From 273a4be2669e135db8d31a9dcb8f0fc88a02be95 Mon Sep 17 00:00:00 2001
From: Habib Rehman <harehman@iu.edu>
Date: Sat, 13 Dec 2025 03:18:31 -0500
Subject: [PATCH 1/3] Added RCTD

---
 common                                        |  2 +-
 .../RCTD/config.vsh.yaml                      | 45 +++++++++++++
 .../RCTD/script.R                             | 67 +++++++++++++++++++
 3 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 src/methods_cell_type_annotation/RCTD/config.vsh.yaml
 create mode 100644 src/methods_cell_type_annotation/RCTD/script.R

diff --git a/common b/common
index 79b884b4..65e05af6 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit 79b884b4c7fed300972d83a6ca025abb6116cbdc
+Subproject commit 65e05af68a11ee87853fcf7a3c6b579001f21abe
diff --git a/src/methods_cell_type_annotation/RCTD/config.vsh.yaml b/src/methods_cell_type_annotation/RCTD/config.vsh.yaml
new file mode 100644
index 00000000..b3b8d2d7
--- /dev/null
+++ b/src/methods_cell_type_annotation/RCTD/config.vsh.yaml
@@ -0,0 +1,45 @@
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+
+name: rctd
+label: "RCTD"
+summary: "Annotate cells using the RCTD method"
+description: "Robust Cell Type Decomposition (RCTD) inputs a spatial transcriptomics dataset, which consists of a set of pixels, which are spatial locations that measure RNA counts across many genes. RCTD additionally uses a single cell RNA-seq (scRNA-seq) dataset, which is labeled for cell types. RCTD learns cell type profiles from the scRNA-seq dataset, and uses these to label the spatial transcriptomics pixels as cell types."
+links:
+  documentation: "https://github.com/dmcable/spacexr"
+  repository: "https://github.com/dmcable/spacexr"
+references:
+  doi: "10.1038/s41587-021-00830-w"
+
+resources:
+  - type: r_script
+    path: script.R
+
+engines:
+  - type: docker
+    image: openproblems/base_r:1
+    setup:
+      #- type: docker
+      #  run: |
+      #    apt-get update && apt-get install -y wget
+      - type: r
+        bioc: [anndataR, rhdf5, devtools]
+      #- type: r
+      #  bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
+      #  bioc_force_install: true
+      - type: docker
+        run: |
+          Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('dmcable/spacexr', build_vignettes = FALSE)"
+
+      # This can probably be left out again in the future. It currently fixes a bug described in these issues:
+      # https://github.com/drighelli/SpatialExperiment/issues/171
+      # https://github.com/satijalab/seurat/issues/9889
+      # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.
+
+      # spacexr -> is there a better way to install an r package from github?
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ hightime, midcpu, highmem ]
\ No newline at end of file
diff --git a/src/methods_cell_type_annotation/RCTD/script.R b/src/methods_cell_type_annotation/RCTD/script.R
new file mode 100644
index 00000000..01ca6013
--- /dev/null
+++ b/src/methods_cell_type_annotation/RCTD/script.R
@@ -0,0 +1,67 @@
+library(spacexr)
+library(Matrix)
+library(SingleCellExperiment)
+library(anndataR)
+
+## VIASH START
+par <- list(
+  "input_spatial_normalized_counts" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad",
+  "input_scrnaseq_reference"= "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad",
+  "output" = "task_ist_preprocessing/tmp/spatial_types.h5ad"
+)
+
+meta <- list(
+  'cpus': 4,
+)
+
+## VIASH END
+
+# Read the input h5ad file and convert to SingleCellExperiment
+sce <- read_h5ad(par$input_spatial_normalized_counts, as = "SingleCellExperiment")
+
+# Extract spatial coordinates and counts matrix
+centroid_x <- colData(sce)$centroid_x
+centroid_y <- colData(sce)$centroid_y
+coords <- data.frame(centroid_x, centroid_y)
+counts <- assay(sce,"counts")
+rownames(coords) <- colData(sce)$cell_id
+puck <- SpatialRNA(coords, counts)
+
+# Read reference scrnaseq
+ref <- read_h5ad(par$input_scrnaseq_reference, as = "SingleCellExperiment")
+
+#filter reference cell types to those with >25 cells
+valid_celltypes <- names(table(colData(ref)$cell_type))[table(colData(ref)$cell_type) >= 25] 
+filtered_ref <- ref[,colData(ref)$cell_type %in% valid_celltypes]
+
+ref_counts <- assay(filtered_ref, "counts")
+# factor to drop filtered cell types
+colData(filtered_ref)$cell_type <- factor(colData(filtered_ref)$cell_type) 
+cell_types <- colData(filtered_ref)$cell_type 
+names(cell_types) <- colnames(ref_counts)
+reference <- Reference(ref_counts, cell_types, min_UMI = 0)
+
+# check cores
+cores <- 1
+if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
+cat(sprintf("Number of cores: %s\n", cores))
+
+# Run the algorithm
+myRCTD <- create.RCTD(puck, reference, max_cores = cores)
+myRCTD <- run.RCTD(myRCTD, doublet_mode = "doublet")
+
+# Extract results
+results <- myRCTD@results
+spatial_cell_types <- results$results_df$first_type 
+# Include None Spatial cell type for the "reject" cells
+levels(spatial_cell_types) <- c(levels(spatial_cell_types), "None_sp")
+spatial_cell_types[results$results_df$spot_class == "reject"] <- "None_sp"
+names(spatial_cell_types) <- rownames(results$results_df)
+
+#
+colData(sce)$cell_type <- "None_sp"
+colData(sce)[names(spatial_cell_types),"cell_type"] <- as.character(spatial_cell_types)
+
+# Write the final object to h5ad format
+dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
+write_h5ad(sce, par$output)

From ed321e962f69fa8ae0d06b57cf616b86c24110ad Mon Sep 17 00:00:00 2001
From: Habib Rehman <harehman@iu.edu>
Date: Sat, 13 Dec 2025 03:21:56 -0500
Subject: [PATCH 2/3] allowed for overwriting h5ad

---
 src/methods_cell_type_annotation/RCTD/script.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/methods_cell_type_annotation/RCTD/script.R b/src/methods_cell_type_annotation/RCTD/script.R
index 01ca6013..d07bb334 100644
--- a/src/methods_cell_type_annotation/RCTD/script.R
+++ b/src/methods_cell_type_annotation/RCTD/script.R
@@ -63,5 +63,6 @@ colData(sce)$cell_type <- "None_sp"
 colData(sce)[names(spatial_cell_types),"cell_type"] <- as.character(spatial_cell_types)
 
 # Write the final object to h5ad format
+# set to 'w', is this ok?
 dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
-write_h5ad(sce, par$output)
+write_h5ad(sce, par$output, mode = "w")

From 6dc3aa8c0d659dcc536a48d92d94e8477eafd621 Mon Sep 17 00:00:00 2001
From: LouisK92 <louiskuemmerle@googlemail.com>
Date: Sat, 13 Dec 2025 13:14:09 +0100
Subject: [PATCH 3/3] Rename rctd dir and add method to workflow

---
 scripts/run_benchmark/run_full_local.sh                        | 1 +
 scripts/run_benchmark/run_full_seqeracloud.sh                  | 1 +
 scripts/run_benchmark/run_test_local.sh                        | 1 +
 scripts/run_benchmark/run_test_seqeracloud.sh                  | 1 +
 .../{RCTD => rctd}/config.vsh.yaml                             | 0
 src/methods_cell_type_annotation/{RCTD => rctd}/script.R       | 0
 src/workflows/run_benchmark/config.vsh.yaml                    | 3 ++-
 src/workflows/run_benchmark/main.nf                            | 3 ++-
 8 files changed, 8 insertions(+), 2 deletions(-)
 rename src/methods_cell_type_annotation/{RCTD => rctd}/config.vsh.yaml (100%)
 rename src/methods_cell_type_annotation/{RCTD => rctd}/script.R (100%)

diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh
index f755d39c..1464a6df 100755
--- a/scripts/run_benchmark/run_full_local.sh
+++ b/scripts/run_benchmark/run_full_local.sh
@@ -60,6 +60,7 @@ celltype_annotation_methods:
   # - mapmycells
   # - tangram
   # - singler
+  # - rctd
 expression_correction_methods:
   - no_correction
   # - gene_efficiency_correction
diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh
index b7fa621a..d006f7e0 100755
--- a/scripts/run_benchmark/run_full_seqeracloud.sh
+++ b/scripts/run_benchmark/run_full_seqeracloud.sh
@@ -52,6 +52,7 @@ celltype_annotation_methods:
   - mapmycells
   - tangram
   - singler
+  - rctd
 expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
index 965110b2..686326b9 100755
--- a/scripts/run_benchmark/run_test_local.sh
+++ b/scripts/run_benchmark/run_test_local.sh
@@ -55,6 +55,7 @@ celltype_annotation_methods:
   # - mapmycells
   # - tangram
   # - singler
+  # - rctd
 expression_correction_methods:
   - no_correction
   # - gene_efficiency_correction
diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh
index 51ef729b..39ab8c5e 100755
--- a/scripts/run_benchmark/run_test_seqeracloud.sh
+++ b/scripts/run_benchmark/run_test_seqeracloud.sh
@@ -51,6 +51,7 @@ celltype_annotation_methods:
   - mapmycells
   - tangram
   - singler
+  - rctd
 expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
diff --git a/src/methods_cell_type_annotation/RCTD/config.vsh.yaml b/src/methods_cell_type_annotation/rctd/config.vsh.yaml
similarity index 100%
rename from src/methods_cell_type_annotation/RCTD/config.vsh.yaml
rename to src/methods_cell_type_annotation/rctd/config.vsh.yaml
diff --git a/src/methods_cell_type_annotation/RCTD/script.R b/src/methods_cell_type_annotation/rctd/script.R
similarity index 100%
rename from src/methods_cell_type_annotation/RCTD/script.R
rename to src/methods_cell_type_annotation/rctd/script.R
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 8edc98de..c25e39ce 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -98,7 +98,7 @@ argument_groups:
           A list of cell type annotation methods to run.
         type: string
         multiple: true
-        default: "ssam:tacco:moscot:mapmycells:tangram:singler"
+        default: "ssam:tacco:moscot:mapmycells:tangram:singler:rctd"
       - name: "--expression_correction_methods"
         description: |
           A list of expression correction methods to run.
@@ -171,6 +171,7 @@ dependencies:
   - name: methods_cell_type_annotation/mapmycells
   - name: methods_cell_type_annotation/tangram
   - name: methods_cell_type_annotation/singler
+  - name: methods_cell_type_annotation/rctd
   - name: methods_expression_correction/no_correction
   - name: methods_expression_correction/gene_efficiency_correction
   - name: methods_expression_correction/resolvi_correction
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index b3f15a9c..6783873d 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -377,7 +377,8 @@ workflow run_wf {
     moscot,
     mapmycells,
     tangram,
-    singler
+    singler,
+    rctd
   ]
   
   cta_ch = normalization_ch