diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh index dbbe4ae7..f755d39c 100755 --- a/scripts/run_benchmark/run_full_local.sh +++ b/scripts/run_benchmark/run_full_local.sh @@ -59,6 +59,7 @@ celltype_annotation_methods: # - moscot # - mapmycells # - tangram + # - singler expression_correction_methods: - no_correction # - gene_efficiency_correction diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh index 1686cfa8..b7fa621a 100755 --- a/scripts/run_benchmark/run_full_seqeracloud.sh +++ b/scripts/run_benchmark/run_full_seqeracloud.sh @@ -51,6 +51,7 @@ celltype_annotation_methods: - moscot - mapmycells - tangram + - singler expression_correction_methods: - no_correction - gene_efficiency_correction diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh index 3ae9e6f7..965110b2 100755 --- a/scripts/run_benchmark/run_test_local.sh +++ b/scripts/run_benchmark/run_test_local.sh @@ -54,6 +54,7 @@ celltype_annotation_methods: # - moscot # - mapmycells # - tangram + # - singler expression_correction_methods: - no_correction # - gene_efficiency_correction diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh index eba4fc9c..51ef729b 100755 --- a/scripts/run_benchmark/run_test_seqeracloud.sh +++ b/scripts/run_benchmark/run_test_seqeracloud.sh @@ -50,6 +50,7 @@ celltype_annotation_methods: - moscot - mapmycells - tangram + - singler expression_correction_methods: - no_correction - gene_efficiency_correction diff --git a/src/methods_cell_type_annotation/singler/config.vsh.yaml b/src/methods_cell_type_annotation/singler/config.vsh.yaml new file mode 100644 index 00000000..bb685efc --- /dev/null +++ b/src/methods_cell_type_annotation/singler/config.vsh.yaml @@ -0,0 +1,38 @@ +__merge__: /src/api/comp_method_cell_type_annotation.yaml + +name: singler +label: "singler" +summary: "Cell type annotations using single-cell reference with SingleR" +description: "Cell type annotations using single-cell reference with SingleR" + +links: + documentation: "https://github.com/SingleR-inc/singler-py" + repository: "https://github.com/SingleR-inc/singler-py" +references: + doi: "10.1038/s41590-018-0276-y" + +arguments: + - name: --labels_key + type: string + description: The key of the cell labels in the input data. + default: cell_labels + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + image: openproblems/base_python:1 + setup: + - type: python + pypi: [singler] + __merge__: + - /src/base/setup_spatialdata_partial.yaml + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ midtime, midcpu, midmem ] diff --git a/src/methods_cell_type_annotation/singler/script.py b/src/methods_cell_type_annotation/singler/script.py new file mode 100644 index 00000000..c6d53928 --- /dev/null +++ b/src/methods_cell_type_annotation/singler/script.py @@ -0,0 +1,53 @@ +import anndata as ad +import os +import shutil + +import singlecellexperiment as sce +import singler + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_spatial_normalized_counts': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad', + 'input_transcript_assignments': r'resources_test/task_ist_preprocessing/mouse_brain_combined/transcript_assignments.zarr', + 'input_scrnaseq_reference': r'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad', + 'celltype_key': r'cell_type', + 'output': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_with_cell_types.h5ad', + 'labels_key': r'cell_labels' +} +meta = { + 'name': r'singleR', + 'functionality_name': r'singleR' +} +dep = { + +} + +## VIASH END +sce_h5ad = sce.read_h5ad(par['input_spatial_normalized_counts']) +adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts']) + +sce_ref = sce.read_h5ad(par['input_scrnaseq_reference']) + +features = [str(x) for x in sce_h5ad.row_data.row_names] + +mat = sce_h5ad.assay("counts") ##example has raw, not sure +mat = mat.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR + +mat_ref = sce_ref.assay("normalized") +mat_ref = mat_ref.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR + +## create the reference from our sc data +built = singler.train_single(ref_data = mat_ref, + ref_labels = sce_ref.get_column_data().column("cell_type"), + ref_features = sce_ref.get_row_names(), + test_features = features,) + +## annotate the dataset +output = singler.classify_single(mat, ref_prebuilt=built) + +adata_sp.obs["cell_type"] = output['best'] + +# Write output +print('Writing output', flush=True) +adata_sp.write(par['output']) \ No newline at end of file diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 1809744b..8edc98de 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -98,7 +98,7 @@ argument_groups: A list of cell type annotation methods to run. type: string multiple: true - default: "ssam:tacco:moscot:mapmycells:tangram" + default: "ssam:tacco:moscot:mapmycells:tangram:singler" - name: "--expression_correction_methods" description: | A list of expression correction methods to run. @@ -170,6 +170,7 @@ dependencies: - name: methods_cell_type_annotation/moscot - name: methods_cell_type_annotation/mapmycells - name: methods_cell_type_annotation/tangram + - name: methods_cell_type_annotation/singler - name: methods_expression_correction/no_correction - name: methods_expression_correction/gene_efficiency_correction - name: methods_expression_correction/resolvi_correction diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index d1d78102..b3f15a9c 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -376,7 +376,8 @@ workflow run_wf { tacco, moscot, mapmycells, - tangram + tangram, + singler ] cta_ch = normalization_ch