diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh index 8a3c440e..dbbe4ae7 100755 --- a/scripts/run_benchmark/run_full_local.sh +++ b/scripts/run_benchmark/run_full_local.sh @@ -57,6 +57,8 @@ celltype_annotation_methods: - ssam # - tacco # - moscot + # - mapmycells + # - tangram expression_correction_methods: - no_correction # - gene_efficiency_correction diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh index 60217433..1686cfa8 100755 --- a/scripts/run_benchmark/run_full_seqeracloud.sh +++ b/scripts/run_benchmark/run_full_seqeracloud.sh @@ -50,6 +50,7 @@ celltype_annotation_methods: - tacco - moscot - mapmycells + - tangram expression_correction_methods: - no_correction - gene_efficiency_correction diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh index bec76841..3ae9e6f7 100755 --- a/scripts/run_benchmark/run_test_local.sh +++ b/scripts/run_benchmark/run_test_local.sh @@ -53,6 +53,7 @@ celltype_annotation_methods: # - tacco # - moscot # - mapmycells + # - tangram expression_correction_methods: - no_correction # - gene_efficiency_correction diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh index 460fe2e8..eba4fc9c 100755 --- a/scripts/run_benchmark/run_test_seqeracloud.sh +++ b/scripts/run_benchmark/run_test_seqeracloud.sh @@ -48,6 +48,8 @@ celltype_annotation_methods: - ssam - tacco - moscot + - mapmycells + - tangram expression_correction_methods: - no_correction - gene_efficiency_correction diff --git a/src/methods_cell_type_annotation/tangram/config.vsh.yaml b/src/methods_cell_type_annotation/tangram/config.vsh.yaml new file mode 100644 index 00000000..447a3617 --- /dev/null +++ b/src/methods_cell_type_annotation/tangram/config.vsh.yaml @@ -0,0 +1,43 @@ +__merge__: /src/api/comp_method_cell_type_annotation.yaml + +name: tangram +label: "Tangram" +summary: "Annotate cell types using Tangram" +description: "Annotate cell types using Tangram" +links: + documentation: "https://tangram-sc.readthedocs.io" + repository: "https://github.com/broadinstitute/Tangram" +references: + doi: "10.1038/s41592-021-01264-7" + +arguments: + - name: --mode + required: false + direction: input + type: string + default: "cells" + - name: --num_epochs + required: false + direction: input + type: integer + default: 1000 + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + #image: openproblems/base_pytorch_nvidia:1 #NOTE: leads to dependency issues. + # TODO: could try some other base image with pytorch and cuda installed. + image: openproblems/base_python:1 + setup: + - type: python + pypi: [tangram-sc] + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ midtime, midcpu, midmem, gpu ] diff --git a/src/methods_cell_type_annotation/tangram/script.py b/src/methods_cell_type_annotation/tangram/script.py new file mode 100644 index 00000000..38f883c5 --- /dev/null +++ b/src/methods_cell_type_annotation/tangram/script.py @@ -0,0 +1,82 @@ +import anndata as ad +import tangram as tg +import torch + +## VIASH START +par = { + 'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad', + 'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad', + 'output': 'spatial_with_celltypes.h5ad', + 'celltype_key': 'cell_type', + 'mode': 'cells', + 'num_epochs': 1000, +} +meta = { + 'name': 'tangram', +} +## VIASH END + +# GPU check +if torch.cuda.is_available(): + device = "cuda:0" +else: + device = "cpu" + +# Optional parameter check: For this specific annotation method the par['input_spatial_normalized_counts'] and par['input_scrnaseq_reference'] are required +assert par['input_spatial_normalized_counts'] is not None, 'Spatial input is required for this annotation method.' +assert par['input_scrnaseq_reference'] is not None, 'Single cell input is required for this annotation method.' + +# Read input +adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts']) +adata_sc = ad.read_h5ad(par['input_scrnaseq_reference']) + +# use log1p noramlized values +adata_sc.X = adata_sc.layers['normalized'] +adata_sp.X = adata_sp.layers['normalized'] + +adata_sp_orig = adata_sp.copy() + +# use all the genes from adata_sp as markers for tangram +markers = adata_sp.var_names.tolist() + +# Removes genes that all entries are zero. Finds the intersection between adata_sc, adata_st and given marker gene list, +# save the intersected markers in two adatas. Calculates density priors and save it with adata_st +tg.pp_adatas(adata_sc=adata_sc, adata_sp=adata_sp, genes=markers) + +# Map single cell data (`adata_sc`) on spatial data (`adata_sp`). +# density_prior (str, ndarray or None): Spatial density of spots, when is a string, value can be 'rna_count_based' or +# 'uniform', when is a ndarray, shape = (number_spots,). +# use 'uniform' if the spatial voxels are at single cell resolution (e.g. MERFISH). 'rna_count_based', assumes that +# cell density is proportional to the number of RNA molecules. +adata_map = tg.map_cells_to_space( + adata_sc=adata_sc, + adata_sp=adata_sp, + device=device, + mode=par['mode'], + num_epochs=par['num_epochs'], + density_prior='uniform' +) + +# Spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData +tg.project_cell_annotations( + adata_map = adata_map, + adata_sp = adata_sp, + annotation=par['celltype_key'] +) + +# Use original without extra layers generated from tangram +df = adata_sp.obsm['tangram_ct_pred'].copy() +adata_sp = adata_sp_orig.copy() + +# Set the cell type annotation +adata_sp.obs[par['celltype_key']] = df.idxmax(axis=1) + + +# # Normalize by row before setting the score +# normalized_df = df.div(df.sum(axis=1), axis=0) +# max_values = normalized_df.max(axis=1) +# adata_sp.obs['tangram_score'] = max_values +# adata_sp.obsm['ct_tangram_scores'] = normalized_df + +# Write output +adata_sp.write_h5ad(par['output']) \ No newline at end of file diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 29104273..1809744b 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -98,7 +98,7 @@ argument_groups: A list of cell type annotation methods to run. type: string multiple: true - default: "ssam:tacco:moscot:mapmycells" + default: "ssam:tacco:moscot:mapmycells:tangram" - name: "--expression_correction_methods" description: | A list of expression correction methods to run. @@ -169,6 +169,7 @@ dependencies: - name: methods_cell_type_annotation/tacco - name: methods_cell_type_annotation/moscot - name: methods_cell_type_annotation/mapmycells + - name: methods_cell_type_annotation/tangram - name: methods_expression_correction/no_correction - name: methods_expression_correction/gene_efficiency_correction - name: methods_expression_correction/resolvi_correction diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 9ec2c892..d1d78102 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -375,7 +375,8 @@ workflow run_wf { ssam, tacco, moscot, - mapmycells + mapmycells, + tangram ] cta_ch = normalization_ch