Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/methods_cell_type_annotation/singler/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
__merge__: /src/api/comp_method_cell_type_annotation.yaml

name: singler
label: "singler"
summary: "Cell type annotations using single-cell reference with SingleR"
description: "Cell type annotations using single-cell reference with SingleR"

links:
documentation: "https://github.com/openproblems-bio/task_ist_preprocessing"
repository: "https://github.com/openproblems-bio/task_ist_preprocessing"
references:
doi: "10.1038/s41590-018-0276-y"

arguments:
- name: --labels_key
type: string
description: The key of the cell labels in the input data.
default: cell_labels

resources:
- type: python_script
path: script.py

engines:
- type: docker
image: openproblems/base_python:1
setup:
- type: python
pypi: [singler]
__merge__:
- /src/base/setup_spatialdata_partial.yaml
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [ midtime, lowcpu, lowmem ]
69 changes: 69 additions & 0 deletions src/methods_cell_type_annotation/singler/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import anndata as ad
import os
import shutil

import singlecellexperiment as sce
import singler

## VIASH START
# The following code has been auto-generated by Viash.
par = {
'input_spatial_normalized_counts': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
'input_transcript_assignments': r'resources_test/task_ist_preprocessing/mouse_brain_combined/transcript_assignments.zarr',
'input_scrnaseq_reference': r'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
'celltype_key': r'cell_type',
'output': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_with_cell_types.h5ad',
'labels_key': r'cell_labels'
}
meta = {
'name': r'singleR',
'functionality_name': r'singleR',
'resources_dir': r'/private/tmp/viash_inject_singleR6465161111175259990',
'executable': r'/private/tmp/viash_inject_singleR6465161111175259990/singleR',
'config': r'/private/tmp/viash_inject_singleR6465161111175259990/.config.vsh.yaml',
'temp_dir': r'/var/folders/fq/ymt0vml175s4yvqxzbmlmpz80000gn/T/',
'cpus': int(r'123'),
'memory_b': int(r'123'),
'memory_kb': int(r'123'),
'memory_mb': int(r'123'),
'memory_gb': int(r'123'),
'memory_tb': int(r'123'),
'memory_pb': int(r'123'),
'memory_kib': int(r'123'),
'memory_mib': int(r'123'),
'memory_gib': int(r'123'),
'memory_tib': int(r'123'),
'memory_pib': int(r'123')
}
dep = {

}

## VIASH END
sce_h5ad = sce.read_h5ad(par['input_spatial_normalized_counts'])
adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])

sce_ref = sce.read_h5ad(par['input_scrnaseq_reference'])

features = [str(x) for x in sce_h5ad.row_data.row_names]

mat = sce_h5ad.assay("counts") ##example has raw, not sure
mat = mat.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR

mat_ref = sce_ref.assay("normalized")
mat_ref = mat_ref.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR

## create the reference from our sc data
built = singler.train_single(ref_data = mat_ref,
ref_labels = sce_ref.get_column_data().column("cell_type"),
ref_features = sce_ref.get_row_names(),
test_features = features,)

## annotate the dataset
output = singler.classify_single(mat, ref_prebuilt=built)

adata_sp.obs["cell_type"] = output['best']

# Write output
print('Writing output', flush=True)
adata_sp.write(par['output'])
81 changes: 81 additions & 0 deletions src/methods_transcript_assignment/fastreseg/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
__merge__: /src/api/comp_method_transcript_assignment.yaml

name: fastreseg
label: "fastReseg transcript assignment"
summary: "Spatial segmentation correction using fastReseg method"
description: |
fastReseg is an R package designed to enhance the precision of cell segmentation in spatial transcriptomics by
leveraging transcriptomic data to correct and refine initial image-based segmentation results.
links:
documentation: "https://github.com/openproblems-bio/task_ist_preprocessing"
repository: "https://github.com/openproblems-bio/task_ist_preprocessing"
references:
doi: "10.1038/s41598-025-08733-5"


arguments:
- name: --transcripts_key
type: string
default: "transcripts"
description: "Key for transcripts in the points layer"
- name: --coordinate_system
type: string
default: "global"
description: "Coordinate system for the transcripts"


resources:
- type: bash_script
path: orchestrator.sh
- type: python_script
path: input.py
- type: file
path: fastreseg.yml
dest: environment.yml
- type: r_script
path: script.R
- type: python_script
path: output.py

engines:
- type: docker
image: openproblems/base_python:1
setup:
- type: apt
packages:
- libv8-dev
- libudunits2-dev
- libabsl-dev
- gdal-bin
- libgdal-dev
- r-base
- type: r
packages:
- devtools
- terra
- Matrix
- dbscan
- igraph
- matrixStats
- codetools
- data.table
# Add other CRAN packages here
github:
# Add GitHub packages here if needed
- Nanostring-Biostats/FastReseg
#- type: python
# github:
# - openproblems-bio/core#subdirectory=packages/python/openproblems
__merge__:
- /src/base/setup_txsim_partial.yaml
- /src/base/setup_spatialdata_partial.yaml
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [ midtime, midcpu, midmem ]


##macOS workaround: export DOCKER_DEFAULT_PLATFORM=linux/amd64
13 changes: 13 additions & 0 deletions src/methods_transcript_assignment/fastreseg/fastreseg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: fastreseg
channels:
- anaconda
- conda-forge
- defaults
dependencies:
- r-base=4.5.1
- r-devtools
- pip
- ipykernel
- jupyter_client
- r-irkernel
prefix: /opt/miniconda3/envs/fastreseg
116 changes: 116 additions & 0 deletions src/methods_transcript_assignment/fastreseg/input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import spatialdata as sd
from tifffile import imwrite
import sys
import numpy as np
import pandas as pd
import xarray as xr
import dask
import txsim as tx
import anndata as ad
import argparse

def parse_arguments():
parser = argparse.ArgumentParser(
description="Process input files and generate output files for FastReseg input"
)
parser.add_argument('input_path_ist', help='Path to the input file')
parser.add_argument('input_segmentation_path', help='Path to the input segmentation file')
parser.add_argument('input_sc_reference_path', help='Path to the input single-cell reference file')
parser.add_argument('output_path_counts', help='Path for the output TSV file')
parser.add_argument('output_path_transcripts', help='Path for the output TIF file')
parser.add_argument('output_path_cell_type', help='Output cell type specification')

return parser.parse_args()

### parsing arguments
args = parse_arguments()
print("args:")
print(args)
input_path = args.input_path_ist
input_segmentation_path = args.input_segmentation_path
input_sc_reference_path = args.input_sc_reference_path
print("path")
print(input_sc_reference_path)
output_path_counts = args.output_path_counts
output_path_transcripts = args.output_path_transcripts
output_path_cell_type = args.output_path_cell_type

## potential other parameters (TODO - make configurable)
um_per_pixel = 0.5
sc_celltype_key = 'cell_type'

### reading the data in
sdata = sd.read_zarr(input_path)

### reading in basic segmentation
sdata_segm = sd.read_zarr(input_segmentation_path)
segmentation_coord_systems = sd.transformations.get_transformation(sdata_segm["segmentation"], get_all=True).keys()

# In case of a translation transformation of the segmentation (e.g. crop of the data), we need to adjust the transcript coordinates
trans = sd.transformations.get_transformation(sdata_segm["segmentation"], get_all=True)['global'].inverse()

transcripts = sd.transform(sdata['transcripts'], to_coordinate_system='global')
transcripts = sd.transform(transcripts, trans, 'global')

print('Assigning transcripts to cell ids', flush=True)
y_coords = transcripts.y.compute().to_numpy(dtype=np.int64)
x_coords = transcripts.x.compute().to_numpy(dtype=np.int64)
if isinstance(sdata_segm["segmentation"], xr.DataTree):
label_image = sdata_segm["segmentation"]["scale0"].image.to_numpy()
else:
label_image = sdata_segm["segmentation"].to_numpy()
cell_id_dask_series = dask.dataframe.from_dask_array(
dask.array.from_array(
label_image[y_coords, x_coords], chunks=tuple(sdata['transcripts'].map_partitions(len).compute())
),
index=sdata['transcripts'].index
)
sdata['transcripts']["cell_id"] = cell_id_dask_series

### extracting transcript ids
print('Transforming transcripts coordinates', flush=True)
transcripts = sd.transform(sdata['transcripts'], to_coordinate_system='global')

transcripts_df = transcripts.compute()
transcripts_df.rename(columns = {'feature_name': 'target',
'transcript_id': 'UMI_transID', 'cell_id': 'UMI_cellID'}, inplace = True)

transcripts_df = transcripts_df.loc[:, ['target', 'x', 'y', 'z', 'UMI_transID', 'UMI_cellID']]
transcripts_df.to_csv(output_path_transcripts)


#### aggregating counts per transcript, based on
df = sdata['transcripts'].compute()
df.feature_name = df.feature_name.astype(str)

adata_sp = tx.preprocessing.generate_adata(df, cell_id_col='cell_id', gene_col='feature_name') #TODO: x and y refers to a specific coordinate system. Decide which space we want to use here. (probably should be handled in the previous assignment step)
adata_sp.layers['counts'] = adata_sp.layers['raw_counts']
del adata_sp.layers['raw_counts']
adata_sp.var["gene_name"] = adata_sp.var_names
print(adata_sp.var_names[1:10])

# currently the function also saves the transcripts in the adata object, but this is not necessary here
del adata_sp.uns['spots']
del adata_sp.uns['pct_noise']


count_df = pd.DataFrame(adata_sp.X.toarray(),
index=adata_sp.obs_names,
columns=adata_sp.var_names)
count_df.to_csv(output_path_counts)

#### run cell annotation with ssam
adata_sc = ad.read_h5ad(input_sc_reference_path)
adata_sc.X = adata_sc.layers["normalized"]
print(adata_sc.var_names[1:10])

shared_genes = [g for g in adata_sc.var_names if g in adata_sp.var_names]
adata_sp = adata_sp[:,shared_genes]

print('Annotating cell types', flush=True)
adata_sp = tx.preprocessing.run_ssam(
adata_sp, transcripts.compute(), adata_sc, um_p_px=um_per_pixel,
cell_id_col='cell_id', gene_col='feature_name', sc_ct_key=sc_celltype_key
)
cell_type_df = adata_sp.obs["ct_ssam"].astype(str)
cell_type_df.to_csv(output_path_cell_type, header=True)
Loading
Loading