openproblems-bio · dariarom94 · Oct 23, 2025 · Oct 28, 2025 · Oct 29, 2025 · Dec 11, 2025
diff --git a/src/methods_cell_type_annotation/singler/config.vsh.yaml b/src/methods_cell_type_annotation/singler/config.vsh.yaml
@@ -0,0 +1,38 @@
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+
+name: singler
+label: "singler"
+summary: "Cell type annotations using single-cell reference with SingleR"
+description: "Cell type annotations using single-cell reference with SingleR"
+
+links:
+  documentation: "https://github.com/openproblems-bio/task_ist_preprocessing"
+  repository: "https://github.com/openproblems-bio/task_ist_preprocessing"
+references:
+  doi: "10.1038/s41590-018-0276-y"
+
+arguments:
+  - name: --labels_key
+    type: string
+    description: The key of the cell labels in the input data.
+    default: cell_labels
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+    - type: python
+      pypi: [singler]
+    __merge__: 
+      - /src/base/setup_spatialdata_partial.yaml
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ midtime, lowcpu, lowmem ]
diff --git a/src/methods_cell_type_annotation/singler/script.py b/src/methods_cell_type_annotation/singler/script.py
@@ -0,0 +1,69 @@
+import anndata as ad
+import os
+import shutil
+
+import singlecellexperiment as sce
+import singler
+
+## VIASH START
+# The following code has been auto-generated by Viash.
+par = {
+  'input_spatial_normalized_counts': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
+  'input_transcript_assignments': r'resources_test/task_ist_preprocessing/mouse_brain_combined/transcript_assignments.zarr',
+  'input_scrnaseq_reference': r'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
+  'celltype_key': r'cell_type',
+  'output': r'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_with_cell_types.h5ad',
+  'labels_key': r'cell_labels'
+}
+meta = {
+  'name': r'singleR',
+  'functionality_name': r'singleR',
+  'resources_dir': r'/private/tmp/viash_inject_singleR6465161111175259990',
+  'executable': r'/private/tmp/viash_inject_singleR6465161111175259990/singleR',
+  'config': r'/private/tmp/viash_inject_singleR6465161111175259990/.config.vsh.yaml',
+  'temp_dir': r'/var/folders/fq/ymt0vml175s4yvqxzbmlmpz80000gn/T/',
+  'cpus': int(r'123'),
+  'memory_b': int(r'123'),
+  'memory_kb': int(r'123'),
+  'memory_mb': int(r'123'),
+  'memory_gb': int(r'123'),
+  'memory_tb': int(r'123'),
+  'memory_pb': int(r'123'),
+  'memory_kib': int(r'123'),
+  'memory_mib': int(r'123'),
+  'memory_gib': int(r'123'),
+  'memory_tib': int(r'123'),
+  'memory_pib': int(r'123')
+}
+dep = {
+
+}
+
+## VIASH END
+sce_h5ad = sce.read_h5ad(par['input_spatial_normalized_counts'])
+adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
+
+sce_ref = sce.read_h5ad(par['input_scrnaseq_reference'])
+
+features = [str(x) for x in sce_h5ad.row_data.row_names]
+
+mat = sce_h5ad.assay("counts") ##example has raw, not sure
+mat = mat.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR
+
+mat_ref = sce_ref.assay("normalized")
+mat_ref = mat_ref.sorted_indices() ## magic line to make sure the matrix is in the right format for SingleR
+
+## create the reference from our sc data
+built = singler.train_single(ref_data = mat_ref,   
+                             ref_labels = sce_ref.get_column_data().column("cell_type"),    
+                             ref_features = sce_ref.get_row_names(),    
+                             test_features = features,)
+
+## annotate the dataset
+output = singler.classify_single(mat, ref_prebuilt=built)
+
+adata_sp.obs["cell_type"] = output['best']
+
+# Write output
+print('Writing output', flush=True)
+adata_sp.write(par['output'])
diff --git a/src/methods_transcript_assignment/fastreseg/config.vsh.yaml b/src/methods_transcript_assignment/fastreseg/config.vsh.yaml
@@ -0,0 +1,81 @@
+__merge__: /src/api/comp_method_transcript_assignment.yaml
+
+name: fastreseg
+label: "fastReseg transcript assignment"
+summary: "Spatial segmentation correction using fastReseg method"
+description: |
+  fastReseg is an R package designed to enhance the precision of cell segmentation in spatial transcriptomics by 
+  leveraging transcriptomic data to correct and refine initial image-based segmentation results.
+links:
+  documentation: "https://github.com/openproblems-bio/task_ist_preprocessing"
+  repository: "https://github.com/openproblems-bio/task_ist_preprocessing"
+references:
+  doi: "10.1038/s41598-025-08733-5"
+
+
+arguments:
+  - name: --transcripts_key
+    type: string
+    default: "transcripts"
+    description: "Key for transcripts in the points layer"
+  - name: --coordinate_system
+    type: string
+    default: "global"
+    description: "Coordinate system for the transcripts"
+
+
+resources:
+  - type: bash_script
+    path: orchestrator.sh
+  - type: python_script
+    path: input.py
+  - type: file
+    path: fastreseg.yml
+    dest: environment.yml
+  - type: r_script
+    path: script.R
+  - type: python_script
+    path: output.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+      - type: apt
+        packages:
+          - libv8-dev
+          - libudunits2-dev
+          - libabsl-dev
+          - gdal-bin
+          - libgdal-dev
+          - r-base
+      - type: r
+        packages:
+          - devtools
+          - terra
+          - Matrix
+          - dbscan
+          - igraph
+          - matrixStats
+          - codetools
+          - data.table
+          # Add other CRAN packages here
+        github:
+          # Add GitHub packages here if needed
+          - Nanostring-Biostats/FastReseg
+      #- type: python
+      #  github:
+      #    - openproblems-bio/core#subdirectory=packages/python/openproblems
+    __merge__: 
+      - /src/base/setup_txsim_partial.yaml
+      - /src/base/setup_spatialdata_partial.yaml
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ midtime, midcpu, midmem ]
+
+
+##macOS workaround: export DOCKER_DEFAULT_PLATFORM=linux/amd64
diff --git a/src/methods_transcript_assignment/fastreseg/fastreseg.yml b/src/methods_transcript_assignment/fastreseg/fastreseg.yml
@@ -0,0 +1,13 @@
+name: fastreseg
+channels:
+  - anaconda
+  - conda-forge
+  - defaults
+dependencies:
+  - r-base=4.5.1
+  - r-devtools
+  - pip
+  - ipykernel
+  - jupyter_client
+  - r-irkernel
+prefix: /opt/miniconda3/envs/fastreseg
diff --git a/src/methods_transcript_assignment/fastreseg/input.py b/src/methods_transcript_assignment/fastreseg/input.py
@@ -0,0 +1,116 @@
+import spatialdata as sd
+from tifffile import imwrite  
+import sys
+import numpy as np
+import pandas as pd
+import xarray as xr
+import dask
+import txsim as tx
+import anndata as ad
+import argparse
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        description="Process input files and generate output files for FastReseg input"
+    )
+    parser.add_argument('input_path_ist', help='Path to the input file')
+    parser.add_argument('input_segmentation_path', help='Path to the input segmentation file')
+    parser.add_argument('input_sc_reference_path', help='Path to the input single-cell reference file')
+    parser.add_argument('output_path_counts', help='Path for the output TSV file')
+    parser.add_argument('output_path_transcripts', help='Path for the output TIF file')
+    parser.add_argument('output_path_cell_type', help='Output cell type specification')
+
+    return parser.parse_args()
+
+### parsing arguments
+args = parse_arguments()
+print("args:")
+print(args)
+input_path = args.input_path_ist
+input_segmentation_path = args.input_segmentation_path
+input_sc_reference_path = args.input_sc_reference_path
+print("path")
+print(input_sc_reference_path)
+output_path_counts = args.output_path_counts
+output_path_transcripts = args.output_path_transcripts
+output_path_cell_type = args.output_path_cell_type
+
+## potential other parameters (TODO - make configurable)
+um_per_pixel = 0.5
+sc_celltype_key = 'cell_type'
+
+### reading the data in
+sdata = sd.read_zarr(input_path)
+
+### reading in basic segmentation
+sdata_segm = sd.read_zarr(input_segmentation_path)
+segmentation_coord_systems = sd.transformations.get_transformation(sdata_segm["segmentation"], get_all=True).keys()
+
+# In case of a translation transformation of the segmentation (e.g. crop of the data), we need to adjust the transcript coordinates
+trans = sd.transformations.get_transformation(sdata_segm["segmentation"], get_all=True)['global'].inverse()
+
+transcripts = sd.transform(sdata['transcripts'], to_coordinate_system='global')
+transcripts = sd.transform(transcripts, trans, 'global')
+
+print('Assigning transcripts to cell ids', flush=True)
+y_coords = transcripts.y.compute().to_numpy(dtype=np.int64)
+x_coords = transcripts.x.compute().to_numpy(dtype=np.int64)
+if isinstance(sdata_segm["segmentation"], xr.DataTree):
+    label_image = sdata_segm["segmentation"]["scale0"].image.to_numpy() 
+else:
+    label_image = sdata_segm["segmentation"].to_numpy()
+cell_id_dask_series = dask.dataframe.from_dask_array(
+    dask.array.from_array(
+        label_image[y_coords, x_coords], chunks=tuple(sdata['transcripts'].map_partitions(len).compute())
+    ), 
+    index=sdata['transcripts'].index
+)
+sdata['transcripts']["cell_id"] = cell_id_dask_series
+
+### extracting transcript ids
+print('Transforming transcripts coordinates', flush=True)
+transcripts = sd.transform(sdata['transcripts'], to_coordinate_system='global')
+
+transcripts_df = transcripts.compute()
+transcripts_df.rename(columns = {'feature_name': 'target', 
+'transcript_id': 'UMI_transID', 'cell_id': 'UMI_cellID'}, inplace = True)
+
+transcripts_df = transcripts_df.loc[:, ['target', 'x', 'y', 'z', 'UMI_transID', 'UMI_cellID']]
+transcripts_df.to_csv(output_path_transcripts)
+
+
+#### aggregating counts per transcript, based on 
+df = sdata['transcripts'].compute()
+df.feature_name = df.feature_name.astype(str)
+
+adata_sp = tx.preprocessing.generate_adata(df, cell_id_col='cell_id', gene_col='feature_name') #TODO: x and y refers to a specific coordinate system. Decide which space we want to use here. (probably should be handled in the previous assignment step)
+adata_sp.layers['counts'] = adata_sp.layers['raw_counts']
+del adata_sp.layers['raw_counts']
+adata_sp.var["gene_name"] = adata_sp.var_names
+print(adata_sp.var_names[1:10])
+
+# currently the function also saves the transcripts in the adata object, but this is not necessary here
+del adata_sp.uns['spots']
+del adata_sp.uns['pct_noise']
+
+
+count_df = pd.DataFrame(adata_sp.X.toarray(), 
+                       index=adata_sp.obs_names, 
+                       columns=adata_sp.var_names)
+count_df.to_csv(output_path_counts)
+
+#### run cell annotation with ssam
+adata_sc = ad.read_h5ad(input_sc_reference_path)
+adata_sc.X = adata_sc.layers["normalized"]
+print(adata_sc.var_names[1:10])
+
+shared_genes = [g for g in adata_sc.var_names if g in adata_sp.var_names]
+adata_sp = adata_sp[:,shared_genes] 
+
+print('Annotating cell types', flush=True)
+adata_sp = tx.preprocessing.run_ssam(
+    adata_sp, transcripts.compute(), adata_sc, um_p_px=um_per_pixel, 
+    cell_id_col='cell_id', gene_col='feature_name', sc_ct_key=sc_celltype_key
+)
+cell_type_df = adata_sp.obs["ct_ssam"].astype(str)
+cell_type_df.to_csv(output_path_cell_type, header=True)