From e5f9d07caf8cabd2a6c6ef687aec28ac0637e0a0 Mon Sep 17 00:00:00 2001 From: Olga Brovkina Date: Mon, 24 Mar 2025 16:19:44 +0100 Subject: [PATCH 1/4] Add Points2Regions module with clustering and plotting templates --- modules/local/points2regions/Dockerfile | 8 ++ modules/local/points2regions/environment.yml | 7 ++ modules/local/points2regions/main.nf | 42 ++++++++++ modules/local/points2regions/meta.yml | 69 ++++++++++++++++ .../templates/ficture_preprocess.py | 80 +++++++++++++++++++ .../templates/points2regions_cluster.py | 40 ++++++++++ .../templates/points2regions_plot.py | 44 ++++++++++ .../local/points2regions/tests/main.nf.test | 73 +++++++++++++++++ 8 files changed, 363 insertions(+) create mode 100644 modules/local/points2regions/Dockerfile create mode 100644 modules/local/points2regions/environment.yml create mode 100644 modules/local/points2regions/main.nf create mode 100644 modules/local/points2regions/meta.yml create mode 100644 modules/local/points2regions/templates/ficture_preprocess.py create mode 100644 modules/local/points2regions/templates/points2regions_cluster.py create mode 100644 modules/local/points2regions/templates/points2regions_plot.py create mode 100644 modules/local/points2regions/tests/main.nf.test diff --git a/modules/local/points2regions/Dockerfile b/modules/local/points2regions/Dockerfile new file mode 100644 index 0000000..abcf17a --- /dev/null +++ b/modules/local/points2regions/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.10-slim + +LABEL maintainer="Olga Brovkina " +LABEL description="Container for Points2Regions" + +RUN pip install points2regions + +ENTRYPOINT ["points2regions"] diff --git a/modules/local/points2regions/environment.yml b/modules/local/points2regions/environment.yml new file mode 100644 index 0000000..4b3c9d3 --- /dev/null +++ b/modules/local/points2regions/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL-HERE" diff --git a/modules/local/points2regions/main.nf b/modules/local/points2regions/main.nf new file mode 100644 index 0000000..11e8174 --- /dev/null +++ b/modules/local/points2regions/main.nf @@ -0,0 +1,42 @@ +process POINTS2REGIONS_CLUSTER { + tag "$meta.id" + label 'points_cluster' + container "community.wave.seqera.io/library/pip_points2regions:9f5bb888586554a6" + + input: + tuple val(meta), path(transcripts) + val(smoothing) + val (num_clusters) + + output: + tuple val(meta), path("clustered_s${smoothing}.csv"), emit: clusters + + when: + task.ext.when == null || task.ext.when + + script: + + """ + python modules/local/points2regions/templates/points2regions_cluster.py \\ + --transcripts ${transcripts} \\ + --smoothing ${smoothing} \\ + --num_clusters ${num_clusters} + """ +} + +process POINTS2REGIONS_PLOT { + tag "$meta.id" + label 'points_visual' + input: + tuple val(meta), path(clusters) + val smoothing + + output: + path "cluster_plot.png" + path "versions.yml" + + script: + """ + python modules/local/points2regions/templates/points2regions_plot.py + """ +} diff --git a/modules/local/points2regions/meta.yml b/modules/local/points2regions/meta.yml new file mode 100644 index 0000000..80a19eb --- /dev/null +++ b/modules/local/points2regions/meta.yml @@ -0,0 +1,69 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "points2regions" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "points2regions": + ## TODO nf-core: Add a description and other details for the software below + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: + identifier: + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + +## TODO nf-core: Add a description of all of the variables used as output +output: + - bam: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + ## TODO nf-core: Delete / customise this example output + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@brovolia" +maintainers: + - "@brovolia" diff --git a/modules/local/points2regions/templates/ficture_preprocess.py b/modules/local/points2regions/templates/ficture_preprocess.py new file mode 100644 index 0000000..37fb4f0 --- /dev/null +++ b/modules/local/points2regions/templates/ficture_preprocess.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +import sys +import os +import re +import logging +import pandas as pd +import gzip + +def format_xenium(): + + print("[START]") + + negctrl_regex = "BLANK\\|NegCon" + if ( "${params.negative_control_regex}" ): + negctrl_regex = "${params.negative_control_regex}" + + transripts = "${transcripts}" + features = "${features}" + + unit_info=['X','Y','gene','cell_id','overlaps_nucleus'] + oheader = unit_info + ['Count'] + + feature=pd.DataFrame() + xmin=sys.maxsize + xmax=0 + ymin=sys.maxsize + ymax=0 + + output = "processed_transcripts.tsv.gz" + feature_file = "feature.clean.tsv.gz" + min_phred_score = 15 + + with gzip.open(output, 'wt') as wf: + wf.write('\\t'.join(oheader) + '\\n') + + for chunk in pd.read_csv(transripts,header=0,chunksize=500000): + chunk = chunk.loc[(chunk.qv > min_phred_score)] + chunk.rename(columns = {'feature_name':'gene'}, inplace=True) + if negctrl_regex != '': + chunk = chunk[~chunk.gene.str.contains(negctrl_regex, flags=re.IGNORECASE, regex=True)] + chunk.rename(columns = {'x_location':'X', 'y_location':'Y'}, inplace=True) + chunk['Count'] = 1 + chunk[oheader].to_csv(output,sep='\\t',mode='a',index=False,header=False,float_format="%.2f") + logging.info(f"{chunk.shape[0]}") + feature = pd.concat([feature, chunk.groupby(by='gene').agg({'Count':"sum"}).reset_index()]) + x0 = chunk.X.min() + x1 = chunk.X.max() + y0 = chunk.Y.min() + y1 = chunk.Y.max() + xmin = min(int(xmin), int(x0)) + xmax = max(int(xmax), int(x1)) + ymin = min(int(ymin), int(y0)) + ymax = max(int(ymax), int(y1)) + + if os.path.exists(features): + feature_list = [] + with open(features, 'r') as ff: + for line in ff: + feature_list.append(line.strip('\\n')) + feature = feature.groupby(by='gene').agg({'Count':"sum"}).reset_index() + feature = feature[[x in feature_list for x in feature['gene']]] + feature.to_csv(feature_file,sep='\\t',index=False) + + f = os.path.join( os.path.dirname(output), "coordinate_minmax.tsv" ) + with open(f, 'w') as wf: + wf.write(f"xmin\\t{xmin}\\n") + wf.write(f"xmax\\t{xmax}\\n") + wf.write(f"ymin\\t{ymin}\\n") + wf.write(f"ymax\\t{ymax}\\n") + + # Output version information + with open("versions.yml", "w") as f: + f.write('"${task.process}":\\n') + f.write(f'ficture_preprocess: v.1.0.0"\\n') + + print("[FINISH]") + +if __name__ == '__main__': + format_xenium() diff --git a/modules/local/points2regions/templates/points2regions_cluster.py b/modules/local/points2regions/templates/points2regions_cluster.py new file mode 100644 index 0000000..d6ecbd5 --- /dev/null +++ b/modules/local/points2regions/templates/points2regions_cluster.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +import sys +import os +import pandas as pd +from points2regions import Points2Regions # adjust if function is local + +def cluster_points2regions(): + print("[START]") + + input_csv = "${transcripts}" + output_csv = "clustered.csv" + smoothing = int("${smoothing}") + num_clusters = int("${num_clusters}") + + # Read input + data = pd.read_csv(input_csv) + + # Run clustering + mdl = Points2Regions( + data[['X', 'Y']], + data['gene'], + pixel_width=1, + pixel_smoothing=smoothing + ) + + data['clusters'] = mdl.fit_predict(num_clusters=num_clusters, output='marker') + + # Save result + data.to_csv(output_csv, index=False) + + # Write version info + with open("versions.yml", "w") as f: + f.write('"${task.process}":\\n') + f.write(' points2regions_cluster: "v1.0.0"\\n') + + print("[FINISH]") + +if __name__ == "__main__": + cluster_points2regions() diff --git a/modules/local/points2regions/templates/points2regions_plot.py b/modules/local/points2regions/templates/points2regions_plot.py new file mode 100644 index 0000000..348ad13 --- /dev/null +++ b/modules/local/points2regions/templates/points2regions_plot.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +import pandas as pd +import matplotlib.pyplot as plt + +def plot_clusters(data, smoothing, output_file="cluster_plot.png"): + """ + Plot spatial clusters from Points2Regions output. + + Args: + data (pd.DataFrame): Data with 'X', 'Y', and 'Clusters' columns. + smoothing (int): Smoothing value used (for the plot title). + output_file (str): Path to save the plot image. + """ + plt.figure(figsize=(6, 6)) + plt.scatter( + data['X'], + data['Y'], + c=data['Clusters'], + alpha=0.7, + s=0.5, + cmap='tab20' + ) + plt.title(f'Smoothing: {smoothing}') + plt.axis('off') + plt.axis('scaled') + plt.tight_layout() + plt.savefig(output_file, dpi=300) + +def main(): + input_csv = "${clustered}" + smoothing = int("${smoothing}") + output_file = "cluster_plot.png" + + data = pd.read_csv(input_csv) + plot_clusters(data, smoothing, output_file) + + # Save version info + with open("versions.yml", "w") as f: + f.write('"${task.process}":\\n') + f.write(' points2regions_plot: "v1.0.0"\\n') + +if __name__ == "__main__": + main() diff --git a/modules/local/points2regions/tests/main.nf.test b/modules/local/points2regions/tests/main.nf.test new file mode 100644 index 0000000..e92cf93 --- /dev/null +++ b/modules/local/points2regions/tests/main.nf.test @@ -0,0 +1,73 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test points2regions +nextflow_process { + + name "Test Process POINTS2REGIONS" + script "../main.nf" + process "POINTS2REGIONS" + + tag "modules" + tag "modules_" + tag "points2regions" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} From e984336da6df1857529e5967b1514e1d2d9ecb82 Mon Sep 17 00:00:00 2001 From: Olga Brovkina Date: Mon, 24 Mar 2025 16:43:35 +0100 Subject: [PATCH 2/4] Updated Credits --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2a84b4d..e73c18c 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ We thank the following people for their extensive assistance in the development - Tobias Krause - Krešimir Beštak (kbestak) - Matthias Hörtenhuber (mashehu) +- Olga Brovkina (brovolia) ## Contributions and Support From 7ae08987f2b0c9eccdc923ae2f476dba10cc169e Mon Sep 17 00:00:00 2001 From: Olga Brovkina Date: Tue, 25 Mar 2025 18:26:56 +0100 Subject: [PATCH 3/4] added test for points2regions --- .nf-test.log | 50 +++++++ .../meta/mock.nf | 97 ++++++++++++++ .../meta/nextflow.log | 126 ++++++++++++++++++ .../meta/output_0.json | 1 + .../meta/output_1.json | 1 + .../meta/output_2.json | 1 + .../meta/output_clustered.json | 1 + .../meta/output_clustered_plot.json | 1 + .../meta/params.json | 1 + .../meta/std.err | 0 .../meta/std.out | 31 +++++ .../meta/trace.csv | 3 + .../meta/workflow.json | 1 + modules.json | 52 ++++++-- modules/local/points2regions/Dockerfile | 8 -- modules/local/points2regions/environment.yml | 10 +- modules/local/points2regions/main.nf | 33 ++--- modules/local/points2regions/meta.yml | 73 +++++----- .../templates/ficture_preprocess.py | 80 ----------- .../templates/points2regions_cluster.py | 65 ++++++--- .../templates/points2regions_plot.py | 44 ------ .../local/points2regions/tests/main.nf.test | 62 +++------ modules/local/scportrait/environment.yml | 7 + modules/local/scportrait/main.nf | 91 +++++++++++++ modules/local/scportrait/meta.yml | 69 ++++++++++ modules/local/scportrait/tests/main.nf.test | 73 ++++++++++ .../custom/dumpsoftwareversions/main.nf | 24 ++++ .../custom/dumpsoftwareversions/meta.yml | 34 +++++ .../templates/dumpsoftwareversions.py | 101 ++++++++++++++ modules/nf-core/unzip/environment.yml | 7 + modules/nf-core/unzip/main.nf | 49 +++++++ modules/nf-core/unzip/meta.yml | 46 +++++++ modules/nf-core/unzip/nextflow.config | 8 ++ modules/nf-core/unzip/tests/main.nf.test | 54 ++++++++ modules/nf-core/unzip/tests/main.nf.test.snap | 76 +++++++++++ modules/nf-core/unzip/tests/tags.yml | 2 + 36 files changed, 1109 insertions(+), 273 deletions(-) create mode 100644 .nf-test.log create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/mock.nf create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/nextflow.log create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_0.json create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_1.json create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_2.json create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered.json create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered_plot.json create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/params.json create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/std.err create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/std.out create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/trace.csv create mode 100644 .nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/workflow.json delete mode 100644 modules/local/points2regions/Dockerfile delete mode 100644 modules/local/points2regions/templates/ficture_preprocess.py delete mode 100644 modules/local/points2regions/templates/points2regions_plot.py create mode 100644 modules/local/scportrait/environment.yml create mode 100644 modules/local/scportrait/main.nf create mode 100644 modules/local/scportrait/meta.yml create mode 100644 modules/local/scportrait/tests/main.nf.test create mode 100644 modules/nf-core/custom/dumpsoftwareversions/main.nf create mode 100644 modules/nf-core/custom/dumpsoftwareversions/meta.yml create mode 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py create mode 100644 modules/nf-core/unzip/environment.yml create mode 100644 modules/nf-core/unzip/main.nf create mode 100644 modules/nf-core/unzip/meta.yml create mode 100644 modules/nf-core/unzip/nextflow.config create mode 100644 modules/nf-core/unzip/tests/main.nf.test create mode 100644 modules/nf-core/unzip/tests/main.nf.test.snap create mode 100644 modules/nf-core/unzip/tests/tags.yml diff --git a/.nf-test.log b/.nf-test.log new file mode 100644 index 0000000..bbf8715 --- /dev/null +++ b/.nf-test.log @@ -0,0 +1,50 @@ +Mar-25 18:17:56.633 [main] INFO com.askimed.nf.test.App - nf-test 0.9.2 +Mar-25 18:17:56.639 [main] INFO com.askimed.nf.test.App - Arguments: [test, modules/local/points2regions/tests/main.nf.test, --profile, docker] +Mar-25 18:17:57.026 [main] INFO com.askimed.nf.test.App - Nextflow Version: 24.10.5 +Mar-25 18:17:57.028 [main] WARN com.askimed.nf.test.commands.RunTestsCommand - No nf-test config file found. +Mar-25 18:17:57.058 [main] INFO com.askimed.nf.test.lang.dependencies.DependencyResolver - Loaded 43 files from directory /Users/obrovkina/Documents/Projects/spatialxe in 0.029 sec +Mar-25 18:17:57.059 [main] INFO com.askimed.nf.test.lang.dependencies.DependencyResolver - Found 1 files containing tests. +Mar-25 18:17:57.059 [main] DEBUG com.askimed.nf.test.lang.dependencies.DependencyResolver - Found files: [/Users/obrovkina/Documents/Projects/spatialxe/modules/local/points2regions/tests/main.nf.test] +Mar-25 18:17:57.280 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Found 1 tests to execute. +Mar-25 18:17:57.280 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Started test plan +Mar-25 18:17:57.280 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Running testsuite 'Test Process POINTS2REGIONS_CLUSTER' from file '/Users/obrovkina/Documents/Projects/spatialxe/modules/local/points2regions/tests/main.nf.test'. +Mar-25 18:17:57.280 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Run test '9b856baf: cluster - with Xenium transcripts.csv'. type: com.askimed.nf.test.lang.process.ProcessTest +Mar-25 18:18:11.859 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Test '9b856baf: cluster - with Xenium transcripts.csv' finished. status: FAILED +org.codehaus.groovy.runtime.powerassert.PowerAssertionError: 2 of 2 assertions failed + at com.askimed.nf.test.lang.extensions.GlobalMethods.assertAll(GlobalMethods.java:48) + at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103) + at java.base/java.lang.reflect.Method.invoke(Method.java:580) + at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:107) + at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:323) + at org.codehaus.groovy.runtime.callsite.StaticMetaMethodSite.invoke(StaticMetaMethodSite.java:44) + at org.codehaus.groovy.runtime.callsite.StaticMetaMethodSite.callStatic(StaticMetaMethodSite.java:100) + at org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCallStatic(CallSiteArray.java:55) + at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callStatic(AbstractCallSite.java:217) + at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callStatic(AbstractCallSite.java:240) + at main_nf$_run_closure1$_closure3$_closure7.doCall(main.nf.test:33) + at main_nf$_run_closure1$_closure3$_closure7.doCall(main.nf.test) + at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103) + at java.base/java.lang.reflect.Method.invoke(Method.java:580) + at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:107) + at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:323) + at org.codehaus.groovy.runtime.metaclass.ClosureMetaClass.invokeMethod(ClosureMetaClass.java:274) + at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1030) + at groovy.lang.Closure.call(Closure.java:427) + at groovy.lang.Closure.call(Closure.java:406) + at com.askimed.nf.test.lang.TestCode.execute(TestCode.java:16) + at com.askimed.nf.test.lang.process.ProcessTest.execute(ProcessTest.java:171) + at com.askimed.nf.test.core.TestExecutionEngine.execute(TestExecutionEngine.java:165) + at com.askimed.nf.test.commands.RunTestsCommand.execute(RunTestsCommand.java:298) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:43) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:18) + at picocli.CommandLine.executeUserObject(CommandLine.java:1953) + at picocli.CommandLine.access$1300(CommandLine.java:145) + at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2346) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2311) + at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179) + at picocli.CommandLine.execute(CommandLine.java:2078) + at com.askimed.nf.test.App.run(App.java:39) + at com.askimed.nf.test.App.main(App.java:46) +Mar-25 18:18:11.860 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Testsuite 'Test Process POINTS2REGIONS_CLUSTER' finished. snapshot file: false, skipped tests: false, failed tests: true +Mar-25 18:18:11.860 [main] INFO com.askimed.nf.test.core.TestExecutionEngine - Executed 1 tests. 1 tests failed. Done! diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/mock.nf b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/mock.nf new file mode 100644 index 0000000..9af3400 --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/mock.nf @@ -0,0 +1,97 @@ +import groovy.json.JsonGenerator +import groovy.json.JsonGenerator.Converter + +nextflow.enable.dsl=2 + +// comes from nf-test to store json files +params.nf_test_output = "" + +// include dependencies + +include { UNZIP } from '/Users/obrovkina/Documents/Projects/spatialxe/modules/nf-core/unzip/main.nf' + + +// include test process +include { POINTS2REGIONS_CLUSTER } from '/Users/obrovkina/Documents/Projects/spatialxe/modules/local/points2regions/tests/../main.nf' + +// define custom rules for JSON that will be generated. +def jsonOutput = + new JsonGenerator.Options() + .addConverter(Path) { value -> value.toAbsolutePath().toString() } // Custom converter for Path. Only filename + .build() + +def jsonWorkflowOutput = new JsonGenerator.Options().excludeNulls().build() + + +workflow { + + // run dependencies + + { + def input = [] + + input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)] + + UNZIP(*input) + } + + + // process mapping + def input = [] + + input[0] = Channel.of([ + [id: "test"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] } + "/transcripts.csv") + input[1] = 20 + input[2] = 5 + + //---- + + //run process + POINTS2REGIONS_CLUSTER(*input) + + if (POINTS2REGIONS_CLUSTER.output){ + + // consumes all named output channels and stores items in a json file + for (def name in POINTS2REGIONS_CLUSTER.out.getNames()) { + serializeChannel(name, POINTS2REGIONS_CLUSTER.out.getProperty(name), jsonOutput) + } + + // consumes all unnamed output channels and stores items in a json file + def array = POINTS2REGIONS_CLUSTER.out as Object[] + for (def i = 0; i < array.length ; i++) { + serializeChannel(i, array[i], jsonOutput) + } + + } + +} + +def serializeChannel(name, channel, jsonOutput) { + def _name = name + def list = [ ] + channel.subscribe( + onNext: { + list.add(it) + }, + onComplete: { + def map = new HashMap() + map[_name] = list + def filename = "${params.nf_test_output}/output_${_name}.json" + new File(filename).text = jsonOutput.toJson(map) + } + ) +} + + +workflow.onComplete { + + def result = [ + success: workflow.success, + exitStatus: workflow.exitStatus, + errorMessage: workflow.errorMessage, + errorReport: workflow.errorReport + ] + new File("${params.nf_test_output}/workflow.json").text = jsonWorkflowOutput.toJson(result) + +} diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/nextflow.log b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/nextflow.log new file mode 100644 index 0000000..07c8cd4 --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/nextflow.log @@ -0,0 +1,126 @@ +Mar-25 18:17:57.928 [main] DEBUG nextflow.cli.Launcher - $> nextflow -quiet -log /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/nextflow.log run /Users/obrovkina/Documents/Projects/spatialxe/.nf-test-9b856baf5b74f66fed52051240a30667.nf -c /Users/obrovkina/Documents/Projects/spatialxe/nextflow.config -params-file /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/params.json -ansi-log false -profile docker -with-trace /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/trace.csv -w /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work +Mar-25 18:17:57.958 [main] INFO nextflow.cli.CmdRun - N E X T F L O W ~ version 24.10.5 +Mar-25 18:17:57.969 [main] DEBUG nextflow.plugin.PluginsFacade - Setting up plugin manager > mode=prod; embedded=false; plugins-dir=/Users/obrovkina/.nextflow/plugins; core-plugins: nf-amazon@2.9.2,nf-azure@1.10.2,nf-cloudcache@0.4.2,nf-codecommit@0.2.2,nf-console@1.1.4,nf-google@1.15.4,nf-tower@1.9.3,nf-wave@1.7.4 +Mar-25 18:17:57.986 [main] INFO o.pf4j.DefaultPluginStatusProvider - Enabled plugins: [] +Mar-25 18:17:57.986 [main] INFO o.pf4j.DefaultPluginStatusProvider - Disabled plugins: [] +Mar-25 18:17:57.988 [main] INFO org.pf4j.DefaultPluginManager - PF4J version 3.12.0 in 'deployment' mode +Mar-25 18:17:57.994 [main] INFO org.pf4j.AbstractPluginManager - No plugins +Mar-25 18:17:58.005 [main] DEBUG nextflow.config.ConfigBuilder - Found config base: /Users/obrovkina/Documents/Projects/spatialxe/nextflow.config +Mar-25 18:17:58.007 [main] DEBUG nextflow.config.ConfigBuilder - User config file: /Users/obrovkina/Documents/Projects/spatialxe/nextflow.config +Mar-25 18:17:58.008 [main] DEBUG nextflow.config.ConfigBuilder - Parsing config file: /Users/obrovkina/Documents/Projects/spatialxe/nextflow.config +Mar-25 18:17:58.008 [main] DEBUG nextflow.config.ConfigBuilder - Parsing config file: /Users/obrovkina/Documents/Projects/spatialxe/nextflow.config +Mar-25 18:17:58.021 [main] DEBUG n.secret.LocalSecretsProvider - Secrets store: /Users/obrovkina/.nextflow/secrets/store.json +Mar-25 18:17:58.022 [main] DEBUG nextflow.secret.SecretsLoader - Discovered secrets providers: [nextflow.secret.LocalSecretsProvider@253c1256] - activable => nextflow.secret.LocalSecretsProvider@253c1256 +Mar-25 18:17:58.031 [main] DEBUG nextflow.config.ConfigBuilder - Applying config profile: `docker` +Mar-25 18:17:58.880 [main] DEBUG nextflow.config.ConfigBuilder - Applying config profile: `docker` +Mar-25 18:17:59.084 [main] DEBUG nextflow.config.ConfigBuilder - Available config profiles: [bih, cfc_dev, uzl_omics, ifb_core, embl_hd, denbi_qbic, alice, mjolnir_globe, uppmax, giga, incliva, ilifu, ki_luria, uge, icr_alma, rosalind_uge, lugh, mccleary, unibe_ibu, vai, czbiohub_aws, jax, roslin, ccga_med, tes, scw, unc_longleaf, tigem, tubingen_apg, google, apollo, ipop_up, vsc_calcua, pdc_kth, googlels, ceci_nic5, humantechnopole, stjude, daisybio, eddie, medair, biowulf, apptainer, bi, bigpurple, adcra, cedars, pawsey_setonix, vsc_kul_uhasselt, pawsey_nimbus, ucl_myriad, utd_ganymede, charliecloud, seattlechildrens, icr_davros, ceres, arm, munin, rosalind, hasta, cfc, uzh, shu_bmrc, ebi_codon_slurm, ebc, ccga_dx, crick, ku_sund_danhead, marvin, shifter, biohpc_gen, mana, mamba, york_viking, unc_lccc, wehi, awsbatch, wustl_htcf, arcc, ceci_dragon2, imperial, maestro, software_license, cannon, genotoul, nci_gadi, abims, janelia, nu_genomics, googlebatch, oist, sahmri, kaust, mpcdf, leicester, vsc_ugent, create, sage, cambridge, jex, podman, ebi_codon, cheaha, xanadu, nyu_hpc, test, marjorie, computerome, ucd_sonic, seg_globe, sanger, dkfz, bluebear, pasteur, einstein, ethz_euler, m3c, test_full, imb, ucl_cscluster, tuos_stanage, azurebatch, hki, seadragon, crukmi, csiro_petrichor, qmul_apocrita, wave, docker, engaging, gis, hypatia, psmn, eva, unity, cropdiversityhpc, nygc, fgcz, conda, crg, singularity, mpcdf_viper, pe2, self_hosted_runner, tufts, uw_hyak_pedslabs, debug, genouest, cbe, unsw_katana, gitpod, phoenix, seawulf, uod_hpc, fub_curta, uct_hpc, aws_tower, binac, fsu_draco] +Mar-25 18:17:59.100 [main] DEBUG nextflow.cli.CmdRun - Applied DSL=2 from script declaration +Mar-25 18:17:59.109 [main] INFO nextflow.cli.CmdRun - Launching `/Users/obrovkina/Documents/Projects/spatialxe/.nf-test-9b856baf5b74f66fed52051240a30667.nf` [shrivelled_shaw] DSL2 - revision: 7072840f61 +Mar-25 18:17:59.109 [main] DEBUG nextflow.plugin.PluginsFacade - Plugins declared=[nf-schema@2.3.0] +Mar-25 18:17:59.110 [main] DEBUG nextflow.plugin.PluginsFacade - Plugins default=[] +Mar-25 18:17:59.110 [main] DEBUG nextflow.plugin.PluginsFacade - Plugins resolved requirement=[nf-schema@2.3.0] +Mar-25 18:17:59.110 [main] DEBUG nextflow.plugin.PluginUpdater - Installing plugin nf-schema version: 2.3.0 +Mar-25 18:17:59.114 [main] INFO org.pf4j.AbstractPluginManager - Plugin 'nf-schema@2.3.0' resolved +Mar-25 18:17:59.114 [main] INFO org.pf4j.AbstractPluginManager - Start plugin 'nf-schema@2.3.0' +Mar-25 18:17:59.117 [main] DEBUG nextflow.plugin.BasePlugin - Plugin started nf-schema@2.3.0 +Mar-25 18:17:59.140 [main] DEBUG nextflow.Session - Session UUID: 4a378e5e-fed9-46b0-b811-2dcfdbaf0a82 +Mar-25 18:17:59.141 [main] DEBUG nextflow.Session - Run name: shrivelled_shaw +Mar-25 18:17:59.141 [main] DEBUG nextflow.Session - Executor pool size: 11 +Mar-25 18:17:59.144 [main] DEBUG nextflow.file.FilePorter - File porter settings maxRetries=3; maxTransfers=50; pollTimeout=null +Mar-25 18:17:59.146 [main] DEBUG nextflow.util.ThreadPoolBuilder - Creating thread pool 'FileTransfer' minSize=10; maxSize=33; workQueue=LinkedBlockingQueue[-1]; allowCoreThreadTimeout=false +Mar-25 18:17:59.305 [main] DEBUG nextflow.cli.CmdRun - + Version: 24.10.5 build 5935 + Created: 04-03-2025 17:55 UTC (18:55 CEST) + System: Mac OS X 14.1 + Runtime: Groovy 4.0.23 on OpenJDK 64-Bit Server VM 21+35 + Encoding: UTF-8 (UTF-8) + Process: 44855@IKMBs-MacBook-Pro.local [127.0.0.1] + CPUs: 11 - Mem: 36 GB (62.6 MB) - Swap: 1 GB (938.9 MB) +Mar-25 18:17:59.314 [main] DEBUG nextflow.Session - Work-dir: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work [Mac OS X] +Mar-25 18:17:59.314 [main] DEBUG nextflow.Session - Script base path does not exist or is not a directory: /Users/obrovkina/Documents/Projects/spatialxe/bin +Mar-25 18:17:59.322 [main] DEBUG nextflow.executor.ExecutorFactory - Extension executors providers=[] +Mar-25 18:17:59.326 [main] DEBUG nextflow.Session - Observer factory: DefaultObserverFactory +Mar-25 18:17:59.334 [main] DEBUG nextflow.Session - Observer factory: ValidationObserverFactory +Mar-25 18:17:59.361 [main] DEBUG nextflow.cache.CacheFactory - Using Nextflow cache factory: nextflow.cache.DefaultCacheFactory +Mar-25 18:17:59.365 [main] DEBUG nextflow.util.CustomThreadPool - Creating default thread pool > poolSize: 12; maxThreads: 1000 +Mar-25 18:17:59.397 [main] DEBUG nextflow.Session - Session start +Mar-25 18:17:59.398 [main] DEBUG nextflow.trace.TraceFileObserver - Workflow started -- trace file: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/trace.csv +Mar-25 18:17:59.507 [main] DEBUG nextflow.script.ScriptRunner - > Launching execution +Mar-25 18:17:59.665 [main] DEBUG nextflow.script.ProcessConfig - Config settings `withLabel:process_single` matches labels `process_single` for process with name UNZIP +Mar-25 18:17:59.676 [main] DEBUG nextflow.executor.ExecutorFactory - << taskConfig executor: null +Mar-25 18:17:59.677 [main] DEBUG nextflow.executor.ExecutorFactory - >> processorType: 'local' +Mar-25 18:17:59.679 [main] DEBUG nextflow.executor.Executor - [warm up] executor > local +Mar-25 18:17:59.681 [main] DEBUG n.processor.LocalPollingMonitor - Creating local task monitor for executor 'local' > cpus=11; memory=36 GB; capacity=11; pollInterval=100ms; dumpInterval=5m +Mar-25 18:17:59.681 [main] DEBUG n.processor.TaskPollingMonitor - >>> barrier register (monitor: local) +Mar-25 18:17:59.717 [main] DEBUG nextflow.executor.ExecutorFactory - << taskConfig executor: null +Mar-25 18:17:59.718 [main] DEBUG nextflow.executor.ExecutorFactory - >> processorType: 'local' +Mar-25 18:17:59.727 [main] DEBUG nextflow.Session - Config process names validation disabled as requested +Mar-25 18:17:59.727 [main] DEBUG nextflow.Session - Igniting dataflow network (3) +Mar-25 18:17:59.727 [main] DEBUG nextflow.processor.TaskProcessor - Starting process > UNZIP +Mar-25 18:17:59.728 [main] DEBUG nextflow.processor.TaskProcessor - Starting process > POINTS2REGIONS_CLUSTER +Mar-25 18:17:59.728 [main] DEBUG nextflow.script.ScriptRunner - Parsed script files: + Script_8b4b8d21490fbe79: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test-9b856baf5b74f66fed52051240a30667.nf + Script_2522f348ed934d86: /Users/obrovkina/Documents/Projects/spatialxe/modules/local/points2regions/tests/../main.nf + Script_8b560cf26a0f6f39: /Users/obrovkina/Documents/Projects/spatialxe/modules/nf-core/unzip/main.nf +Mar-25 18:17:59.728 [main] DEBUG nextflow.script.ScriptRunner - > Awaiting termination +Mar-25 18:17:59.728 [main] DEBUG nextflow.Session - Session await +Mar-25 18:17:59.813 [FileTransfer-1] DEBUG nextflow.file.FilePorter - Copying foreign file https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip to work dir: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/stage-4a378e5e-fed9-46b0-b811-2dcfdbaf0a82/86/59f872d28a00d8a57ea62dbc4c8ed0/Xenium_Prime_Mouse_Ileum_tiny_outs.zip +Mar-25 18:18:01.818 [Actor Thread 11] INFO nextflow.file.FilePorter - Staging foreign file: https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip +Mar-25 18:18:06.202 [Task submitter] DEBUG n.executor.local.LocalTaskHandler - Launch cmd line: /bin/bash -ue .command.run +Mar-25 18:18:06.204 [Task submitter] INFO nextflow.Session - [10/35c695] Submitted process > UNZIP (Xenium_Prime_Mouse_Ileum_tiny_outs.zip) +Mar-25 18:18:10.107 [Task monitor] DEBUG n.processor.TaskPollingMonitor - Task completed > TaskHandler[id: 1; name: UNZIP (Xenium_Prime_Mouse_Ileum_tiny_outs.zip); status: COMPLETED; exit: 0; error: -; workDir: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/10/35c695018a0ff6f10a50d77448fe17] +Mar-25 18:18:10.107 [Task monitor] DEBUG nextflow.util.ThreadPoolBuilder - Creating thread pool 'TaskFinalizer' minSize=10; maxSize=33; workQueue=LinkedBlockingQueue[-1]; allowCoreThreadTimeout=false +Mar-25 18:18:10.120 [TaskFinalizer-1] WARN nextflow.processor.PublishDir - Process `UNZIP` publishDir path contains a variable with a null value +Mar-25 18:18:10.121 [Actor Thread 11] DEBUG nextflow.util.HashBuilder - Unable to get file attributes file: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/10/35c695018a0ff6f10a50d77448fe17/Xenium_Prime_Mouse_Ileum_tiny_outs/transcripts.csv -- Cause: java.nio.file.NoSuchFileException: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/10/35c695018a0ff6f10a50d77448fe17/Xenium_Prime_Mouse_Ileum_tiny_outs/transcripts.csv +Mar-25 18:18:10.127 [TaskFinalizer-1] DEBUG nextflow.util.ThreadPoolBuilder - Creating thread pool 'PublishDir' minSize=10; maxSize=33; workQueue=LinkedBlockingQueue[-1]; allowCoreThreadTimeout=false +Mar-25 18:18:10.135 [Task submitter] DEBUG n.executor.local.LocalTaskHandler - Launch cmd line: /bin/bash -ue .command.run +Mar-25 18:18:10.135 [Task submitter] INFO nextflow.Session - [d8/128d4b] Submitted process > POINTS2REGIONS_CLUSTER (test) +Mar-25 18:18:11.157 [Task monitor] DEBUG n.processor.TaskPollingMonitor - Task completed > TaskHandler[id: 2; name: POINTS2REGIONS_CLUSTER (test); status: COMPLETED; exit: 0; error: -; workDir: /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/d8/128d4b427dc53ec692fd772d9fec52] +Mar-25 18:18:11.159 [TaskFinalizer-2] DEBUG nextflow.processor.TaskProcessor - Process `POINTS2REGIONS_CLUSTER (test)` is unable to find [UnixPath]: `/Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/d8/128d4b427dc53ec692fd772d9fec52/clustered_s20.csv` (pattern: `clustered_s20.csv`) +Mar-25 18:18:11.163 [TaskFinalizer-2] DEBUG nextflow.processor.TaskProcessor - Handling unexpected condition for + task: name=POINTS2REGIONS_CLUSTER (test); work-dir=/Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/d8/128d4b427dc53ec692fd772d9fec52 + error [nextflow.exception.MissingFileException]: Missing output file(s) `clustered_s20.csv` expected by process `POINTS2REGIONS_CLUSTER (test)` +Mar-25 18:18:11.189 [TaskFinalizer-2] ERROR nextflow.processor.TaskProcessor - Error executing process > 'POINTS2REGIONS_CLUSTER (test)' + +Caused by: + Missing output file(s) `clustered_s20.csv` expected by process `POINTS2REGIONS_CLUSTER (test)` + + +Command executed: + + cat <<-END_VERSIONS > versions.yml + "POINTS2REGIONS_CLUSTER": + ficture_preprocess: v.1.0.0 + END_VERSIONS + +Command exit status: + 0 + +Command output: + (empty) + +Command error: + WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested + +Work dir: + /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/d8/128d4b427dc53ec692fd772d9fec52 + +Container: + community.wave.seqera.io/library/pip_points2regions:9f5bb888586554a6 + +Tip: you can try to figure out what's wrong by changing to the process work dir and showing the script file named `.command.sh` +Mar-25 18:18:11.190 [main] DEBUG nextflow.Session - Session await > all processes finished +Mar-25 18:18:11.191 [TaskFinalizer-2] INFO nextflow.Session - Execution cancelled -- Finishing pending tasks before exit +Mar-25 18:18:11.195 [Task monitor] DEBUG n.processor.TaskPollingMonitor - <<< barrier arrives (monitor: local) - terminating tasks monitor poll loop +Mar-25 18:18:11.195 [main] DEBUG nextflow.Session - Session await > all barriers passed +Mar-25 18:18:11.196 [main] DEBUG nextflow.util.ThreadPoolManager - Thread pool 'TaskFinalizer' shutdown completed (hard=false) +Mar-25 18:18:11.196 [main] DEBUG nextflow.util.ThreadPoolManager - Thread pool 'PublishDir' shutdown completed (hard=false) +Mar-25 18:18:11.201 [main] DEBUG n.trace.WorkflowStatsObserver - Workflow completed > WorkflowStats[succeededCount=1; failedCount=1; ignoredCount=0; cachedCount=0; pendingCount=0; submittedCount=0; runningCount=0; retriesCount=0; abortedCount=0; succeedDuration=3s; failedDuration=72ms; cachedDuration=0ms;loadCpus=0; loadMemory=0; peakRunning=1; peakCpus=1; peakMemory=6 GB; ] +Mar-25 18:18:11.201 [main] DEBUG nextflow.trace.TraceFileObserver - Workflow completed -- saving trace file +Mar-25 18:18:11.201 [main] DEBUG nextflow.trace.ReportObserver - Workflow completed -- rendering execution report +Mar-25 18:18:11.749 [main] DEBUG nextflow.trace.TimelineObserver - Workflow completed -- rendering execution timeline +Mar-25 18:18:11.815 [main] DEBUG nextflow.cache.CacheDB - Closing CacheDB done +Mar-25 18:18:11.821 [main] INFO org.pf4j.AbstractPluginManager - Stop plugin 'nf-schema@2.3.0' +Mar-25 18:18:11.821 [main] DEBUG nextflow.plugin.BasePlugin - Plugin stopped nf-schema +Mar-25 18:18:11.821 [main] DEBUG nextflow.util.ThreadPoolManager - Thread pool 'FileTransfer' shutdown completed (hard=false) +Mar-25 18:18:11.821 [main] DEBUG nextflow.script.ScriptRunner - > Execution complete -- Goodbye diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_0.json b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_0.json new file mode 100644 index 0000000..713f21d --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_0.json @@ -0,0 +1 @@ +{"0":[]} \ No newline at end of file diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_1.json b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_1.json new file mode 100644 index 0000000..787e540 --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_1.json @@ -0,0 +1 @@ +{"1":[]} \ No newline at end of file diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_2.json b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_2.json new file mode 100644 index 0000000..125581d --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_2.json @@ -0,0 +1 @@ +{"2":[]} \ No newline at end of file diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered.json b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered.json new file mode 100644 index 0000000..76639e1 --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered.json @@ -0,0 +1 @@ +{"clustered":[]} \ No newline at end of file diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered_plot.json b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered_plot.json new file mode 100644 index 0000000..b803b96 --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/output_clustered_plot.json @@ -0,0 +1 @@ +{"clustered_plot":[]} \ No newline at end of file diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/params.json b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/params.json new file mode 100644 index 0000000..9cff60e --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/params.json @@ -0,0 +1 @@ +{"nf_test_output":"/Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta"} \ No newline at end of file diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/std.err b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/std.err new file mode 100644 index 0000000..e69de29 diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/std.out b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/std.out new file mode 100644 index 0000000..ec566c3 --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/std.out @@ -0,0 +1,31 @@ +ERROR ~ Error executing process > 'POINTS2REGIONS_CLUSTER (test)' + +Caused by: + Missing output file(s) `clustered_s20.csv` expected by process `POINTS2REGIONS_CLUSTER (test)` + + +Command executed: + + cat <<-END_VERSIONS > versions.yml + "POINTS2REGIONS_CLUSTER": + ficture_preprocess: v.1.0.0 + END_VERSIONS + +Command exit status: + 0 + +Command output: + (empty) + +Command error: + WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested + +Work dir: + /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/d8/128d4b427dc53ec692fd772d9fec52 + +Container: + community.wave.seqera.io/library/pip_points2regions:9f5bb888586554a6 + +Tip: you can try to figure out what's wrong by changing to the process work dir and showing the script file named `.command.sh` + + -- Check '/Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/nextflow.log' file for details diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/trace.csv b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/trace.csv new file mode 100644 index 0000000..0043f06 --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/trace.csv @@ -0,0 +1,3 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar +1 10/35c695 44922 UNZIP (Xenium_Prime_Mouse_Ileum_tiny_outs.zip) COMPLETED 0 2025-03-25 18:18:06.204 3.9s 3s 90.1% 34 MB 442.6 MB 49.2 MB 372.7 MB +2 d8/128d4b 44952 POINTS2REGIONS_CLUSTER (test) FAILED 0 2025-03-25 18:18:10.135 1s 72ms 125.7% 0 0 129.3 KB 4.3 KB diff --git a/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/workflow.json b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/workflow.json new file mode 100644 index 0000000..06d3ecb --- /dev/null +++ b/.nf-test/tests/9b856baf5b74f66fed52051240a30667/meta/workflow.json @@ -0,0 +1 @@ +{"success":false,"exitStatus":0,"errorMessage":"WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested","errorReport":"Error executing process > 'POINTS2REGIONS_CLUSTER (test)'\n\nCaused by:\n Missing output file(s) `clustered_s20.csv` expected by process `POINTS2REGIONS_CLUSTER (test)`\n\n\nCommand executed:\n\n cat <<-END_VERSIONS > versions.yml\n \"POINTS2REGIONS_CLUSTER\":\n ficture_preprocess: v.1.0.0\n END_VERSIONS\n\nCommand exit status:\n 0\n\nCommand output:\n (empty)\n\nCommand error:\n WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested\n\nWork dir:\n /Users/obrovkina/Documents/Projects/spatialxe/.nf-test/tests/9b856baf5b74f66fed52051240a30667/work/d8/128d4b427dc53ec692fd772d9fec52\n\nContainer:\n community.wave.seqera.io/library/pip_points2regions:9f5bb888586554a6\n\nTip: you can try to figure out what's wrong by changing to the process work dir and showing the script file named `.command.sh`"} \ No newline at end of file diff --git a/modules.json b/modules.json index 783abff..98e329d 100644 --- a/modules.json +++ b/modules.json @@ -8,32 +8,58 @@ "cellpose": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "fastqc": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "unzip": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": [ + "modules" + ] }, "xeniumranger/import-segmentation": { "branch": "master", "git_sha": "b5e1891a88491d8731b5e68e22bd907726caec4a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "xeniumranger/relabel": { "branch": "master", "git_sha": "b5e1891a88491d8731b5e68e22bd907726caec4a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "xeniumranger/resegment": { "branch": "master", "git_sha": "b5e1891a88491d8731b5e68e22bd907726caec4a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -42,20 +68,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/local/points2regions/Dockerfile b/modules/local/points2regions/Dockerfile deleted file mode 100644 index abcf17a..0000000 --- a/modules/local/points2regions/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM python:3.10-slim - -LABEL maintainer="Olga Brovkina " -LABEL description="Container for Points2Regions" - -RUN pip install points2regions - -ENTRYPOINT ["points2regions"] diff --git a/modules/local/points2regions/environment.yml b/modules/local/points2regions/environment.yml index 4b3c9d3..81d3544 100644 --- a/modules/local/points2regions/environment.yml +++ b/modules/local/points2regions/environment.yml @@ -4,4 +4,12 @@ channels: - conda-forge - bioconda dependencies: - - "YOUR-TOOL-HERE" + - python=3.10 + - matplotlib + - pandas + - pip + - pip: + - points2regions + +container: + - community.wave.seqera.io/library/pip_points2regions:9f5bb888586554a6 diff --git a/modules/local/points2regions/main.nf b/modules/local/points2regions/main.nf index 11e8174..655b9d5 100644 --- a/modules/local/points2regions/main.nf +++ b/modules/local/points2regions/main.nf @@ -6,37 +6,22 @@ process POINTS2REGIONS_CLUSTER { input: tuple val(meta), path(transcripts) val(smoothing) - val (num_clusters) + val(num_clusters) output: - tuple val(meta), path("clustered_s${smoothing}.csv"), emit: clusters + tuple val(meta), path("clustered_s${smoothing}.csv"), emit: clustered + tuple val(meta), path("cluster_plot_s${smoothing}.png"), emit: clustered_plot + path "versions.yml" when: task.ext.when == null || task.ext.when script: - - """ - python modules/local/points2regions/templates/points2regions_cluster.py \\ - --transcripts ${transcripts} \\ - --smoothing ${smoothing} \\ - --num_clusters ${num_clusters} - """ -} - -process POINTS2REGIONS_PLOT { - tag "$meta.id" - label 'points_visual' - input: - tuple val(meta), path(clusters) - val smoothing - - output: - path "cluster_plot.png" - path "versions.yml" - - script: + template 'points2regions_cluster.py' """ - python modules/local/points2regions/templates/points2regions_plot.py + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ficture_preprocess: v.1.0.0 + END_VERSIONS """ } diff --git a/modules/local/points2regions/meta.yml b/modules/local/points2regions/meta.yml index 80a19eb..367a153 100644 --- a/modules/local/points2regions/meta.yml +++ b/modules/local/points2regions/meta.yml @@ -1,24 +1,20 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "points2regions" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Fast and efficient region discovery across multiple scales keywords: - - sort - - example - - genomics + - probabilistic segmentation + - clustering + - spatialomics tools: - "points2regions": - ## TODO nf-core: Add a description and other details for the software below - description: "" - homepage: "" - documentation: "" - tool_dev_url: "" - doi: "" - licence: - identifier: + description: "Tool for identifying regions with similar mRNA compositions. Xenium platform is currently supported." + homepage: "https://github.com/wahlby-lab/Points2Regions/tree/main" + documentation: "https://github.com/wahlby-lab/Points2Regions/blob/main/README.md" + tool_dev_url: "https://github.com/wahlby-lab/Points2Regions" + doi: ["10.1002/cyto.a.24884"] + licence: "MIT" -## TODO nf-core: Add a description of all of the variables used as input input: # Only when we have meta - - meta: @@ -26,37 +22,34 @@ input: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - - bam: + - transcripts: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" - + description: | + File containing the transcript position + pattern: "transcripts.csv.gz" -## TODO nf-core: Add a description of all of the variables used as output output: - - bam: - #Only when we have meta - - meta: - type: map + - - meta: + type: object(Meta yaml) description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": + Groovy Map containing run information + e.g. `[ id:'run_id']` + - clustered: + type: file + description: | + CSV file with Points2Regions-assigned cluster labels. + The file is named `clustered_s.csv` and includes columns: + - X, Y: transcript coordinates + - gene: gene name + - clusters: assigned cluster number + pattern: "clustered_s*.csv" + - cluster_plot: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" - + description: | + PNG plot visualizing spatial clusters. + The filename is dynamically generated as `cluster_plot_s.png`, + and displays spatial scatter plots of transcript positions colored by cluster ID. + pattern: "cluster_plot_s*.png" - versions: - "versions.yml": type: file diff --git a/modules/local/points2regions/templates/ficture_preprocess.py b/modules/local/points2regions/templates/ficture_preprocess.py deleted file mode 100644 index 37fb4f0..0000000 --- a/modules/local/points2regions/templates/ficture_preprocess.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python - -import sys -import os -import re -import logging -import pandas as pd -import gzip - -def format_xenium(): - - print("[START]") - - negctrl_regex = "BLANK\\|NegCon" - if ( "${params.negative_control_regex}" ): - negctrl_regex = "${params.negative_control_regex}" - - transripts = "${transcripts}" - features = "${features}" - - unit_info=['X','Y','gene','cell_id','overlaps_nucleus'] - oheader = unit_info + ['Count'] - - feature=pd.DataFrame() - xmin=sys.maxsize - xmax=0 - ymin=sys.maxsize - ymax=0 - - output = "processed_transcripts.tsv.gz" - feature_file = "feature.clean.tsv.gz" - min_phred_score = 15 - - with gzip.open(output, 'wt') as wf: - wf.write('\\t'.join(oheader) + '\\n') - - for chunk in pd.read_csv(transripts,header=0,chunksize=500000): - chunk = chunk.loc[(chunk.qv > min_phred_score)] - chunk.rename(columns = {'feature_name':'gene'}, inplace=True) - if negctrl_regex != '': - chunk = chunk[~chunk.gene.str.contains(negctrl_regex, flags=re.IGNORECASE, regex=True)] - chunk.rename(columns = {'x_location':'X', 'y_location':'Y'}, inplace=True) - chunk['Count'] = 1 - chunk[oheader].to_csv(output,sep='\\t',mode='a',index=False,header=False,float_format="%.2f") - logging.info(f"{chunk.shape[0]}") - feature = pd.concat([feature, chunk.groupby(by='gene').agg({'Count':"sum"}).reset_index()]) - x0 = chunk.X.min() - x1 = chunk.X.max() - y0 = chunk.Y.min() - y1 = chunk.Y.max() - xmin = min(int(xmin), int(x0)) - xmax = max(int(xmax), int(x1)) - ymin = min(int(ymin), int(y0)) - ymax = max(int(ymax), int(y1)) - - if os.path.exists(features): - feature_list = [] - with open(features, 'r') as ff: - for line in ff: - feature_list.append(line.strip('\\n')) - feature = feature.groupby(by='gene').agg({'Count':"sum"}).reset_index() - feature = feature[[x in feature_list for x in feature['gene']]] - feature.to_csv(feature_file,sep='\\t',index=False) - - f = os.path.join( os.path.dirname(output), "coordinate_minmax.tsv" ) - with open(f, 'w') as wf: - wf.write(f"xmin\\t{xmin}\\n") - wf.write(f"xmax\\t{xmax}\\n") - wf.write(f"ymin\\t{ymin}\\n") - wf.write(f"ymax\\t{ymax}\\n") - - # Output version information - with open("versions.yml", "w") as f: - f.write('"${task.process}":\\n') - f.write(f'ficture_preprocess: v.1.0.0"\\n') - - print("[FINISH]") - -if __name__ == '__main__': - format_xenium() diff --git a/modules/local/points2regions/templates/points2regions_cluster.py b/modules/local/points2regions/templates/points2regions_cluster.py index d6ecbd5..039e451 100644 --- a/modules/local/points2regions/templates/points2regions_cluster.py +++ b/modules/local/points2regions/templates/points2regions_cluster.py @@ -3,38 +3,65 @@ import sys import os import pandas as pd -from points2regions import Points2Regions # adjust if function is local +import matplotlib.pyplot as plt +from points2regions import Points2Regions # adjust if local or installable -def cluster_points2regions(): - print("[START]") - input_csv = "${transcripts}" - output_csv = "clustered.csv" - smoothing = int("${smoothing}") - num_clusters = int("${num_clusters}") - - # Read input - data = pd.read_csv(input_csv) - - # Run clustering +def cluster_points2regions(data, smoothing, num_clusters): mdl = Points2Regions( data[['X', 'Y']], data['gene'], pixel_width=1, pixel_smoothing=smoothing ) - data['clusters'] = mdl.fit_predict(num_clusters=num_clusters, output='marker') + return data + + +def plot_clusters(data, smoothing, output_file): + plt.figure(figsize=(6, 6)) + plt.scatter( + data['X'], + data['Y'], + c=data['clusters'], + alpha=0.7, + s=0.5, + cmap='tab20' + ) + plt.title(f'Smoothing: {smoothing}') + plt.axis('off') + plt.axis('scaled') + plt.tight_layout() + plt.savefig(output_file, dpi=300) - # Save result - data.to_csv(output_csv, index=False) - # Write version info +def main(): + # Inputs passed from Nextflow + input_csv = "${transcripts}" + smoothing = int("${smoothing}") + num_clusters = int("${num_clusters}") + + # Outputs + clustered_csv = f"clustered_s{smoothing}.csv" + plot_file = f"cluster_plot_s{smoothing}.png" + + print("[START] Reading transcripts and clustering...") + data = pd.read_csv(input_csv) + data = cluster_points2regions(data, smoothing, num_clusters) + data.to_csv(clustered_csv, index=False) + print(f"[CLUSTERING DONE] Saved to {clustered_csv}") + + print("[START] Plotting clusters...") + plot_clusters(data, smoothing, plot_file) + print(f"[PLOT DONE] Saved to {plot_file}") + + # Version file with open("versions.yml", "w") as f: - f.write('"${task.process}":\\n') - f.write(' points2regions_cluster: "v1.0.0"\\n') + f.write(f'"${task.process}":\n') + f.write(' points2regions: "v1.0.0"\n') print("[FINISH]") + if __name__ == "__main__": - cluster_points2regions() + main() diff --git a/modules/local/points2regions/templates/points2regions_plot.py b/modules/local/points2regions/templates/points2regions_plot.py deleted file mode 100644 index 348ad13..0000000 --- a/modules/local/points2regions/templates/points2regions_plot.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -import pandas as pd -import matplotlib.pyplot as plt - -def plot_clusters(data, smoothing, output_file="cluster_plot.png"): - """ - Plot spatial clusters from Points2Regions output. - - Args: - data (pd.DataFrame): Data with 'X', 'Y', and 'Clusters' columns. - smoothing (int): Smoothing value used (for the plot title). - output_file (str): Path to save the plot image. - """ - plt.figure(figsize=(6, 6)) - plt.scatter( - data['X'], - data['Y'], - c=data['Clusters'], - alpha=0.7, - s=0.5, - cmap='tab20' - ) - plt.title(f'Smoothing: {smoothing}') - plt.axis('off') - plt.axis('scaled') - plt.tight_layout() - plt.savefig(output_file, dpi=300) - -def main(): - input_csv = "${clustered}" - smoothing = int("${smoothing}") - output_file = "cluster_plot.png" - - data = pd.read_csv(input_csv) - plot_clusters(data, smoothing, output_file) - - # Save version info - with open("versions.yml", "w") as f: - f.write('"${task.process}":\\n') - f.write(' points2regions_plot: "v1.0.0"\\n') - -if __name__ == "__main__": - main() diff --git a/modules/local/points2regions/tests/main.nf.test b/modules/local/points2regions/tests/main.nf.test index e92cf93..173f69d 100644 --- a/modules/local/points2regions/tests/main.nf.test +++ b/modules/local/points2regions/tests/main.nf.test @@ -1,61 +1,30 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core modules test points2regions nextflow_process { - name "Test Process POINTS2REGIONS" + name "Test Process POINTS2REGIONS_CLUSTER" script "../main.nf" - process "POINTS2REGIONS" + process "POINTS2REGIONS_CLUSTER" - tag "modules" - tag "modules_" - tag "points2regions" - - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used - test("sarscov2 - bam") { - - // TODO nf-core: If you are created a test for a chained module - // (the module requires running more than one process to generate the required output) - // add the 'setup' method here. - // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). - - when { + setup { + run("UNZIP") { + script "modules/nf-core/unzip/main.nf" process { """ - // TODO nf-core: define inputs of the process here. Example: - - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] + input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)] """ } } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. - ) - } - } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. - test("sarscov2 - bam - stub") { - - options "-stub" + test("cluster - with Xenium transcripts.csv") { when { process { """ - // TODO nf-core: define inputs of the process here. Example: - - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] + input[0] = Channel.of([ + [id: "test"], + ]).combine(UNZIP.out.unzipped_archive.map { it[1] } + "/transcripts.csv") + input[1] = 20 + input[2] = 5 """ } } @@ -63,11 +32,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. + { assert file(process.out.clustered.get(0).find { file(it).name == 'clustered_s20.csv' }).exists() }, + { assert file(process.out.clustered_plot.get(0).find { file(it).name == 'cluster_plot_s20.png' }).exists() }, + { assert process.out.contains("versions.yml") } ) } - } - } diff --git a/modules/local/scportrait/environment.yml b/modules/local/scportrait/environment.yml new file mode 100644 index 0000000..4b3c9d3 --- /dev/null +++ b/modules/local/scportrait/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL-HERE" diff --git a/modules/local/scportrait/main.nf b/modules/local/scportrait/main.nf new file mode 100644 index 0000000..d5c8135 --- /dev/null +++ b/modules/local/scportrait/main.nf @@ -0,0 +1,91 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process SCPORTRAIT { + tag "$meta.id" + label 'process_portrait' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': + 'biocontainers/YOUR-TOOL-HERE' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + scportrait: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: A stub section should mimic the execution of the original module as best as possible + // Have a look at the following examples: + // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 + // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + scportrait: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/local/scportrait/meta.yml b/modules/local/scportrait/meta.yml new file mode 100644 index 0000000..260e8cf --- /dev/null +++ b/modules/local/scportrait/meta.yml @@ -0,0 +1,69 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "scportrait" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "scportrait": + ## TODO nf-core: Add a description and other details for the software below + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: + identifier: + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + +## TODO nf-core: Add a description of all of the variables used as output +output: + - bam: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + ## TODO nf-core: Delete / customise this example output + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@brovolia" +maintainers: + - "@brovolia" diff --git a/modules/local/scportrait/tests/main.nf.test b/modules/local/scportrait/tests/main.nf.test new file mode 100644 index 0000000..c407477 --- /dev/null +++ b/modules/local/scportrait/tests/main.nf.test @@ -0,0 +1,73 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test scportrait +nextflow_process { + + name "Test Process SCPORTRAIT" + script "../main.nf" + process "SCPORTRAIT" + + tag "modules" + tag "modules_" + tag "scportrait" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 0000000..3df2176 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "bioconda::multiqc=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 0000000..60b546a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,34 @@ +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" + +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 0000000..da03340 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/unzip/environment.yml b/modules/nf-core/unzip/environment.yml new file mode 100644 index 0000000..2461589 --- /dev/null +++ b/modules/nf-core/unzip/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::p7zip=16.02 diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf new file mode 100644 index 0000000..a0c0210 --- /dev/null +++ b/modules/nf-core/unzip/main.nf @@ -0,0 +1,49 @@ +process UNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : + 'biocontainers/p7zip:16.02' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/"), emit: unzipped_archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + 7za \\ + x \\ + -o"${prefix}"/ \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + mkdir "${prefix}" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/unzip/meta.yml b/modules/nf-core/unzip/meta.yml new file mode 100644 index 0000000..426fccb --- /dev/null +++ b/modules/nf-core/unzip/meta.yml @@ -0,0 +1,46 @@ +name: unzip +description: Unzip ZIP archive files +keywords: + - unzip + - decompression + - zip + - archiving +tools: + - unzip: + description: p7zip is a quick port of 7z.exe and 7za.exe (command line version + of 7zip, see www.7-zip.org) for Unix. + homepage: https://sourceforge.net/projects/p7zip/ + documentation: https://sourceforge.net/projects/p7zip/ + tool_dev_url: https://sourceforge.net/projects/p7zip" + licence: ["LGPL-2.1-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: ZIP file + pattern: "*.zip" +output: + - unzipped_archive: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/: + type: directory + description: Directory contents of the unzipped archive + pattern: "${archive.baseName}/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/unzip/nextflow.config b/modules/nf-core/unzip/nextflow.config new file mode 100644 index 0000000..1a91908 --- /dev/null +++ b/modules/nf-core/unzip/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: UNZIP{ + container="quay.io/biocontainers/p7zip:16.02" + publishDir = [ + enabled: false + ] + } +} \ No newline at end of file diff --git a/modules/nf-core/unzip/tests/main.nf.test b/modules/nf-core/unzip/tests/main.nf.test new file mode 100644 index 0000000..238b68d --- /dev/null +++ b/modules/nf-core/unzip/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process UNZIP" + script "../main.nf" + process "UNZIP" + + tag "modules" + tag "modules_nfcore" + tag "unzip" + + test("generic [tar] [tar_gz]") { + + when { + process { + """ + input[0] = [ + [ id: 'hello' ], + file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("generic [tar] [tar_gz] stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'hello' ], + file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/unzip/tests/main.nf.test.snap b/modules/nf-core/unzip/tests/main.nf.test.snap new file mode 100644 index 0000000..cdd2ab1 --- /dev/null +++ b/modules/nf-core/unzip/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "generic [tar] [tar_gz] stub": { + "content": [ + { + "0": [ + [ + { + "id": "hello" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ], + "unzipped_archive": [ + [ + { + "id": "hello" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T19:16:37.11550986" + }, + "generic [tar] [tar_gz]": { + "content": [ + { + "0": [ + [ + { + "id": "hello" + }, + [ + "hello.tar:md5,80c66db79a773bc87b3346035ff9593e" + ] + ] + ], + "1": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ], + "unzipped_archive": [ + [ + { + "id": "hello" + }, + [ + "hello.tar:md5,80c66db79a773bc87b3346035ff9593e" + ] + ] + ], + "versions": [ + "versions.yml:md5,52c55ce814e8bc9edc5a6c625ed794b8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-30T19:16:25.120242571" + } +} \ No newline at end of file diff --git a/modules/nf-core/unzip/tests/tags.yml b/modules/nf-core/unzip/tests/tags.yml new file mode 100644 index 0000000..7f5647e --- /dev/null +++ b/modules/nf-core/unzip/tests/tags.yml @@ -0,0 +1,2 @@ +unzip: + - "modules/nf-core/unzip/**" From 740a0eaedad72efd92bb1ff6f0aa9511d2a6e902 Mon Sep 17 00:00:00 2001 From: Olga Brovkina Date: Wed, 26 Mar 2025 14:01:36 +0100 Subject: [PATCH 4/4] Refactored scportrait module --- .gitignore | 1 + modules/local/points2regions/main.nf | 2 +- modules/local/scportrait/environment.yml | 12 ++- modules/local/scportrait/main.nf | 82 ++--------------- modules/local/scportrait/meta.yml | 84 ++++++++--------- .../scportrait/templates/scportrait_run.py | 92 +++++++++++++++++++ modules/local/scportrait/tests/main.nf.test | 54 +++-------- 7 files changed, 165 insertions(+), 162 deletions(-) create mode 100644 modules/local/scportrait/templates/scportrait_run.py diff --git a/.gitignore b/.gitignore index a42ce01..23b0c7d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc null/ +.nf-test* diff --git a/modules/local/points2regions/main.nf b/modules/local/points2regions/main.nf index 655b9d5..2186bc2 100644 --- a/modules/local/points2regions/main.nf +++ b/modules/local/points2regions/main.nf @@ -21,7 +21,7 @@ process POINTS2REGIONS_CLUSTER { """ cat <<-END_VERSIONS > versions.yml "${task.process}": - ficture_preprocess: v.1.0.0 + points2regions_cluster: v.1.0.0 END_VERSIONS """ } diff --git a/modules/local/scportrait/environment.yml b/modules/local/scportrait/environment.yml index 4b3c9d3..2fe7fdf 100644 --- a/modules/local/scportrait/environment.yml +++ b/modules/local/scportrait/environment.yml @@ -3,5 +3,15 @@ channels: - conda-forge - bioconda + - defaults + dependencies: - - "YOUR-TOOL-HERE" + - python=3.10 + - tifffile + - matplotlib + - pip + - pip: + - scportrait + +container: + - community.wave.seqera.io/library/pip_scportrait:e0651f1fbb601e73 diff --git a/modules/local/scportrait/main.nf b/modules/local/scportrait/main.nf index d5c8135..606b414 100644 --- a/modules/local/scportrait/main.nf +++ b/modules/local/scportrait/main.nf @@ -1,91 +1,27 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process SCPORTRAIT { tag "$meta.id" - label 'process_portrait' - - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'biocontainers/YOUR-TOOL-HERE' }" + label 'sdata' + container "community.wave.seqera.io/library/pip_scportrait:e0651f1fbb601e73" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) + tuple val(meta), path(sdata) + val(cell_id_identifier) optional true output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val(meta), path("**/*_masks.tif"), emit: sc_mask + tuple val(meta), path("**/*_images.png"), emit: sc_images + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) - """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam + template 'scportrait_run.py' - cat <<-END_VERSIONS > versions.yml - "${task.process}": - scportrait: \$(samtools --version |& sed '1!d ; s/samtools //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: A stub section should mimic the execution of the original module as best as possible - // Have a look at the following examples: - // Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63 - // Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54 """ - touch ${prefix}.bam - cat <<-END_VERSIONS > versions.yml "${task.process}": - scportrait: \$(samtools --version |& sed '1!d ; s/samtools //') + scportrait_run: v.1.0.0 END_VERSIONS """ } diff --git a/modules/local/scportrait/meta.yml b/modules/local/scportrait/meta.yml index 260e8cf..42f6bfc 100644 --- a/modules/local/scportrait/meta.yml +++ b/modules/local/scportrait/meta.yml @@ -1,62 +1,56 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "scportrait" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Generatation of single-cell representations from raw microscopy images keywords: - - sort - - example - - genomics + - image analysis + - spatial omics + - image representation tools: - "scportrait": - ## TODO nf-core: Add a description and other details for the software below - description: "" - homepage: "" - documentation: "" - tool_dev_url: "" - doi: "" - licence: - identifier: + description: " Computational framework to generate single cell datasets from raw microscopy images." + homepage: "https://github.com/MannLabs/scPortrait/tree/main" + documentation: "https://github.com/MannLabs/scPortrait/blob/main/README.md" + tool_dev_url: "https://github.com/MannLabs/scPortrait" + doi: "10.1101/2023.06.01.542416" + licence: "Apache-2.0" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - - - meta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + + - sdata: + type: file + description: SpatialData object (Zarr or H5AD) with image and segmentation masks + pattern: "*.{zarr,h5ad}" + + - cell_id_identifier: + type: string + description: Optional column name in the segmentation table that links to cell masks + optional: true + +output: + - masks: + - meta: type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ## TODO nf-core: Delete / customise this example input - - bam: + description: Groovy Map containing sample information + - "*_masks.tif": type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" - + description: Segmentation mask visualization generated by scPortrait + pattern: "*_masks.tif" -## TODO nf-core: Add a description of all of the variables used as output -output: - - bam: - #Only when we have meta + - images: - meta: type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - ## TODO nf-core: Delete / customise this example output - - "*.bam": + description: Groovy Map containing sample information + - "*_cells.png": type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - ontologies: - - edam: "http://edamontology.org/format_25722" - - edam: "http://edamontology.org/format_2573" - - edam: "http://edamontology.org/format_3462" - + description: Visualization of extracted single-cell images + pattern: "*_cells.png" + - versions: - "versions.yml": type: file diff --git a/modules/local/scportrait/templates/scportrait_run.py b/modules/local/scportrait/templates/scportrait_run.py new file mode 100644 index 0000000..90fcc1e --- /dev/null +++ b/modules/local/scportrait/templates/scportrait_run.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +import os +import matplotlib.pyplot as plt +from scportrait.pipeline.project import Project +from scportrait.pipeline.extraction.hdf5 import HDF5CellExtraction + +def run_scportrait( + sdata_path, + project_location="scportrait_output", + cell_id_identifier=None +): + os.makedirs(project_location, exist_ok=True) + + s = sdata.read(sdata_path) + image_names = list(s.images.keys()) + labels = s.labels.keys() + + cyto_key = "seg_all_cytosol" + nucleus_key = "seg_all_nucleus" + + for image_name in image_names: + print(f"[INFO] Processing image: {image_name}") + + use_cyto = cyto_key in labels + use_nucleus = nucleus_key in labels + + if not use_cyto and not use_nucleus: + print(f"[WARN] Skipping {image_name} — no segmentation masks found.") + continue + + if use_cyto and use_nucleus: + segmentation_mask_for_extraction = cyto_key + elif use_cyto: + segmentation_mask_for_extraction = cyto_key + elif use_nucleus: + segmentation_mask_for_extraction = nucleus_key + + sub_project_location = os.path.join(project_location, image_name) + os.makedirs(sub_project_location, exist_ok=True) + + project = Project( + os.path.abspath(sub_project_location), + config_path={"segmentation_mask": segmentation_mask_for_extraction}, + overwrite=True, + debug=False, + segmentation_f=None, + extraction_f=HDF5CellExtraction, + featurization_f=None, + selection_f=None + ) + + load_kwargs = dict( + sdata_path=sdata_path, + input_image_name=image_name, + cytosol_segmentation_name=cyto_key if use_cyto else None, + nucleus_segmentation_name=nucleus_key if use_nucleus else None, + overwrite=True, + keep_all=True, + remove_duplicates=True + ) + + if cell_id_identifier is not None: + load_kwargs["cell_id_identifier"] = cell_id_identifier + + project.load_input_from_sdata(**load_kwargs) + + project.extract() + + fig1 = project.plot_segmentation_masks() + fig1.savefig(os.path.join(sub_project_location, f"{image_name}_masks.tif")) + + fig2 = project.plot_single_cell_images() + fig2.savefig(os.path.join(sub_project_location, f"{image_name}_cells.png")) + + # Write version info + with open(os.path.join(project_location, "versions.yml"), "w") as f: + f.write(f'"SCPORTRAIT":\n scportrait: "v1.0.0"\n') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sdata_path", required=True, help="Path to spatialdata object (Zarr or h5ad)") + parser.add_argument("--project_location", default="scportrait_output") + parser.add_argument("--cell_id_identifier", default=None) + args = parser.parse_args() + + run_scportrait_all_images( + sdata_path=args.sdata_path, + project_location=args.project_location, + cell_id_identifier=args.cell_id_identifier + ) diff --git a/modules/local/scportrait/tests/main.nf.test b/modules/local/scportrait/tests/main.nf.test index c407477..e7508c1 100644 --- a/modules/local/scportrait/tests/main.nf.test +++ b/modules/local/scportrait/tests/main.nf.test @@ -1,5 +1,4 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core modules test scportrait + nextflow_process { name "Test Process SCPORTRAIT" @@ -7,55 +6,27 @@ nextflow_process { process "SCPORTRAIT" tag "modules" - tag "modules_" tag "scportrait" - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used - test("sarscov2 - bam") { - - // TODO nf-core: If you are created a test for a chained module - // (the module requires running more than one process to generate the required output) - // add the 'setup' method here. - // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). - - when { + setup { + run("UNZIP") { + script "modules/nf-core/unzip/main.nf" process { """ - // TODO nf-core: define inputs of the process here. Example: - - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] + input[0] = [[], file('https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip', checkIfExists: true)] """ } } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. - // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. - ) - } - } - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. - test("sarscov2 - bam - stub") { - - options "-stub" + test("scportrait - from spatialdata object") { when { process { """ - // TODO nf-core: define inputs of the process here. Example: - - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] + input[0] = Channel.of([ + [ id: "test" ] + ]).combine(UNZIP.out.unzipped_archive.map { it[1] + "/Xenium_Prime_Mouse_Ileum_tiny_outs.zarr" }) """ } } @@ -63,11 +34,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } - //TODO nf-core: Add all required assertions to verify the test output. + { assert process.out.sc_mask.any { it.name.endsWith("_masks.tif") } }, + { assert process.out.sc_images.any { it.name.endsWith("_cells.png") } }, + { assert process.out.contains("versions.yml") } ) } - } - }