diff --git a/modules/meta-schema.json b/modules/meta-schema.json new file mode 100644 index 0000000..ccad8f3 --- /dev/null +++ b/modules/meta-schema.json @@ -0,0 +1,279 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "title": "Meta yaml", + "description": "Validate the meta yaml file for an nf-core module", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the module" + }, + "description": { + "type": "string", + "description": "Description of the module" + }, + "keywords": { + "type": "array", + "description": "Keywords for the module", + "items": { + "type": "string", + "not": { + "const": "example" + } + }, + "uniqueItems": true, + "minItems": 3 + }, + "authors": { + "type": "array", + "description": "Authors of the module", + "items": { + "type": "string" + } + }, + "maintainers": { + "type": "array", + "description": "Maintainers of the module", + "items": { + "type": "string" + } + }, + "extra_args": { + "type": "array", + "description": "Extra arguments for the module", + "items": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "Description of the argument" + } + } + } + }, + "input": { + "type": "array", + "description": "Input channels for the module", + "items": { + "oneOf": [ + { + "type": "array", + "items": { + "type": "object", + "patternProperties": { + ".*": { + "$ref": "#/definitions/elementProperties" + } + } + } + }, + { + "type": "object", + "patternProperties": { + ".*": { + "$ref": "#/definitions/elementProperties" + } + } + } + ] + } + }, + "output": { + "type": "object", + "description": "Output channels for the module", + "patternProperties": { + ".*": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "patternProperties": { + ".*": { + "$ref": "#/definitions/elementProperties" + } + } + }, + { + "type": "array", + "items": { + "type": "object", + "patternProperties": { + ".*": { + "$ref": "#/definitions/elementProperties" + } + } + } + } + ] + } + } + } + }, + "tools": { + "type": "array", + "description": "Tools used by the module", + "items": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "description": { + "type": "string", + "description": "Description of the output channel" + }, + "homepage": { + "type": "string", + "description": "Homepage of the tool", + "pattern": "^(http|https)://.*$" + }, + "documentation": { + "type": "string", + "description": "Documentation of the tool", + "pattern": "^(http|https|ftp)://.*$" + }, + "tool_dev_url": { + "type": "string", + "description": "URL of the development version of the tool's documentation", + "pattern": "^(http|https)://.*$" + }, + "doi": { + "description": "DOI of the tool", + "anyOf": [ + { + "type": "string", + "pattern": "^10\\.\\d{4,9}\\/[^,]+$" + }, + { + "type": "string", + "enum": [ + "no DOI available" + ] + } + ] + }, + "licence": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Licence of the tool", + "minItems": 1, + "uniqueItems": true, + "message": "Licence must be an array of one or more entries, e.g. [\"MIT\"]" + }, + "identifier": { + "description": "bio.tools identifier of the tool", + "anyOf": [ + { + "type": "string", + "pattern": "^biotools:.*$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + } + }, + "required": [ + "description" + ], + "anyOf": [ + { + "required": [ + "homepage" + ] + }, + { + "required": [ + "documentation" + ] + }, + { + "required": [ + "tool_dev_url" + ] + }, + { + "required": [ + "doi" + ] + } + ] + } + } + } + } + }, + "definitions": { + "elementProperties": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Type of the channel element", + "enum": [ + "map", + "file", + "directory", + "string", + "integer", + "float", + "boolean", + "list" + ] + }, + "description": { + "type": "string", + "description": "Description of the channel" + }, + "pattern": { + "type": "string", + "description": "Pattern of the channel, given in Java glob syntax" + }, + "enum": { + "type": "array", + "description": "List of allowed values for the channel", + "items": { + "type": [ + "string", + "number", + "boolean", + "array", + "object" + ] + }, + "uniqueItems": true + }, + "ontologies": { + "type": "array", + "description": "List of ontologies for the channel", + "uniqueItems": true, + "items": { + "type": "object", + "patternProperties": { + ".*": { + "type": "string", + "pattern": "^(http|https)://.*" + } + } + } + } + }, + "required": [ + "type", + "description" + ] + } + }, + "required": [ + "name", + "description", + "keywords", + "authors", + "output", + "tools" + ] +} \ No newline at end of file diff --git a/modules/sanger-cellgeni/csv/concat/main.nf b/modules/sanger-cellgeni/csv/concat/main.nf new file mode 100644 index 0000000..546ce17 --- /dev/null +++ b/modules/sanger-cellgeni/csv/concat/main.nf @@ -0,0 +1,25 @@ +process CSV_CONCAT { + tag "Concatenating CSV files" + container 'docker://quay.io/cellgeni/metacells-python:latest' + + input: + tuple val(meta), path(csv_files, name: "input/*.csv") + + output: + tuple val(meta), path("*.csv"), emit: csv + tuple val(meta), path("*.json"), emit: json + path "versions.yml", emit: versions + + script: + def prefix = task.ext.prefix ?: "concatenated" + def args = task.ext.args ?: "" + """ + concat.py --input ${csv_files} --prefix "${prefix}" ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | awk '{print \$2}') + pandas: \$( python -c "import pandas; print(pandas.__version__)" ) + END_VERSIONS + """ +} diff --git a/modules/sanger-cellgeni/csv/concat/meta.yml b/modules/sanger-cellgeni/csv/concat/meta.yml new file mode 100644 index 0000000..dc27480 --- /dev/null +++ b/modules/sanger-cellgeni/csv/concat/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "csv_concat" +description: + "Module to concatenate multiple CSV files into a single CSV and JSON + output" +keywords: + - csv + - concatenate + - merge + - pandas + - json +tools: + - pandas: + description: Powerful data structures for data analysis, time series, and + statistics + homepage: https://pandas.pydata.org/ + documentation: https://pandas.pydata.org/docs/ + licence: ["BSD-3-Clause"] + identifier: "biotools:pandas" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - csv_files: + type: file + description: "Multiple CSV files to be concatenated" + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV +output: + csv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csv": + type: file + description: "Concatenated CSV file" + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: "Concatenated data in JSON format" + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + versions: + - versions.yml: + type: file + description: "YAML file containing software versions used" + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@claptar" +maintainers: + - "@claptar" diff --git a/modules/sanger-cellgeni/csv/concat/module.config b/modules/sanger-cellgeni/csv/concat/module.config new file mode 100644 index 0000000..f624a68 --- /dev/null +++ b/modules/sanger-cellgeni/csv/concat/module.config @@ -0,0 +1,20 @@ +process { + withName: CSV_CONCAT { + ext.prefix = "metadata" + ext.args = { + [ + "--axis 'index'", + "--join 'outer'", + ].join(' ') + } + queue = 'normal' + cpus = 1 + memory = '2 GB' + publishDir = [ + mode: 'link', + path: 'results', + pattern: '*.{csv,json}', + overwrite: true, + ] + } +} diff --git a/modules/sanger-cellgeni/csv/concat/resources/usr/bin/concat.py b/modules/sanger-cellgeni/csv/concat/resources/usr/bin/concat.py new file mode 100755 index 0000000..0cc4cf5 --- /dev/null +++ b/modules/sanger-cellgeni/csv/concat/resources/usr/bin/concat.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd +import json + +def init_parser() -> argparse.ArgumentParser: + """ + Initialise argument parser for the script + """ + parser = argparse.ArgumentParser( + description="Concatenates .csv files and save the result in .csv and .json format" + ) + parser.add_argument( + "--input", + metavar="", + nargs="+", + type=str, + help="Specify a path to the .csv files to concatenate", + ) + parser.add_argument( + "--axis", + metavar="", + type=str, + default="index", + help="Axis to concatenate along ('columns' or 'index'; default: 'index')", + ) + parser.add_argument( + "--join", + metavar="", + type=str, + default="outer", + help="How to handle indexes on other axis (or axes). Options are 'inner' and 'outer' (default: 'outer')", + ) + parser.add_argument( + "--prefix", + metavar="", + type=str, + default="output", + help="Prefix for the output files (default: 'output')", + ) + return parser + + +def main(): + """ + Main function of the script + """ + # parse script arguments + parser = init_parser() + args = parser.parse_args() + + # read input files + csv_files = [pd.read_csv(f) for f in args.input] + + # concatenate .csv files + result = pd.concat(csv_files, axis=args.axis, join=args.join) + + # sort values and columns + result = result.reindex(sorted(result.columns), axis=1) + result = result.sort_values(by=result.columns.tolist()) + + # save result + result.to_csv(f"{args.prefix}.csv", index=False) + result.to_json(f"{args.prefix}.json", orient="records", lines=True, indent=4) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/modules/sanger-cellgeni/csv/concat/tests/main.nf.test b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test new file mode 100644 index 0000000..67278a0 --- /dev/null +++ b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test @@ -0,0 +1,45 @@ +nextflow_process { + name "Test Process: CSV_CONCAT" + script "../main.nf" + process "CSV_CONCAT" + + tag "modules" + tag "csv" + tag "csv/concat" + tag "modules_sangercellgeni" + + test("Concatenate multiple CSV files") { + tag "basic" + config "../module.config" + + when { + params { + test_data_base = "https://raw.githubusercontent.com/cellgeni/nf-upload2irods/4d31aff47e156c8256f990da525e68bb7bc134af/tests/data/" + } + + process { + """ + input[0] = [ + [id: "test_concat"], + [ + file(params.test_data_base + "csv/concat/file1.csv"), + file(params.test_data_base + "csv/concat/file2.csv"), + file(params.test_data_base + "csv/concat/file3.csv") + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert process.out.json }, + { assert path( process.out.csv.get(0).get(1) ).md5 == path( "tests/data/csv/concat/results.csv" ).md5 }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/sanger-cellgeni/csv/concat/tests/main.nf.test.snap b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test.snap new file mode 100644 index 0000000..df158e6 --- /dev/null +++ b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "Concatenate multiple CSV files": { + "content": [ + { + "0": [ + [ + { + "id": "test_concat" + }, + "metadata.csv:md5,969e0c9fee0ad4156eb2f6ac2e7ff815" + ] + ], + "1": [ + [ + { + "id": "test_concat" + }, + "metadata.json:md5,e9573a3c0f88048e12a5655a4bc80d5a" + ] + ], + "2": [ + "versions.yml:md5,efc3533460e957d1714d02d8dd758890" + ], + "csv": [ + [ + { + "id": "test_concat" + }, + "metadata.csv:md5,969e0c9fee0ad4156eb2f6ac2e7ff815" + ] + ], + "json": [ + [ + { + "id": "test_concat" + }, + "metadata.json:md5,e9573a3c0f88048e12a5655a4bc80d5a" + ] + ], + "versions": [ + "versions.yml:md5,efc3533460e957d1714d02d8dd758890" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-24T17:20:52.724355864" + } +} \ No newline at end of file diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/main.nf b/modules/sanger-cellgeni/irods/aggregatemetadata/main.nf new file mode 100644 index 0000000..eaef33c --- /dev/null +++ b/modules/sanger-cellgeni/irods/aggregatemetadata/main.nf @@ -0,0 +1,39 @@ +process IRODS_AGGREGATEMETADATA { + tag "Aggregating metadata for ${meta.id}" + container 'docker://quay.io/cellgeni/metacells-python:latest' + + input: + tuple val(meta), path(irods_metadata, name: "input.csv") + + output: + tuple val(meta), path("metadata.csv"), emit: csv + tuple val(meta), path("metadata.json"), emit: json + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '--dup-sep ";" --index_name "id"' + """ + aggregate_metadata.py \ + ${args} \ + --input input.csv \ + --id ${meta.id} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | awk '{print \$2}') + pandas: \$( python -c "import pandas; print(pandas.__version__)" ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--dup-sep "," --index_name "id"' + """ + touch metadata.csv metadata.json + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | awk '{print \$2}') + pandas: \$( python -c "import pandas; print(pandas.__version__)" ) + END_VERSIONS + """ +} diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/meta.yml b/modules/sanger-cellgeni/irods/aggregatemetadata/meta.yml new file mode 100644 index 0000000..43f0907 --- /dev/null +++ b/modules/sanger-cellgeni/irods/aggregatemetadata/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "irods_aggregatemetadata" +description: + "Module to aggregate iRODS metadata from CSV files with duplicate attribute + handling and output as CSV and JSON" +keywords: + - irods + - metadata + - csv + - json + - pandas +tools: + - pandas: + description: Powerful data structures for data analysis, time series, and + statistics + homepage: https://pandas.pydata.org/ + documentation: https://pandas.pydata.org/docs/ + licence: ["BSD-3-Clause"] + identifier: "biotools:pandas" + args_id: "$args" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - irods_metadata: + type: file + description: "CSV file containing iRODS metadata to be aggregated" + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV +output: + csv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - metadata.csv: + type: file + description: "CSV file containing aggregated iRODS metadata" + pattern: "metadata.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - metadata.json: + type: file + description: "JSON file containing aggregated iRODS metadata" + pattern: "metadata.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + versions: + - versions.yml: + type: file + description: "YAML file containing software versions used" + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@claptar" +maintainers: + - "@claptar" diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/module.config b/modules/sanger-cellgeni/irods/aggregatemetadata/module.config new file mode 100644 index 0000000..e1ce906 --- /dev/null +++ b/modules/sanger-cellgeni/irods/aggregatemetadata/module.config @@ -0,0 +1,13 @@ +process { + withName: IRODS_AGGREGATEMETADATA { + ext.args = { + [ + params.dup_meta_separator ? "--dup-sep '${params.dup_meta_separator}'" : "--dup-sep ';'", + params.metadata_index_name ? "--index_name '${params.metadata_index_name}'" : "--index_name 'irodspath'", + ].join(' ') + } + queue = 'small' + cpus = 1 + memory = 70.MB + } +} diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/resources/usr/bin/aggregate_metadata.py b/modules/sanger-cellgeni/irods/aggregatemetadata/resources/usr/bin/aggregate_metadata.py new file mode 100755 index 0000000..cb91af1 --- /dev/null +++ b/modules/sanger-cellgeni/irods/aggregatemetadata/resources/usr/bin/aggregate_metadata.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +import os +import json +import argparse +import pandas as pd +import json +import logging + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(levelname)s: %(message)s' +) + +def init_parser() -> argparse.ArgumentParser: + """ + Initialise argument parser for the script + """ + parser = argparse.ArgumentParser( + description="Aggregates metadata for duplicated attributes and saves in .csv and .json format" + ) + parser.add_argument( + "--input", + metavar="", + type=str, + help="Specify a path to the .csv file with iRODS metadata", + ) + parser.add_argument( + "--dup-sep", + metavar="", + type=str, + default=",", + help="Separator for duplicated metadata attributes (default: ',')", + ) + parser.add_argument( + "--index_name", + metavar="", + type=str, + default="id", + help="Name of the index column for the output .csv file (default: 'id')", + ) + parser.add_argument( + "--id", + metavar="", + type=str, + default=None, + help="Identifier to use for the index column in the output .csv file (default: None)", + ) + return parser + + +def main(): + """ + Main function of the script + """ + + # parse script arguments + parser = init_parser() + args = parser.parse_args() + + # Check if file exists and not empty + if not os.path.isfile(args.input) or os.path.getsize(args.input) == 0: + logging.warning(f"Input file '{args.input}' is empty or does not exist - creating empty output files") + + # Create a DataFrame with only index column + metadata = pd.DataFrame(index=[args.id] if args.id else None) + metadata.index.name = args.index_name + else: + # read input metadata file + irods_metadata = pd.read_csv(args.input, header=None, names=["attribute", "value", "unit"]) + + # aggregate duplicated metadata attributes + metadata = pd.pivot_table(irods_metadata, values="value", columns="attribute", aggfunc=lambda x: args.dup_sep.join(x)) + metadata.index = [args.id] if args.id else metadata.index + metadata.index.name = args.index_name + + # save aggregated metadata + metadata.to_csv("metadata.csv", index=True if args.id else False) + metadata.to_json("metadata.json", orient="index" if args.id else "records", indent=4) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test new file mode 100644 index 0000000..383a1a6 --- /dev/null +++ b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + name "Test Process: IRODS_AGGREGATEMETADATA" + script "../main.nf" + process "IRODS_AGGREGATEMETADATA" + + tag "modules" + tag "irods" + tag "irods/aggregatemetadata" + tag "modules_sangercellgeni" + + setup { + run("IRODS_GETMETADATA", alias: "CRAM") { + script "../../getmetadata/main.nf" + config "../../getmetadata/module.config" + process { + """ + input[0] = [ [id: "cram"], "/seq/illumina/runs/41/41796/lane1/plex1/41796_1#1.cram" ] + """ + } + } + + run("IRODS_GETMETADATA", alias: "CELLRANGER_ARC_OUTPUT") { + script "../../getmetadata/main.nf" + config "../../getmetadata/module.config" + process { + """ + input[0] = [ [id: "cellranger_arc"], "/seq/illumina/cellranger-arc/cellranger-arc202_count_43c2d8dd1eaf98b635896165fd98ae3a" ] + """ + } + } + + run("IRODS_GETMETADATA", alias: "EMPTY") { + script "../../getmetadata/main.nf" + config "../../getmetadata/module.config" + process { + """ + input[0] = [ [id: "empty"], "/archive/cellgeni/multiome" ] + """ + } + } + } + + test("Cellranger ARC output metadata aggregation") { + tag "cellranger_arc" + config "../module.config" + + when { + process { + """ + input[0] = CELLRANGER_ARC_OUTPUT.out.csv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert process.out.json }, + { assert path(process.out.csv.get(0).get(1)).csv.rowCount == 1 }, + { assert snapshot(process.out).match() } + ) + } + } + + test(".cram file metadata aggregation") { + tag "cram" + config "../module.config" + + when { + process { + """ + input[0] = CRAM.out.csv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert process.out.json }, + { assert path(process.out.csv.get(0).get(1)).csv.rowCount == 1 }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Empty file output metadata aggregation") { + tag "empty" + config "../module.config" + + when { + process { + """ + input[0] = EMPTY.out.csv + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert process.out.json }, + { assert path(process.out.csv.get(0).get(1)).csv.rows[0] == ["irodspath": "empty"] }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test.snap b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test.snap new file mode 100644 index 0000000..bfb3a1d --- /dev/null +++ b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test.snap @@ -0,0 +1,149 @@ +{ + "Empty file output metadata aggregation": { + "content": [ + { + "0": [ + [ + { + "id": "empty" + }, + "metadata.csv:md5,cbac237f9f819fadf37c85efd0b4269e" + ] + ], + "1": [ + [ + { + "id": "empty" + }, + "metadata.json:md5,22e67cc3ae278cb47bca0058382d3330" + ] + ], + "2": [ + "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134" + ], + "csv": [ + [ + { + "id": "empty" + }, + "metadata.csv:md5,cbac237f9f819fadf37c85efd0b4269e" + ] + ], + "json": [ + [ + { + "id": "empty" + }, + "metadata.json:md5,22e67cc3ae278cb47bca0058382d3330" + ] + ], + "versions": [ + "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-24T16:32:25.841505619" + }, + ".cram file metadata aggregation": { + "content": [ + { + "0": [ + [ + { + "id": "cram" + }, + "metadata.csv:md5,6ea84f3640a255d1adf9578b6f0c35f4" + ] + ], + "1": [ + [ + { + "id": "cram" + }, + "metadata.json:md5,a5eac87d4b5894c5dacda14d5261cb3e" + ] + ], + "2": [ + "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134" + ], + "csv": [ + [ + { + "id": "cram" + }, + "metadata.csv:md5,6ea84f3640a255d1adf9578b6f0c35f4" + ] + ], + "json": [ + [ + { + "id": "cram" + }, + "metadata.json:md5,a5eac87d4b5894c5dacda14d5261cb3e" + ] + ], + "versions": [ + "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-24T16:40:51.463429041" + }, + "Cellranger ARC output metadata aggregation": { + "content": [ + { + "0": [ + [ + { + "id": "cellranger_arc" + }, + "metadata.csv:md5,22087d13d841154d0a548f161fd8e723" + ] + ], + "1": [ + [ + { + "id": "cellranger_arc" + }, + "metadata.json:md5,c6f62aeba959b75ab8e476fe6c18eb26" + ] + ], + "2": [ + "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134" + ], + "csv": [ + [ + { + "id": "cellranger_arc" + }, + "metadata.csv:md5,22087d13d841154d0a548f161fd8e723" + ] + ], + "json": [ + [ + { + "id": "cellranger_arc" + }, + "metadata.json:md5,c6f62aeba959b75ab8e476fe6c18eb26" + ] + ], + "versions": [ + "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-24T16:40:01.098156284" + } +} \ No newline at end of file diff --git a/modules/sanger-cellgeni/irods/attachmetadata/main.nf b/modules/sanger-cellgeni/irods/attachmetadata/main.nf new file mode 100644 index 0000000..4453b1d --- /dev/null +++ b/modules/sanger-cellgeni/irods/attachmetadata/main.nf @@ -0,0 +1,111 @@ +def metaToTsv(meta) { + def tsv_string = meta + .findAll { key, value -> key != 'id' && value } + .collectMany { key, value -> + value + .toString() + .split(/\s*,\s*/) + .collect { it.trim() } + .findAll { it } + .collect { v -> "${key}\t${v}" } + } + .join('\\n') + .stripIndent() + .replaceAll('"', '\\\\"') + // remove leading whitespace and escape quotes + return tsv_string +} + +process IRODS_ATTACHMETADATA { + tag "Attaching metadata for ${meta.id}" + + input: + tuple val(meta), val(irodspath) + + output: + path "versions.yml", emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def irodspath = irodspath.replaceFirst('/$', '') + def meta_tsv = metaToTsv(meta) + def delimiter = task.ext.delimiter ?: "" + """ + # Create tsv file with metadata + set -euo pipefail + echo -e "${meta_tsv}" > metadata.tsv + + # Check if irodspath exists + if ils -d "${irodspath}" | grep -q ':\$'; then + resource="-C" + elif ils -d "${irodspath}"; then + resource="-d" + else + echo "Error: iRODS path ${irodspath} does not exist." + exit 1 + fi + + # Get existing metadata from iRODS + get_metadata.sh \$resource "${irodspath}" > existing_metadata.csv + + echo "Existing metadata for ${irodspath}:" + cat existing_metadata.csv + + # Remove existing metadata if specified + if [ "${task.ext.remove_existing_metadata}" == "true" ]; then + echo "Removing existing metadata for ${irodspath}" + imeta rmw \$resource "${irodspath}" % % || echo "No metadata to remove (this is OK)" + :> existing_metadata.csv # clear file + fi + + # Load metadata to iRODS + echo "Current metadata for ${irodspath}:" + get_metadata.sh \$resource "${irodspath}" + set +e + while IFS=\$'\\t' read -r key value; do + [[ -z "\$key" || -z "\$value" ]] && continue # skip empty lines + + # Check if value contains semicolon delimiter + if [[ -n "${delimiter}" && "\$value" == *"${delimiter}"* ]]; then + # Split by semicolon and process each value separately + IFS='${delimiter}' read -ra VALUES <<< "\$value" + for val in "\${VALUES[@]}"; do + val=\$(echo "\$val" | xargs) # trim whitespace + [[ -z "\$val" ]] && continue # skip empty values + + # Check if the key value pair already exists in iRODS metadata + if grep -qzP "\${key},\${val}" existing_metadata.csv; then + echo "[SKIP] \$key=\$val already present" + else + echo "Adding \$key=\$val to iRODS metadata" + imeta add \$resource "${irodspath}" "\$key" "\$val" + fi + done + else + # Process single value as before + if grep -qzP "\${key},\${value}" existing_metadata.csv; then + echo "[SKIP] \$key=\$value already present" + else + echo "Adding \$key=\$value to iRODS metadata" + imeta add \$resource "${irodspath}" "\$key" "\$value" + fi + fi + done < metadata.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + irods: \$(ienv | grep version | awk '{ print \$3 }') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def meta_tsv = metaToTsv(meta) + """ + echo -e "${meta_tsv}" > metadata.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + irods: \$(ienv | grep version | awk '{ print \$3 }') + END_VERSIONS + """ +} diff --git a/modules/sanger-cellgeni/irods/attachmetadata/meta.yml b/modules/sanger-cellgeni/irods/attachmetadata/meta.yml new file mode 100644 index 0000000..0b3ae5d --- /dev/null +++ b/modules/sanger-cellgeni/irods/attachmetadata/meta.yml @@ -0,0 +1,42 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "irods_attachmetadata" +description: + "Module to attach metadata to iRODS collections and data objects with + support for duplicate value splitting" +keywords: + - irods + - metadata + - attach + - collections + - data objects +tools: + - irods: + description: Integrated Rule-Oriented Data System (iRODS) is open source + data management software for a cancer genome analysis workflow. + homepage: https://irods.org/ + documentation: https://irods.org/documentation/ + doi: 10.1186/s12859-018-2576-5 + license: ["BSD-3-Clause"] + identifier: "biotools:iRODS" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information and metadata to attach + e.g. [ id:'test', author:'John Doe', study:'cancer_research' ] + - irodspath: + type: string + description: "iRODS collection or data object path to attach metadata to" + ontologies: [] +output: + versions: + - versions.yml: + type: file + description: "YAML file containing software versions used" + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@claptar" +maintainers: + - "@claptar" diff --git a/modules/sanger-cellgeni/irods/attachmetadata/module.config b/modules/sanger-cellgeni/irods/attachmetadata/module.config new file mode 100644 index 0000000..490f775 --- /dev/null +++ b/modules/sanger-cellgeni/irods/attachmetadata/module.config @@ -0,0 +1,11 @@ +process { + withName: IRODS_ATTACHMETADATA { + ext.remove_existing_metadata = false + ext.delimiter = ';' + maxForks = 5 + array = 500 + queue = 'transfer' + cpus = 1 + memory = '2 GB' + } +} diff --git a/modules/sanger-cellgeni/irods/attachmetadata/resources/usr/bin/get_metadata.sh b/modules/sanger-cellgeni/irods/attachmetadata/resources/usr/bin/get_metadata.sh new file mode 100755 index 0000000..dbf8fd9 --- /dev/null +++ b/modules/sanger-cellgeni/irods/attachmetadata/resources/usr/bin/get_metadata.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -euo pipefail + +resource_type=$1 +resource_path=$2 + +if [[ -z "$resource_type" || -z "$resource_path" ]]; then + echo "Usage: $0 " + exit 1 +fi + +imeta ls $resource_type "$resource_path" | \ + grep -v "AVUs" | \ + sed -e 's/^attribute: //' \ + -e 's/^value: //' \ + -e 's/^units: //' | \ + awk ' + NR%4==1 { attr=$0 } + NR%4==2 { val=$0 } + NR%4==3 { unit=$0 } + NR%4==0 { printf "%s,%s,%s\n", attr, val, unit; attr=""; val=""; unit="" } + END { if (attr != "") printf "%s,%s,%s\n", attr, val, unit } + ' \ No newline at end of file diff --git a/modules/sanger-cellgeni/irods/getmetadata/main.nf b/modules/sanger-cellgeni/irods/getmetadata/main.nf new file mode 100644 index 0000000..73b630c --- /dev/null +++ b/modules/sanger-cellgeni/irods/getmetadata/main.nf @@ -0,0 +1,51 @@ +process IRODS_GETMETADATA { + tag "Getting metadata for ${irodspath}" + + input: + tuple val(meta), val(irodspath) + + output: + tuple val(meta), path("irods_metadata.csv"), emit: csv + path "versions.yml", emit: versions + + script: + """ + set -euo pipefail + + # Check if irodspath exists + if ils -d "${irodspath}" | grep -q ':\$'; then + resource="-C" + elif ils -d "${irodspath}"; then + resource="-d" + else + echo "Error: iRODS path ${irodspath} does not exist." + exit 1 + fi + + # Get metadata from iRODS + imeta ls \$resource ${irodspath} \ + | (grep -E 'attribute|value|units' || true) \ + | sed -e 's/^attribute: //' -e 's/^value: //' -e 's/^units: //' \ + | sed -e "s/\\\"/'/g" \ + | awk 'NR%3!=0 {printf "\\\"%s\\\",", \$0} NR%3==0 {printf "\\\"%s\\\"\\n", \$0}' > irods_metadata.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + irods: \$(ienv | grep version | awk '{ print \$3 }') + awk: \$(awk --version | head -n1) + sed: \$(sed --version | head -n1 | awk '{ print \$4 }') + END_VERSIONS + """ + + stub: + """ + touch irods_metadata.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + irods: \$(ienv | grep version | awk '{ print \$3 }') + awk: \$(awk --version | head -n1) + sed: \$(sed --version | head -n1 | awk '{ print \$4 }') + END_VERSIONS + """ +} diff --git a/modules/sanger-cellgeni/irods/getmetadata/meta.yml b/modules/sanger-cellgeni/irods/getmetadata/meta.yml new file mode 100644 index 0000000..6282771 --- /dev/null +++ b/modules/sanger-cellgeni/irods/getmetadata/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "irods_getmetadata" +description: + "Module to get metadata from an iRODS collections and data objects and + output as CSV" +keywords: + - irods + - metadata + - csv +tools: + - irods: + description: Integrated Rule-Oriented Data System (iRODS) is open source + data management software for a cancer genome analysis workflow. + homepage: https://irods.org/ + documentation: https://irods.org/documentation/ + doi: 10.1186/s12859-018-2576-5 + license: ["BSD-3-Clause"] + identifier: "biotools:iRODS" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - irodspath: + type: string + description: "iRODS collection or data object path to get metadata from" + ontologies: [] +output: + csv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - irods_metadata.csv: + type: file + description: "CSV file containing iRODS metadata" + pattern: "irods_metadata.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + versions: + - versions.yml: + type: file + description: "YAML file containing software versions used" + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@claptar" +maintainers: + - "@claptar" diff --git a/modules/sanger-cellgeni/irods/getmetadata/module.config b/modules/sanger-cellgeni/irods/getmetadata/module.config new file mode 100644 index 0000000..9fc53fa --- /dev/null +++ b/modules/sanger-cellgeni/irods/getmetadata/module.config @@ -0,0 +1,10 @@ +process { + withName: IRODS_GETMETADATA { + maxForks = 20 + array = 1000 + queue = 'small' + cpus = 1 + memory = 70.MB + time = 10.m + } +} diff --git a/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test new file mode 100644 index 0000000..996d1a3 --- /dev/null +++ b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test @@ -0,0 +1,98 @@ +nextflow_process { + name "Test Process: IRODS_GETMETADATA" + script "../main.nf" + config "../module.config" + process "IRODS_GETMETADATA" + + tag "modules" + tag "irods" + tag "irods/getmetadata" + tag "modules_sangercellgeni" + + test("Get data object metadata") { + tag "data_object" + + when { + process { + """ + input[0] = [ [id: "data_object_1"], "/seq/illumina/runs/41/41796/lane1/plex1/41796_1#1.cram" ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert snapshot(process.out).match() }, + { assert path(process.out.csv.get(0).get(1)).csv.columnCount == 3 } + ) + } + } + + test("Get collection metadata") { + tag "collection" + + when { + process { + """ + input[0] = [ [id: "collection_1"], "/seq/illumina/cellranger-arc/cellranger-arc202_count_43c2d8dd1eaf98b635896165fd98ae3a" ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert snapshot(process.out).match() }, + { assert path(process.out.csv.get(0).get(1)).csv.columnCount == 3 } + ) + } + } + + test("Get linked collection metadata") { + tag "collection" + + when { + process { + """ + input[0] = [ [id: "linked_collection_1"], "/archive/cellgeni/multiome/internal/cellranger_arc/6439/WS_wEMB13400246WS_wEMB13400220" ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert snapshot(process.out).match() }, + { assert path(process.out.csv.get(0).get(1)).csv.columnCount == 3 } + ) + } + } + + test("Get empty metadata") { + tag "empty_metadata" + + when { + process { + """ + input[0] = [ [id: "emptymeta_collection_1"], "/archive/cellgeni/multiome" ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.versions }, + { assert process.out.csv }, + { assert snapshot(process.out).match() }, + ) + } + } +} \ No newline at end of file diff --git a/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test.snap b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test.snap new file mode 100644 index 0000000..3434b74 --- /dev/null +++ b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Get data object metadata": { + "content": [ + { + "0": [ + [ + { + "id": "data_object_1" + }, + "irods_metadata.csv:md5,197a5b8db4e7c8ae4783731992d6c6ac" + ] + ], + "1": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ], + "csv": [ + [ + { + "id": "data_object_1" + }, + "irods_metadata.csv:md5,197a5b8db4e7c8ae4783731992d6c6ac" + ] + ], + "versions": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-27T13:32:47.27054006" + }, + "Get linked collection metadata": { + "content": [ + { + "0": [ + [ + { + "id": "linked_collection_1" + }, + "irods_metadata.csv:md5,ce97a73816135627adc03848746fb881" + ] + ], + "1": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ], + "csv": [ + [ + { + "id": "linked_collection_1" + }, + "irods_metadata.csv:md5,ce97a73816135627adc03848746fb881" + ] + ], + "versions": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-27T13:33:24.914320709" + }, + "Get empty metadata": { + "content": [ + { + "0": [ + [ + { + "id": "emptymeta_collection_1" + }, + "irods_metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ], + "csv": [ + [ + { + "id": "emptymeta_collection_1" + }, + "irods_metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-27T13:33:43.503718805" + }, + "Get collection metadata": { + "content": [ + { + "0": [ + [ + { + "id": "collection_1" + }, + "irods_metadata.csv:md5,fc54181e42872f3b1725271798b36dd4" + ] + ], + "1": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ], + "csv": [ + [ + { + "id": "collection_1" + }, + "irods_metadata.csv:md5,fc54181e42872f3b1725271798b36dd4" + ] + ], + "versions": [ + "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.4" + }, + "timestamp": "2025-10-27T13:33:06.176407056" + } +} \ No newline at end of file diff --git a/modules/sanger-cellgeni/irods/storefile/main.nf b/modules/sanger-cellgeni/irods/storefile/main.nf new file mode 100644 index 0000000..11bd2cd --- /dev/null +++ b/modules/sanger-cellgeni/irods/storefile/main.nf @@ -0,0 +1,63 @@ +process IRODS_STOREFILE { + tag "Loading ${irodspath}" + + input: + tuple val(meta), path(file), val(irodspath) + + output: + tuple val(meta), val(irodspath), env('md5'), env('irods_md5'), emit: md5 + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: "-KV -f -X restart.txt --retries 10 --acl 'read public#archive'" + def prefix = task.ext.prefix ?: "${meta.id}" + """ + module load cellgen/irods + + # calculate MD5 + md5=\$(md5sum "${file}" | awk '{print \$1}') + + # create iRODS directory if it doesn't exist + irodsdir=\$(dirname "${irodspath}") + imkdir -p "\$irodsdir" + + # Load file to iRODS + echo "Loading ${file} to iRODS at ${irodspath}" + iput ${args} \ + -N ${task.cpus} \ + --metadata="md5;\${md5};;" \ + "${file}" "${irodspath}" + + # Calculate iRODS md5 + sleep 1 # wait for iRODS to do it's thing + irods_md5=\$(ichksum "${irodspath}" | awk '{print \$NF}') + + # Compare iRODS md5 with local md5 + if [ "\$md5" != "\$irods_md5" ]; then + echo "MD5 mismatch for ${file}: local \$md5, iRODS \$irods_md5" + exit 1 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + irods: \$(ienv | grep version | awk '{ print \$3 }') + awk: \$(awk --version | head -n1) + md5sum: \$(md5sum --version | head -n1 | awk '{ print \$4 }') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: "-K -f -X restart.txt --retries 10" + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # calculate MD5 + md5=\$(md5sum "${file}" | awk '{print \$1}') + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + irods: \$(ienv | grep version | awk '{ print \$3 }') + awk: \$(awk --version | head -n1) + md5sum: \$(md5sum --version | head -n1 | awk '{ print \$4 }') + END_VERSIONS + """ +} diff --git a/modules/sanger-cellgeni/irods/storefile/meta.yml b/modules/sanger-cellgeni/irods/storefile/meta.yml new file mode 100644 index 0000000..7ffaba4 --- /dev/null +++ b/modules/sanger-cellgeni/irods/storefile/meta.yml @@ -0,0 +1,65 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "irods_storefile" +description: + "Module to upload files to iRODS with MD5 checksum verification and metadata + attachment" +keywords: + - irods + - upload + - storage + - checksum +tools: + - irods: + description: Integrated Rule-Oriented Data System (iRODS) is open source + data management software for a cancer genome analysis workflow. + homepage: https://irods.org/ + documentation: https://irods.org/documentation/ + doi: 10.1186/s12859-018-2576-5 + license: BSD-3-Clause + identifier: "biotools:iRODS" + args_id: "$args" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - file: + type: file + description: "File to be uploaded to iRODS" + pattern: "*" + ontologies: [] + - irodspath: + type: string + description: "Target iRODS path where the file should be stored" + ontologies: [] +output: + md5: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - irodspath: + type: string + description: "iRODS path where the file was stored" + ontologies: [] + - md5: + type: string + description: "MD5 checksum of the local file" + ontologies: [] + - irods_md5: + type: string + description: "MD5 checksum of the file stored in iRODS" + ontologies: [] + versions: + - versions.yml: + type: file + description: "YAML file containing software versions used" + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@claptar" +maintainers: + - "@claptar" diff --git a/modules/sanger-cellgeni/irods/storefile/module.config b/modules/sanger-cellgeni/irods/storefile/module.config new file mode 100644 index 0000000..f37e9d7 --- /dev/null +++ b/modules/sanger-cellgeni/irods/storefile/module.config @@ -0,0 +1,10 @@ +process { + withName: 'IRODS_STOREFILE' { + ext.args = "-KV -f -X restart.txt --retries 10 --acl 'read public#archive'" + maxForks = 20 + array = 1000 + queue = 'transfer' + cpus = 2 + memory = '4 GB' + } +} \ No newline at end of file diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..03c3bfd --- /dev/null +++ b/nf-test.config @@ -0,0 +1,9 @@ +config { + testsDir "." + workDir ".nf-test" + configFile "tests/config/nf-test.config" + + plugins { + load "nft-csv@0.1.0" + } +} \ No newline at end of file diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config new file mode 100644 index 0000000..b6e7510 --- /dev/null +++ b/tests/config/nf-test.config @@ -0,0 +1,35 @@ +nextflow.enable.moduleBinaries = true + +params { + // test params + test_data_base = "${projectDir}/tests/data/" + + // process params + output_dir = "results" +} + +process { + cpus = 2 + memory = '4.GB' +} + +executor { + name = 'lsf' + perJobMemLimit = true +} + +profiles { + singularity { + singularity.enabled = true + singularity.autoMounts = true + singularity.runOptions = '-B /lustre,/nfs' + singularity.cacheDir = '/nfs/cellgeni/singularity/images/' + } + docker { + docker.enabled = true + } +} + +docker.registry = 'quay.io' +singularity.registry = 'quay.io' +cleanup = false \ No newline at end of file