diff --git a/modules/meta-schema.json b/modules/meta-schema.json
new file mode 100644
index 0000000..ccad8f3
--- /dev/null
+++ b/modules/meta-schema.json
@@ -0,0 +1,279 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema",
+    "title": "Meta yaml",
+    "description": "Validate the meta yaml file for an nf-core module",
+    "type": "object",
+    "properties": {
+        "name": {
+            "type": "string",
+            "description": "Name of the module"
+        },
+        "description": {
+            "type": "string",
+            "description": "Description of the module"
+        },
+        "keywords": {
+            "type": "array",
+            "description": "Keywords for the module",
+            "items": {
+                "type": "string",
+                "not": {
+                    "const": "example"
+                }
+            },
+            "uniqueItems": true,
+            "minItems": 3
+        },
+        "authors": {
+            "type": "array",
+            "description": "Authors of the module",
+            "items": {
+                "type": "string"
+            }
+        },
+        "maintainers": {
+            "type": "array",
+            "description": "Maintainers of the module",
+            "items": {
+                "type": "string"
+            }
+        },
+        "extra_args": {
+            "type": "array",
+            "description": "Extra arguments for the module",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "description": {
+                        "type": "string",
+                        "description": "Description of the argument"
+                    }
+                }
+            }
+        },
+        "input": {
+            "type": "array",
+            "description": "Input channels for the module",
+            "items": {
+                "oneOf": [
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "patternProperties": {
+                                ".*": {
+                                    "$ref": "#/definitions/elementProperties"
+                                }
+                            }
+                        }
+                    },
+                    {
+                        "type": "object",
+                        "patternProperties": {
+                            ".*": {
+                                "$ref": "#/definitions/elementProperties"
+                            }
+                        }
+                    }
+                ]
+            }
+        },
+        "output": {
+            "type": "object",
+            "description": "Output channels for the module",
+            "patternProperties": {
+                ".*": {
+                    "type": "array",
+                    "items": {
+                        "oneOf": [
+                            {
+                                "type": "object",
+                                "patternProperties": {
+                                    ".*": {
+                                        "$ref": "#/definitions/elementProperties"
+                                    }
+                                }
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "patternProperties": {
+                                        ".*": {
+                                            "$ref": "#/definitions/elementProperties"
+                                        }
+                                    }
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
+        },
+        "tools": {
+            "type": "array",
+            "description": "Tools used by the module",
+            "items": {
+                "type": "object",
+                "patternProperties": {
+                    ".*": {
+                        "type": "object",
+                        "properties": {
+                            "description": {
+                                "type": "string",
+                                "description": "Description of the output channel"
+                            },
+                            "homepage": {
+                                "type": "string",
+                                "description": "Homepage of the tool",
+                                "pattern": "^(http|https)://.*$"
+                            },
+                            "documentation": {
+                                "type": "string",
+                                "description": "Documentation of the tool",
+                                "pattern": "^(http|https|ftp)://.*$"
+                            },
+                            "tool_dev_url": {
+                                "type": "string",
+                                "description": "URL of the development version of the tool's documentation",
+                                "pattern": "^(http|https)://.*$"
+                            },
+                            "doi": {
+                                "description": "DOI of the tool",
+                                "anyOf": [
+                                    {
+                                        "type": "string",
+                                        "pattern": "^10\\.\\d{4,9}\\/[^,]+$"
+                                    },
+                                    {
+                                        "type": "string",
+                                        "enum": [
+                                            "no DOI available"
+                                        ]
+                                    }
+                                ]
+                            },
+                            "licence": {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                },
+                                "description": "Licence of the tool",
+                                "minItems": 1,
+                                "uniqueItems": true,
+                                "message": "Licence must be an array of one or more entries, e.g. [\"MIT\"]"
+                            },
+                            "identifier": {
+                                "description": "bio.tools identifier of the tool",
+                                "anyOf": [
+                                    {
+                                        "type": "string",
+                                        "pattern": "^biotools:.*$"
+                                    },
+                                    {
+                                        "type": "string",
+                                        "maxLength": 0
+                                    }
+                                ]
+                            }
+                        },
+                        "required": [
+                            "description"
+                        ],
+                        "anyOf": [
+                            {
+                                "required": [
+                                    "homepage"
+                                ]
+                            },
+                            {
+                                "required": [
+                                    "documentation"
+                                ]
+                            },
+                            {
+                                "required": [
+                                    "tool_dev_url"
+                                ]
+                            },
+                            {
+                                "required": [
+                                    "doi"
+                                ]
+                            }
+                        ]
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "elementProperties": {
+            "type": "object",
+            "properties": {
+                "type": {
+                    "type": "string",
+                    "description": "Type of the channel element",
+                    "enum": [
+                        "map",
+                        "file",
+                        "directory",
+                        "string",
+                        "integer",
+                        "float",
+                        "boolean",
+                        "list"
+                    ]
+                },
+                "description": {
+                    "type": "string",
+                    "description": "Description of the channel"
+                },
+                "pattern": {
+                    "type": "string",
+                    "description": "Pattern of the channel, given in Java glob syntax"
+                },
+                "enum": {
+                    "type": "array",
+                    "description": "List of allowed values for the channel",
+                    "items": {
+                        "type": [
+                            "string",
+                            "number",
+                            "boolean",
+                            "array",
+                            "object"
+                        ]
+                    },
+                    "uniqueItems": true
+                },
+                "ontologies": {
+                    "type": "array",
+                    "description": "List of ontologies for the channel",
+                    "uniqueItems": true,
+                    "items": {
+                        "type": "object",
+                        "patternProperties": {
+                            ".*": {
+                                "type": "string",
+                                "pattern": "^(http|https)://.*"
+                            }
+                        }
+                    }
+                }
+            },
+            "required": [
+                "type",
+                "description"
+            ]
+        }
+    },
+    "required": [
+        "name",
+        "description",
+        "keywords",
+        "authors",
+        "output",
+        "tools"
+    ]
+}
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/csv/concat/main.nf b/modules/sanger-cellgeni/csv/concat/main.nf
new file mode 100644
index 0000000..546ce17
--- /dev/null
+++ b/modules/sanger-cellgeni/csv/concat/main.nf
@@ -0,0 +1,25 @@
+process CSV_CONCAT {
+    tag "Concatenating CSV files"
+    container 'docker://quay.io/cellgeni/metacells-python:latest'
+
+    input:
+    tuple val(meta), path(csv_files, name: "input/*.csv")
+
+    output:
+    tuple val(meta), path("*.csv"), emit: csv
+    tuple val(meta), path("*.json"), emit: json
+    path "versions.yml", emit: versions
+
+    script:
+    def prefix = task.ext.prefix ?: "concatenated"
+    def args = task.ext.args ?: ""
+    """
+    concat.py --input ${csv_files} --prefix "${prefix}" ${args}
+    
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | awk '{print \$2}')
+        pandas: \$( python -c "import pandas; print(pandas.__version__)" )
+    END_VERSIONS
+    """
+}
diff --git a/modules/sanger-cellgeni/csv/concat/meta.yml b/modules/sanger-cellgeni/csv/concat/meta.yml
new file mode 100644
index 0000000..dc27480
--- /dev/null
+++ b/modules/sanger-cellgeni/csv/concat/meta.yml
@@ -0,0 +1,67 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "csv_concat"
+description:
+  "Module to concatenate multiple CSV files into a single CSV and JSON
+  output"
+keywords:
+  - csv
+  - concatenate
+  - merge
+  - pandas
+  - json
+tools:
+  - pandas:
+      description: Powerful data structures for data analysis, time series, and
+        statistics
+      homepage: https://pandas.pydata.org/
+      documentation: https://pandas.pydata.org/docs/
+      licence: ["BSD-3-Clause"]
+      identifier: "biotools:pandas"
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - csv_files:
+        type: file
+        description: "Multiple CSV files to be concatenated"
+        pattern: "*.csv"
+        ontologies:
+          - edam: http://edamontology.org/format_3752 # CSV
+output:
+  csv:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.csv":
+          type: file
+          description: "Concatenated CSV file"
+          pattern: "*.csv"
+          ontologies:
+            - edam: http://edamontology.org/format_3752 # CSV
+  json:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.json":
+          type: file
+          description: "Concatenated data in JSON format"
+          pattern: "*.json"
+          ontologies:
+            - edam: http://edamontology.org/format_3464 # JSON
+  versions:
+    - versions.yml:
+        type: file
+        description: "YAML file containing software versions used"
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@claptar"
+maintainers:
+  - "@claptar"
diff --git a/modules/sanger-cellgeni/csv/concat/module.config b/modules/sanger-cellgeni/csv/concat/module.config
new file mode 100644
index 0000000..f624a68
--- /dev/null
+++ b/modules/sanger-cellgeni/csv/concat/module.config
@@ -0,0 +1,20 @@
+process {
+    withName: CSV_CONCAT {
+        ext.prefix = "metadata"
+        ext.args   = {
+            [
+                "--axis 'index'",
+                "--join 'outer'",
+            ].join(' ')
+        }
+        queue      = 'normal'
+        cpus       = 1
+        memory     = '2 GB'
+        publishDir = [
+            mode: 'link',
+            path: 'results',
+            pattern: '*.{csv,json}',
+            overwrite: true,
+        ]
+    }
+}
diff --git a/modules/sanger-cellgeni/csv/concat/resources/usr/bin/concat.py b/modules/sanger-cellgeni/csv/concat/resources/usr/bin/concat.py
new file mode 100755
index 0000000..0cc4cf5
--- /dev/null
+++ b/modules/sanger-cellgeni/csv/concat/resources/usr/bin/concat.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+import argparse
+import pandas as pd
+import json
+
+def init_parser() -> argparse.ArgumentParser:
+    """
+    Initialise argument parser for the script
+    """
+    parser = argparse.ArgumentParser(
+        description="Concatenates .csv files and save the result in .csv and .json format"
+    )
+    parser.add_argument(
+        "--input",
+        metavar="<file>",
+        nargs="+",
+        type=str,
+        help="Specify a path to the .csv files to concatenate",
+    )
+    parser.add_argument(
+        "--axis",
+        metavar="<str>",
+        type=str,
+        default="index",
+        help="Axis to concatenate along ('columns' or 'index'; default: 'index')",
+    )
+    parser.add_argument(
+        "--join",
+        metavar="<str>",
+        type=str,
+        default="outer",
+        help="How to handle indexes on other axis (or axes). Options are 'inner' and 'outer' (default: 'outer')",
+    )
+    parser.add_argument(
+        "--prefix",
+        metavar="<str>",
+        type=str,
+        default="output",
+        help="Prefix for the output files (default: 'output')",
+    )
+    return parser
+
+
+def main():
+    """
+    Main function of the script
+    """
+    # parse script arguments
+    parser = init_parser()
+    args = parser.parse_args()
+
+    # read input files
+    csv_files = [pd.read_csv(f) for f in args.input]
+
+    # concatenate .csv files
+    result = pd.concat(csv_files, axis=args.axis, join=args.join)
+
+    # sort values and columns
+    result = result.reindex(sorted(result.columns), axis=1)
+    result = result.sort_values(by=result.columns.tolist())
+
+    # save result
+    result.to_csv(f"{args.prefix}.csv", index=False)
+    result.to_json(f"{args.prefix}.json", orient="records", lines=True, indent=4)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/csv/concat/tests/main.nf.test b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test
new file mode 100644
index 0000000..67278a0
--- /dev/null
+++ b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test
@@ -0,0 +1,45 @@
+nextflow_process {
+    name "Test Process: CSV_CONCAT"
+    script "../main.nf"
+    process "CSV_CONCAT"
+
+    tag "modules"
+    tag "csv"
+    tag "csv/concat"
+    tag "modules_sangercellgeni"
+
+    test("Concatenate multiple CSV files") {
+        tag "basic"
+        config "../module.config"
+
+        when {
+            params {
+                test_data_base = "https://raw.githubusercontent.com/cellgeni/nf-upload2irods/4d31aff47e156c8256f990da525e68bb7bc134af/tests/data/"
+            }
+
+            process {
+                """
+                input[0] = [
+                    [id: "test_concat"],
+                    [
+                        file(params.test_data_base + "csv/concat/file1.csv"),
+                        file(params.test_data_base + "csv/concat/file2.csv"),
+                        file(params.test_data_base + "csv/concat/file3.csv")
+                    ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert process.out.json },
+                { assert path( process.out.csv.get(0).get(1) ).md5 == path( "tests/data/csv/concat/results.csv" ).md5 },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/csv/concat/tests/main.nf.test.snap b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test.snap
new file mode 100644
index 0000000..df158e6
--- /dev/null
+++ b/modules/sanger-cellgeni/csv/concat/tests/main.nf.test.snap
@@ -0,0 +1,51 @@
+{
+    "Concatenate multiple CSV files": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test_concat"
+                        },
+                        "metadata.csv:md5,969e0c9fee0ad4156eb2f6ac2e7ff815"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test_concat"
+                        },
+                        "metadata.json:md5,e9573a3c0f88048e12a5655a4bc80d5a"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,efc3533460e957d1714d02d8dd758890"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "test_concat"
+                        },
+                        "metadata.csv:md5,969e0c9fee0ad4156eb2f6ac2e7ff815"
+                    ]
+                ],
+                "json": [
+                    [
+                        {
+                            "id": "test_concat"
+                        },
+                        "metadata.json:md5,e9573a3c0f88048e12a5655a4bc80d5a"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,efc3533460e957d1714d02d8dd758890"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-24T17:20:52.724355864"
+    }
+}
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/main.nf b/modules/sanger-cellgeni/irods/aggregatemetadata/main.nf
new file mode 100644
index 0000000..eaef33c
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/aggregatemetadata/main.nf
@@ -0,0 +1,39 @@
+process IRODS_AGGREGATEMETADATA {
+    tag "Aggregating metadata for ${meta.id}"
+    container 'docker://quay.io/cellgeni/metacells-python:latest'
+
+    input:
+    tuple val(meta), path(irods_metadata, name: "input.csv")
+
+    output:
+    tuple val(meta), path("metadata.csv"), emit: csv
+    tuple val(meta), path("metadata.json"), emit: json
+    path "versions.yml", emit: versions
+
+    script:
+    def args = task.ext.args ?: '--dup-sep ";" --index_name "id"'
+    """
+    aggregate_metadata.py \
+        ${args} \
+        --input input.csv \
+        --id ${meta.id}
+        
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | awk '{print \$2}')
+        pandas: \$( python -c "import pandas; print(pandas.__version__)" )
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: '--dup-sep "," --index_name "id"'
+    """
+    touch metadata.csv metadata.json
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | awk '{print \$2}')
+        pandas: \$( python -c "import pandas; print(pandas.__version__)" )
+    END_VERSIONS
+    """
+}
diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/meta.yml b/modules/sanger-cellgeni/irods/aggregatemetadata/meta.yml
new file mode 100644
index 0000000..43f0907
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/aggregatemetadata/meta.yml
@@ -0,0 +1,68 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "irods_aggregatemetadata"
+description:
+  "Module to aggregate iRODS metadata from CSV files with duplicate attribute
+  handling and output as CSV and JSON"
+keywords:
+  - irods
+  - metadata
+  - csv
+  - json
+  - pandas
+tools:
+  - pandas:
+      description: Powerful data structures for data analysis, time series, and
+        statistics
+      homepage: https://pandas.pydata.org/
+      documentation: https://pandas.pydata.org/docs/
+      licence: ["BSD-3-Clause"]
+      identifier: "biotools:pandas"
+      args_id: "$args"
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - irods_metadata:
+        type: file
+        description: "CSV file containing iRODS metadata to be aggregated"
+        pattern: "*.csv"
+        ontologies:
+          - edam: http://edamontology.org/format_3752 # CSV
+output:
+  csv:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - metadata.csv:
+          type: file
+          description: "CSV file containing aggregated iRODS metadata"
+          pattern: "metadata.csv"
+          ontologies:
+            - edam: http://edamontology.org/format_3752 # CSV
+  json:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - metadata.json:
+          type: file
+          description: "JSON file containing aggregated iRODS metadata"
+          pattern: "metadata.json"
+          ontologies:
+            - edam: http://edamontology.org/format_3464 # JSON
+  versions:
+    - versions.yml:
+        type: file
+        description: "YAML file containing software versions used"
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@claptar"
+maintainers:
+  - "@claptar"
diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/module.config b/modules/sanger-cellgeni/irods/aggregatemetadata/module.config
new file mode 100644
index 0000000..e1ce906
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/aggregatemetadata/module.config
@@ -0,0 +1,13 @@
+process {
+    withName: IRODS_AGGREGATEMETADATA {
+        ext.args = {
+            [
+                params.dup_meta_separator ? "--dup-sep '${params.dup_meta_separator}'" : "--dup-sep ';'",
+                params.metadata_index_name ? "--index_name '${params.metadata_index_name}'" : "--index_name 'irodspath'",
+            ].join(' ')
+        }
+        queue    = 'small'
+        cpus     = 1
+        memory   = 70.MB
+    }
+}
diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/resources/usr/bin/aggregate_metadata.py b/modules/sanger-cellgeni/irods/aggregatemetadata/resources/usr/bin/aggregate_metadata.py
new file mode 100755
index 0000000..cb91af1
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/aggregatemetadata/resources/usr/bin/aggregate_metadata.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+import os
+import json
+import argparse
+import pandas as pd
+import json
+import logging
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(levelname)s: %(message)s'
+)
+
+def init_parser() -> argparse.ArgumentParser:
+    """
+    Initialise argument parser for the script
+    """
+    parser = argparse.ArgumentParser(
+        description="Aggregates metadata for duplicated attributes and saves in .csv and .json format"
+    )
+    parser.add_argument(
+        "--input",
+        metavar="<file>",
+        type=str,
+        help="Specify a path to the .csv file with iRODS metadata",
+    )
+    parser.add_argument(
+        "--dup-sep",
+        metavar="<str>",
+        type=str,
+        default=",",
+        help="Separator for duplicated metadata attributes (default: ',')",
+    )
+    parser.add_argument(
+        "--index_name",
+        metavar="<str>",
+        type=str,
+        default="id",
+        help="Name of the index column for the output .csv file (default: 'id')",
+    )
+    parser.add_argument(
+        "--id",
+        metavar="<str>",
+        type=str,
+        default=None,
+        help="Identifier to use for the index column in the output .csv file (default: None)",
+    )
+    return parser
+
+
+def main():
+    """
+    Main function of the script
+    """
+    
+    # parse script arguments
+    parser = init_parser()
+    args = parser.parse_args()
+
+    # Check if file exists and not empty
+    if not os.path.isfile(args.input) or os.path.getsize(args.input) == 0:
+        logging.warning(f"Input file '{args.input}' is empty or does not exist - creating empty output files")
+        
+        # Create a DataFrame with only index column
+        metadata = pd.DataFrame(index=[args.id] if args.id else None)
+        metadata.index.name = args.index_name
+    else:
+        # read input metadata file
+        irods_metadata = pd.read_csv(args.input, header=None, names=["attribute", "value", "unit"])
+
+        # aggregate duplicated metadata attributes
+        metadata = pd.pivot_table(irods_metadata, values="value", columns="attribute", aggfunc=lambda x: args.dup_sep.join(x))
+        metadata.index = [args.id] if args.id else metadata.index
+        metadata.index.name = args.index_name
+
+    # save aggregated metadata
+    metadata.to_csv("metadata.csv", index=True if args.id else False)
+    metadata.to_json("metadata.json", orient="index" if args.id else "records", indent=4)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test
new file mode 100644
index 0000000..383a1a6
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test
@@ -0,0 +1,114 @@
+nextflow_process {
+    name "Test Process: IRODS_AGGREGATEMETADATA"
+    script "../main.nf"
+    process "IRODS_AGGREGATEMETADATA"
+
+    tag "modules"
+    tag "irods"
+    tag "irods/aggregatemetadata"
+    tag "modules_sangercellgeni"
+
+    setup {
+        run("IRODS_GETMETADATA", alias: "CRAM") {
+            script "../../getmetadata/main.nf"
+            config "../../getmetadata/module.config"
+            process {
+                """
+                input[0] = [ [id: "cram"], "/seq/illumina/runs/41/41796/lane1/plex1/41796_1#1.cram" ]
+                """
+            }
+        }
+
+        run("IRODS_GETMETADATA", alias: "CELLRANGER_ARC_OUTPUT") {
+            script "../../getmetadata/main.nf"
+            config "../../getmetadata/module.config"
+            process {
+                """
+                input[0] = [ [id: "cellranger_arc"], "/seq/illumina/cellranger-arc/cellranger-arc202_count_43c2d8dd1eaf98b635896165fd98ae3a" ]
+                """
+            }
+        }
+
+        run("IRODS_GETMETADATA", alias: "EMPTY") {
+            script "../../getmetadata/main.nf"
+            config "../../getmetadata/module.config"
+            process {
+                """
+                input[0] = [ [id: "empty"], "/archive/cellgeni/multiome" ]
+                """
+            }
+        }
+    }
+
+    test("Cellranger ARC output metadata aggregation") {
+        tag "cellranger_arc"
+        config "../module.config"
+
+        when {
+            process {
+                """
+                input[0] = CELLRANGER_ARC_OUTPUT.out.csv
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert process.out.json },
+                { assert path(process.out.csv.get(0).get(1)).csv.rowCount == 1 },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test(".cram file metadata aggregation") {
+        tag "cram"
+        config "../module.config"
+
+        when {
+            process {
+                """
+                input[0] = CRAM.out.csv
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert process.out.json },
+                { assert path(process.out.csv.get(0).get(1)).csv.rowCount == 1 },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("Empty file output metadata aggregation") {
+        tag "empty"
+        config "../module.config"
+
+        when {
+            process {
+                """
+                input[0] = EMPTY.out.csv
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert process.out.json },
+                { assert path(process.out.csv.get(0).get(1)).csv.rows[0] == ["irodspath": "empty"] },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test.snap b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test.snap
new file mode 100644
index 0000000..bfb3a1d
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/aggregatemetadata/tests/main.nf.test.snap
@@ -0,0 +1,149 @@
+{
+    "Empty file output metadata aggregation": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "empty"
+                        },
+                        "metadata.csv:md5,cbac237f9f819fadf37c85efd0b4269e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "empty"
+                        },
+                        "metadata.json:md5,22e67cc3ae278cb47bca0058382d3330"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "empty"
+                        },
+                        "metadata.csv:md5,cbac237f9f819fadf37c85efd0b4269e"
+                    ]
+                ],
+                "json": [
+                    [
+                        {
+                            "id": "empty"
+                        },
+                        "metadata.json:md5,22e67cc3ae278cb47bca0058382d3330"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-24T16:32:25.841505619"
+    },
+    ".cram file metadata aggregation": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "cram"
+                        },
+                        "metadata.csv:md5,6ea84f3640a255d1adf9578b6f0c35f4"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "cram"
+                        },
+                        "metadata.json:md5,a5eac87d4b5894c5dacda14d5261cb3e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "cram"
+                        },
+                        "metadata.csv:md5,6ea84f3640a255d1adf9578b6f0c35f4"
+                    ]
+                ],
+                "json": [
+                    [
+                        {
+                            "id": "cram"
+                        },
+                        "metadata.json:md5,a5eac87d4b5894c5dacda14d5261cb3e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-24T16:40:51.463429041"
+    },
+    "Cellranger ARC output metadata aggregation": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "cellranger_arc"
+                        },
+                        "metadata.csv:md5,22087d13d841154d0a548f161fd8e723"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "cellranger_arc"
+                        },
+                        "metadata.json:md5,c6f62aeba959b75ab8e476fe6c18eb26"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "cellranger_arc"
+                        },
+                        "metadata.csv:md5,22087d13d841154d0a548f161fd8e723"
+                    ]
+                ],
+                "json": [
+                    [
+                        {
+                            "id": "cellranger_arc"
+                        },
+                        "metadata.json:md5,c6f62aeba959b75ab8e476fe6c18eb26"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,c7179ef11646eed9f4262bc31b48d134"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-24T16:40:01.098156284"
+    }
+}
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/irods/attachmetadata/main.nf b/modules/sanger-cellgeni/irods/attachmetadata/main.nf
new file mode 100644
index 0000000..4453b1d
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/attachmetadata/main.nf
@@ -0,0 +1,111 @@
+def metaToTsv(meta) {
+    def tsv_string = meta
+        .findAll { key, value -> key != 'id' && value }
+        .collectMany { key, value ->
+            value
+                .toString()
+                .split(/\s*,\s*/)
+                .collect { it.trim() }
+                .findAll { it }
+                .collect { v -> "${key}\t${v}" }
+        }
+        .join('\\n')
+        .stripIndent()
+        .replaceAll('"', '\\\\"')
+    // remove leading whitespace and escape quotes
+    return tsv_string
+}
+
+process IRODS_ATTACHMETADATA {
+    tag "Attaching metadata for ${meta.id}"
+
+    input:
+    tuple val(meta), val(irodspath)
+
+    output:
+    path "versions.yml", emit: versions
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def irodspath = irodspath.replaceFirst('/$', '')
+    def meta_tsv = metaToTsv(meta)
+    def delimiter = task.ext.delimiter ?: ""
+    """
+    # Create tsv file with metadata
+    set -euo pipefail
+    echo -e "${meta_tsv}" > metadata.tsv
+
+    # Check if irodspath exists
+    if ils -d "${irodspath}" | grep -q ':\$'; then
+        resource="-C"
+    elif ils -d "${irodspath}"; then
+        resource="-d"
+    else
+        echo "Error: iRODS path ${irodspath} does not exist."
+        exit 1
+    fi
+
+    # Get existing metadata from iRODS
+    get_metadata.sh \$resource "${irodspath}" > existing_metadata.csv
+
+    echo "Existing metadata for ${irodspath}:"
+    cat existing_metadata.csv
+
+    # Remove existing metadata if specified
+    if [ "${task.ext.remove_existing_metadata}" == "true" ]; then
+        echo "Removing existing metadata for ${irodspath}"
+        imeta rmw \$resource "${irodspath}" % % || echo "No metadata to remove (this is OK)"
+        :> existing_metadata.csv # clear file
+    fi
+
+    # Load metadata to iRODS
+    echo "Current metadata for ${irodspath}:"
+    get_metadata.sh \$resource "${irodspath}"
+    set +e
+    while IFS=\$'\\t' read -r key value; do
+        [[ -z "\$key" || -z "\$value" ]] && continue  # skip empty lines
+
+        # Check if value contains semicolon delimiter
+        if [[ -n "${delimiter}" && "\$value" == *"${delimiter}"* ]]; then
+            # Split by semicolon and process each value separately
+            IFS='${delimiter}' read -ra VALUES <<< "\$value"
+            for val in "\${VALUES[@]}"; do
+                val=\$(echo "\$val" | xargs)  # trim whitespace
+                [[ -z "\$val" ]] && continue  # skip empty values
+                
+                # Check if the key value pair already exists in iRODS metadata
+                if grep -qzP "\${key},\${val}" existing_metadata.csv; then
+                    echo "[SKIP] \$key=\$val already present"
+                else
+                    echo "Adding \$key=\$val to iRODS metadata"
+                    imeta add \$resource "${irodspath}" "\$key" "\$val"
+                fi
+            done
+        else
+            # Process single value as before
+            if grep -qzP "\${key},\${value}" existing_metadata.csv; then
+                echo "[SKIP] \$key=\$value already present"
+            else
+                echo "Adding \$key=\$value to iRODS metadata"
+                imeta add \$resource "${irodspath}" "\$key" "\$value"
+            fi
+        fi
+    done < metadata.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        irods: \$(ienv | grep version | awk '{ print \$3 }')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def meta_tsv = metaToTsv(meta)
+    """
+    echo -e "${meta_tsv}" > metadata.tsv
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        irods: \$(ienv | grep version | awk '{ print \$3 }')
+    END_VERSIONS
+    """
+}
diff --git a/modules/sanger-cellgeni/irods/attachmetadata/meta.yml b/modules/sanger-cellgeni/irods/attachmetadata/meta.yml
new file mode 100644
index 0000000..0b3ae5d
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/attachmetadata/meta.yml
@@ -0,0 +1,42 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "irods_attachmetadata"
+description:
+  "Module to attach metadata to iRODS collections and data objects with
+  support for duplicate value splitting"
+keywords:
+  - irods
+  - metadata
+  - attach
+  - collections
+  - data objects
+tools:
+  - irods:
+      description: Integrated Rule-Oriented Data System (iRODS) is open source
+        data management software for a cancer genome analysis workflow.
+      homepage: https://irods.org/
+      documentation: https://irods.org/documentation/
+      doi: 10.1186/s12859-018-2576-5
+      license: ["BSD-3-Clause"]
+      identifier: "biotools:iRODS"
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information and metadata to attach
+          e.g. [ id:'test', author:'John Doe', study:'cancer_research' ]
+    - irodspath:
+        type: string
+        description: "iRODS collection or data object path to attach metadata to"
+        ontologies: []
+output:
+  versions:
+    - versions.yml:
+        type: file
+        description: "YAML file containing software versions used"
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@claptar"
+maintainers:
+  - "@claptar"
diff --git a/modules/sanger-cellgeni/irods/attachmetadata/module.config b/modules/sanger-cellgeni/irods/attachmetadata/module.config
new file mode 100644
index 0000000..490f775
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/attachmetadata/module.config
@@ -0,0 +1,11 @@
+process {
+    withName: IRODS_ATTACHMETADATA {
+        ext.remove_existing_metadata = false
+        ext.delimiter                = ';'
+        maxForks                     = 5
+        array                        = 500
+        queue                        = 'transfer'
+        cpus                         = 1
+        memory                       = '2 GB'
+    }
+}
diff --git a/modules/sanger-cellgeni/irods/attachmetadata/resources/usr/bin/get_metadata.sh b/modules/sanger-cellgeni/irods/attachmetadata/resources/usr/bin/get_metadata.sh
new file mode 100755
index 0000000..dbf8fd9
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/attachmetadata/resources/usr/bin/get_metadata.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+resource_type=$1
+resource_path=$2
+
+if [[ -z "$resource_type" || -z "$resource_path" ]]; then
+    echo "Usage: $0 <resource_type> <resource_path>"
+    exit 1
+fi
+
+imeta ls $resource_type "$resource_path" | \
+    grep -v "AVUs" | \
+    sed -e 's/^attribute: //' \
+        -e 's/^value: //' \
+        -e 's/^units: //' | \
+    awk '
+    NR%4==1 { attr=$0 }
+    NR%4==2 { val=$0 }
+    NR%4==3 { unit=$0 }
+    NR%4==0 { printf "%s,%s,%s\n", attr, val, unit; attr=""; val=""; unit="" }
+    END { if (attr != "") printf "%s,%s,%s\n", attr, val, unit }
+    '
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/irods/getmetadata/main.nf b/modules/sanger-cellgeni/irods/getmetadata/main.nf
new file mode 100644
index 0000000..73b630c
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/getmetadata/main.nf
@@ -0,0 +1,51 @@
+process IRODS_GETMETADATA {
+    tag "Getting metadata for ${irodspath}"
+
+    input:
+    tuple val(meta), val(irodspath)
+
+    output:
+    tuple val(meta), path("irods_metadata.csv"), emit: csv
+    path "versions.yml", emit: versions
+
+    script:
+    """
+    set -euo pipefail
+
+    # Check if irodspath exists
+    if ils -d "${irodspath}" | grep -q ':\$'; then
+        resource="-C"
+    elif ils -d "${irodspath}"; then
+        resource="-d"
+    else
+        echo "Error: iRODS path ${irodspath} does not exist."
+        exit 1
+    fi
+
+    # Get metadata from iRODS
+    imeta ls \$resource ${irodspath} \
+        | (grep -E 'attribute|value|units' || true) \
+        | sed -e 's/^attribute: //' -e 's/^value: //' -e 's/^units: //' \
+        | sed -e "s/\\\"/'/g" \
+        | awk 'NR%3!=0 {printf "\\\"%s\\\",", \$0} NR%3==0 {printf "\\\"%s\\\"\\n", \$0}' > irods_metadata.csv
+    
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        irods: \$(ienv | grep version | awk '{ print \$3 }')
+        awk: \$(awk --version | head -n1)
+        sed: \$(sed --version | head -n1 | awk '{ print \$4 }')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch irods_metadata.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        irods: \$(ienv | grep version | awk '{ print \$3 }')
+        awk: \$(awk --version | head -n1)
+        sed: \$(sed --version | head -n1 | awk '{ print \$4 }')
+    END_VERSIONS
+    """
+}
diff --git a/modules/sanger-cellgeni/irods/getmetadata/meta.yml b/modules/sanger-cellgeni/irods/getmetadata/meta.yml
new file mode 100644
index 0000000..6282771
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/getmetadata/meta.yml
@@ -0,0 +1,52 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "irods_getmetadata"
+description:
+  "Module to get metadata from an iRODS collections and data objects and
+  output as CSV"
+keywords:
+  - irods
+  - metadata
+  - csv
+tools:
+  - irods:
+      description: Integrated Rule-Oriented Data System (iRODS) is open source
+        data management software for a cancer genome analysis workflow.
+      homepage: https://irods.org/
+      documentation: https://irods.org/documentation/
+      doi: 10.1186/s12859-018-2576-5
+      license: ["BSD-3-Clause"]
+      identifier: "biotools:iRODS"
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - irodspath:
+        type: string
+        description: "iRODS collection or data object path to get metadata from"
+        ontologies: []
+output:
+  csv:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - irods_metadata.csv:
+          type: file
+          description: "CSV file containing iRODS metadata"
+          pattern: "irods_metadata.csv"
+          ontologies:
+            - edam: http://edamontology.org/format_3752 # CSV
+  versions:
+    - versions.yml:
+        type: file
+        description: "YAML file containing software versions used"
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@claptar"
+maintainers:
+  - "@claptar"
diff --git a/modules/sanger-cellgeni/irods/getmetadata/module.config b/modules/sanger-cellgeni/irods/getmetadata/module.config
new file mode 100644
index 0000000..9fc53fa
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/getmetadata/module.config
@@ -0,0 +1,10 @@
+process {
+    withName: IRODS_GETMETADATA {
+        maxForks = 20
+        array    = 1000
+        queue    = 'small'
+        cpus     = 1
+        memory   = 70.MB
+        time     = 10.m
+    }
+}
diff --git a/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test
new file mode 100644
index 0000000..996d1a3
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test
@@ -0,0 +1,98 @@
+nextflow_process {
+    name "Test Process: IRODS_GETMETADATA"
+    script "../main.nf"
+    config "../module.config"
+    process "IRODS_GETMETADATA"
+
+    tag "modules"
+    tag "irods"
+    tag "irods/getmetadata"
+    tag "modules_sangercellgeni"
+
+    test("Get data object metadata") {
+        tag "data_object"
+
+        when {
+            process {
+                """
+                input[0] = [ [id: "data_object_1"], "/seq/illumina/runs/41/41796/lane1/plex1/41796_1#1.cram" ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert snapshot(process.out).match() },
+                { assert path(process.out.csv.get(0).get(1)).csv.columnCount == 3 }
+            )
+        }
+    }
+
+    test("Get collection metadata") {
+        tag "collection"
+
+        when {
+            process {
+                """
+                input[0] = [ [id: "collection_1"], "/seq/illumina/cellranger-arc/cellranger-arc202_count_43c2d8dd1eaf98b635896165fd98ae3a" ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert snapshot(process.out).match() },
+                { assert path(process.out.csv.get(0).get(1)).csv.columnCount == 3 }
+            )
+        }
+    }
+
+    test("Get linked collection metadata") {
+        tag "collection"
+
+        when {
+            process {
+                """
+                input[0] = [ [id: "linked_collection_1"], "/archive/cellgeni/multiome/internal/cellranger_arc/6439/WS_wEMB13400246WS_wEMB13400220" ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert snapshot(process.out).match() },
+                { assert path(process.out.csv.get(0).get(1)).csv.columnCount == 3 }
+            )
+        }
+    }
+
+    test("Get empty metadata") {
+        tag "empty_metadata"
+
+        when {
+            process {
+                """
+                input[0] = [ [id: "emptymeta_collection_1"], "/archive/cellgeni/multiome" ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.versions },
+                { assert process.out.csv },
+                { assert snapshot(process.out).match() },
+            )
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test.snap b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test.snap
new file mode 100644
index 0000000..3434b74
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/getmetadata/tests/main.nf.test.snap
@@ -0,0 +1,134 @@
+{
+    "Get data object metadata": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "data_object_1"
+                        },
+                        "irods_metadata.csv:md5,197a5b8db4e7c8ae4783731992d6c6ac"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "data_object_1"
+                        },
+                        "irods_metadata.csv:md5,197a5b8db4e7c8ae4783731992d6c6ac"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-27T13:32:47.27054006"
+    },
+    "Get linked collection metadata": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "linked_collection_1"
+                        },
+                        "irods_metadata.csv:md5,ce97a73816135627adc03848746fb881"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "linked_collection_1"
+                        },
+                        "irods_metadata.csv:md5,ce97a73816135627adc03848746fb881"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-27T13:33:24.914320709"
+    },
+    "Get empty metadata": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "emptymeta_collection_1"
+                        },
+                        "irods_metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "emptymeta_collection_1"
+                        },
+                        "irods_metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-27T13:33:43.503718805"
+    },
+    "Get collection metadata": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "collection_1"
+                        },
+                        "irods_metadata.csv:md5,fc54181e42872f3b1725271798b36dd4"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ],
+                "csv": [
+                    [
+                        {
+                            "id": "collection_1"
+                        },
+                        "irods_metadata.csv:md5,fc54181e42872f3b1725271798b36dd4"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,aa918ef363c12d21e86ccdf876b0cb74"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.4"
+        },
+        "timestamp": "2025-10-27T13:33:06.176407056"
+    }
+}
\ No newline at end of file
diff --git a/modules/sanger-cellgeni/irods/storefile/main.nf b/modules/sanger-cellgeni/irods/storefile/main.nf
new file mode 100644
index 0000000..11bd2cd
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/storefile/main.nf
@@ -0,0 +1,63 @@
+process IRODS_STOREFILE {
+    tag "Loading ${irodspath}"
+
+    input:
+    tuple val(meta), path(file), val(irodspath)
+
+    output:
+    tuple val(meta), val(irodspath), env('md5'), env('irods_md5'), emit: md5
+    path "versions.yml", emit: versions
+
+    script:
+    def args = task.ext.args ?: "-KV -f -X restart.txt --retries 10 --acl 'read public#archive'"
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    module load cellgen/irods
+
+    # calculate MD5
+    md5=\$(md5sum "${file}" | awk '{print \$1}')
+
+    # create iRODS directory if it doesn't exist
+    irodsdir=\$(dirname "${irodspath}")
+    imkdir -p "\$irodsdir"
+
+    # Load file to iRODS
+    echo "Loading ${file} to iRODS at ${irodspath}"
+    iput ${args} \
+        -N ${task.cpus} \
+        --metadata="md5;\${md5};;" \
+        "${file}" "${irodspath}"
+
+    # Calculate iRODS md5
+    sleep 1 # wait for iRODS to do it's thing
+    irods_md5=\$(ichksum "${irodspath}" | awk '{print \$NF}')
+
+    # Compare iRODS md5 with local md5
+    if [ "\$md5" != "\$irods_md5" ]; then
+        echo "MD5 mismatch for ${file}: local \$md5, iRODS \$irods_md5"
+        exit 1
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        irods: \$(ienv | grep version | awk '{ print \$3 }')
+        awk: \$(awk --version | head -n1)
+        md5sum: \$(md5sum --version | head -n1 | awk '{ print \$4 }')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: "-K -f -X restart.txt --retries 10"
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    # calculate MD5
+    md5=\$(md5sum "${file}" | awk '{print \$1}')
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        irods: \$(ienv | grep version | awk '{ print \$3 }')
+        awk: \$(awk --version | head -n1)
+        md5sum: \$(md5sum --version | head -n1 | awk '{ print \$4 }')
+    END_VERSIONS
+    """
+}
diff --git a/modules/sanger-cellgeni/irods/storefile/meta.yml b/modules/sanger-cellgeni/irods/storefile/meta.yml
new file mode 100644
index 0000000..7ffaba4
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/storefile/meta.yml
@@ -0,0 +1,65 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "irods_storefile"
+description:
+  "Module to upload files to iRODS with MD5 checksum verification and metadata
+  attachment"
+keywords:
+  - irods
+  - upload
+  - storage
+  - checksum
+tools:
+  - irods:
+      description: Integrated Rule-Oriented Data System (iRODS) is open source
+        data management software for a cancer genome analysis workflow.
+      homepage: https://irods.org/
+      documentation: https://irods.org/documentation/
+      doi: 10.1186/s12859-018-2576-5
+      license: BSD-3-Clause
+      identifier: "biotools:iRODS"
+      args_id: "$args"
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - file:
+        type: file
+        description: "File to be uploaded to iRODS"
+        pattern: "*"
+        ontologies: []
+    - irodspath:
+        type: string
+        description: "Target iRODS path where the file should be stored"
+        ontologies: []
+output:
+  md5:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - irodspath:
+          type: string
+          description: "iRODS path where the file was stored"
+          ontologies: []
+      - md5:
+          type: string
+          description: "MD5 checksum of the local file"
+          ontologies: []
+      - irods_md5:
+          type: string
+          description: "MD5 checksum of the file stored in iRODS"
+          ontologies: []
+  versions:
+    - versions.yml:
+        type: file
+        description: "YAML file containing software versions used"
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@claptar"
+maintainers:
+  - "@claptar"
diff --git a/modules/sanger-cellgeni/irods/storefile/module.config b/modules/sanger-cellgeni/irods/storefile/module.config
new file mode 100644
index 0000000..f37e9d7
--- /dev/null
+++ b/modules/sanger-cellgeni/irods/storefile/module.config
@@ -0,0 +1,10 @@
+process {
+    withName: 'IRODS_STOREFILE' {
+        ext.args = "-KV -f -X restart.txt --retries 10 --acl 'read public#archive'"
+        maxForks = 20
+        array    = 1000
+        queue    = 'transfer'
+        cpus     = 2
+        memory   = '4 GB'
+    }
+}
\ No newline at end of file
diff --git a/nf-test.config b/nf-test.config
new file mode 100644
index 0000000..03c3bfd
--- /dev/null
+++ b/nf-test.config
@@ -0,0 +1,9 @@
+config {
+    testsDir "."
+    workDir ".nf-test"
+    configFile "tests/config/nf-test.config"
+
+    plugins {
+        load "nft-csv@0.1.0"
+    }
+}
\ No newline at end of file
diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config
new file mode 100644
index 0000000..b6e7510
--- /dev/null
+++ b/tests/config/nf-test.config
@@ -0,0 +1,35 @@
+nextflow.enable.moduleBinaries = true
+
+params {
+    // test params
+    test_data_base           = "${projectDir}/tests/data/"
+
+    // process params
+    output_dir               = "results"
+}
+
+process {
+    cpus   = 2
+    memory = '4.GB'
+}
+
+executor {
+    name           = 'lsf'
+    perJobMemLimit = true
+}
+
+profiles {
+    singularity {
+        singularity.enabled    = true
+        singularity.autoMounts = true
+        singularity.runOptions = '-B /lustre,/nfs'
+        singularity.cacheDir   = '/nfs/cellgeni/singularity/images/'
+    }
+    docker {
+        docker.enabled    = true
+    }
+}
+
+docker.registry      = 'quay.io'
+singularity.registry = 'quay.io'
+cleanup              = false
\ No newline at end of file