From 288c043473af9116336bb9f2842f62edc8b707e4 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 18 Aug 2025 17:05:53 +0200
Subject: [PATCH 1/6] introduce new component

---
 .../process_task_results/config.vsh.yaml      | 120 +++++++++---------
 src/reporting/process_task_results/main.nf    |  44 +++++--
 2 files changed, 94 insertions(+), 70 deletions(-)

diff --git a/src/reporting/process_task_results/config.vsh.yaml b/src/reporting/process_task_results/config.vsh.yaml
index e1703bf52..677cb5e85 100644
--- a/src/reporting/process_task_results/config.vsh.yaml
+++ b/src/reporting/process_task_results/config.vsh.yaml
@@ -44,9 +44,66 @@ argument_groups:
         description: Nextflow execution trace file
         example: resources_test/openproblems/task_results_v4/raw/trace.txt
 
+  - name: Dataset filtering
+    description: |
+      Use these arguments to filter datasets by name. By default, all datasets are
+      run. If `--datasets_include` is defined, only those datasets are run. If
+      `--datasets_exclude` is defined, all datasets except those specified are run.
+      These arguments are mutually exclusive, so only `--datasets_include` OR
+      `--datasets_exclude` can set but not both.
+    arguments:
+      - name: "--datasets_include"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to include. If specified, only these datasets will be run.
+      - name: "--datasets_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to exclude. If specified, all datasets except the ones listed will be run.
+
+  - name: Method filtering
+    description: |
+      Use these arguments to filter methods by name. By default, all methods are
+      run. If `--methods_include` is defined, only those methods are run. If
+      `--methods_exclude` is defined, all methods except those specified are run.
+      These arguments are mutually exclusive, so only `--methods_include` OR
+      `--methods_exclude` can set but not both.
+    arguments:
+      - name: "--methods_include"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to include. If specified, only these methods will be run.
+      - name: "--methods_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to exclude. If specified, all methods except the ones listed will be run.
+
+  - name: Metric filtering
+    description: |
+      Use these arguments to filter metrics by name. By default, all metrics are
+      run. If `--metrics_include` is defined, only those metrics are run. If
+      `--metrics_exclude` is defined, all metrics except those specified are run.
+      These arguments are mutually exclusive, so only `--metrics_include` OR
+      `--metrics_exclude` can set but not both.
+    arguments:
+      - name: "--metrics_include"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to include. If specified, only these metrics will be run.
+      - name: "--metrics_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to exclude. If specified, all metrics except the ones listed will be run.
+
   - name: Outputs
     arguments:
-      - name: "--output_combined"
+      - name: "--output_data"
         type: file
         required: true
         direction: output
@@ -65,66 +122,6 @@ argument_groups:
         info:
           format:
             type: html
-      - name: "--output_task_info"
-        type: file
-        required: true
-        direction: output
-        description: Task info JSON file
-        default: task_info.json
-        info:
-          format:
-            type: json
-            schema: /common/schemas/results_v4/task_info.json
-      - name: "--output_dataset_info"
-        type: file
-        required: true
-        direction: output
-        description: Dataset info JSON file
-        default: dataset_info.json
-        info:
-          format:
-            type: json
-            schema: /common/schemas/results_v4/dataset_info.json
-      - name: "--output_method_info"
-        type: file
-        required: true
-        direction: output
-        description: Method info JSON file
-        default: method_info.json
-        info:
-          format:
-            type: json
-            schema: /common/schemas/results_v4/method_info.json
-      - name: "--output_metric_info"
-        type: file
-        required: true
-        direction: output
-        description: Metric info JSON file
-        default: metric_info.json
-        info:
-          format:
-            type: json
-            schema: /common/schemas/results_v4/metric_info.json
-      - name: "--output_results"
-        type: file
-        required: true
-        direction: output
-        description: Results JSON file
-        default: results.json
-        info:
-          format:
-            type: json
-            schema: /common/schemas/results_v4/results.json
-      - name: "--output_quality_control"
-        type: file
-        required: true
-        direction: output
-        description: Quality control JSON file
-        default: quality_control.json
-        info:
-          format:
-            type: json
-            schema: /common/schemas/results_v4/quality_control.json
 
 resources:
   - type: nextflow_script
@@ -137,6 +134,7 @@ dependencies:
   - name: reporting/get_metric_info
   - name: reporting/get_dataset_info
   - name: reporting/get_task_info
+  - name: reporting/filter_results
   - name: reporting/generate_qc
   - name: reporting/combine_output
   - name: reporting/render_report
diff --git a/src/reporting/process_task_results/main.nf b/src/reporting/process_task_results/main.nf
index 1fc64f389..97f5b1220 100644
--- a/src/reporting/process_task_results/main.nf
+++ b/src/reporting/process_task_results/main.nf
@@ -52,13 +52,45 @@ workflow run_wf {
         "input_trace": "input_trace",
         "input_dataset_info": "output_dataset",
         "input_method_info": "output_method",
-        "input_metric_info": "output_metric"
+        "input_metric_info": "output_metric",
+        "datasets_include": "datasets_include",
+        "datasets_exclude": "datasets_exclude",
+        "methods_include": "methods_include",
+        "methods_exclude": "methods_exclude",
+        "metrics_include": "metrics_include",
+        "metrics_exclude": "metrics_exclude"
       ],
       toState: [
         "output_results": "output"
       ]
     )
 
+    | filter_results.run(
+      runIf: { id, state ->
+        // Only run filtering if there are include/exclude lists defined
+        return state.datasets_exclude || state.methods_exclude || state.metrics_exclude ||
+          state.datasets_include || state.methods_include || state.metrics_include
+      },
+      fromState: [
+        "input_dataset_info": "output_dataset",
+        "input_method_info": "output_method",
+        "input_metric_info": "output_metric",
+        "input_results": "output_results",
+        "datasets_include": "datasets_include",
+        "datasets_exclude": "datasets_exclude",
+        "methods_include": "methods_include",
+        "methods_exclude": "methods_exclude",
+        "metrics_include": "metrics_include",
+        "metrics_exclude": "metrics_exclude"
+      ],
+      toState: [
+        "output_dataset": "output_dataset_info",
+        "output_method": "output_method_info",
+        "output_metric": "output_metric_info",
+        "output_results": "output_results"
+      ]
+    )
+
     | generate_qc.run(
       fromState: [
         "input_task_info": "output_task",
@@ -90,14 +122,8 @@ workflow run_wf {
     )
 
     | setState([
-      "output_combined": "output_combined",
-      "output_report": "output_report",
-      "output_task_info": "output_task",
-      "output_dataset_info": "output_dataset",
-      "output_method_info": "output_method",
-      "output_metric_info": "output_metric",
-      "output_results": "output_results",
-      "output_quality_control": "output_qc"
+      "output_data": "output_combined",
+      "output_report": "output_report"
     ])
 
   emit:

From ffd0a733a31bb3455b60c6bac825c012b8ff6304 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 18 Aug 2025 22:36:49 +0200
Subject: [PATCH 2/6] implement component

---
 src/reporting/filter_results/config.vsh.yaml | 162 ++++++++++
 src/reporting/filter_results/script.py       | 298 +++++++++++++++++++
 2 files changed, 460 insertions(+)
 create mode 100644 src/reporting/filter_results/config.vsh.yaml
 create mode 100644 src/reporting/filter_results/script.py

diff --git a/src/reporting/filter_results/config.vsh.yaml b/src/reporting/filter_results/config.vsh.yaml
new file mode 100644
index 000000000..09e22ba7a
--- /dev/null
+++ b/src/reporting/filter_results/config.vsh.yaml
@@ -0,0 +1,162 @@
+name: filter_results
+namespace: reporting
+description: Filter dataset, method, metric info and results based on include/exclude criteria
+
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input_dataset_info
+      type: file
+      description: JSON file containing dataset information
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/dataset_info.json
+
+    - name: --input_method_info
+      type: file
+      description: JSON file containing method information
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/method_info.json
+
+    - name: --input_metric_info
+      type: file
+      description: JSON file containing metric information
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/metric_info.json
+
+    - name: --input_results
+      type: file
+      description: JSON file containing results
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/results.json
+
+  - name: Dataset filtering
+    description: |
+      Use these arguments to filter datasets by name. By default, all datasets are
+      included. If `--datasets_include` is defined, only those datasets are included. If
+      `--datasets_exclude` is defined, all datasets except those specified are included.
+      These arguments are mutually exclusive, so only `--datasets_include` OR
+      `--datasets_exclude` can be set but not both.
+    arguments:
+      - name: "--datasets_include"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to include. If specified, only these datasets will be included.
+      - name: "--datasets_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to exclude. If specified, all datasets except the ones listed will be included.
+
+  - name: Method filtering
+    description: |
+      Use these arguments to filter methods by name. By default, all methods are
+      included. If `--methods_include` is defined, only those methods are included. If
+      `--methods_exclude` is defined, all methods except those specified are included.
+      These arguments are mutually exclusive, so only `--methods_include` OR
+      `--methods_exclude` can be set but not both.
+    arguments:
+      - name: "--methods_include"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to include. If specified, only these methods will be included.
+      - name: "--methods_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to exclude. If specified, all methods except the ones listed will be included.
+
+  - name: Metric filtering
+    description: |
+      Use these arguments to filter metrics by name. By default, all metrics are
+      included. If `--metrics_include` is defined, only those metrics are included. If
+      `--metrics_exclude` is defined, all metrics except those specified are included.
+      These arguments are mutually exclusive, so only `--metrics_include` OR
+      `--metrics_exclude` can be set but not both.
+    arguments:
+      - name: "--metrics_include"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to include. If specified, only these metrics will be included.
+      - name: "--metrics_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to exclude. If specified, all metrics except the ones listed will be included.
+
+  - name: Outputs
+    arguments:
+    - name: --output_dataset_info
+      type: file
+      direction: output
+      default: filtered_dataset_info.json
+      description: Filtered dataset info JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/dataset_info.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_dataset_info.json
+
+    - name: --output_method_info
+      type: file
+      direction: output
+      default: filtered_method_info.json
+      description: Filtered method info JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/method_info.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_method_info.json
+
+    - name: --output_metric_info
+      type: file
+      direction: output
+      default: filtered_metric_info.json
+      description: Filtered metric info JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/metric_info.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_metric_info.json
+
+    - name: --output_results
+      type: file
+      direction: output
+      default: filtered_results.json
+      description: Filtered results JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/results.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_results.json
+
+resources:
+  - type: python_script
+    path: script.py
+  - path: /common/schemas
+    dest: schemas
+
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowmem, lowtime, lowcpu]
diff --git a/src/reporting/filter_results/script.py b/src/reporting/filter_results/script.py
new file mode 100644
index 000000000..4c8d410e4
--- /dev/null
+++ b/src/reporting/filter_results/script.py
@@ -0,0 +1,298 @@
+
+## VIASH START
+par = {
+    "input_dataset_info": "resources_test/openproblems/task_results_v4/processed/dataset_info.json",
+    "input_method_info": "resources_test/openproblems/task_results_v4/processed/method_info.json",
+    "input_metric_info": "resources_test/openproblems/task_results_v4/processed/metric_info.json",
+    "input_results": "resources_test/openproblems/task_results_v4/processed/results.json",
+    "output_dataset_info": "resources_test/openproblems/task_results_v4/processed/filtered_dataset_info.json",
+    "output_method_info": "resources_test/openproblems/task_results_v4/processed/filtered_method_info.json",
+    "output_metric_info": "resources_test/openproblems/task_results_v4/processed/filtered_metric_info.json",
+    "output_results": "resources_test/openproblems/task_results_v4/processed/filtered_results.json",
+    "datasets_exclude": ["cellxgene_census/tabula_sapiens", "cellxgene_census/mouse_pancreas_atlas"],
+    "datasets_include": None,
+    "methods_exclude": None,
+    "methods_include": None,
+    "metrics_exclude": None,
+    "metrics_include": None
+}
+meta = {
+    "resources_dir": "target/executable/reporting/filter_results"
+}
+## VIASH END
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+
+
+def validate_filtering_args():
+    """Validate that include/exclude arguments are mutually exclusive."""
+    if par["datasets_include"] and par["datasets_exclude"]:
+        raise ValueError("Cannot specify both --datasets_include and --datasets_exclude")
+    
+    if par["methods_include"] and par["methods_exclude"]:
+        raise ValueError("Cannot specify both --methods_include and --methods_exclude")
+    
+    if par["metrics_include"] and par["metrics_exclude"]:
+        raise ValueError("Cannot specify both --metrics_include and --metrics_exclude")
+
+
+def apply_name_filter(
+    data_list: List[Dict[str, Any]], 
+    include_list: Optional[List[str]] = None,
+    exclude_list: Optional[List[str]] = None,
+    item_type: str = "item"
+) -> List[Dict[str, Any]]:
+    """Apply filtering to a list based on name field."""
+    if not data_list:
+        return data_list
+    
+    original_count = len(data_list)
+    item_names = [item["name"] for item in data_list]
+    
+    if include_list:
+        items_to_include = set(item_names) & set(include_list)
+        if not items_to_include:
+            print(f"Warning: None of the specified {item_type}s to include were found in the data", 
+                  file=sys.stderr)
+            return []
+        
+        missing_items = set(include_list) - set(item_names)
+        if missing_items:
+            print(f"Warning: The following {item_type}s specified in include list were not found: " +
+                  ", ".join(missing_items), file=sys.stderr)
+        
+        filtered_data = [item for item in data_list if item["name"] in items_to_include]
+        print(f">>> Included {len(filtered_data)} out of {original_count} {item_type}s")
+        return filtered_data
+        
+    elif exclude_list:
+        items_to_exclude = set(item_names) & set(exclude_list)
+        
+        missing_items = set(exclude_list) - set(item_names)
+        if missing_items:
+            print(f"Warning: The following {item_type}s specified in exclude list were not found: " +
+                  ", ".join(missing_items), file=sys.stderr)
+        
+        filtered_data = [item for item in data_list if item["name"] not in items_to_exclude]
+        print(f">>> Excluded {len(items_to_exclude)} {item_type}s, keeping {len(filtered_data)} out of {original_count} {item_type}s")
+        return filtered_data
+    
+    # No filtering applied
+    return data_list
+
+
+def filter_results_data(
+    results_data: List[Dict[str, Any]],
+    dataset_names: List[str],
+    method_names: List[str], 
+    metric_names: List[str]
+) -> List[Dict[str, Any]]:
+    """Filter results based on dataset, method, and metric filters."""
+    if not results_data:
+        return results_data
+    
+    original_count = len(results_data)
+    
+    # Filter result entries based on dataset_name, method_name, and metric_names
+    filtered_results = []
+    for result in results_data:
+        dataset_keep = result["dataset_name"] in dataset_names
+        method_keep = result["method_name"] in method_names
+
+        # Check whether this result should be kept
+        if dataset_keep and method_keep:
+            filtered_result = result.copy()
+            
+            filtered_metrics = [
+                (i, name)
+                for i, name in enumerate(result["metric_names"])
+                if name in metric_names
+            ]
+            
+            # store metric names
+            filtered_result["metric_names"] = [name for _, name in filtered_metrics]
+            
+            # store metric values
+            filtered_result["metric_values"] = [result["metric_values"][i] for i, _ in filtered_metrics]
+            
+            # store metric components
+            new_metric_components = []
+            for component in result.get("metric_components", []):
+                new_component = component.copy()
+                new_component["metric_names"] = [name for name in component["metric_names"] if name in metric_names]
+                
+                # if metric_names are not empty
+                if new_component["metric_names"]:
+                    new_metric_components.append(new_component)
+            filtered_result["metric_components"] = new_metric_components
+
+            filtered_results.append(filtered_result)
+    
+    print(f">>> Filtered results: keeping {len(filtered_results)} out of {original_count} result entries")
+    return filtered_results
+
+
+def validate_json_against_schema(json_file: str, schema_file: str, name: str) -> tuple[bool, str]:
+    """Validate a JSON file against its schema using ajv-cli.
+    
+    Returns:
+        tuple[bool, str]: (is_valid, error_message)
+    """
+    try:
+        cmd = [
+            "ajv", "validate",
+            "--spec", "draft2020",
+            "-s", schema_file,
+            "-r", str(Path(meta["resources_dir"]) / "schemas" / "results_v4" / "core.json"),
+            "-d", json_file
+        ]
+        
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        
+        if result.returncode == 0:
+            print(f"✓ {name} validation passed")
+            return True, ""
+        else:
+            error_msg = ""
+            if result.stderr:
+                error_msg += f"stderr: {result.stderr.strip()}"
+            if result.stdout:
+                error_msg += f"\nstdout: {result.stdout.strip()}"
+            if not error_msg:
+                error_msg = "Unknown validation error"
+            
+            return False, error_msg
+            
+    except FileNotFoundError:
+        return False, "ajv-cli not found. Cannot validate schema"
+
+
+print("====== Filter results ======")
+
+# Validation
+print("\n>>> Validating arguments...")
+validate_filtering_args()
+
+# Read input files
+print("\n>>> Reading input files...")
+
+print(f'Reading dataset info from "{par["input_dataset_info"]}"...')
+with open(par["input_dataset_info"], "r") as f:
+    dataset_info = json.load(f)
+
+print(f'Reading method info from "{par["input_method_info"]}"...')
+with open(par["input_method_info"], "r") as f:
+    method_info = json.load(f)
+
+print(f'Reading metric info from "{par["input_metric_info"]}"...')
+with open(par["input_metric_info"], "r") as f:
+    metric_info = json.load(f)
+
+print(f'Reading results from "{par["input_results"]}"...')
+with open(par["input_results"], "r") as f:
+    results = json.load(f)
+
+# Apply filters
+print("\n>>> Applying filters...")
+
+print("Filtering datasets...")
+filtered_dataset_info = apply_name_filter(
+    dataset_info,
+    par["datasets_include"],
+    par["datasets_exclude"],
+    "dataset"
+)
+
+print("Filtering methods...")
+filtered_method_info = apply_name_filter(
+    method_info,
+    par["methods_include"],
+    par["methods_exclude"],
+    "method"
+)
+
+print("Filtering metrics...")
+filtered_metric_info = apply_name_filter(
+    metric_info,
+    par["metrics_include"],
+    par["metrics_exclude"],
+    "metric"
+)
+
+# Get names for results filtering
+filtered_dataset_names = [item["name"] for item in filtered_dataset_info]
+filtered_method_names = [item["name"] for item in filtered_method_info]
+filtered_metric_names = [item["name"] for item in filtered_metric_info]
+
+print("Filtering results...")
+filtered_results = filter_results_data(
+    results,
+    filtered_dataset_names,
+    filtered_method_names,
+    filtered_metric_names
+)
+
+# Write and validate output files
+print("\n>>> Writing and validating output files...")
+results_schemas_dir = Path(meta["resources_dir"]) / "schemas" / "results_v4"
+
+validation_files = [
+    {
+        "data": filtered_dataset_info,
+        "schema": "dataset_info.json",
+        "file": par["output_dataset_info"],
+        "name": "dataset info"
+    },
+    {
+        "data": filtered_method_info,
+        "schema": "method_info.json",
+        "file": par["output_method_info"],
+        "name": "method info"
+    },
+    {
+        "data": filtered_metric_info,
+        "schema": "metric_info.json",
+        "file": par["output_metric_info"],
+        "name": "metric info"
+    },
+    {
+        "data": filtered_results,
+        "schema": "results.json",
+        "file": par["output_results"],
+        "name": "results"
+    }
+]
+
+all_valid = True
+for validation in validation_files:
+    print(f'Writing {validation["name"]} to "{validation["file"]}"...')
+    with open(validation["file"], "w") as f:
+        json.dump(validation["data"], f, indent=2, ensure_ascii=False)
+
+    print(f'Validating {validation["name"]}...')
+    schema_file = str(results_schemas_dir / validation["schema"])
+    is_valid, error_msg = validate_json_against_schema(
+        validation["file"], 
+        schema_file, 
+        validation["name"]
+    )
+    if not is_valid:
+        print(f'✗ {validation["name"]} validation failed')
+        print(f"Validation error: {error_msg}", file=sys.stderr)
+        all_valid = False
+
+if not all_valid:
+    raise RuntimeError("One or more output files do not conform to their schemas")
+
+# Summary
+print("\n>>> Summary of filtering results:")
+print(f"Datasets: {len(filtered_dataset_info)} (from {len(dataset_info)})")
+print(f"Methods: {len(filtered_method_info)} (from {len(method_info)})")
+print(f"Metrics: {len(filtered_metric_info)} (from {len(metric_info)})")
+print(f"Results: {len(filtered_results)} (from {len(results)})")
+
+print("\n>>> Done!")

From 06cf1800bef500859300df0608a48c906d8cd94a Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Mon, 18 Aug 2025 22:38:56 +0200
Subject: [PATCH 3/6] format code

---
 src/reporting/filter_results/script.py | 163 +++++++++++++------------
 1 file changed, 88 insertions(+), 75 deletions(-)

diff --git a/src/reporting/filter_results/script.py b/src/reporting/filter_results/script.py
index 4c8d410e4..1d267a001 100644
--- a/src/reporting/filter_results/script.py
+++ b/src/reporting/filter_results/script.py
@@ -1,4 +1,3 @@
-
 ## VIASH START
 par = {
     "input_dataset_info": "resources_test/openproblems/task_results_v4/processed/dataset_info.json",
@@ -9,16 +8,17 @@
     "output_method_info": "resources_test/openproblems/task_results_v4/processed/filtered_method_info.json",
     "output_metric_info": "resources_test/openproblems/task_results_v4/processed/filtered_metric_info.json",
     "output_results": "resources_test/openproblems/task_results_v4/processed/filtered_results.json",
-    "datasets_exclude": ["cellxgene_census/tabula_sapiens", "cellxgene_census/mouse_pancreas_atlas"],
+    "datasets_exclude": [
+        "cellxgene_census/tabula_sapiens",
+        "cellxgene_census/mouse_pancreas_atlas",
+    ],
     "datasets_include": None,
     "methods_exclude": None,
     "methods_include": None,
     "metrics_exclude": None,
-    "metrics_include": None
-}
-meta = {
-    "resources_dir": "target/executable/reporting/filter_results"
+    "metrics_include": None,
 }
+meta = {"resources_dir": "target/executable/reporting/filter_results"}
 ## VIASH END
 
 import json
@@ -31,56 +31,70 @@
 def validate_filtering_args():
     """Validate that include/exclude arguments are mutually exclusive."""
     if par["datasets_include"] and par["datasets_exclude"]:
-        raise ValueError("Cannot specify both --datasets_include and --datasets_exclude")
-    
+        raise ValueError(
+            "Cannot specify both --datasets_include and --datasets_exclude"
+        )
+
     if par["methods_include"] and par["methods_exclude"]:
         raise ValueError("Cannot specify both --methods_include and --methods_exclude")
-    
+
     if par["metrics_include"] and par["metrics_exclude"]:
         raise ValueError("Cannot specify both --metrics_include and --metrics_exclude")
 
 
 def apply_name_filter(
-    data_list: List[Dict[str, Any]], 
+    data_list: List[Dict[str, Any]],
     include_list: Optional[List[str]] = None,
     exclude_list: Optional[List[str]] = None,
-    item_type: str = "item"
+    item_type: str = "item",
 ) -> List[Dict[str, Any]]:
     """Apply filtering to a list based on name field."""
     if not data_list:
         return data_list
-    
+
     original_count = len(data_list)
     item_names = [item["name"] for item in data_list]
-    
+
     if include_list:
         items_to_include = set(item_names) & set(include_list)
         if not items_to_include:
-            print(f"Warning: None of the specified {item_type}s to include were found in the data", 
-                  file=sys.stderr)
+            print(
+                f"Warning: None of the specified {item_type}s to include were found in the data",
+                file=sys.stderr,
+            )
             return []
-        
+
         missing_items = set(include_list) - set(item_names)
         if missing_items:
-            print(f"Warning: The following {item_type}s specified in include list were not found: " +
-                  ", ".join(missing_items), file=sys.stderr)
-        
+            print(
+                f"Warning: The following {item_type}s specified in include list were not found: "
+                + ", ".join(missing_items),
+                file=sys.stderr,
+            )
+
         filtered_data = [item for item in data_list if item["name"] in items_to_include]
         print(f">>> Included {len(filtered_data)} out of {original_count} {item_type}s")
         return filtered_data
-        
+
     elif exclude_list:
         items_to_exclude = set(item_names) & set(exclude_list)
-        
+
         missing_items = set(exclude_list) - set(item_names)
         if missing_items:
-            print(f"Warning: The following {item_type}s specified in exclude list were not found: " +
-                  ", ".join(missing_items), file=sys.stderr)
-        
-        filtered_data = [item for item in data_list if item["name"] not in items_to_exclude]
-        print(f">>> Excluded {len(items_to_exclude)} {item_type}s, keeping {len(filtered_data)} out of {original_count} {item_type}s")
+            print(
+                f"Warning: The following {item_type}s specified in exclude list were not found: "
+                + ", ".join(missing_items),
+                file=sys.stderr,
+            )
+
+        filtered_data = [
+            item for item in data_list if item["name"] not in items_to_exclude
+        ]
+        print(
+            f">>> Excluded {len(items_to_exclude)} {item_type}s, keeping {len(filtered_data)} out of {original_count} {item_type}s"
+        )
         return filtered_data
-    
+
     # No filtering applied
     return data_list
 
@@ -88,15 +102,15 @@ def apply_name_filter(
 def filter_results_data(
     results_data: List[Dict[str, Any]],
     dataset_names: List[str],
-    method_names: List[str], 
-    metric_names: List[str]
+    method_names: List[str],
+    metric_names: List[str],
 ) -> List[Dict[str, Any]]:
     """Filter results based on dataset, method, and metric filters."""
     if not results_data:
         return results_data
-    
+
     original_count = len(results_data)
-    
+
     # Filter result entries based on dataset_name, method_name, and metric_names
     filtered_results = []
     for result in results_data:
@@ -106,53 +120,66 @@ def filter_results_data(
         # Check whether this result should be kept
         if dataset_keep and method_keep:
             filtered_result = result.copy()
-            
+
             filtered_metrics = [
                 (i, name)
                 for i, name in enumerate(result["metric_names"])
                 if name in metric_names
             ]
-            
+
             # store metric names
             filtered_result["metric_names"] = [name for _, name in filtered_metrics]
-            
+
             # store metric values
-            filtered_result["metric_values"] = [result["metric_values"][i] for i, _ in filtered_metrics]
-            
+            filtered_result["metric_values"] = [
+                result["metric_values"][i] for i, _ in filtered_metrics
+            ]
+
             # store metric components
             new_metric_components = []
             for component in result.get("metric_components", []):
                 new_component = component.copy()
-                new_component["metric_names"] = [name for name in component["metric_names"] if name in metric_names]
-                
+                new_component["metric_names"] = [
+                    name for name in component["metric_names"] if name in metric_names
+                ]
+
                 # if metric_names are not empty
                 if new_component["metric_names"]:
                     new_metric_components.append(new_component)
             filtered_result["metric_components"] = new_metric_components
 
             filtered_results.append(filtered_result)
-    
-    print(f">>> Filtered results: keeping {len(filtered_results)} out of {original_count} result entries")
+
+    print(
+        f">>> Filtered results: keeping {len(filtered_results)} out of {original_count} result entries"
+    )
     return filtered_results
 
 
-def validate_json_against_schema(json_file: str, schema_file: str, name: str) -> tuple[bool, str]:
+def validate_json_against_schema(
+    json_file: str, schema_file: str, name: str
+) -> tuple[bool, str]:
     """Validate a JSON file against its schema using ajv-cli.
-    
+
     Returns:
         tuple[bool, str]: (is_valid, error_message)
     """
     try:
         cmd = [
-            "ajv", "validate",
-            "--spec", "draft2020",
-            "-s", schema_file,
-            "-r", str(Path(meta["resources_dir"]) / "schemas" / "results_v4" / "core.json"),
-            "-d", json_file
+            "ajv",
+            "validate",
+            "--spec",
+            "draft2020",
+            "-s",
+            schema_file,
+            "-r",
+            str(Path(meta["resources_dir"]) / "schemas" / "results_v4" / "core.json"),
+            "-d",
+            json_file,
         ]
-        
+
         result = subprocess.run(cmd, capture_output=True, text=True)
-        
+
         if result.returncode == 0:
             print(f"✓ {name} validation passed")
             return True, ""
@@ -164,9 +191,9 @@ def validate_json_against_schema(json_file: str, schema_file: str, name: str) ->
                 error_msg += f"\nstdout: {result.stdout.strip()}"
             if not error_msg:
                 error_msg = "Unknown validation error"
-            
+
             return False, error_msg
-            
+
     except FileNotFoundError:
         return False, "ajv-cli not found. Cannot validate schema"
 
@@ -201,26 +228,17 @@ def validate_json_against_schema(json_file: str, schema_file: str, name: str) ->
 
 print("Filtering datasets...")
 filtered_dataset_info = apply_name_filter(
-    dataset_info,
-    par["datasets_include"],
-    par["datasets_exclude"],
-    "dataset"
+    dataset_info, par["datasets_include"], par["datasets_exclude"], "dataset"
 )
 
 print("Filtering methods...")
 filtered_method_info = apply_name_filter(
-    method_info,
-    par["methods_include"],
-    par["methods_exclude"],
-    "method"
+    method_info, par["methods_include"], par["methods_exclude"], "method"
 )
 
 print("Filtering metrics...")
 filtered_metric_info = apply_name_filter(
-    metric_info,
-    par["metrics_include"],
-    par["metrics_exclude"],
-    "metric"
+    metric_info, par["metrics_include"], par["metrics_exclude"], "metric"
 )
 
 # Get names for results filtering
@@ -230,10 +248,7 @@ def validate_json_against_schema(json_file: str, schema_file: str, name: str) ->
 
 print("Filtering results...")
 filtered_results = filter_results_data(
-    results,
-    filtered_dataset_names,
-    filtered_method_names,
-    filtered_metric_names
+    results, filtered_dataset_names, filtered_method_names, filtered_metric_names
 )
 
 # Write and validate output files
@@ -245,26 +260,26 @@ def validate_json_against_schema(json_file: str, schema_file: str, name: str) ->
         "data": filtered_dataset_info,
         "schema": "dataset_info.json",
         "file": par["output_dataset_info"],
-        "name": "dataset info"
+        "name": "dataset info",
     },
     {
         "data": filtered_method_info,
         "schema": "method_info.json",
         "file": par["output_method_info"],
-        "name": "method info"
+        "name": "method info",
     },
     {
         "data": filtered_metric_info,
         "schema": "metric_info.json",
         "file": par["output_metric_info"],
-        "name": "metric info"
+        "name": "metric info",
     },
     {
         "data": filtered_results,
         "schema": "results.json",
         "file": par["output_results"],
-        "name": "results"
-    }
+        "name": "results",
+    },
 ]
 
 all_valid = True
@@ -276,9 +291,7 @@ def validate_json_against_schema(json_file: str, schema_file: str, name: str) ->
     print(f'Validating {validation["name"]}...')
     schema_file = str(results_schemas_dir / validation["schema"])
     is_valid, error_msg = validate_json_against_schema(
-        validation["file"], 
-        schema_file, 
-        validation["name"]
+        validation["file"], schema_file, validation["name"]
     )
     if not is_valid:
         print(f'✗ {validation["name"]} validation failed')

From 564400d75a34b01fb879e7a24bdcd41a3120b39b Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 19 Aug 2025 17:10:41 +0200
Subject: [PATCH 4/6] Update src/reporting/process_task_results/main.nf

---
 src/reporting/process_task_results/main.nf | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/reporting/process_task_results/main.nf b/src/reporting/process_task_results/main.nf
index 97f5b1220..f02ec5658 100644
--- a/src/reporting/process_task_results/main.nf
+++ b/src/reporting/process_task_results/main.nf
@@ -52,13 +52,7 @@ workflow run_wf {
         "input_trace": "input_trace",
         "input_dataset_info": "output_dataset",
         "input_method_info": "output_method",
-        "input_metric_info": "output_metric",
-        "datasets_include": "datasets_include",
-        "datasets_exclude": "datasets_exclude",
-        "methods_include": "methods_include",
-        "methods_exclude": "methods_exclude",
-        "metrics_include": "metrics_include",
-        "metrics_exclude": "metrics_exclude"
+        "input_metric_info": "output_metric"
       ],
       toState: [
         "output_results": "output"

From 58d9a5473966ffc8d899ff5d59d5f2395a3e91c3 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 19 Aug 2025 17:12:09 +0200
Subject: [PATCH 5/6] Apply suggestions from code review

Co-authored-by: Luke Zappia <lazappi@users.noreply.github.com>
---
 src/reporting/filter_results/script.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/reporting/filter_results/script.py b/src/reporting/filter_results/script.py
index 1d267a001..952e5fab7 100644
--- a/src/reporting/filter_results/script.py
+++ b/src/reporting/filter_results/script.py
@@ -73,7 +73,7 @@ def apply_name_filter(
             )
 
         filtered_data = [item for item in data_list if item["name"] in items_to_include]
-        print(f">>> Included {len(filtered_data)} out of {original_count} {item_type}s")
+        print(f"Included {len(filtered_data)} out of {original_count} {item_type}s")
         return filtered_data
 
     elif exclude_list:
@@ -91,7 +91,7 @@ def apply_name_filter(
             item for item in data_list if item["name"] not in items_to_exclude
         ]
         print(
-            f">>> Excluded {len(items_to_exclude)} {item_type}s, keeping {len(filtered_data)} out of {original_count} {item_type}s"
+            f"Excluded {len(items_to_exclude)} {item_type}s, keeping {len(filtered_data)} out of {original_count} {item_type}s"
         )
         return filtered_data
 
@@ -151,7 +151,7 @@ def filter_results_data(
             filtered_results.append(filtered_result)
 
     print(
-        f">>> Filtered results: keeping {len(filtered_results)} out of {original_count} result entries"
+        f"Filtered results: keeping {len(filtered_results)} out of {original_count} result entries"
     )
     return filtered_results
 
@@ -295,7 +295,7 @@ def validate_json_against_schema(
     )
     if not is_valid:
         print(f'✗ {validation["name"]} validation failed')
-        print(f"Validation error: {error_msg}", file=sys.stderr)
+        print(f"Validation error: {error_msg}")
         all_valid = False
 
 if not all_valid:

From 87827474e4bd2a6415639ce61a28eab8a3c3cace Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 19 Aug 2025 17:27:02 +0200
Subject: [PATCH 6/6] add back previously removed arguments

---
 .../process_task_results/config.vsh.yaml      | 62 ++++++++++++++++++-
 src/reporting/process_task_results/main.nf    | 10 ++-
 2 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/src/reporting/process_task_results/config.vsh.yaml b/src/reporting/process_task_results/config.vsh.yaml
index 677cb5e85..c3dcd3699 100644
--- a/src/reporting/process_task_results/config.vsh.yaml
+++ b/src/reporting/process_task_results/config.vsh.yaml
@@ -103,7 +103,7 @@ argument_groups:
 
   - name: Outputs
     arguments:
-      - name: "--output_data"
+      - name: "--output_combined"
         type: file
         required: true
         direction: output
@@ -122,6 +122,66 @@ argument_groups:
         info:
           format:
             type: html
+      - name: "--output_task_info"
+        type: file
+        required: true
+        direction: output
+        description: Task info JSON file
+        default: task_info.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/task_info.json
+      - name: "--output_dataset_info"
+        type: file
+        required: true
+        direction: output
+        description: Dataset info JSON file
+        default: dataset_info.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/dataset_info.json
+      - name: "--output_method_info"
+        type: file
+        required: true
+        direction: output
+        description: Method info JSON file
+        default: method_info.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/method_info.json
+      - name: "--output_metric_info"
+        type: file
+        required: true
+        direction: output
+        description: Metric info JSON file
+        default: metric_info.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/metric_info.json
+      - name: "--output_results"
+        type: file
+        required: true
+        direction: output
+        description: Results JSON file
+        default: results.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/results.json
+      - name: "--output_quality_control"
+        type: file
+        required: true
+        direction: output
+        description: Quality control JSON file
+        default: quality_control.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/quality_control.json
 
 resources:
   - type: nextflow_script
diff --git a/src/reporting/process_task_results/main.nf b/src/reporting/process_task_results/main.nf
index f02ec5658..059960d65 100644
--- a/src/reporting/process_task_results/main.nf
+++ b/src/reporting/process_task_results/main.nf
@@ -116,8 +116,14 @@ workflow run_wf {
     )
 
     | setState([
-      "output_data": "output_combined",
-      "output_report": "output_report"
+      "output_combined": "output_combined",
+      "output_report": "output_report",
+      "output_task_info": "output_task",
+      "output_dataset_info": "output_dataset",
+      "output_method_info": "output_method",
+      "output_metric_info": "output_metric",
+      "output_results": "output_results",
+      "output_quality_control": "output_qc"
     ])
 
   emit: