Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
87 commits
Select commit Hold shift + click to select a range
e734440
Add combine_output reporting component
lazappi Feb 3, 2025
45abf32
Update process_task_results to combine outputs
lazappi Feb 3, 2025
37042df
Update get_task_info to new schema
lazappi Feb 5, 2025
a747b9b
Update get_method_info to new schema
lazappi Feb 5, 2025
e3439c9
Update get_metric_info to new schema
lazappi Feb 5, 2025
fdbdff8
Update get_dataset_info to new schema
lazappi Feb 6, 2025
00be327
Update get_results to new schema
lazappi Feb 10, 2025
d318832
Add metric resources to results component/schema
lazappi Jul 17, 2025
d5d1b8f
Update generate_qc to match new schema
lazappi Jul 17, 2025
11d6d9d
Update combine_output to new schema
lazappi Jul 18, 2025
6a91673
Update viash version and reference
lazappi Jul 18, 2025
5f70ab9
Update process_results workflow to new components
lazappi Jul 18, 2025
99a4a50
Add render_report component
lazappi Jul 28, 2025
264790e
Add render_report to process results workflow
lazappi Jul 28, 2025
272bba8
Handle missing values in generate_qc()
lazappi Jul 29, 2025
8ab4f60
Handle missing controls in results report
lazappi Jul 29, 2025
d8fdfd9
update common submodule
rcannood Jul 29, 2025
15a2d18
Merge remote-tracking branch 'origin/main' into feature/no-ref/update…
rcannood Jul 29, 2025
e9e3ef1
Merge remote-tracking branch 'origin' into feature/no-ref/update-proc…
lazappi Jul 29, 2025
cbf0e50
Merge branch 'feature/no-ref/update-process-results' of github.com:op…
lazappi Jul 29, 2025
c1198e5
Strip quotes from descriptions/summaries
lazappi Jul 30, 2025
a7b9c6d
Add roles to author details
lazappi Jul 30, 2025
19ecc9d
Add QC check for number of successful controls
lazappi Jul 30, 2025
dc75604
Handle missing exit codes in report
lazappi Jul 30, 2025
382a35d
Add schema validation to process_results workflow
lazappi Jul 30, 2025
caad282
Fix combine_output image version
lazappi Jul 31, 2025
4b3b585
Handle alternative field names in get_dataset_info
lazappi Jul 31, 2025
8d6a370
Handle v1 slots in get_method_info
lazappi Jul 31, 2025
186133f
Handle null author fields in report
lazappi Jul 31, 2025
8ba2b4c
Add missing information in control QC checks
lazappi Aug 1, 2025
cd2eef7
Handle old doc URL location in get_method_info
lazappi Aug 1, 2025
c44f13e
Prefix component additional info in get_metric_info
lazappi Aug 1, 2025
bca05d1
Cleanup removed argument in get_results
lazappi Aug 1, 2025
22361be
Fix test script for generate_qc
lazappi Aug 1, 2025
14861b4
Add authors to datasets, methods, metrics
lazappi Aug 4, 2025
2f7943a
schemas were moved to the common_resources repo
rcannood Aug 5, 2025
de9e0b2
fix schema paths
rcannood Aug 5, 2025
01acc49
set common submodule to different branch for testing
rcannood Aug 5, 2025
a40114e
Fix resource
rcannood Aug 5, 2025
3098da9
fix schema paths in the script
rcannood Aug 5, 2025
7044804
authors and references were moved into core
rcannood Aug 5, 2025
05a33b2
add a params placeholder for ease of use
rcannood Aug 5, 2025
5ab5c3f
show number of passed checks as well
rcannood Aug 5, 2025
8d14bc8
fix result schema path
rcannood Aug 5, 2025
3fbbbf6
Add bibliography file
lazappi Aug 6, 2025
5276ba0
Add shared util functions
lazappi Aug 6, 2025
072addf
Use shared functions for authors and references
lazappi Aug 6, 2025
664eeb5
update submodule (#934)
rcannood Aug 6, 2025
ada1876
Add scripts/create_resources/task_results_v4
lazappi Aug 6, 2025
55b9e23
Update main reference
lazappi Aug 6, 2025
2c4ce88
Use temporary directory in render-report
lazappi Aug 6, 2025
6572871
Style reporting R scripts
lazappi Aug 6, 2025
18cd13d
add auto wf
rcannood Aug 6, 2025
52d7f17
add script to reprocess task results
rcannood Aug 6, 2025
2239144
Handle missing scaled scores in generate_qc
lazappi Aug 6, 2025
9c1cd26
Set unknown error in get_results
lazappi Aug 6, 2025
5a18f35
fix script
rcannood Aug 6, 2025
ab06258
Handle missing fields in old task info
lazappi Aug 7, 2025
0f223ba
Handle missing additional info in authors field
lazappi Aug 7, 2025
0e55c4c
Fix typo in get_references_list()
lazappi Aug 7, 2025
8623331
Handle missing summary/label in get_task_info
lazappi Aug 7, 2025
f3381a0
Handle minimal dataset info in old results
lazappi Aug 7, 2025
e23473b
Handle missing file size in get_dataset_info
lazappi Aug 7, 2025
51e5fa6
Handle empty string in get_references_list()
lazappi Aug 7, 2025
7ce4cc8
Handle method info stored in functionality field
lazappi Aug 7, 2025
2df3b82
Move get_additional_info() to shared functions
lazappi Aug 7, 2025
6667812
Handle missing maximize in get_metric_info
lazappi Aug 7, 2025
73dbf12
Handle missing metric values in get_results
lazappi Aug 7, 2025
bb87f2e
Properly handle workflow component in get_results
lazappi Aug 7, 2025
732a2f2
Handle metrics stored in functionality field
lazappi Aug 7, 2025
26189c7
Give better error when dataset IDs don't map
lazappi Aug 8, 2025
0634e62
Remove duplicate datasets in get_dataset_info
lazappi Aug 8, 2025
eaa1f17
Handle infinite values in generate_qc
lazappi Aug 8, 2025
ad5b6b7
Add check that any valid scores are found
lazappi Aug 8, 2025
a427571
Adjust dataset process mapping
lazappi Aug 8, 2025
fe3f2e1
Handle source_urls in render_report
lazappi Aug 8, 2025
7d39c12
Fix additional info in get_method_info
lazappi Aug 8, 2025
96e8e4e
Handle missing file size/date in report
lazappi Aug 8, 2025
8031e40
Use regex to match DOI references
lazappi Aug 8, 2025
072a6a3
Fix empty scores check in report
lazappi Aug 8, 2025
583fdcf
Fix DOI regex
lazappi Aug 8, 2025
7e7c11a
Handle DOIs without text citations
lazappi Aug 12, 2025
efa00bb
Merge remote-tracking branch 'origin/feature/no-ref/update-process-re…
lazappi Aug 12, 2025
c0dd768
Warn about missing values for succeeded metrics
lazappi Aug 12, 2025
ca4bb8c
Improve controls check in report
lazappi Aug 13, 2025
6ba4f49
update submodule
rcannood Aug 18, 2025
e8119ce
Add results filtering (#935)
rcannood Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions _viash.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ keywords: [openproblems, benchmarking, single-cell omics]
references:
doi:
# Malte Luecken, Scott Gigante, Daniel Burkhardt, Robrecht Cannoodt, et al.
# Defining and benchmarking open problems in single-cell analysis,
# 03 April 2024, PREPRINT (Version 1) available at Research Square [https://doi.org/10.21203/rs.3.rs-4181617/v1]
- 10.21203/rs.3.rs-4181617/v1
# Defining and benchmarking open problems in single-cell analysis.
# Nat Biotechnol 43, 1035–1040 (2025).
- 10.1038/s41587-025-02694-w

links:
issue_tracker: https://github.com/openproblems-bio/openproblems/issues
Expand Down
83 changes: 83 additions & 0 deletions scripts/create_resources/reprocess_task_results_v4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/bin/bash

# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)

# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

set -e

OUT_DIR="resources"

echo ">>> Fetching raw results..."
aws s3 sync --profile op \
s3://openproblems-data/resources/ \
"$OUT_DIR/" \
--exclude "*" \
--include "**/results/run_*/*" \
--delete

echo ">>> Patch state.yaml files..."
# fix state.yaml id and output_trace
python <<HERE
import os
import re
import glob

def update_state_file(file_path, new_id):
with open(file_path, 'r') as file:
content = file.read()

# if output_trace is missing, add it
if 'output_trace:' not in content:
content += "\noutput_trace: !file trace.txt\n"

# replace the id with the value of the glob ** pattern
content = re.sub(r'id: .+', f'id: {new_id}/processed', content)

with open(file_path, 'w') as file:
file.write(content)

# find all state.yaml files
state_files = glob.glob('resources/**/state.yaml', recursive=True)
for state_file in state_files:
# extract the id from the path
match = re.search(r'resources/(.+?)/state\.yaml', state_file)
if match:
new_id = match.group(1)
update_state_file(state_file, new_id)
print(f"Updated {state_file} with id: {new_id}")
else:
print(f"Could not extract id from {state_file}, skipping.")
HERE

echo ">>> Creating params.yaml..."
cat > /tmp/params.yaml << HERE
input_states: resources/*/results/run_*/state.yaml
rename_keys: 'input_task_info:output_task_info;input_dataset_info:output_dataset_info;input_method_configs:output_method_configs;input_metric_configs:output_metric_configs;input_scores:output_scores;input_trace:output_trace'
output_state: '\$id/state.yaml'
settings: '{"output_combined": "\$id/output_combined.json", "output_report": "\$id/output_report.html", "output_task_info": "\$id/output_task_info.json", "output_dataset_info": "\$id/output_dataset_info.json", "output_method_info": "\$id/output_method_info.json", "output_metric_info": "\$id/output_metric_info.json", "output_results": "\$id/output_results.json", "output_scores": "\$id/output_quality_control.json"}'
publish_dir: "$OUT_DIR"
HERE

echo ">>> Processing results..."
nextflow run target/nextflow/reporting/process_task_results/main.nf \
-profile docker \
-params-file /tmp/params.yaml \
-c common/nextflow_helpers/labels_ci.config \
-entry auto \
-resume

# find all files in $OUT with the pattern output_report.html
echo ">>> List reports..."
find "$OUT_DIR" -name "output_report.html"

# echo ">>> Uploading processed results to S3..."
# aws s3 sync --profile op \
# "resources_test/openproblems/task_results_v4/" \
# "s3://openproblems-data/resources_test/openproblems/task_results_v4/" \
# --delete --dryrun

# echo
# echo ">>> Done!"
40 changes: 40 additions & 0 deletions scripts/create_resources/task_results_v4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)

# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

set -e

OUT_DIR="resources_test/openproblems/task_results_v4"

echo ">>> Fetching raw results..."
aws s3 sync --profile op \
s3://openproblems-data/resources/task_batch_integration/results/run_2025-01-23_18-03-16/ \
"$OUT_DIR/raw/" \
--delete

echo
echo ">>> Processing results..."
if [ -d "$OUT_DIR/processed" ]; then rm -Rf $OUT_DIR/processed; fi
nextflow run target/nextflow/reporting/process_task_results/main.nf \
-profile docker \
--input_task_info $OUT_DIR/raw/task_info.yaml \
--input_dataset_info $OUT_DIR/raw/dataset_uns.yaml \
--input_method_configs $OUT_DIR/raw/method_configs.yaml \
--input_metric_configs $OUT_DIR/raw/metric_configs.yaml \
--input_scores $OUT_DIR/raw/score_uns.yaml \
--input_trace $OUT_DIR/raw/trace.txt \
--output_state state.yaml \
--publishDir $OUT_DIR/processed

echo ">>> Uploading processed results to S3..."
aws s3 sync --profile op \
"resources_test/openproblems/task_results_v4/" \
"s3://openproblems-data/resources_test/openproblems/task_results_v4/" \
--delete --dryrun

echo
echo ">>> Done!"
102 changes: 102 additions & 0 deletions src/reporting/combine_output/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: combine_output
namespace: reporting
description: Combine task outputs into a single JSON

argument_groups:
- name: Inputs
arguments:
- name: --input_task_info
type: file
description: Task info file
info:
format:
type: json
schema: /common/schemas/results_v4/task_info.json
required: true
example: resources_test/openproblems/task_results_v4/processed/task_info.json
- name: --input_dataset_info
type: file
description: Dataset info file
info:
format:
type: json
schema: /common/schemas/results_v4/dataset_info.json
required: true
example: resources_test/openproblems/task_results_v4/processed/dataset_info.json
- name: --input_method_info
type: file
description: Method info file
info:
format:
type: json
schema: /common/schemas/results_v4/method_info.json
required: true
example: resources_test/openproblems/task_results_v4/processed/method_info.json
- name: --input_metric_info
type: file
description: Metric info file
info:
format:
type: json
schema: /common/schemas/results_v4/metric_info.json
required: true
example: resources_test/openproblems/task_results_v4/processed/metric_info.json
- name: --input_results
type: file
description: Results file
info:
format:
type: json
schema: /common/schemas/results_v4/results.json
required: true
example: resources_test/openproblems/task_results_v4/processed/results.json
- name: --input_quality_control
type: file
description: Quality control file
info:
format:
type: json
schema: /common/schemas/results_v4/quality_control.json
required: true
example: resources_test/openproblems/task_results_v4/processed/quality_control.json

- name: Outputs
arguments:
- name: --output
type: file
direction: output
description: Combined output JSON
default: combined_output.json
info:
format:
type: json
schema: /common/schemas/results_v4/combined_output.json

resources:
- type: r_script
path: script.R
- path: /common/schemas
dest: schemas

test_resources:
- type: python_script
path: /common/component_tests/run_and_check_output.py
- path: /resources_test/openproblems/task_results_v4
dest: resources_test/openproblems/task_results_v4

engines:
- type: docker
image: openproblems/base_r:1
setup:
- type: apt
packages:
- nodejs
- npm
- type: docker
run: npm install -g ajv-cli

runners:
- type: executable
- type: nextflow
directives:
label: [lowmem, lowtime, lowcpu]
105 changes: 105 additions & 0 deletions src/reporting/combine_output/script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
## VIASH START
processed_dir <- "resources_test/openproblems/task_results_v4/processed"

par <- list(
# Inputs
input_task_info = paste0(processed_dir, "/task_info.json"),
input_quality_control = paste0(processed_dir, "/quality_control.json"),
input_metric_info = paste0(processed_dir, "/metric_info.json"),
input_method_info = paste0(processed_dir, "/method_info.json"),
input_dataset_info = paste0(processed_dir, "/dataset_info.json"),
input_results = paste0(processed_dir, "/results.json"),
# Outputs
output = "task_results.json"
)
## VIASH END

################################################################################
# MAIN SCRIPT
################################################################################

cat("====== Combine output ======\n")

cat("\n>>> Reading input files...\n")
cat("Reading task info from '", par$input_task_info, "'...\n", sep = "")
task_info <- jsonlite::read_json(par$input_task_info)

cat(
"Reading quality control from '",
par$input_quality_control,
"'...\n",
sep = ""
)
quality_control <- jsonlite::read_json(par$input_quality_control)

cat("Reading metric info from '", par$input_metric_info, "'...\n", sep = "")
metric_info <- jsonlite::read_json(par$input_metric_info)

cat("Reading method info from '", par$input_method_info, "'...\n", sep = "")
method_info <- jsonlite::read_json(par$input_method_info)

cat("Reading dataset info from '", par$input_dataset_info, "'...\n", sep = "")
dataset_info <- jsonlite::read_json(par$input_dataset_info)

cat("Reading results from '", par$input_results, "'...\n", sep = "")
results <- jsonlite::read_json(par$input_results)

cat("\n>>> Combining outputs...\n")
# Create combined output according to task_results.json
combined_output <- list(
task_info = task_info,
dataset_info = dataset_info,
method_info = method_info,
metric_info = metric_info,
results = results,
quality_control = quality_control
)

cat("\n>>> Writing output file...\n")
cat("Writing combined output to '", par$output, "'...\n", sep = "")
jsonlite::write_json(
combined_output,
par$output,
pretty = TRUE,
null = "null",
na = "null",
auto_unbox = TRUE
)

cat("\n>>> Validating output against schema...\n")
results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
ajv_args <- paste(
"validate",
"--spec draft2020",
"-s",
file.path(results_schemas, "combined_output.json"),
"-r",
file.path(results_schemas, "task_info.json"),
"-r",
file.path(results_schemas, "dataset_info.json"),
"-r",
file.path(results_schemas, "method_info.json"),
"-r",
file.path(results_schemas, "metric_info.json"),
"-r",
file.path(results_schemas, "results.json"),
"-r",
file.path(results_schemas, "quality_control.json"),
"-r",
file.path(results_schemas, "core.json"),
"-d",
par$output
)

cat("Running validation command:", "ajv", ajv_args, "\n")
cat("Output:\n")
validation_result <- system2("ajv", ajv_args)

if (validation_result == 0) {
cat("JSON validation passed successfully!\n")
} else {
cat("JSON validation failed!\n")
stop("Output JSON does not conform to schema")
}

cat("\n>>> Done!\n")
Loading