From b66481197f18c1d067ea3d7038d720f8f586c51a Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 18 Aug 2025 08:01:33 +0200 Subject: [PATCH 1/4] add report script --- scripts/run_benchmark/render_report_local.sh | 47 ++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100755 scripts/run_benchmark/render_report_local.sh diff --git a/scripts/run_benchmark/render_report_local.sh b/scripts/run_benchmark/render_report_local.sh new file mode 100755 index 00000000..710ab308 --- /dev/null +++ b/scripts/run_benchmark/render_report_local.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# fail on error +set -e + +# ensure we're in the root of the repo +REPO_ROOT=$(git rev-parse --show-toplevel) +cd "$REPO_ROOT" + +# set input and output directories +TASK=task_batch_integration +BASE_DIR="s3://openproblems-data/resources/$TASK/results" +OUTPUT_DIR="output/report" + +# find subdir in bucket with latest date +DATE=$(aws s3 ls $BASE_DIR --recursive | awk '{print $4}' | grep 'task_info.yaml' | sort -r | head -n 1 | sed 's#.*/run_\(.*\)/[^/]*$#\1#') + +INPUT_DIR="$BASE_DIR/run_$DATE" +TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//') + +echo "Processing $DATE -> $OUTPUT_DIR" + +# start the run +# TODO: switch to `-r build/main` once PR openproblems-bio/openproblems#919 is merged +nextflow run openproblems-bio/openproblems \ + -r build/feature/no-ref/update-process-results \ + -main-script target/nextflow/reporting/process_task_results/main.nf \ + -profile docker \ + -resume \ + -latest \ + -c common/nextflow_helpers/labels_ci.config \ + --id "$TASK/run_$DATE" \ + --input_scores "$INPUT_DIR/score_uns.yaml" \ + --input_dataset_info "$INPUT_DIR/dataset_uns.yaml" \ + --input_method_configs "$INPUT_DIR/method_configs.yaml" \ + --input_metric_configs "$INPUT_DIR/metric_configs.yaml" \ + --input_trace "$INPUT_DIR/trace.txt" \ + --input_task_info "$INPUT_DIR/task_info.yaml" \ + --output_state '$id/state.yaml' \ + --output_combined '$id/combined_output.json' \ + --output_report '$id/report.html' \ + --output_dataset_info '$id/dataset_info.json' \ + --output_method_info '$id/method_info.json' \ + --output_metric_info '$id/metric_info.json' \ + --output_results '$id/results.json' \ + --output_quality_control '$id/quality_control.json' \ + --publish_dir "$OUTPUT_DIR" From 2de70d9f14bb5416c1878c353b0d42c6224269fc Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 19 Aug 2025 17:59:07 +0200 Subject: [PATCH 2/4] update to main branch --- scripts/run_benchmark/render_report_local.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/run_benchmark/render_report_local.sh b/scripts/run_benchmark/render_report_local.sh index 710ab308..1f5975df 100755 --- a/scripts/run_benchmark/render_report_local.sh +++ b/scripts/run_benchmark/render_report_local.sh @@ -21,9 +21,8 @@ TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//') echo "Processing $DATE -> $OUTPUT_DIR" # start the run -# TODO: switch to `-r build/main` once PR openproblems-bio/openproblems#919 is merged nextflow run openproblems-bio/openproblems \ - -r build/feature/no-ref/update-process-results \ + -r build/main \ -main-script target/nextflow/reporting/process_task_results/main.nf \ -profile docker \ -resume \ From cb57d8e1362ca01a05f47311ef42918bafe35714 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 20 Aug 2025 12:14:06 +0200 Subject: [PATCH 3/4] update script --- scripts/run_benchmark/render_report_local.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/run_benchmark/render_report_local.sh b/scripts/run_benchmark/render_report_local.sh index 1f5975df..335b4b95 100755 --- a/scripts/run_benchmark/render_report_local.sh +++ b/scripts/run_benchmark/render_report_local.sh @@ -21,6 +21,12 @@ TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//') echo "Processing $DATE -> $OUTPUT_DIR" # start the run +extra_filters=() +# extra_filters=( +# --datasets_exclude "cellxgene_census/hypomap;cellxgene_census/mouse_pancreas_atlas" +# --metrics_exclude "hvg_overlap" +# ) + nextflow run openproblems-bio/openproblems \ -r build/main \ -main-script target/nextflow/reporting/process_task_results/main.nf \ @@ -43,4 +49,6 @@ nextflow run openproblems-bio/openproblems \ --output_metric_info '$id/metric_info.json' \ --output_results '$id/results.json' \ --output_quality_control '$id/quality_control.json' \ - --publish_dir "$OUTPUT_DIR" + --publish_dir "$OUTPUT_DIR" \ + "${extra_filters[@]}" + From f2a2bc4d5a0c897a76dfb1b737f9ccf6b08b0448 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 29 Sep 2025 13:41:32 +0200 Subject: [PATCH 4/4] switch to new results rendering script --- common | 2 +- scripts/render_report.sh | 6 +++ scripts/run_benchmark/render_report_local.sh | 54 -------------------- 3 files changed, 7 insertions(+), 55 deletions(-) create mode 100644 scripts/render_report.sh delete mode 100755 scripts/run_benchmark/render_report_local.sh diff --git a/common b/common index b60eda08..67da19a3 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit b60eda085e9cd505ec169fa30cc7e919e8563ad2 +Subproject commit 67da19a36ae56ea068804d15ccadec88a06da920 diff --git a/scripts/render_report.sh b/scripts/render_report.sh new file mode 100644 index 00000000..f1a652b3 --- /dev/null +++ b/scripts/render_report.sh @@ -0,0 +1,6 @@ + +#!/bin/bash + +set -e + +common/scripts/render_results_report "$@" diff --git a/scripts/run_benchmark/render_report_local.sh b/scripts/run_benchmark/render_report_local.sh deleted file mode 100755 index 335b4b95..00000000 --- a/scripts/run_benchmark/render_report_local.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -# fail on error -set -e - -# ensure we're in the root of the repo -REPO_ROOT=$(git rev-parse --show-toplevel) -cd "$REPO_ROOT" - -# set input and output directories -TASK=task_batch_integration -BASE_DIR="s3://openproblems-data/resources/$TASK/results" -OUTPUT_DIR="output/report" - -# find subdir in bucket with latest date -DATE=$(aws s3 ls $BASE_DIR --recursive | awk '{print $4}' | grep 'task_info.yaml' | sort -r | head -n 1 | sed 's#.*/run_\(.*\)/[^/]*$#\1#') - -INPUT_DIR="$BASE_DIR/run_$DATE" -TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//') - -echo "Processing $DATE -> $OUTPUT_DIR" - -# start the run -extra_filters=() -# extra_filters=( -# --datasets_exclude "cellxgene_census/hypomap;cellxgene_census/mouse_pancreas_atlas" -# --metrics_exclude "hvg_overlap" -# ) - -nextflow run openproblems-bio/openproblems \ - -r build/main \ - -main-script target/nextflow/reporting/process_task_results/main.nf \ - -profile docker \ - -resume \ - -latest \ - -c common/nextflow_helpers/labels_ci.config \ - --id "$TASK/run_$DATE" \ - --input_scores "$INPUT_DIR/score_uns.yaml" \ - --input_dataset_info "$INPUT_DIR/dataset_uns.yaml" \ - --input_method_configs "$INPUT_DIR/method_configs.yaml" \ - --input_metric_configs "$INPUT_DIR/metric_configs.yaml" \ - --input_trace "$INPUT_DIR/trace.txt" \ - --input_task_info "$INPUT_DIR/task_info.yaml" \ - --output_state '$id/state.yaml' \ - --output_combined '$id/combined_output.json' \ - --output_report '$id/report.html' \ - --output_dataset_info '$id/dataset_info.json' \ - --output_method_info '$id/method_info.json' \ - --output_metric_info '$id/metric_info.json' \ - --output_results '$id/results.json' \ - --output_quality_control '$id/quality_control.json' \ - --publish_dir "$OUTPUT_DIR" \ - "${extra_filters[@]}" -