Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
configfile: "config/config.yml"

# rules for CRISPR comparisons
include: "rules/utils.smk"
include: "rules/crispr_comparison.smk"


# perform all comparisons listed in config.yml
rule all:
input:
Expand Down
6 changes: 3 additions & 3 deletions workflow/rules/crispr_comparison.smk
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ rule mergePredictionsWithExperiment:
log: "results/{comparison}/logs/mergePredictionsWithExperiment.log"
conda: "../envs/r_crispr_comparison.yml"
resources:
mem_mb = 32000
mem_mb = determine_mem_mb
script:
"../../workflow/scripts/mergePredictionsWithExperiment.R"

Expand All @@ -80,7 +80,7 @@ rule annotateEnhFeatures:
"results/{comparison}/expt_pred_merged_annot.txt.gz"
conda: "../envs/r_crispr_comparison.yml"
resources:
mem_mb = 32000
mem_mb = determine_mem_mb
script:
"../../workflow/scripts/annotateMergedData.R"

Expand All @@ -99,7 +99,7 @@ rule comparePredictionsToExperiment:
include_col = lambda wildcards: get_optional_parameter(wildcards, "include_col", None)
conda: "../envs/r_crispr_comparison.yml"
resources:
mem_mb = 32000,
mem_mb = determine_mem_mb,
runtime = "6h"
script:
"../../workflow/scripts/comparePredictionsToExperiment.Rmd"
Expand Down
10 changes: 10 additions & 0 deletions workflow/rules/utils.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
MAX_MEM_MB = 250 * 1000 # 250GB

def determine_mem_mb(wildcards, input, attempt, min_gb=8):
# Memory resource calculator for snakemake rules
input_size_mb = input.size_mb
if ".gz" in str(input):
input_size_mb *= 8 # assume gz compressesed the file <= 8x
attempt_multiplier = 2 ** (attempt - 1) # Double memory for each retry
mem_to_use_mb = attempt_multiplier * max(4 * input_size_mb, min_gb * 1000)
return min(mem_to_use_mb, MAX_MEM_MB)
6 changes: 6 additions & 0 deletions workflow/scripts/crisprComparisonLoadInputData.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ load_encode_pred_file <- function(file, showProgress) {
# load predictions and remove optional "#" in header row
pred <- fread(file)
colnames(pred)[[1]] <- sub("^#", "", colnames(pred)[[1]])

if ("PredictionCellType" %in% colnames(pred)) {
pred <- pred %>% rename(CellType = PredictionCellType)
}

pred <- pred %>% mutate(name = paste0(chr, ":", start, "-", end))

return(pred)

Expand Down