diff --git a/DIMS/HMDBparts.R b/DIMS/HMDBparts.R index 232028e..8c2234f 100755 --- a/DIMS/HMDBparts.R +++ b/DIMS/HMDBparts.R @@ -1,11 +1,9 @@ -# adapted from hmdb_parts.R - # define parameters cmd_args <- commandArgs(trailingOnly = TRUE) db_file <- cmd_args[1] breaks_file <- cmd_args[2] -standard_run <- cmd_args[4] +standard_run <- cmd_args[3] # load file with binning breaks load(breaks_file) @@ -15,8 +13,8 @@ max_mz <- round(breaks_fwhm[length(breaks_fwhm)]) # In case of a standard run use external HMDB parts # m/z is approximately 70 to 600: set limits between 68-71 for min and 599-610 for max if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz < 610) { - # skip generating HMDB parts - hmdb_parts_path <- cmd_args[3] + # skip generating HMDB parts; copy them from hmdb_parts_path + hmdb_parts_path <- cmd_args[4] # find all files containing hmdb in file name hmdb_parts <- list.files(hmdb_parts_path, pattern = "hmdb") for (hmdb_file in hmdb_parts) { @@ -25,8 +23,24 @@ if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz < } else { # generate HMDB parts in case of non-standard mz range load(db_file) - ppm <- as.numeric(cmd_args[5]) + # determine segments of m/z for HMDB parts; smaller parts for m/z < 100 + mz_segments <- c() + segment_start <- min_mz + segment_end <- min_mz + 5 + while (segment_end < max_mz) { + if (segment_start < 100) { + segment_size = 5 + } else { + segment_size = 10 + } + mz_segments <- c(mz_segments, segment_start) + segment_start <- segment_start + segment_size + segment_end <- segment_end + segment_size + } + #last segment + mz_segments <- c(mz_segments, max_mz) + scanmodes <- c("positive", "negative") for (scanmode in scanmodes) { if (scanmode == "negative") { @@ -38,73 +52,20 @@ if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz < } # filter mass range meassured - hmdb_add_iso = hmdb_add_iso[which(hmdb_add_iso[ , column_label] >= breaks_fwhm[1] & - hmdb_add_iso[ , column_label] <= breaks_fwhm[length(breaks_fwhm)]), ] + hmdb_add_iso = hmdb_add_iso[which(hmdb_add_iso[ , column_label] >= min_mz & + hmdb_add_iso[ , column_label] <= max_mz), ] # sort on mass - outlist <- hmdb_add_iso[order(as.numeric(hmdb_add_iso[ , column_label])),] - nr_rows <- dim(outlist)[1] - - # maximum number of rows per file - sub <- 20000 - end <- 0 - last_line <- sub - check <- 0 - outlist_part <- NULL + sorted_hmdb_add_iso <- hmdb_add_iso[order(as.numeric(hmdb_add_iso[ , column_label])),] + nr_rows <- dim(sorted_hmdb_add_iso)[1] # create parts and save to file - if (nr_rows < sub) { - outlist_part <- outlist - save(outlist_part, file = paste0(scanmode, "_hmdb.1.RData")) - } else if (nr_rows >= sub & (floor(nr_rows / sub) - 1) >= 2) { - for (i in 2:floor(nr_rows / sub) - 1) { - start <- -(sub - 1) + i * sub - end <- i * sub - - if (i > 1){ - outlist_i = outlist[c(start:end),] - nr_moved = 0 - # Use ppm to replace border to avoid cut within peak group - while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 / - as.numeric(outlist_i[1, column_label]) < ppm) { - outlist_part <- rbind(outlist_part, outlist_i[1, ]) - outlist_i <- outlist_i[-1, ] - nr_moved <- nr_moved + 1 - } - - save(outlist_part, file = paste(scanmode, "_", paste("hmdb", i-1, "RData", sep = "."), sep = "")) - check <- check + dim(outlist_part)[1] - - outlist_part <- outlist_i - last_line <- dim(outlist_part)[1] - - } else { - outlist_part <- outlist[c(start:end),] - } - } - - start <- end + 1 - end <- nr_rows - outlist_i <- outlist[c(start:end), ] - nr_moved <- 0 - - if (!is.null(outlist_part)) { - # Calculate ppm and replace border, avoid cut within peak group - while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 / - as.numeric(outlist_i[1, column_label]) < ppm) { - outlist_part <- rbind(outlist_part, outlist_i[1, ]) - outlist_i <- outlist_i[-1, ] - nr_moved <- nr_moved + 1 - } - - save(outlist_part, file = paste0(scanmode, "_hmdb.", i, ".RData")) - check <- check + dim(outlist_part)[1] - } - - outlist_part <- outlist_i - save(outlist_part, file = paste0(scanmode, "_hmdb.", i + 1, ".RData")) - check <- check + dim(outlist_part)[1] + for (mz_part_index in 1:(length(mz_segments) - 1)) { + mz_start <- mz_segments[mz_part_index] + mz_end <- mz_segments[mz_part_index + 1] + outlist_part <- sorted_hmdb_add_iso[sorted_hmdb_add_iso[ , column_label] > mz_start & + sorted_hmdb_add_iso[ , column_label] <= mz_end, ] + save(outlist_part, file = paste0(scanmode, "_hmdb.", mz_part_index, ".RData")) } } } - diff --git a/DIMS/HMDBparts.nf b/DIMS/HMDBparts.nf index 5f19f75..760b28d 100644 --- a/DIMS/HMDBparts.nf +++ b/DIMS/HMDBparts.nf @@ -13,6 +13,6 @@ process HMDBparts { script: """ - Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.hmdb_parts_files $params.standard_run $params.ppm + Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.standard_run $params.hmdb_parts_files """ }