diff --git a/DIMS/AssignToBins.R b/DIMS/AssignToBins.R
index 49d4528a..8b31af7e 100644
--- a/DIMS/AssignToBins.R
+++ b/DIMS/AssignToBins.R
@@ -6,11 +6,13 @@ cmd_args <- commandArgs(trailingOnly = TRUE)
 
 mzml_filepath <- cmd_args[1]
 breaks_filepath <- cmd_args[2]
-resol <- as.numeric(cmd_args[3])
+trim_parameters_filepath <- cmd_args[3]
+resol <- as.numeric(cmd_args[4])
 
 # load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
-# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
 load(breaks_filepath)
+# load trim parameters file: contains trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
+load(trim_parameters_filepath)
 
 # get sample name
 techrep_name <- sub("\\..*$", "", basename(mzml_filepath))
diff --git a/DIMS/AssignToBins.nf b/DIMS/AssignToBins.nf
index d0a7098b..d3bc79ae 100644
--- a/DIMS/AssignToBins.nf
+++ b/DIMS/AssignToBins.nf
@@ -5,7 +5,7 @@ process AssignToBins {
     shell = ['/bin/bash', '-euo', 'pipefail']
 
     input:
-       tuple(val(file_id), path(mzML_file), path(breaks_file))
+       tuple(val(file_id), path(mzML_file), path(breaks_file), path(trim_params_file))
 
     output:
        path("${file_id}.RData"), emit: rdata_file
@@ -13,7 +13,7 @@ process AssignToBins {
 
     script:
         """
-        Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $params.resolution
+        Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $trim_params_file $params.resolution
         """
 }
 
diff --git a/DIMS/AveragePeaks.R b/DIMS/AveragePeaks.R
new file mode 100644
index 00000000..7114e3c4
--- /dev/null
+++ b/DIMS/AveragePeaks.R
@@ -0,0 +1,37 @@
+library(dplyr)
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+sample_name <- cmd_args[1]
+techreps <- cmd_args[2]
+scanmode <- cmd_args[3]
+preprocessing_scripts_dir <- cmd_args[4]
+tech_reps <- strsplit(techreps, ";")[[1]]
+
+# load in function scripts
+source(paste0(preprocessing_scripts_dir, "average_peaks_functions.R"))
+
+# Initialize per sample
+peaklist_allrepl <- NULL
+nr_repl_persample <- 0
+averaged_peaks <- matrix(0, nrow = 0, ncol = 6) 
+colnames(averaged_peaks) <- c("samplenr", "mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt")
+
+# load RData files of technical replicates belonging to biological sample
+for (file_nr in 1:length(tech_reps)) {
+  tech_repl_file <- paste0(tech_reps[file_nr], "_", scanmode, ".RData")
+  tech_repl <- get(load(tech_repl_file))
+  # combine data for all technical replicates
+  peaklist_allrepl <- rbind(peaklist_allrepl, tech_repl)
+}
+# sort on mass
+peaklist_allrepl_df <- as.data.frame(peaklist_allrepl)
+peaklist_allrepl_df$mzmed.pkt <- as.numeric(peaklist_allrepl_df$mzmed.pkt) 
+peaklist_allrepl_df$height.pkt <- as.numeric(peaklist_allrepl_df$height.pkt) 
+peaklist_allrepl_sorted <- peaklist_allrepl_df %>% arrange(mzmed.pkt)
+
+# average over technical replicates
+averaged_peaks <- average_peaks_per_sample(peaklist_allrepl_sorted, sample_name)
+save(averaged_peaks, file = paste0("AvgPeaks_", sample_name, "_", scanmode, ".RData"))
+
diff --git a/DIMS/AveragePeaks.nf b/DIMS/AveragePeaks.nf
new file mode 100644
index 00000000..f50bd874
--- /dev/null
+++ b/DIMS/AveragePeaks.nf
@@ -0,0 +1,18 @@
+process AveragePeaks {
+    tag "DIMS AveragePeaks"
+    label 'AveragePeaks'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(rdata_files)
+       tuple val(sample_id), val(tech_reps), val(scanmode)
+
+    output:
+       path 'AvgPeaks_*.RData'
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/AveragePeaks.R $sample_id $tech_reps $scanmode $params.preprocessing_scripts_dir
+        """
+}
diff --git a/DIMS/AverageTechReplicates.nf b/DIMS/AverageTechReplicates.nf
deleted file mode 100644
index f03d553c..00000000
--- a/DIMS/AverageTechReplicates.nf
+++ /dev/null
@@ -1,36 +0,0 @@
-process AverageTechReplicates {
-    tag "DIMS AverageTechReplicates"
-    label 'AverageTechReplicates'
-    container = 'docker://umcugenbioinf/dims:1.3'
-    shell = ['/bin/bash', '-euo', 'pipefail']
-
-    input:
-       path(rdata_file)
-       path(tic_txt_files)
-       path(init_file)
-       val(nr_replicates)
-       val(analysis_id)
-       val(matrix)
-       path(highest_mz_file)
-       path(breaks_file)
-
-    output:
-       path('*_repl_pattern.RData'), emit: pattern_files
-       path('*_avg.RData'), emit: binned_files
-       path('miss_infusions_negative.txt')
-       path('miss_infusions_positive.txt')
-       path('*_TICplots.pdf'), emit: tic_plots_pdf
-
-    script:
-        """
-        Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R $init_file \
-                                                                      $params.nr_replicates \
-                                                                      $analysis_id \
-                                                                      $matrix \
-                                                                      $highest_mz_file \
-                                                                      $breaks_file \
-                                                                      $params.threshold_tics
-        """
-}
-
-
diff --git a/DIMS/CollectAveraged.R b/DIMS/CollectAveraged.R
new file mode 100755
index 00000000..e6466d9e
--- /dev/null
+++ b/DIMS/CollectAveraged.R
@@ -0,0 +1,18 @@
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+scripts_dir <- cmd_args[1]
+
+# for each scan mode, collect all averaged peak lists per biological sample
+scanmodes <- c("positive", "negative")
+for (scanmode in scanmodes) {
+  # get list of files
+  filled_files <- list.files("./", full.names = TRUE, pattern = paste0(scanmode, ".RData"))
+  # load files and combine into one object
+  outlist_total <- NULL
+  for (file_nr in 1:length(filled_files)) {
+    peaklist_averaged <- get(load(filled_files[file_nr]))
+    outlist_total <- rbind(outlist_total, peaklist_averaged)
+  }
+  save(outlist_total, file = paste0("AvgPeaks_", scanmode, ".RData"))
+}
diff --git a/DIMS/CollectAveraged.nf b/DIMS/CollectAveraged.nf
new file mode 100644
index 00000000..fc65bf21
--- /dev/null
+++ b/DIMS/CollectAveraged.nf
@@ -0,0 +1,17 @@
+process CollectAveraged {
+    tag "DIMS CollectAveraged"
+    label 'CollectAveraged'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(averaged_files)
+
+    output:
+       path('AvgPeaks*.RData'), emit: averaged_peaks
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/CollectAveraged.R
+        """
+}
diff --git a/DIMS/AverageTechReplicates.R b/DIMS/EvaluateTics.R
similarity index 55%
rename from DIMS/AverageTechReplicates.R
rename to DIMS/EvaluateTics.R
index 1c727d10..528d6f4b 100644
--- a/DIMS/AverageTechReplicates.R
+++ b/DIMS/EvaluateTics.R
@@ -1,5 +1,3 @@
-# adapted from 3-AverageTechReplicates.R
-
 # load packages
 library("ggplot2")
 library("gridExtra")
@@ -13,39 +11,14 @@ run_name <- cmd_args[3]
 dims_matrix <- cmd_args[4]
 highest_mz_file <- cmd_args[5]
 highest_mz <- get(load(highest_mz_file))
-breaks_filepath <- cmd_args[6]
-thresh2remove <- as.numeric(cmd_args[7])
-
-remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
-  # collect list of samples to remove from replication pattern
-  remove_from_group <- NULL
-  for (sample_nr in 1:length(repl_pattern)){
-    repl_pattern_1sample <- repl_pattern[[sample_nr]]
-    remove <- NULL
-    for (file_nr in 1:length(repl_pattern_1sample)) {
-      if (repl_pattern_1sample[file_nr] %in% bad_samples) {
-        remove <- c(remove, file_nr)
-      }
-    }
-    if (length(remove) == nr_replicates) {
-      remove_from_group <- c(remove_from_group, sample_nr)
-    }
-    if (!is.null(remove)) {
-      repl_pattern[[sample_nr]] <- repl_pattern[[sample_nr]][-remove]
-    }
-  }
-  if (length(remove_from_group) != 0) {
-    repl_pattern <- repl_pattern[-remove_from_group]
-  }
-  return(list("pattern" = repl_pattern))
-}
+trim_params_filepath <- cmd_args[6]
+thresh2remove <- 1000000000
 
 # load init_file: contains repl_pattern
 load(init_file)
 
-# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
-# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
-load(breaks_filepath)
+# load trim_params_file: contains trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
+load(trim_params_filepath)
 
 # lower the threshold for non Plasma matrices
 if (dims_matrix != "Plasma") {
@@ -59,78 +32,32 @@ if (highest_mz > 700) {
   thresh2remove <- 1000000
 }
 
+# find out which technical replicates are below the threshold
+remove_tech_reps <- find_bad_replicates(repl_pattern, thresh2remove)
+print(remove_tech_reps)
 
-# remove technical replicates which are below the threshold
-remove_neg <- NULL
-remove_pos <- NULL
-cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
-for (sample_nr in 1:length(repl_pattern)) {
-  tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
-  tech_reps_array_pos <- NULL
-  tech_reps_array_neg <- NULL
-  sum_neg <- 0
-  sum_pos <- 0
-  nr_pos <- 0
-  nr_neg <- 0
-  for (file_nr in 1:length(tech_reps)) {
-    load(paste(tech_reps[file_nr], ".RData", sep = ""))
-    cat("\n\nParsing", tech_reps[file_nr])
-    # negative scanmode
-    cat("\n\tNegative peak_list sum", sum(peak_list$neg[, 1]))
-    if (sum(peak_list$neg[, 1]) < thresh2remove) {
-      cat(" ... Removed")
-      remove_neg <- c(remove_neg, tech_reps[file_nr])
-    } else {
-      nr_neg <- nr_neg + 1
-      sum_neg <- sum_neg + peak_list$neg
-    }
-    tech_reps_array_neg <- cbind(tech_reps_array_neg, peak_list$neg)
-    # positive scanmode
-    cat("\n\tPositive peak_list sum", sum(peak_list$pos[, 1]))
-    if (sum(peak_list$pos[, 1]) < thresh2remove) {
-      cat(" ... Removed")
-      remove_pos <- c(remove_pos, tech_reps[file_nr])
-    } else {
-      nr_pos <- nr_pos + 1
-      sum_pos <- sum_pos + peak_list$pos
-    }
-    tech_reps_array_pos <- cbind(tech_reps_array_pos, peak_list$pos)
-  }
-  # save to file  
-  if (nr_neg != 0) {
-    sum_neg[, 1] <- sum_neg[, 1] / nr_neg
-    colnames(sum_neg) <- names(repl_pattern)[sample_nr]
-    save(sum_neg, file = paste0(names(repl_pattern)[sample_nr], "_neg_avg.RData"))
-  }
-  if (nr_pos != 0) {
-    sum_pos[, 1] <- sum_pos[, 1] / nr_pos
-    colnames(sum_pos) <- names(repl_pattern)[sample_nr]
-    save(sum_pos, file = paste0(names(repl_pattern)[sample_nr], "_pos_avg.RData"))
-  }
-}
-
-pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
-repl_pattern_filtered <- pattern_list$pattern
+# negative scan mode
+remove_neg <- remove_tech_reps$neg
+repl_pattern_filtered <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
 save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
-write.table(
-  remove_neg, 
-  file = "miss_infusions_negative.txt", 
-  row.names = FALSE, 
-  col.names = FALSE, 
-  sep = "\t"
-)
 
-pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
-repl_pattern_filtered <- pattern_list$pattern
+# positive scan mode
+remove_pos <- remove_tech_reps$pos
+repl_pattern_filtered <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
 save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
-write.table(
-  remove_pos, 
-  file = "miss_infusions_positive.txt", 
-  row.names = FALSE, 
-  col.names = FALSE, 
-  sep = "\t"
+
+# get an overview of suitable technical replicates for both scan modes
+allsamples_techreps_neg <- get_overview_tech_reps(repl_pattern_filtered, "negative")
+allsamples_techreps_pos <- get_overview_tech_reps(repl_pattern_filtered, "positive")
+allsamples_techreps_both_scanmodes <- rbind(allsamples_techreps_pos, allsamples_techreps_neg)
+write.table(allsamples_techreps_both_scanmodes,
+            file = "replicates_per_sample.txt",
+            col.names = FALSE,
+            row.names = FALSE,
+            sep = ","
 )
 
+
 ## generate TIC plots
 # get all txt files
 tic_files <- list.files("./", full.names = TRUE, pattern = "*TIC.txt")
@@ -207,3 +134,5 @@ tic_plot_pdf <- marrangeGrob(
 # save to file
 ggsave(filename = paste0(run_name, "_TICplots.pdf"),
        tic_plot_pdf, width = 21, height = 29.7, units = "cm")
+
+
diff --git a/DIMS/EvaluateTics.nf b/DIMS/EvaluateTics.nf
new file mode 100644
index 00000000..fdf7f713
--- /dev/null
+++ b/DIMS/EvaluateTics.nf
@@ -0,0 +1,33 @@
+process EvaluateTics {
+    tag "DIMS EvaluateTics"
+    label 'EvaluateTics'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(rdata_file)
+       path(tic_txt_files)
+       path(init_file)
+       val(analysis_id)
+       path(highest_mz_file)
+       path(trim_params_file)
+
+    output:
+       path('*_repl_pattern.RData'),      emit: pattern_files
+       path('replicates_per_sample.txt'), emit: sample_techreps
+       path('miss_infusions_negative.txt')
+       path('miss_infusions_positive.txt')
+       path('*_TICplots.pdf'),            emit: tic_plots_pdf
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/EvaluateTics.R $init_file \
+                                                             $params.nr_replicates \
+                                                             $analysis_id \
+                                                             $params.matrix \
+                                                             $highest_mz_file \
+                                                             $trim_params_file
+        """
+}
+
+
diff --git a/DIMS/GenerateBreaks.R b/DIMS/GenerateBreaks.R
index d07fd203..007d5ab7 100644
--- a/DIMS/GenerateBreaks.R
+++ b/DIMS/GenerateBreaks.R
@@ -1,5 +1,3 @@
-## adapted from 1-generateBreaksFwhm.HPC.R ##
-
 # load required package
 suppressPackageStartupMessages(library("xcms"))
 
@@ -57,5 +55,6 @@ for (i in 1:nr_segments) {
 }
 
 # generate output file
-save(breaks_fwhm, breaks_fwhm_avg, trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "breaks.fwhm.RData")
+save(breaks_fwhm, breaks_fwhm_avg, file = "breaks.fwhm.RData")
+save(trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "trim_params.RData")
 save(high_mz, file = "highest_mz.RData")
diff --git a/DIMS/GenerateBreaks.nf b/DIMS/GenerateBreaks.nf
index af617a8d..c486010d 100644
--- a/DIMS/GenerateBreaks.nf
+++ b/DIMS/GenerateBreaks.nf
@@ -10,6 +10,7 @@ process GenerateBreaks {
 
     output:
        path('breaks.fwhm.RData'), emit: breaks
+       path('trim_params.RData'), emit: trim_params
        path('highest_mz.RData'), emit: highest_mz
 
     script:
diff --git a/DIMS/MakeInit.R b/DIMS/MakeInit.R
index 44d49962..9ff22623 100644
--- a/DIMS/MakeInit.R
+++ b/DIMS/MakeInit.R
@@ -1,5 +1,3 @@
-## adapted from makeInit in old pipeline
-
 # define parameters
 args <- commandArgs(trailingOnly = TRUE)
 
@@ -15,12 +13,12 @@ group_names_unique <- unique(group_names)
 # generate the replication pattern
 repl_pattern <- c()
 for (sample_group in 1:nr_sample_groups) {
-  tmp <- c()
+  replicates_persample <- c()
   for (repl in nr_replicates:1) {
     index <- ((sample_group * nr_replicates) - repl) + 1
-    tmp <- c(tmp, sample_names[index])
+    replicates_persample <- c(replicates_persample, sample_names[index])
   }
-  repl_pattern <- c(repl_pattern, list(tmp))
+  repl_pattern <- c(repl_pattern, list(replicates_persample))
 }
 
 names(repl_pattern) <- group_names_unique
diff --git a/DIMS/PeakFinding.R b/DIMS/PeakFinding.R
index 697f8f7e..ab62a880 100644
--- a/DIMS/PeakFinding.R
+++ b/DIMS/PeakFinding.R
@@ -1,53 +1,59 @@
-## adapted from 4-peakFinding.R
+library(dplyr)
 
 # define parameters
 cmd_args <- commandArgs(trailingOnly = TRUE)
 
-sample_file <- cmd_args[1]
-breaks_file <- cmd_args[2]
-resol <- as.numeric(cmd_args[3])
-scripts_dir <- cmd_args[4]
-thresh <- 2000
-outdir <- "./"
-
-# load in function scripts
-source(paste0(scripts_dir, "do_peakfinding.R"))
-source(paste0(scripts_dir, "check_overlap.R"))
-source(paste0(scripts_dir, "search_mzrange.R"))
-source(paste0(scripts_dir, "fit_optim.R"))
-source(paste0(scripts_dir, "fit_gaussian.R"))
-source(paste0(scripts_dir, "fit_init.R"))
-source(paste0(scripts_dir, "get_fwhm.R"))
-source(paste0(scripts_dir, "get_stdev.R"))
-source(paste0(scripts_dir, "optimize_gaussfit.R"))
-source(paste0(scripts_dir, "fit_peaks.R"))
-source(paste0(scripts_dir, "fit_gaussians.R"))
-source(paste0(scripts_dir, "estimate_area.R"))
-source(paste0(scripts_dir, "get_fit_quality.R"))
-source(paste0(scripts_dir, "check_overlap.R"))
-source(paste0(scripts_dir, "sum_curves.R"))
-source(paste0(scripts_dir, "within_ppm.R"))
-
-load(breaks_file)
-
-# Load output of AverageTechReplicates for a sample
-sample_avgtechrepl <- get(load(sample_file))
-if (grepl("_pos", sample_file)) {
-  scanmode <- "positive"
-} else if (grepl("_neg", sample_file)) {
-  scanmode <- "negative"
-}
+replicate_rdatafile <- cmd_args[1]
+resol <- as.numeric(cmd_args[2])
+preprocessing_scripts_dir <- cmd_args[3]
+# use fixed theshold between noise and signal for peak
+peak_thresh <- 2000 
+
+# source functions script
+source(paste0(preprocessing_scripts_dir, "peak_finding_functions.R"))
+
+# Load output of AssignToBins (peak_list) for a technical replicate
+load(replicate_rdatafile)
+techrepl_name <- colnames(peak_list$pos)[1]
+
+# load list of technical replicates per sample that passed threshold filter
+techreps_passed <- read.table("replicates_per_sample.txt", sep=",")
 
 # Initialize
 options(digits = 16)
-# Number used to calculate area under Gaussian curve
-int_factor <- 1 * 10^5 
-# Initial value used to estimate scaling parameter
-scale <- 2
-width <- 1024
-height <- 768
 
-# run the findPeaks function
+# do peak finding
+scanmodes <- c("positive", "negative")
+for (scanmode in scanmodes) {
+  # get intensities for scan mode
+  if (scanmode == "positive") {
+    ints_perscanmode <- peak_list$pos
+  } else if (scanmode == "negative") {
+    ints_perscanmode <- peak_list$neg
+  }
+ 
+  # check whether technical replicate has passed threshold filter for this scanmode
+  techreps_scanmode <- techreps_passed[grep(scanmode, techreps_passed[, 3]), ]
+  # if techrep is ok, it will be found. If not, skip this techrep.
+  if (length(grep(techrepl_name, techreps_scanmode)) == 0) {
+    break
+  }
+
+  # put mz and intensities into dataframe
+  ints_fullrange <- as.data.frame(cbind(mz = as.numeric(rownames(ints_perscanmode)), 
+                                        int = as.numeric(ints_perscanmode)))
+
+  # look for m/z range for all peaks
+  regions_of_interest <- search_regions_of_interest(ints_fullrange)
+ 
+  # fit Gaussian curve and calculate integrated area under curve
+  integrated_peak_df <- integrate_peaks(ints_fullrange, regions_of_interest, resol, peak_thresh)
+ 
+  # add sample name to dataframe
+  integrated_peak_df <- as.data.frame(cbind(samplenr = techrepl_name, integrated_peak_df))
+ 
+  # save output to file
+  save(integrated_peak_df, file = paste0(techrepl_name, "_", scanmode, ".RData"))
+  write.table(integrated_peak_df, file = paste0(techrepl_name, "_", scanmode, ".txt"), sep = "\t", row.names = FALSE)
+}
 
-# do_peakfinding(sample_avgtechrepl, breaks_fwhm, int_factor, scale, resol, outdir, scanmode, FALSE, thresh, width, height)
-do_peakfinding(sample_avgtechrepl, int_factor, scale, resol, outdir, scanmode, FALSE, thresh, width, height)
diff --git a/DIMS/PeakFinding.nf b/DIMS/PeakFinding.nf
index 0097d128..1d02e505 100644
--- a/DIMS/PeakFinding.nf
+++ b/DIMS/PeakFinding.nf
@@ -5,13 +5,15 @@ process PeakFinding {
     shell = ['/bin/bash', '-euo', 'pipefail']
 
     input:
-       tuple(path(rdata_file), path(breaks_file))
+       path(rdata_file)
+       each path(sample_techreps)
 
     output:
-       path '*tive.RData'
+       path '*tive.RData', emit: peaklist_rdata, optional: true
+       path '*tive.txt', optional: true
 
     script:
         """
-        Rscript ${baseDir}/CustomModules/DIMS/PeakFinding.R $rdata_file $breaks_file $params.resolution $params.scripts_dir
+        Rscript ${baseDir}/CustomModules/DIMS/PeakFinding.R $rdata_file $params.resolution $params.preprocessing_scripts_dir
         """
 }
diff --git a/DIMS/PeakGrouping.R b/DIMS/PeakGrouping.R
index 4435b033..e0a5c828 100644
--- a/DIMS/PeakGrouping.R
+++ b/DIMS/PeakGrouping.R
@@ -34,7 +34,7 @@ if (grepl("negative", basename(hmdb_part_file))) {
 batch_number <- strsplit(basename(hmdb_part_file), ".", fixed = TRUE)[[1]][2]
 
 # load file with spectrum peaks
-spec_peaks_file <- paste0("SpectrumPeaks_", scanmode, ".RData")
+spec_peaks_file <- paste0("AvgPeaks_", scanmode, ".RData")
 load(spec_peaks_file)
 outlist_df <- as.data.frame(outlist_total)
 outlist_df$mzmed.pkt <- as.numeric(outlist_df$mzmed.pkt)
diff --git a/DIMS/PeakGrouping.nf b/DIMS/PeakGrouping.nf
index 0ecb18c1..6cc4ddb0 100644
--- a/DIMS/PeakGrouping.nf
+++ b/DIMS/PeakGrouping.nf
@@ -6,7 +6,8 @@ process PeakGrouping {
 
     input:
        path(hmdbpart_file)
-       each path(spectrumpeak_file)
+       path(averagedpeaks_file)
+       each path(pattern_file)
 
     output:
        path '*_identified.RData', emit: grouped_identified
diff --git a/DIMS/preprocessing/average_peaks_functions.R b/DIMS/preprocessing/average_peaks_functions.R
new file mode 100644
index 00000000..beed29bb
--- /dev/null
+++ b/DIMS/preprocessing/average_peaks_functions.R
@@ -0,0 +1,41 @@
+average_peaks_per_sample <- function(peaklist_allrepl_sorted, sample_name) {
+  #' Average the intensity of peaks that occur in different technical replicates of a biological sample
+  #'
+  #' @param peaklist_allrepl_sorted: Dataframe with peaks sorted on median m/z (float)
+  #'
+  #' @return averaged_peaks: matrix of averaged peaks (float)
+  
+  # initialize
+  averaged_peaks <- peaklist_allrepl_sorted[0, ]
+  # set ppm as fixed value, not the same ppm as in peak grouping
+  ppm_peak <- 2
+
+  while (nrow(peaklist_allrepl_sorted) > 1) {
+    # store row numbers
+    peaklist_allrepl_sorted$rownr <- 1:nrow(peaklist_allrepl_sorted)
+    # find the peaks in the dataset with corresponding m/z plus or minus tolerance
+    reference_mass <- peaklist_allrepl_sorted$mzmed.pkt[1]
+    mz_tolerance <- (reference_mass * ppm_peak) / 10^6
+    minmz_ref <- reference_mass - mz_tolerance
+    maxmz_ref <- reference_mass + mz_tolerance
+    select_peak_indices <- which((peaklist_allrepl_sorted$mzmed.pkt > minmz_ref) & (peaklist_allrepl_sorted$mzmed.pkt < maxmz_ref))
+    select_peaks <- peaklist_allrepl_sorted[select_peak_indices, ]
+    nrsamples <- length(select_peak_indices)
+    # put averaged intensities into a new row
+    averaged_1peak <- matrix(0, nrow = 1, ncol = 6) 
+    colnames(averaged_1peak) <- c("samplenr", "mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt")
+    # calculate m/z values for peak group
+    averaged_1peak[1, "mzmed.pkt"] <- mean(select_peaks$mzmed.pkt)
+    averaged_1peak[1, "mzmin.pkt"] <- min(select_peaks$mzmed.pkt)
+    averaged_1peak[1, "mzmax.pkt"] <- max(select_peaks$mzmed.pkt)
+    averaged_1peak[1, "fq"] <- nrsamples
+    averaged_1peak[1, "height.pkt"] <- mean(select_peaks$height.pkt)
+    # remove rownr column and append to averaged_peaks
+    peaklist_allrepl_sorted <- peaklist_allrepl_sorted[-select_peaks$rownr, ]
+    averaged_peaks <- rbind(averaged_peaks, averaged_1peak)
+  }
+  # add sample name to first column
+  averaged_peaks[ , "samplenr"] <- sample_name
+  
+  return(averaged_peaks)
+}
diff --git a/DIMS/preprocessing/evaluate_tics_functions.R b/DIMS/preprocessing/evaluate_tics_functions.R
new file mode 100644
index 00000000..318ce2b6
--- /dev/null
+++ b/DIMS/preprocessing/evaluate_tics_functions.R
@@ -0,0 +1,102 @@
+# EvaluateTics functions
+find_bad_replicates <- function(repl_pattern, thresh2remove) {
+  #' Find technical replicates with a total intensity below a threshold
+  #'
+  #' @param repl_pattern: List of samples with corresponding technical replicates (strings)
+  #' @param thresh2remove: Threshold value for acceptance or rejection of total intensity (integer)
+  #'
+  #' @return remove_tech_reps: Array of rejected technical replicates (strings)
+  
+  remove_pos <- NULL
+  remove_neg <- NULL
+  cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
+  for (sample_nr in 1:length(repl_pattern)) {
+    tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
+    for (file_nr in 1:length(tech_reps)) {
+      load(paste0(tech_reps[file_nr], ".RData"))
+      cat("\n\nParsing", tech_reps[file_nr])
+      # positive scan mode: determine whether sum of intensities is above threshold
+      cat("\n\tPositive peak_list sum", sum(peak_list$pos[, 1]))
+      if (sum(peak_list$pos[, 1]) < thresh2remove) {
+        cat(" ... Removed")
+        remove_pos <- c(remove_pos, tech_reps[file_nr])
+      }
+      # negative scan mode: determine whether sum of intensities is above threshold
+      cat("\n\tNegative peak_list sum", sum(peak_list$neg[, 1]))
+      if (sum(peak_list$neg[, 1]) < thresh2remove) {
+        cat(" ... Removed")
+        remove_neg <- c(remove_neg, tech_reps[file_nr])
+      }
+    }
+  }
+  cat("\n")
+  # write information on miss_infusions for both scan modes
+  write.table(remove_pos,
+              file = paste0("miss_infusions_positive.txt"),
+              row.names = FALSE,
+              col.names = FALSE,
+              sep = "\t"
+  )
+  write.table(remove_neg,
+              file = paste0("miss_infusions_negative.txt"),
+              row.names = FALSE,
+              col.names = FALSE,
+              sep = "\t"
+  )
+  
+  # combine removed technical replicates from pos and neg
+  remove_tech_reps <- list(pos = remove_pos, neg = remove_neg)
+  return(remove_tech_reps)
+}
+
+remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
+  #' Remove technical replicates with insufficient quality from a biological sample
+  #'
+  #' @param bad_samples: Array of technical replicates of insufficient quality (strings)
+  #' @param repl_pattern: List of samples with corresponding technical replicates (strings)
+  #' @param nr_replicates: Number of technical replicates per biological sample (integer)
+  #'
+  #' @return repl_pattern_filtered: list of technical replicates of sufficient quality (strings)
+  
+  # collect list of samples to remove from replication pattern
+  remove_from_group <- NULL
+  for (sample_nr in 1:length(repl_pattern)){
+    repl_pattern_1sample <- repl_pattern[[sample_nr]]
+    remove <- NULL
+    for (file_nr in 1:length(repl_pattern_1sample)) {
+      if (repl_pattern_1sample[file_nr] %in% bad_samples) {
+        remove <- c(remove, file_nr)
+      }
+    }
+    if (length(remove) == nr_replicates) {
+      remove_from_group <- c(remove_from_group, sample_nr)
+    }
+    if (!is.null(remove)) {
+      repl_pattern[[sample_nr]] <- repl_pattern[[sample_nr]][-remove]
+    }
+  }
+  if (length(remove_from_group) != 0) {
+    repl_pattern_filtered <- repl_pattern[-remove_from_group]
+  } else {
+    repl_pattern_filtered <- repl_pattern
+  }
+  return(repl_pattern_filtered)
+}
+
+get_overview_tech_reps <- function(repl_pattern_filtered, scanmode) {
+  #' Create an overview of technical replicates with sufficient quality from a biological sample
+  #'
+  #' @param repl_pattern_filtered: List of samples with corresponding technical replicates (strings)
+  #' @param scanmode: Scan mode "positive" or "negative" (string)
+  #'
+  #' @return allsamples_techreps_scanmode: Matrix of technical replicates of sufficient quality (strings)
+  
+  allsamples_techreps_scanmode <- matrix("", ncol = 3, nrow = length(repl_pattern_filtered))
+  for (sample_nr in 1:length(repl_pattern_filtered)) {
+    allsamples_techreps_scanmode[sample_nr, 1] <- names(repl_pattern_filtered)[sample_nr]
+    allsamples_techreps_scanmode[sample_nr, 2] <- paste0(repl_pattern_filtered[[sample_nr]], collapse = ";")
+  }
+  allsamples_techreps_scanmode[, 3] <- scanmode
+  return(allsamples_techreps_scanmode)
+}
+
diff --git a/DIMS/preprocessing/peak_finding_functions.R b/DIMS/preprocessing/peak_finding_functions.R
new file mode 100644
index 00000000..8539c03c
--- /dev/null
+++ b/DIMS/preprocessing/peak_finding_functions.R
@@ -0,0 +1,246 @@
+# functions for peak finding
+
+search_regions_of_interest <- function(ints_fullrange) {
+  #' Divide the full m/z range into regions of interest with indices
+  #'
+  #' @param ints_fullrange: Matrix with m/z values and intensities (float)
+  #'
+  #' @return regions_of_interest: matrix of m/z regions of interest (integer)
+
+  # find indices where intensity is not equal to zero
+  nonzero_positions <- as.vector(which(ints_fullrange$int != 0))
+
+  # find regions of interest (look for consecutive numbers)
+  regions_of_interest_consec <- seqToIntervals(nonzero_positions)
+  # add length of regions of interest
+  regions_of_interest_diff <- regions_of_interest_consec[, 2] - regions_of_interest_consec[, 1] + 1
+  regions_of_interest_length <- cbind(regions_of_interest_consec, length = regions_of_interest_diff)
+  # remove short lengths; a peak should have at least 3 data points
+  if (any(regions_of_interest_length[, "length"] < 3)) {
+    regions_of_interest_gte3 <- regions_of_interest_length[-which(regions_of_interest_length[, "length"] < 3), ]
+  } else {
+    regions_of_interest_gte3 <- regions_of_interest_length
+  }
+  # test for length of roi (region of interest). If length is greater than 11, break up into separate rois
+  remove_roi_index <- c()
+  new_rois_all <- regions_of_interest_gte3[0, ]
+  for (roi_nr in 1:nrow(regions_of_interest_gte3)) {
+    if (regions_of_interest_gte3[roi_nr, "length"] > 11) {
+      roi <- ints_fullrange[(regions_of_interest_gte3[roi_nr, "from"]:regions_of_interest_gte3[roi_nr, "to"]), ]
+      roi_intrange <- as.numeric(roi$int)
+      # look for local minima that separate the peaks
+      local_min_positions <- which(diff(sign(diff(roi_intrange))) == 2) + 1
+      if (length(local_min_positions) > 0) {
+        remove_roi_index <- c(remove_roi_index, roi_nr)
+        # find new indices for rois after splitting
+        start_pos <- regions_of_interest_gte3[roi_nr, "from"]
+        new_rois <- data.frame(from = 0, to = 0, length = 0)
+        new_rois_splitroi <- regions_of_interest_gte3[0, ]
+        for (local_min_index in 1:length(local_min_positions)) {
+          new_rois[, 1] <- start_pos
+          new_rois[, 2] <- start_pos + local_min_positions[local_min_index]
+          new_rois[, 3] <- new_rois[, 2] - new_rois[, 1] + 1
+          new_rois_splitroi <- rbind(new_rois_splitroi, new_rois)
+          start_pos <- new_rois[, 2]
+        }
+        # intensities after last local minimum
+        new_rois[, 1] <- start_pos
+        new_rois[, 2] <- regions_of_interest_gte3[roi_nr, "to"]
+        new_rois[, 3] <- new_rois[, 2] - new_rois[, 1] + 1
+        new_rois_splitroi <- rbind(new_rois_splitroi, new_rois)
+        # append
+        new_rois_all <- rbind(new_rois_all, new_rois_splitroi)
+      } else {
+        # if there are no local minima, all intensities belong to one peak.
+        remove_roi_index <- c(remove_roi_index, roi_nr)
+        # look for maximum and take 5 intensities to the left and right
+        max_index <- which(roi_intrange == max(roi_intrange))
+        max_pos <- as.numeric(rownames(roi)[max_index])
+        new_roi <- data.frame(from = 0, to = 0, length = 0)
+        new_roi[, 1] <- max_pos - 5
+        new_roi[, 2] <- max_pos + 5
+        new_roi[, 3] <- new_roi[, 2] - new_roi[, 1] + 1
+        # append
+        new_rois_all <- rbind(new_rois_all, new_roi)
+      }
+    }
+  }
+
+  # remove rois that have been split into chunks or shortened
+  if (length(remove_roi_index) > 0) {
+    regions_of_interest_minus_short <- regions_of_interest_gte3[-remove_roi_index, ]
+  } else {
+    regions_of_interest_minus_short <- regions_of_interest_gte3
+  }
+  # combine remaining rois with info on chunks
+  regions_of_interest_split <- rbind(regions_of_interest_minus_short, new_rois_all)
+  # remove regions of interest with short lengths again
+  if (any(regions_of_interest_split[, "length"] < 3)) {
+    regions_of_interest_final <- regions_of_interest_split[-which(regions_of_interest_split[, "length"] < 3), ]
+  } else {
+    regions_of_interest_final <- regions_of_interest_split
+  }
+
+  # sort on first index
+  if (nrow(regions_of_interest_final) > 1){
+    regions_of_interest_sorted <- regions_of_interest_final %>% dplyr::arrange(from)
+  } else {
+    regions_of_interest_sorted <- regions_of_interest_final
+  }
+
+  return(regions_of_interest_sorted)
+}
+
+integrate_peaks <- function(ints_fullrange, regions_of_interest, resol, peak_thresh) {
+  #' Fit Gaussian peak for each region of interest and integrate area under the curve
+  #'
+  #' @param ints_fullrange: Named list of intensities (float)
+  #' @param regions_of_interest: Named list of intensities (float)
+  #' @param resol: Value for resolution (integer)
+  #' @param peak_thresh: Value for noise level threshold (integer) # NOT USED YET
+  #'
+  #' @return allpeaks_values: matrix of integrated peaks
+
+  # initialize dataframe to store results for all peaks
+  allpeaks_values <- matrix(0, nrow = nrow(regions_of_interest), ncol = 5)
+  colnames(allpeaks_values) <- c("mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt")
+
+  for (roi_nr in 1:nrow(regions_of_interest)) {
+    # find m/z values and intensities corresponding to the region of interest
+    index_range <- regions_of_interest[roi_nr, "from"] : regions_of_interest[roi_nr, "to"]
+    roi_mzrange <- as.numeric(ints_fullrange$mz[index_range])
+    roi_intrange <- as.numeric(ints_fullrange$int[index_range])
+    # find m/z value for maximum of peak (mu in Gaussian function)
+    weighted_mzmean <- weighted.mean(roi_mzrange, roi_intrange)
+    # find expected peak width at this m/z value
+    fwhm_mzmed <- get_fwhm(weighted_mzmean, resol)
+    # calculate sigma for Gaussian curve (https://en.wikipedia.org/wiki/Full_width_at_half_maximum)
+    sigma_peak <- fwhm_mzmed / 2.355
+    # find scale factor for Gaussian. Initial estimate is maximum intensity in roi
+    # divided by 2 for better correlation with intensities from old PeakFinding method
+    scale_factor <- max(roi_intrange) / 2
+    # fit Gaussian peak. Find intensities according to normal distribution
+    normal_ints <- scale_factor * gaussfunc(roi_mzrange, weighted_mzmean, sigma_peak)
+    # sum intensities in roi
+    sum_ints <- sum(roi_intrange)
+    sum_gauss <- sum(normal_ints)
+    # calculate quality of fit
+    quality_fit <- 1 - sum(abs(normal_ints - roi_intrange)) / sum_ints
+    # put all values into dataframe
+    allpeaks_values[roi_nr, "mzmed.pkt"] <- weighted_mzmean
+    allpeaks_values[roi_nr, "fq"] <- quality_fit
+    allpeaks_values[roi_nr, "mzmax.pkt"] <- max(roi_mzrange)
+    allpeaks_values[roi_nr, "mzmin.pkt"] <- min(roi_mzrange)
+    allpeaks_values[roi_nr, "height.pkt"] <- sum_gauss
+  }
+
+  # remove peaks with height = 0, look for NA or NaN
+  remove_na <- which(is.na(as.numeric(allpeaks_values[, "height.pkt"])))
+  if (length(remove_na) > 0) {
+    allpeaks_values <- allpeaks_values[-remove_na, ]
+  }
+
+  return(allpeaks_values)
+}
+
+# in the next version of the docker image, package R.utils will be included
+seqToIntervals <- function(idx) {
+  #' Find consecutive stretches of numbers
+  #' function seqToIntervals copied from R.utils library
+  #' see https://rdrr.io/cran/R.utils/src/R/seqToIntervals.R
+  #'
+  #' @param idx: Sequence of indices (integers)
+  #'
+  #' @return res: Matrix of start and end positions of consecutive numbers (matrix)
+
+  # Clean up sequence
+  idx <- as.integer(idx)
+  idx <- unique(idx)
+  idx <- sort(idx)
+
+  len_idx <- length(idx)
+  if (len_idx == 0L) {
+    res <- matrix(NA_integer_, nrow = 0L, ncol = 2L)
+    colnames(res) <- c("from", "to")
+    return(res)
+  }
+
+  # Identify end points of intervals
+  diff_idx <- diff(idx)
+  diff_idx <- (diff_idx > 1)
+  diff_idx <- which(diff_idx)
+  nr_intervals <- length(diff_idx) + 1
+
+  # Allocate return matrix
+  res <- matrix(NA_integer_, nrow = nr_intervals, ncol = 2L)
+  colnames(res) <- c("from", "to")
+
+  from_value <- idx[1]
+  to_value <- from_value - 1
+  last_value <- from_value
+
+  count <- 1
+  for (running_index in seq_along(idx)) {
+    value <- idx[running_index]
+    if (value - last_value > 1) {
+      to_value <- last_value
+      res[count, ] <- c(from_value, to_value)
+      from_value <- value
+      count <- count + 1
+    }
+    last_value <- value
+  }
+
+  if (to_value < from_value) {
+    to_value <- last_value
+    res[count, ] <- c(from_value, to_value)
+  }
+
+  return(res)
+}
+
+get_fwhm <- function(query_mass, resol) {
+  #' Calculate fwhm (full width at half maximum intensity) for a peak
+  #'
+  #' @param query_mass: Value for mass (float)
+  #' @param resol: Value for resolution (integer)
+  #'
+  #' @return fwhm: Value for full width at half maximum (float)
+
+  # set aberrant values of query_mass to default value of 200
+  if (is.na(query_mass)) {
+    query_mass <- 200
+  }
+  if (query_mass < 0) {
+    query_mass <- 200
+  }
+  # calculate resolution at given m/z value
+  resol_mz <- resol * (1 / sqrt(2) ^ (log2(query_mass / 200)))
+  # calculate full width at half maximum
+  fwhm <- query_mass / resol_mz
+
+  return(fwhm)
+}
+
+
+# from https://rdrr.io/cran/rvmethod/src/R/gaussfit.R
+#' Gaussian Function from package rvmethod
+#'
+#' This function returns the unnormalized (height of 1.0) Gaussian curve with a
+#' given center and spread.
+#'
+#' @param x the vector of values at which to evaluate the Gaussian
+#' @param mu the center of the Gaussian
+#' @param sigma the spread of the Gaussian (must be greater than 0)
+#' @return vector of values of the Gaussian
+#' @examples x = seq(-4, 4, length.out = 100)
+#' y = gaussfunc(x, 0, 1)
+#' plot(x, y)
+#'
+#' @import stats
+#'
+#' @export
+gaussfunc <- function(x, mu, sigma) {
+  return(exp(-((x - mu) ^ 2) / (2 * (sigma ^ 2))))
+}
+
diff --git a/DIMS/tests/testthat/fixtures/test_evaluate_tics_file1.RData b/DIMS/tests/testthat/fixtures/test_evaluate_tics_file1.RData
new file mode 100644
index 00000000..6d9f40d8
Binary files /dev/null and b/DIMS/tests/testthat/fixtures/test_evaluate_tics_file1.RData differ
diff --git a/DIMS/tests/testthat/fixtures/test_evaluate_tics_file2.RData b/DIMS/tests/testthat/fixtures/test_evaluate_tics_file2.RData
new file mode 100644
index 00000000..77d650f9
Binary files /dev/null and b/DIMS/tests/testthat/fixtures/test_evaluate_tics_file2.RData differ
diff --git a/DIMS/tests/testthat/fixtures/test_evaluate_tics_file3.RData b/DIMS/tests/testthat/fixtures/test_evaluate_tics_file3.RData
new file mode 100644
index 00000000..21fd5bca
Binary files /dev/null and b/DIMS/tests/testthat/fixtures/test_evaluate_tics_file3.RData differ
diff --git a/DIMS/tests/testthat/test_average_peaks.R b/DIMS/tests/testthat/test_average_peaks.R
new file mode 100644
index 00000000..0a8d2345
--- /dev/null
+++ b/DIMS/tests/testthat/test_average_peaks.R
@@ -0,0 +1,30 @@
+# unit test for AveragePeaks
+
+# source all functions for PeakGrouping
+source("../../preprocessing/average_peaks_functions.R")
+
+# test find_peak_groups
+testthat::test_that("peaks are correctly averaged", {
+  # create peak list to test on:
+  samplenrs <- rep(c("repl_001", "repl_002", "repl_003"), 3)
+  test_peaklist_sorted_num <- matrix(0, nrow = 9, ncol = 5)
+  colnames(test_peaklist_sorted_num) <- c("mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt")
+  test_peaklist_sorted_num[, 1] <- 300 + (1:9) / 10000
+  test_peaklist_sorted_num[, 5] <- 100 * (9:1)
+  test_peaklist_sorted <- as.data.frame(cbind(samplenr = samplenrs, test_peaklist_sorted_num))
+  test_peaklist_sorted[, 2] <- as.numeric(test_peaklist_sorted[, 2])
+  test_peaklist_sorted[, 6] <- as.numeric(test_peaklist_sorted[, 6])
+  test_sample_name <- "P001"
+  test_empty_peaklist <- test_peaklist_sorted[0, ]
+
+  # test that first peak is correctly averaged
+  expect_equal(as.numeric(average_peaks_per_sample(test_peaklist_sorted, test_sample_name)[1, 6]), 600, tolerance = 0.001, TRUE)
+  # test number of rows
+  expect_equal(nrow(average_peaks_per_sample(test_peaklist_sorted, test_sample_name)), 2)
+  # test column names
+  expect_equal(colnames(average_peaks_per_sample(test_peaklist_sorted, test_sample_name)),
+               c("samplenr", "mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt"), TRUE)
+  # test what happens when peak list is empty
+  expect_error(average_peaks_per_sample(test_empty_peaklist, test_sample_name))
+})
+
diff --git a/DIMS/tests/testthat/test_evaluate_tics.R b/DIMS/tests/testthat/test_evaluate_tics.R
new file mode 100644
index 00000000..b474a81b
--- /dev/null
+++ b/DIMS/tests/testthat/test_evaluate_tics.R
@@ -0,0 +1,66 @@
+# unit test for EvaluateTics
+
+# source all functions for PeakGrouping
+source("../../preprocessing/evaluate_tics_functions.R")
+
+# test find_bad_replicates
+testthat::test_that("TICS are correctly accepted or rejected", {
+  # It's necessary to copy/symlink the files to the current location for the combine_sum_adducts_parts function
+  # local: setwd("~/Development/DIMS_refactor_PeakFinding_codereview/CustomModules/DIMS/tests/testthat")
+  test_files <- list.files("fixtures/", "test_evaluate_tics", full.names = TRUE)
+  file.symlink(file.path(test_files), getwd())
+  
+  # create replication pattern to test on:
+  technical_replicates <- paste0("test_evaluate_tics_file", 1:3)
+  test_repl_pattern <- list(technical_replicates)
+  names(test_repl_pattern) <- "sample1"
+  test_thresh2remove <- 10^9
+  
+  # test that output has two entries
+  expect_equal(length(find_bad_replicates(test_repl_pattern, test_thresh2remove)), 2)
+  # test that first technical replicate is removed in positive scan mode
+  expect_equal(find_bad_replicates(test_repl_pattern, test_thresh2remove)$pos, "test_evaluate_tics_file1", TRUE)
+  # test that third technical replicate is removed in negative scan mode
+  expect_equal(find_bad_replicates(test_repl_pattern, test_thresh2remove)$neg, "test_evaluate_tics_file3", TRUE)
+  # test that output files are generated
+  expect_equal(sum(grepl("miss_infusions_", list.files("./"))), 2)
+  
+  # Remove symlinked files
+  files_remove <- list.files("./", "SummedAdducts_test.RData", full.names = TRUE)
+  file.remove(files_remove)
+  
+})
+
+# test remove_from_repl_pattern
+testthat::test_that("technical replicates are correctly removed from replication pattern", {
+  # create replication pattern to test on:
+  technical_replicates <- paste0("test_evaluate_tics_file", 1:3)
+  test_repl_pattern <- list(technical_replicates)
+  names(test_repl_pattern) <- "sample1"
+  test_bad_samples <- "test_evaluate_tics_file2"
+  test_nr_replicates <- 3
+  
+  # test that the output contains 1 sample
+  expect_equal(length(remove_from_repl_pattern(test_bad_samples, test_repl_pattern, test_nr_replicates)), 1)
+  # test that the output for the sample contains 2 technical replicates
+  expect_equal(length(remove_from_repl_pattern(test_bad_samples, test_repl_pattern, test_nr_replicates)$sample1), 2)
+  # test that the second technical replicate has been removed
+  expect_false(unique(grepl(test_bad_samples, remove_from_repl_pattern(test_bad_samples, test_repl_pattern, test_nr_replicates)$sample1)))
+})
+
+# test get_overview_tech_reps
+testthat::test_that("overview of technical replicates is correctly created", {
+  # create replication pattern to test on:
+  technical_replicates <- paste0("test_evaluate_tics_file", 1:3)
+  test_repl_pattern_filtered <- list(technical_replicates)
+  names(test_repl_pattern_filtered) <- "sample1"
+  test_scanmode <- "positive"
+
+  # test that overview is correctly created
+  expect_equal(get_overview_tech_reps(test_repl_pattern_filtered, test_scanmode)[ ,1], "sample1")
+  expect_equal(get_overview_tech_reps(test_repl_pattern_filtered, test_scanmode)[ ,3], "positive")
+  expect_true(get_overview_tech_reps(test_repl_pattern_filtered, test_scanmode)[ ,2] == 
+               paste0(technical_replicates, collapse = ";"), TRUE)
+  
+})
+
diff --git a/DIMS/tests/testthat/test_peak_finding_functions.R b/DIMS/tests/testthat/test_peak_finding_functions.R
new file mode 100644
index 00000000..bc7f9d86
--- /dev/null
+++ b/DIMS/tests/testthat/test_peak_finding_functions.R
@@ -0,0 +1,87 @@
+# unit tests for PeakFinding functions:
+
+source("../../preprocessing/peak_finding_functions.R")
+
+# test search_regions_of_interest function:
+test_that("regions of interest are correctly found for a single peak", {
+  # create intensities to test on
+  test_stdev <- 0.00006
+  test_mass_vector <- rep(70, 10) + 1:10 * test_stdev
+  test_ints_range <- dnorm(test_mass_vector, mean = 70.0003, sd = test_stdev)
+  test_ints_fullrange <- as.data.frame(cbind(mz = test_mass_vector, 
+                                             int = test_ints_range))
+  
+  # expected output
+  expected_output <- matrix(c(1, 10, 10), nrow = 1, ncol = 3L)
+  colnames(expected_output) <- c("from", "to", "length")
+  rownames(expected_output) <- "to"
+
+  # tests
+  expect_type(search_regions_of_interest(test_ints_fullrange), "double")
+  expect_equal(nrow(search_regions_of_interest(test_ints_fullrange)), 1)
+  expect_equal(search_regions_of_interest(test_ints_fullrange), expected_output, tolerance = 0.000001, TRUE)
+})
+test_that("regions of interest are correctly found for two peaks", {
+  # create intensities to test on
+  test_stdev <- 0.00006
+  test_mass_vector <- rep(70, 20) + 1:20 * test_stdev
+  test_ints_range <- dnorm(test_mass_vector, mean = 70.0003, sd = test_stdev) + 
+                     dnorm(test_mass_vector, mean = 70.0005, sd = test_stdev)
+  test_ints_fullrange <- as.data.frame(cbind(mz = test_mass_vector, 
+                                             int = test_ints_range))
+  
+  # expected output
+  expected_output <- as.data.frame(matrix(c(1, 8, 8, 20, 8, 13), nrow = 2, ncol = 3))
+  colnames(expected_output) <- c("from", "to", "length")
+  rownames(expected_output) <- as.character(c(1, 2))
+  
+  # tests
+  expect_type(search_regions_of_interest(test_ints_fullrange), "list")
+  expect_equal(nrow(search_regions_of_interest(test_ints_fullrange)), 2)
+  expect_equal(search_regions_of_interest(test_ints_fullrange)[, "length"], as.numeric(expected_output[, "length"]))
+  expect_equal(search_regions_of_interest(test_ints_fullrange), expected_output, check.attributes = FALSE)
+})
+
+# test integrate_peaks function:
+test_that("peaks are correctly integrated", {
+  # create peak info to test on:
+  test_stdev <- 0.00006
+  test_mass_vector <- rep(70, 20) + 1:20 * test_stdev
+  test_ints_range <- dnorm(test_mass_vector, mean = 70.0003, sd = test_stdev) + 
+                     dnorm(test_mass_vector, mean = 70.0005, sd = test_stdev)
+  test_ints_fullrange <- as.data.frame(cbind(mz = test_mass_vector, 
+                                             int = test_ints_range))
+  
+  # create regions of interest to test on:
+  test_regions_of_interest <- as.data.frame(matrix(c(1, 8, 8, 20, 8, 13), nrow = 2, ncol = 3L))
+  colnames(test_regions_of_interest) <- c("from", "to", "length")
+  rownames(test_regions_of_interest) <- as.character(c(1, 2))
+  
+  # set other parameters
+  test_resol <- 140000
+  test_peak_thresh <- 2000
+  
+  # expected output
+  expected_output <- as.data.frame(matrix(c(70.000357, 70.000521, 0.5148094, 0.3016569, 70.00006, 70.00048, 
+                                            70.00048, 70.00120, 15526.826791, 11950.90570), nrow = 2, ncol = 5))
+  colnames(expected_output) <- c("mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt")
+  
+  expect_type(integrate_peaks(test_ints_fullrange, test_regions_of_interest, test_resol, test_peak_thresh), "double")
+  expect_equal(nrow(integrate_peaks(test_ints_fullrange, test_regions_of_interest, test_resol, test_peak_thresh)), 2)
+  expect_equal(integrate_peaks(test_ints_fullrange, test_regions_of_interest, test_resol, test_peak_thresh)[, "mzmed.pkt"], 
+               expected_output[, "mzmed.pkt"], tolerance = 0.0001)
+  expect_equal(integrate_peaks(test_ints_fullrange, test_regions_of_interest, test_resol, test_peak_thresh)[, "height.pkt"], 
+               expected_output[, "height.pkt"], tolerance = 0.0001)
+})
+
+# test get_fwhm function
+test_that("fwhm is correctly calculated", {
+  # create peak info to test on:
+  test_mass_vector <- rep(70.00938, 5) + 1:5 * 0.00006
+  test_mz_max <- max(test_mass_vector)
+  test_resol <- 140000
+
+  expect_type(get_fwhm(test_mz_max, test_resol), "double")
+  expect_equal(get_fwhm(test_mz_max, test_resol), 0.000295865, tolerance = 0.000001, TRUE)
+})
+