From 14290f1c72c75f4a36cbd3cbdf0994be6661283f Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 11:30:17 +0000 Subject: [PATCH 01/15] Add template, test and notebook --- preprocessing/shuffling/shuffle_labels.r | 48 ++++++++++ preprocessing/shuffling/test_labels.tsv | 11 +++ preprocessing/shuffling/testing.ipynb | 117 +++++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 preprocessing/shuffling/shuffle_labels.r create mode 100644 preprocessing/shuffling/test_labels.tsv create mode 100644 preprocessing/shuffling/testing.ipynb diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r new file mode 100644 index 00000000..299d0bda --- /dev/null +++ b/preprocessing/shuffling/shuffle_labels.r @@ -0,0 +1,48 @@ +#!/usr/bin/env Rscript + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Kim Vucinic; modified template and created script + +suppressPackageStartupMessages(library(optparse)) + +# Arguments +option_list <- list( + make_option( + c("-l", "--labels"), + type = "character", default = NULL, + help = "Labels from domain clustering. Path to labels (as tsv)." + ), + make_option( + c("--seed"), + type = "integer", default = NULL, + help = "Seed to use for random operations." + ), + make_option( + c("-o", "--out_file"), + type = "character", default = NULL, + help = "Output file." + ) +) + +# Description +description <- "Shuffling labels..." + +opt_parser <- OptionParser( + usage = description, + option_list = option_list +) +opt <- parse_args(opt_parser) + +# Use these filepaths as input +label_file <- opt$labels +seed <- opt$seed + +## Your code goes here + + + +## Write output +outfile <- file(opt$out_file) +dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE) + +write.table(df_shuffled, outfile, sep = "\t", col.names = NA, quote = "FALSE") \ No newline at end of file diff --git a/preprocessing/shuffling/test_labels.tsv b/preprocessing/shuffling/test_labels.tsv new file mode 100644 index 00000000..9f185410 --- /dev/null +++ b/preprocessing/shuffling/test_labels.tsv @@ -0,0 +1,11 @@ + label +ID1 Domain1 +ID2 Domain1 +ID3 Domain2 +ID4 Domain3 +ID5 Domain1 +ID6 Domain2 +ID7 Domain2 +ID8 Domain1 +ID9 Domain3 +ID10 Domain4 \ No newline at end of file diff --git a/preprocessing/shuffling/testing.ipynb b/preprocessing/shuffling/testing.ipynb new file mode 100644 index 00000000..99aeb098 --- /dev/null +++ b/preprocessing/shuffling/testing.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ba391971-26a5-4f9e-86fe-a0237746c4e8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\n", + "\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\t\n", + "\n", + "
A data.frame: 10 × 1
label
<chr>
ID1Domain1
ID2Domain1
ID3Domain2
ID4Domain3
ID5Domain1
ID6Domain2
ID7Domain2
ID8Domain1
ID9Domain3
ID10Domain4
\n" + ], + "text/latex": [ + "A data.frame: 10 × 1\n", + "\\begin{tabular}{r|l}\n", + " & label\\\\\n", + " & \\\\\n", + "\\hline\n", + "\tID1 & Domain1\\\\\n", + "\tID2 & Domain1\\\\\n", + "\tID3 & Domain2\\\\\n", + "\tID4 & Domain3\\\\\n", + "\tID5 & Domain1\\\\\n", + "\tID6 & Domain2\\\\\n", + "\tID7 & Domain2\\\\\n", + "\tID8 & Domain1\\\\\n", + "\tID9 & Domain3\\\\\n", + "\tID10 & Domain4\\\\\n", + "\\end{tabular}\n" + ], + "text/markdown": [ + "\n", + "A data.frame: 10 × 1\n", + "\n", + "| | label <chr> |\n", + "|---|---|\n", + "| ID1 | Domain1 |\n", + "| ID2 | Domain1 |\n", + "| ID3 | Domain2 |\n", + "| ID4 | Domain3 |\n", + "| ID5 | Domain1 |\n", + "| ID6 | Domain2 |\n", + "| ID7 | Domain2 |\n", + "| ID8 | Domain1 |\n", + "| ID9 | Domain3 |\n", + "| ID10 | Domain4 |\n", + "\n" + ], + "text/plain": [ + " label \n", + "ID1 Domain1\n", + "ID2 Domain1\n", + "ID3 Domain2\n", + "ID4 Domain3\n", + "ID5 Domain1\n", + "ID6 Domain2\n", + "ID7 Domain2\n", + "ID8 Domain1\n", + "ID9 Domain3\n", + "ID10 Domain4" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df <- read.delim(\"test_labels.tsv\", sep = \"\\t\", row.names = 1)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a010c80d-2992-4bca-8c45-f5c1f17e4417", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "4.3.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d4de939c8aab34bac50c37cc229707f9e34055c3 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 11:30:55 +0000 Subject: [PATCH 02/15] Ignore Python notebooks --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..44fcac0f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Notebook Checkpoints +.ipynb_checkpoints/ From e5204c4bc76fdd5152f1ffded85182e25abb8d91 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 11:57:29 +0000 Subject: [PATCH 03/15] Shuffle labels script --- preprocessing/shuffling/shuffle_labels.r | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r index 299d0bda..4fed2969 100644 --- a/preprocessing/shuffling/shuffle_labels.r +++ b/preprocessing/shuffling/shuffle_labels.r @@ -35,14 +35,23 @@ opt <- parse_args(opt_parser) # Use these filepaths as input label_file <- opt$labels + + +# Seed seed <- opt$seed +set.seed(seed) ## Your code goes here +if (!("label" %in% colnames(df))){ + stop("Label column not present in the file. Check your file.") +} - +# Randomize labels +df_randomized <- data.frame(label = sample(df$label)) +rownames(df_randomized) <- rownames(df) ## Write output outfile <- file(opt$out_file) dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE) -write.table(df_shuffled, outfile, sep = "\t", col.names = NA, quote = "FALSE") \ No newline at end of file +write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = "FALSE") \ No newline at end of file From c7956858a7971d57851faf0bbf88cc5f08a19503 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 12:00:49 +0000 Subject: [PATCH 04/15] Ignore test files and notebook --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 44fcac0f..f9f47d82 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ # Notebook Checkpoints .ipynb_checkpoints/ + +# Test files and notebooks +preprocessing/shuffling/test* From 43c65e182c77d966eb6d3efa89621ff703b3e3b5 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 12:07:43 +0000 Subject: [PATCH 05/15] Add environment --- preprocessing/shuffling/shuffle_labels.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 preprocessing/shuffling/shuffle_labels.yml diff --git a/preprocessing/shuffling/shuffle_labels.yml b/preprocessing/shuffling/shuffle_labels.yml new file mode 100644 index 00000000..ece484d4 --- /dev/null +++ b/preprocessing/shuffling/shuffle_labels.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - defaults +dependencies: + - R==4.3.1 \ No newline at end of file From 4491e1b0790830a23f4a4a2a8d5958450313fcbc Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 12:15:54 +0000 Subject: [PATCH 06/15] Load file --- preprocessing/shuffling/shuffle_labels.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r index 4fed2969..40190434 100644 --- a/preprocessing/shuffling/shuffle_labels.r +++ b/preprocessing/shuffling/shuffle_labels.r @@ -36,12 +36,12 @@ opt <- parse_args(opt_parser) # Use these filepaths as input label_file <- opt$labels - # Seed seed <- opt$seed set.seed(seed) ## Your code goes here +df <- read.delim(label_file, sep = "\t", row.names = 1) if (!("label" %in% colnames(df))){ stop("Label column not present in the file. Check your file.") } From ba36d8df7b75c1693370004c1ccf563e3422b2dd Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 12:37:36 +0000 Subject: [PATCH 07/15] Adds shuffle coordinates script --- preprocessing/shuffling/shuffle_coordinates.r | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 preprocessing/shuffling/shuffle_coordinates.r diff --git a/preprocessing/shuffling/shuffle_coordinates.r b/preprocessing/shuffling/shuffle_coordinates.r new file mode 100644 index 00000000..f5deb188 --- /dev/null +++ b/preprocessing/shuffling/shuffle_coordinates.r @@ -0,0 +1,58 @@ +#!/usr/bin/env Rscript + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Kim Vucinic; modified template and created script + +suppressPackageStartupMessages(library(optparse)) + +# Arguments +option_list <- list( + make_option( + c("-c", "--coordinates"), + type = "character", default = NULL, + help = "Path to coordinates (as tsv)." + ), + make_option( + c("--seed"), + type = "integer", default = NULL, + help = "Seed to use for random operations." + ), + make_option( + c("-o", "--out_file"), + type = "character", default = NULL, + help = "Output file." + ) +) + +# Description +description <- "Shuffling coordinates in coordinates.tsv" + +opt_parser <- OptionParser( + usage = description, + option_list = option_list +) +opt <- parse_args(opt_parser) + +# Use these filepaths as input +coord_file <- opt$coordinates + +# Seed +seed <- opt$seed +set.seed(seed) + +## Your code goes here +df <- read.delim(coord_file, sep = "\t", row.names = 1) +if (any(!(c("x", "y") %in% colnames(df)))){ + stop("X and y coordinates are not present in the file. Check your file.") +} + +# Randomize IDs, but keep the same order of IDs (not really necessary) +df_order <- rownames(df) +rownames(df) <- sample(rownames(df)) +df_final <- df[order(match(rownames(df), df_order)),] + +## Write output +outfile <- file(opt$out_file) +dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE) + +write.table(df_final, outfile, sep = "\t", col.names = NA, quote = "FALSE") \ No newline at end of file From 0a87c9cb7d2913b8d97c18af9b964b42ef074548 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 12:38:54 +0000 Subject: [PATCH 08/15] Add environment for shuffling coordinates --- preprocessing/shuffling/shuffle_coordinates.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 preprocessing/shuffling/shuffle_coordinates.yml diff --git a/preprocessing/shuffling/shuffle_coordinates.yml b/preprocessing/shuffling/shuffle_coordinates.yml new file mode 100644 index 00000000..ece484d4 --- /dev/null +++ b/preprocessing/shuffling/shuffle_coordinates.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - defaults +dependencies: + - R==4.3.1 \ No newline at end of file From 9019b760f7b03431ece762a682c18839a165c34a Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 12:42:14 +0000 Subject: [PATCH 09/15] Remove tests from branch --- preprocessing/shuffling/test_labels.tsv | 11 --- preprocessing/shuffling/testing.ipynb | 117 ------------------------ 2 files changed, 128 deletions(-) delete mode 100644 preprocessing/shuffling/test_labels.tsv delete mode 100644 preprocessing/shuffling/testing.ipynb diff --git a/preprocessing/shuffling/test_labels.tsv b/preprocessing/shuffling/test_labels.tsv deleted file mode 100644 index 9f185410..00000000 --- a/preprocessing/shuffling/test_labels.tsv +++ /dev/null @@ -1,11 +0,0 @@ - label -ID1 Domain1 -ID2 Domain1 -ID3 Domain2 -ID4 Domain3 -ID5 Domain1 -ID6 Domain2 -ID7 Domain2 -ID8 Domain1 -ID9 Domain3 -ID10 Domain4 \ No newline at end of file diff --git a/preprocessing/shuffling/testing.ipynb b/preprocessing/shuffling/testing.ipynb deleted file mode 100644 index 99aeb098..00000000 --- a/preprocessing/shuffling/testing.ipynb +++ /dev/null @@ -1,117 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "ba391971-26a5-4f9e-86fe-a0237746c4e8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\t\n", - "\t\n", - "\n", - "\n", - "\t\n", - "\t\n", - "\t\n", - "\t\n", - "\t\n", - "\t\n", - "\t\n", - "\t\n", - "\t\n", - "\t\n", - "\n", - "
A data.frame: 10 × 1
label
<chr>
ID1Domain1
ID2Domain1
ID3Domain2
ID4Domain3
ID5Domain1
ID6Domain2
ID7Domain2
ID8Domain1
ID9Domain3
ID10Domain4
\n" - ], - "text/latex": [ - "A data.frame: 10 × 1\n", - "\\begin{tabular}{r|l}\n", - " & label\\\\\n", - " & \\\\\n", - "\\hline\n", - "\tID1 & Domain1\\\\\n", - "\tID2 & Domain1\\\\\n", - "\tID3 & Domain2\\\\\n", - "\tID4 & Domain3\\\\\n", - "\tID5 & Domain1\\\\\n", - "\tID6 & Domain2\\\\\n", - "\tID7 & Domain2\\\\\n", - "\tID8 & Domain1\\\\\n", - "\tID9 & Domain3\\\\\n", - "\tID10 & Domain4\\\\\n", - "\\end{tabular}\n" - ], - "text/markdown": [ - "\n", - "A data.frame: 10 × 1\n", - "\n", - "| | label <chr> |\n", - "|---|---|\n", - "| ID1 | Domain1 |\n", - "| ID2 | Domain1 |\n", - "| ID3 | Domain2 |\n", - "| ID4 | Domain3 |\n", - "| ID5 | Domain1 |\n", - "| ID6 | Domain2 |\n", - "| ID7 | Domain2 |\n", - "| ID8 | Domain1 |\n", - "| ID9 | Domain3 |\n", - "| ID10 | Domain4 |\n", - "\n" - ], - "text/plain": [ - " label \n", - "ID1 Domain1\n", - "ID2 Domain1\n", - "ID3 Domain2\n", - "ID4 Domain3\n", - "ID5 Domain1\n", - "ID6 Domain2\n", - "ID7 Domain2\n", - "ID8 Domain1\n", - "ID9 Domain3\n", - "ID10 Domain4" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df <- read.delim(\"test_labels.tsv\", sep = \"\\t\", row.names = 1)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a010c80d-2992-4bca-8c45-f5c1f17e4417", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "R", - "language": "R", - "name": "ir" - }, - "language_info": { - "codemirror_mode": "r", - "file_extension": ".r", - "mimetype": "text/x-r-source", - "name": "R", - "pygments_lexer": "r", - "version": "4.3.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 496aedf489ae133a3f554c0706875c679da74279 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 15:09:02 +0000 Subject: [PATCH 10/15] Update environments - add optparse package --- preprocessing/shuffling/shuffle_coordinates.yml | 3 ++- preprocessing/shuffling/shuffle_labels.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/preprocessing/shuffling/shuffle_coordinates.yml b/preprocessing/shuffling/shuffle_coordinates.yml index ece484d4..dae4376d 100644 --- a/preprocessing/shuffling/shuffle_coordinates.yml +++ b/preprocessing/shuffling/shuffle_coordinates.yml @@ -2,4 +2,5 @@ channels: - conda-forge - defaults dependencies: - - R==4.3.1 \ No newline at end of file + - r-base==4.3.1 + - r-optparse=1.7.3 \ No newline at end of file diff --git a/preprocessing/shuffling/shuffle_labels.yml b/preprocessing/shuffling/shuffle_labels.yml index ece484d4..dae4376d 100644 --- a/preprocessing/shuffling/shuffle_labels.yml +++ b/preprocessing/shuffling/shuffle_labels.yml @@ -2,4 +2,5 @@ channels: - conda-forge - defaults dependencies: - - R==4.3.1 \ No newline at end of file + - r-base==4.3.1 + - r-optparse=1.7.3 \ No newline at end of file From f99a4729ca0d505e8442283e1fefe1e9994f88b4 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Tue, 12 Dec 2023 15:09:52 +0000 Subject: [PATCH 11/15] Fixes outputs --- preprocessing/shuffling/shuffle_coordinates.r | 4 +--- preprocessing/shuffling/shuffle_labels.r | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) mode change 100644 => 100755 preprocessing/shuffling/shuffle_coordinates.r mode change 100644 => 100755 preprocessing/shuffling/shuffle_labels.r diff --git a/preprocessing/shuffling/shuffle_coordinates.r b/preprocessing/shuffling/shuffle_coordinates.r old mode 100644 new mode 100755 index f5deb188..18c0922d --- a/preprocessing/shuffling/shuffle_coordinates.r +++ b/preprocessing/shuffling/shuffle_coordinates.r @@ -53,6 +53,4 @@ df_final <- df[order(match(rownames(df), df_order)),] ## Write output outfile <- file(opt$out_file) -dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE) - -write.table(df_final, outfile, sep = "\t", col.names = NA, quote = "FALSE") \ No newline at end of file +write.table(df_final, outfile, sep = "\t", col.names = NA, quote = FALSE) \ No newline at end of file diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r old mode 100644 new mode 100755 index 40190434..cfcf8393 --- a/preprocessing/shuffling/shuffle_labels.r +++ b/preprocessing/shuffling/shuffle_labels.r @@ -52,6 +52,4 @@ rownames(df_randomized) <- rownames(df) ## Write output outfile <- file(opt$out_file) -dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE) - -write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = "FALSE") \ No newline at end of file +write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = FALSE) \ No newline at end of file From ab2b8e5d8acdeca0840e3941e992b90e1503ad13 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Wed, 13 Dec 2023 14:15:57 +0000 Subject: [PATCH 12/15] Remove .gitignore directory --- .gitignore | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index f9f47d82..00000000 --- a/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -# Notebook Checkpoints -.ipynb_checkpoints/ - -# Test files and notebooks -preprocessing/shuffling/test* From 083da0fecb425fde08458ccbc8a685bfe3e54308 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Thu, 14 Dec 2023 14:12:49 +0000 Subject: [PATCH 13/15] separate folders --- .../{shuffling => shuffling_coordinates}/shuffle_coordinates.r | 0 .../{shuffling => shuffling_coordinates}/shuffle_coordinates.yml | 0 preprocessing/{shuffling => shuffling_labels}/shuffle_labels.r | 0 preprocessing/{shuffling => shuffling_labels}/shuffle_labels.yml | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename preprocessing/{shuffling => shuffling_coordinates}/shuffle_coordinates.r (100%) mode change 100755 => 100644 rename preprocessing/{shuffling => shuffling_coordinates}/shuffle_coordinates.yml (100%) rename preprocessing/{shuffling => shuffling_labels}/shuffle_labels.r (100%) rename preprocessing/{shuffling => shuffling_labels}/shuffle_labels.yml (100%) diff --git a/preprocessing/shuffling/shuffle_coordinates.r b/preprocessing/shuffling_coordinates/shuffle_coordinates.r old mode 100755 new mode 100644 similarity index 100% rename from preprocessing/shuffling/shuffle_coordinates.r rename to preprocessing/shuffling_coordinates/shuffle_coordinates.r diff --git a/preprocessing/shuffling/shuffle_coordinates.yml b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml similarity index 100% rename from preprocessing/shuffling/shuffle_coordinates.yml rename to preprocessing/shuffling_coordinates/shuffle_coordinates.yml diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling_labels/shuffle_labels.r similarity index 100% rename from preprocessing/shuffling/shuffle_labels.r rename to preprocessing/shuffling_labels/shuffle_labels.r diff --git a/preprocessing/shuffling/shuffle_labels.yml b/preprocessing/shuffling_labels/shuffle_labels.yml similarity index 100% rename from preprocessing/shuffling/shuffle_labels.yml rename to preprocessing/shuffling_labels/shuffle_labels.yml From c84a2d4387ded3a9b2ad4ad30dcd401526791218 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Thu, 14 Dec 2023 14:14:53 +0000 Subject: [PATCH 14/15] Removes defaults from environments --- preprocessing/shuffling_coordinates/shuffle_coordinates.yml | 1 - preprocessing/shuffling_labels/shuffle_labels.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/preprocessing/shuffling_coordinates/shuffle_coordinates.yml b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml index dae4376d..4e2066e6 100644 --- a/preprocessing/shuffling_coordinates/shuffle_coordinates.yml +++ b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml @@ -1,6 +1,5 @@ channels: - conda-forge - - defaults dependencies: - r-base==4.3.1 - r-optparse=1.7.3 \ No newline at end of file diff --git a/preprocessing/shuffling_labels/shuffle_labels.yml b/preprocessing/shuffling_labels/shuffle_labels.yml index dae4376d..4e2066e6 100644 --- a/preprocessing/shuffling_labels/shuffle_labels.yml +++ b/preprocessing/shuffling_labels/shuffle_labels.yml @@ -1,6 +1,5 @@ channels: - conda-forge - - defaults dependencies: - r-base==4.3.1 - r-optparse=1.7.3 \ No newline at end of file From 843f2368b8e5d339c99c9a185ebf393230527081 Mon Sep 17 00:00:00 2001 From: Kim Vucinic Date: Thu, 14 Dec 2023 14:39:13 +0000 Subject: [PATCH 15/15] Shuffles only label column --- preprocessing/shuffling_labels/shuffle_labels.r | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/preprocessing/shuffling_labels/shuffle_labels.r b/preprocessing/shuffling_labels/shuffle_labels.r index cfcf8393..7269be6d 100755 --- a/preprocessing/shuffling_labels/shuffle_labels.r +++ b/preprocessing/shuffling_labels/shuffle_labels.r @@ -47,9 +47,8 @@ if (!("label" %in% colnames(df))){ } # Randomize labels -df_randomized <- data.frame(label = sample(df$label)) -rownames(df_randomized) <- rownames(df) +df$label <- sample(df$label) ## Write output outfile <- file(opt$out_file) -write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = FALSE) \ No newline at end of file +write.table(df, outfile, sep = "\t", col.names = NA, quote = FALSE) \ No newline at end of file