From 14290f1c72c75f4a36cbd3cbdf0994be6661283f Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 11:30:17 +0000
Subject: [PATCH 01/15] Add template, test and notebook

---
 preprocessing/shuffling/shuffle_labels.r |  48 ++++++++++
 preprocessing/shuffling/test_labels.tsv  |  11 +++
 preprocessing/shuffling/testing.ipynb    | 117 +++++++++++++++++++++++
 3 files changed, 176 insertions(+)
 create mode 100644 preprocessing/shuffling/shuffle_labels.r
 create mode 100644 preprocessing/shuffling/test_labels.tsv
 create mode 100644 preprocessing/shuffling/testing.ipynb
diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r
new file mode 100644
index 00000000..299d0bda
--- /dev/null
+++ b/preprocessing/shuffling/shuffle_labels.r
@@ -0,0 +1,48 @@
+#!/usr/bin/env Rscript
+
+# Author_and_contribution: Niklas Mueller-Boetticher; created template
+# Author_and_contribution: Kim Vucinic; modified template and created script
+
+suppressPackageStartupMessages(library(optparse))
+
+# Arguments
+option_list <- list(
+  make_option(
+    c("-l", "--labels"),
+    type = "character", default = NULL,
+    help = "Labels from domain clustering. Path to labels (as tsv)."
+  ),
+  make_option(
+    c("--seed"),
+    type = "integer", default = NULL,
+    help = "Seed to use for random operations."
+  ),
+  make_option(
+    c("-o", "--out_file"),
+    type = "character", default = NULL,
+    help = "Output file."
+  )
+)
+
+# Description
+description <- "Shuffling labels..."
+
+opt_parser <- OptionParser(
+  usage = description,
+  option_list = option_list
+)
+opt <- parse_args(opt_parser)
+
+# Use these filepaths as input
+label_file <- opt$labels
+seed <- opt$seed
+
+## Your code goes here
+
+
+
+## Write output
+outfile <- file(opt$out_file)
+dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE)
+
+write.table(df_shuffled, outfile, sep = "\t", col.names = NA, quote = "FALSE")
\ No newline at end of file
diff --git a/preprocessing/shuffling/test_labels.tsv b/preprocessing/shuffling/test_labels.tsv
new file mode 100644
index 00000000..9f185410
--- /dev/null
+++ b/preprocessing/shuffling/test_labels.tsv
@@ -0,0 +1,11 @@
+	label
+ID1	Domain1
+ID2	Domain1
+ID3	Domain2
+ID4	Domain3
+ID5	Domain1
+ID6	Domain2
+ID7	Domain2
+ID8	Domain1
+ID9	Domain3
+ID10	Domain4
\ No newline at end of file
diff --git a/preprocessing/shuffling/testing.ipynb b/preprocessing/shuffling/testing.ipynb
new file mode 100644
index 00000000..99aeb098
--- /dev/null
+++ b/preprocessing/shuffling/testing.ipynb
@@ -0,0 +1,117 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ba391971-26a5-4f9e-86fe-a0237746c4e8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table class=\"dataframe\">\n",
+       "<caption>A data.frame: 10 × 1</caption>\n",
+       "<thead>\n",
+       "\t<tr><th></th><th scope=col>label</th></tr>\n",
+       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th></tr>\n",
+       "</thead>\n",
+       "<tbody>\n",
+       "\t<tr><th scope=row>ID1</th><td>Domain1</td></tr>\n",
+       "\t<tr><th scope=row>ID2</th><td>Domain1</td></tr>\n",
+       "\t<tr><th scope=row>ID3</th><td>Domain2</td></tr>\n",
+       "\t<tr><th scope=row>ID4</th><td>Domain3</td></tr>\n",
+       "\t<tr><th scope=row>ID5</th><td>Domain1</td></tr>\n",
+       "\t<tr><th scope=row>ID6</th><td>Domain2</td></tr>\n",
+       "\t<tr><th scope=row>ID7</th><td>Domain2</td></tr>\n",
+       "\t<tr><th scope=row>ID8</th><td>Domain1</td></tr>\n",
+       "\t<tr><th scope=row>ID9</th><td>Domain3</td></tr>\n",
+       "\t<tr><th scope=row>ID10</th><td>Domain4</td></tr>\n",
+       "</tbody>\n",
+       "</table>\n"
+      ],
+      "text/latex": [
+       "A data.frame: 10 × 1\n",
+       "\\begin{tabular}{r|l}\n",
+       "  & label\\\\\n",
+       "  & <chr>\\\\\n",
+       "\\hline\n",
+       "\tID1 & Domain1\\\\\n",
+       "\tID2 & Domain1\\\\\n",
+       "\tID3 & Domain2\\\\\n",
+       "\tID4 & Domain3\\\\\n",
+       "\tID5 & Domain1\\\\\n",
+       "\tID6 & Domain2\\\\\n",
+       "\tID7 & Domain2\\\\\n",
+       "\tID8 & Domain1\\\\\n",
+       "\tID9 & Domain3\\\\\n",
+       "\tID10 & Domain4\\\\\n",
+       "\\end{tabular}\n"
+      ],
+      "text/markdown": [
+       "\n",
+       "A data.frame: 10 × 1\n",
+       "\n",
+       "| <!--/--> | label &lt;chr&gt; |\n",
+       "|---|---|\n",
+       "| ID1 | Domain1 |\n",
+       "| ID2 | Domain1 |\n",
+       "| ID3 | Domain2 |\n",
+       "| ID4 | Domain3 |\n",
+       "| ID5 | Domain1 |\n",
+       "| ID6 | Domain2 |\n",
+       "| ID7 | Domain2 |\n",
+       "| ID8 | Domain1 |\n",
+       "| ID9 | Domain3 |\n",
+       "| ID10 | Domain4 |\n",
+       "\n"
+      ],
+      "text/plain": [
+       "     label  \n",
+       "ID1  Domain1\n",
+       "ID2  Domain1\n",
+       "ID3  Domain2\n",
+       "ID4  Domain3\n",
+       "ID5  Domain1\n",
+       "ID6  Domain2\n",
+       "ID7  Domain2\n",
+       "ID8  Domain1\n",
+       "ID9  Domain3\n",
+       "ID10 Domain4"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "df <- read.delim(\"test_labels.tsv\", sep = \"\\t\", row.names = 1)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a010c80d-2992-4bca-8c45-f5c1f17e4417",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "R",
+   "language": "R",
+   "name": "ir"
+  },
+  "language_info": {
+   "codemirror_mode": "r",
+   "file_extension": ".r",
+   "mimetype": "text/x-r-source",
+   "name": "R",
+   "pygments_lexer": "r",
+   "version": "4.3.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From d4de939c8aab34bac50c37cc229707f9e34055c3 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 11:30:55 +0000
Subject: [PATCH 02/15] Ignore Python notebooks

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..44fcac0f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+# Notebook Checkpoints
+.ipynb_checkpoints/

From e5204c4bc76fdd5152f1ffded85182e25abb8d91 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 11:57:29 +0000
Subject: [PATCH 03/15] Shuffle labels script

---
 preprocessing/shuffling/shuffle_labels.r | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r
index 299d0bda..4fed2969 100644
--- a/preprocessing/shuffling/shuffle_labels.r
+++ b/preprocessing/shuffling/shuffle_labels.r
@@ -35,14 +35,23 @@ opt <- parse_args(opt_parser)
 
 # Use these filepaths as input
 label_file <- opt$labels
+
+
+# Seed
 seed <- opt$seed
+set.seed(seed)
 
 ## Your code goes here
+if (!("label" %in% colnames(df))){
+     stop("Label column not present in the file. Check your file.")
+}
 
-
+# Randomize labels
+df_randomized <- data.frame(label = sample(df$label))
+rownames(df_randomized) <- rownames(df)
 
 ## Write output
 outfile <- file(opt$out_file)
 dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE)
 
-write.table(df_shuffled, outfile, sep = "\t", col.names = NA, quote = "FALSE")
\ No newline at end of file
+write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = "FALSE")
\ No newline at end of file

From c7956858a7971d57851faf0bbf88cc5f08a19503 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 12:00:49 +0000
Subject: [PATCH 04/15] Ignore test files and notebook

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 44fcac0f..f9f47d82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 # Notebook Checkpoints
 .ipynb_checkpoints/
+
+# Test files and notebooks
+preprocessing/shuffling/test*

From 43c65e182c77d966eb6d3efa89621ff703b3e3b5 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 12:07:43 +0000
Subject: [PATCH 05/15] Add environment

---
 preprocessing/shuffling/shuffle_labels.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 preprocessing/shuffling/shuffle_labels.yml

diff --git a/preprocessing/shuffling/shuffle_labels.yml b/preprocessing/shuffling/shuffle_labels.yml
new file mode 100644
index 00000000..ece484d4
--- /dev/null
+++ b/preprocessing/shuffling/shuffle_labels.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - R==4.3.1
\ No newline at end of file

From 4491e1b0790830a23f4a4a2a8d5958450313fcbc Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 12:15:54 +0000
Subject: [PATCH 06/15] Load file

---
 preprocessing/shuffling/shuffle_labels.r | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r
index 4fed2969..40190434 100644
--- a/preprocessing/shuffling/shuffle_labels.r
+++ b/preprocessing/shuffling/shuffle_labels.r
@@ -36,12 +36,12 @@ opt <- parse_args(opt_parser)
 # Use these filepaths as input
 label_file <- opt$labels
 
-
 # Seed
 seed <- opt$seed
 set.seed(seed)
 
 ## Your code goes here
+df <- read.delim(label_file, sep = "\t", row.names = 1)
 if (!("label" %in% colnames(df))){
      stop("Label column not present in the file. Check your file.")
 }

From ba36d8df7b75c1693370004c1ccf563e3422b2dd Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 12:37:36 +0000
Subject: [PATCH 07/15] Adds shuffle coordinates script

---
 preprocessing/shuffling/shuffle_coordinates.r | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 preprocessing/shuffling/shuffle_coordinates.r

diff --git a/preprocessing/shuffling/shuffle_coordinates.r b/preprocessing/shuffling/shuffle_coordinates.r
new file mode 100644
index 00000000..f5deb188
--- /dev/null
+++ b/preprocessing/shuffling/shuffle_coordinates.r
@@ -0,0 +1,58 @@
+#!/usr/bin/env Rscript
+
+# Author_and_contribution: Niklas Mueller-Boetticher; created template
+# Author_and_contribution: Kim Vucinic; modified template and created script
+
+suppressPackageStartupMessages(library(optparse))
+
+# Arguments
+option_list <- list(
+  make_option(
+    c("-c", "--coordinates"),
+    type = "character", default = NULL,
+    help = "Path to coordinates (as tsv)."
+  ),
+  make_option(
+    c("--seed"),
+    type = "integer", default = NULL,
+    help = "Seed to use for random operations."
+  ),
+  make_option(
+    c("-o", "--out_file"),
+    type = "character", default = NULL,
+    help = "Output file."
+  )
+)
+
+# Description
+description <- "Shuffling coordinates in coordinates.tsv"
+
+opt_parser <- OptionParser(
+  usage = description,
+  option_list = option_list
+)
+opt <- parse_args(opt_parser)
+
+# Use these filepaths as input
+coord_file <- opt$coordinates
+
+# Seed
+seed <- opt$seed
+set.seed(seed)
+
+## Your code goes here
+df <- read.delim(coord_file, sep = "\t", row.names = 1)
+if (any(!(c("x", "y") %in% colnames(df)))){
+     stop("X and y coordinates are not present in the file. Check your file.")
+}
+
+# Randomize IDs, but keep the same order of IDs (not really necessary)
+df_order <- rownames(df)
+rownames(df) <- sample(rownames(df))
+df_final <- df[order(match(rownames(df), df_order)),]
+
+## Write output
+outfile <- file(opt$out_file)
+dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE)
+
+write.table(df_final, outfile, sep = "\t", col.names = NA, quote = "FALSE")
\ No newline at end of file

From 0a87c9cb7d2913b8d97c18af9b964b42ef074548 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 12:38:54 +0000
Subject: [PATCH 08/15] Add environment for shuffling coordinates

---
 preprocessing/shuffling/shuffle_coordinates.yml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 preprocessing/shuffling/shuffle_coordinates.yml

diff --git a/preprocessing/shuffling/shuffle_coordinates.yml b/preprocessing/shuffling/shuffle_coordinates.yml
new file mode 100644
index 00000000..ece484d4
--- /dev/null
+++ b/preprocessing/shuffling/shuffle_coordinates.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - R==4.3.1
\ No newline at end of file

From 9019b760f7b03431ece762a682c18839a165c34a Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 12:42:14 +0000
Subject: [PATCH 09/15] Remove tests from branch

---
 preprocessing/shuffling/test_labels.tsv |  11 ---
 preprocessing/shuffling/testing.ipynb   | 117 ------------------------
 2 files changed, 128 deletions(-)
 delete mode 100644 preprocessing/shuffling/test_labels.tsv
 delete mode 100644 preprocessing/shuffling/testing.ipynb

diff --git a/preprocessing/shuffling/test_labels.tsv b/preprocessing/shuffling/test_labels.tsv
deleted file mode 100644
index 9f185410..00000000
--- a/preprocessing/shuffling/test_labels.tsv
+++ /dev/null
@@ -1,11 +0,0 @@
-	label
-ID1	Domain1
-ID2	Domain1
-ID3	Domain2
-ID4	Domain3
-ID5	Domain1
-ID6	Domain2
-ID7	Domain2
-ID8	Domain1
-ID9	Domain3
-ID10	Domain4
\ No newline at end of file
diff --git a/preprocessing/shuffling/testing.ipynb b/preprocessing/shuffling/testing.ipynb
deleted file mode 100644
index 99aeb098..00000000
--- a/preprocessing/shuffling/testing.ipynb
+++ /dev/null
@@ -1,117 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "ba391971-26a5-4f9e-86fe-a0237746c4e8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<table class=\"dataframe\">\n",
-       "<caption>A data.frame: 10 × 1</caption>\n",
-       "<thead>\n",
-       "\t<tr><th></th><th scope=col>label</th></tr>\n",
-       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th></tr>\n",
-       "</thead>\n",
-       "<tbody>\n",
-       "\t<tr><th scope=row>ID1</th><td>Domain1</td></tr>\n",
-       "\t<tr><th scope=row>ID2</th><td>Domain1</td></tr>\n",
-       "\t<tr><th scope=row>ID3</th><td>Domain2</td></tr>\n",
-       "\t<tr><th scope=row>ID4</th><td>Domain3</td></tr>\n",
-       "\t<tr><th scope=row>ID5</th><td>Domain1</td></tr>\n",
-       "\t<tr><th scope=row>ID6</th><td>Domain2</td></tr>\n",
-       "\t<tr><th scope=row>ID7</th><td>Domain2</td></tr>\n",
-       "\t<tr><th scope=row>ID8</th><td>Domain1</td></tr>\n",
-       "\t<tr><th scope=row>ID9</th><td>Domain3</td></tr>\n",
-       "\t<tr><th scope=row>ID10</th><td>Domain4</td></tr>\n",
-       "</tbody>\n",
-       "</table>\n"
-      ],
-      "text/latex": [
-       "A data.frame: 10 × 1\n",
-       "\\begin{tabular}{r|l}\n",
-       "  & label\\\\\n",
-       "  & <chr>\\\\\n",
-       "\\hline\n",
-       "\tID1 & Domain1\\\\\n",
-       "\tID2 & Domain1\\\\\n",
-       "\tID3 & Domain2\\\\\n",
-       "\tID4 & Domain3\\\\\n",
-       "\tID5 & Domain1\\\\\n",
-       "\tID6 & Domain2\\\\\n",
-       "\tID7 & Domain2\\\\\n",
-       "\tID8 & Domain1\\\\\n",
-       "\tID9 & Domain3\\\\\n",
-       "\tID10 & Domain4\\\\\n",
-       "\\end{tabular}\n"
-      ],
-      "text/markdown": [
-       "\n",
-       "A data.frame: 10 × 1\n",
-       "\n",
-       "| <!--/--> | label &lt;chr&gt; |\n",
-       "|---|---|\n",
-       "| ID1 | Domain1 |\n",
-       "| ID2 | Domain1 |\n",
-       "| ID3 | Domain2 |\n",
-       "| ID4 | Domain3 |\n",
-       "| ID5 | Domain1 |\n",
-       "| ID6 | Domain2 |\n",
-       "| ID7 | Domain2 |\n",
-       "| ID8 | Domain1 |\n",
-       "| ID9 | Domain3 |\n",
-       "| ID10 | Domain4 |\n",
-       "\n"
-      ],
-      "text/plain": [
-       "     label  \n",
-       "ID1  Domain1\n",
-       "ID2  Domain1\n",
-       "ID3  Domain2\n",
-       "ID4  Domain3\n",
-       "ID5  Domain1\n",
-       "ID6  Domain2\n",
-       "ID7  Domain2\n",
-       "ID8  Domain1\n",
-       "ID9  Domain3\n",
-       "ID10 Domain4"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "df <- read.delim(\"test_labels.tsv\", sep = \"\\t\", row.names = 1)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a010c80d-2992-4bca-8c45-f5c1f17e4417",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "R",
-   "language": "R",
-   "name": "ir"
-  },
-  "language_info": {
-   "codemirror_mode": "r",
-   "file_extension": ".r",
-   "mimetype": "text/x-r-source",
-   "name": "R",
-   "pygments_lexer": "r",
-   "version": "4.3.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 496aedf489ae133a3f554c0706875c679da74279 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 15:09:02 +0000
Subject: [PATCH 10/15] Update environments - add optparse package

---
 preprocessing/shuffling/shuffle_coordinates.yml | 3 ++-
 preprocessing/shuffling/shuffle_labels.yml      | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/preprocessing/shuffling/shuffle_coordinates.yml b/preprocessing/shuffling/shuffle_coordinates.yml
index ece484d4..dae4376d 100644
--- a/preprocessing/shuffling/shuffle_coordinates.yml
+++ b/preprocessing/shuffling/shuffle_coordinates.yml
@@ -2,4 +2,5 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - R==4.3.1
\ No newline at end of file
+  - r-base==4.3.1
+  - r-optparse=1.7.3
\ No newline at end of file
diff --git a/preprocessing/shuffling/shuffle_labels.yml b/preprocessing/shuffling/shuffle_labels.yml
index ece484d4..dae4376d 100644
--- a/preprocessing/shuffling/shuffle_labels.yml
+++ b/preprocessing/shuffling/shuffle_labels.yml
@@ -2,4 +2,5 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - R==4.3.1
\ No newline at end of file
+  - r-base==4.3.1
+  - r-optparse=1.7.3
\ No newline at end of file

From f99a4729ca0d505e8442283e1fefe1e9994f88b4 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Tue, 12 Dec 2023 15:09:52 +0000
Subject: [PATCH 11/15] Fixes outputs

---
 preprocessing/shuffling/shuffle_coordinates.r | 4 +---
 preprocessing/shuffling/shuffle_labels.r      | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)
 mode change 100644 => 100755 preprocessing/shuffling/shuffle_coordinates.r
 mode change 100644 => 100755 preprocessing/shuffling/shuffle_labels.r

diff --git a/preprocessing/shuffling/shuffle_coordinates.r b/preprocessing/shuffling/shuffle_coordinates.r
old mode 100644
new mode 100755
index f5deb188..18c0922d
--- a/preprocessing/shuffling/shuffle_coordinates.r
+++ b/preprocessing/shuffling/shuffle_coordinates.r
@@ -53,6 +53,4 @@ df_final <- df[order(match(rownames(df), df_order)),]
 
 ## Write output
 outfile <- file(opt$out_file)
-dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE)
-
-write.table(df_final, outfile, sep = "\t", col.names = NA, quote = "FALSE")
\ No newline at end of file
+write.table(df_final, outfile, sep = "\t", col.names = NA, quote = FALSE)
\ No newline at end of file
diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r
old mode 100644
new mode 100755
index 40190434..cfcf8393
--- a/preprocessing/shuffling/shuffle_labels.r
+++ b/preprocessing/shuffling/shuffle_labels.r
@@ -52,6 +52,4 @@ rownames(df_randomized) <- rownames(df)
 
 ## Write output
 outfile <- file(opt$out_file)
-dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE)
-
-write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = "FALSE")
\ No newline at end of file
+write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = FALSE)
\ No newline at end of file

From ab2b8e5d8acdeca0840e3941e992b90e1503ad13 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Wed, 13 Dec 2023 14:15:57 +0000
Subject: [PATCH 12/15] Remove .gitignore directory

---
 .gitignore | 5 -----
 1 file changed, 5 deletions(-)
 delete mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index f9f47d82..00000000
--- a/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# Notebook Checkpoints
-.ipynb_checkpoints/
-
-# Test files and notebooks
-preprocessing/shuffling/test*

From 083da0fecb425fde08458ccbc8a685bfe3e54308 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Thu, 14 Dec 2023 14:12:49 +0000
Subject: [PATCH 13/15] separate folders

---
 .../{shuffling => shuffling_coordinates}/shuffle_coordinates.r    | 0
 .../{shuffling => shuffling_coordinates}/shuffle_coordinates.yml  | 0
 preprocessing/{shuffling => shuffling_labels}/shuffle_labels.r    | 0
 preprocessing/{shuffling => shuffling_labels}/shuffle_labels.yml  | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename preprocessing/{shuffling => shuffling_coordinates}/shuffle_coordinates.r (100%)
 mode change 100755 => 100644
 rename preprocessing/{shuffling => shuffling_coordinates}/shuffle_coordinates.yml (100%)
 rename preprocessing/{shuffling => shuffling_labels}/shuffle_labels.r (100%)
 rename preprocessing/{shuffling => shuffling_labels}/shuffle_labels.yml (100%)

diff --git a/preprocessing/shuffling/shuffle_coordinates.r b/preprocessing/shuffling_coordinates/shuffle_coordinates.r
old mode 100755
new mode 100644
similarity index 100%
rename from preprocessing/shuffling/shuffle_coordinates.r
rename to preprocessing/shuffling_coordinates/shuffle_coordinates.r
diff --git a/preprocessing/shuffling/shuffle_coordinates.yml b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml
similarity index 100%
rename from preprocessing/shuffling/shuffle_coordinates.yml
rename to preprocessing/shuffling_coordinates/shuffle_coordinates.yml
diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling_labels/shuffle_labels.r
similarity index 100%
rename from preprocessing/shuffling/shuffle_labels.r
rename to preprocessing/shuffling_labels/shuffle_labels.r
diff --git a/preprocessing/shuffling/shuffle_labels.yml b/preprocessing/shuffling_labels/shuffle_labels.yml
similarity index 100%
rename from preprocessing/shuffling/shuffle_labels.yml
rename to preprocessing/shuffling_labels/shuffle_labels.yml

From c84a2d4387ded3a9b2ad4ad30dcd401526791218 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Thu, 14 Dec 2023 14:14:53 +0000
Subject: [PATCH 14/15] Removes defaults from environments

---
 preprocessing/shuffling_coordinates/shuffle_coordinates.yml | 1 -
 preprocessing/shuffling_labels/shuffle_labels.yml           | 1 -
 2 files changed, 2 deletions(-)

diff --git a/preprocessing/shuffling_coordinates/shuffle_coordinates.yml b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml
index dae4376d..4e2066e6 100644
--- a/preprocessing/shuffling_coordinates/shuffle_coordinates.yml
+++ b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml
@@ -1,6 +1,5 @@
 channels:
   - conda-forge
-  - defaults
 dependencies:
   - r-base==4.3.1
   - r-optparse=1.7.3
\ No newline at end of file
diff --git a/preprocessing/shuffling_labels/shuffle_labels.yml b/preprocessing/shuffling_labels/shuffle_labels.yml
index dae4376d..4e2066e6 100644
--- a/preprocessing/shuffling_labels/shuffle_labels.yml
+++ b/preprocessing/shuffling_labels/shuffle_labels.yml
@@ -1,6 +1,5 @@
 channels:
   - conda-forge
-  - defaults
 dependencies:
   - r-base==4.3.1
   - r-optparse=1.7.3
\ No newline at end of file

From 843f2368b8e5d339c99c9a185ebf393230527081 Mon Sep 17 00:00:00 2001
From: Kim Vucinic <kim.vucinic@img.cas.cz>
Date: Thu, 14 Dec 2023 14:39:13 +0000
Subject: [PATCH 15/15] Shuffles only label column

---
 preprocessing/shuffling_labels/shuffle_labels.r | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/preprocessing/shuffling_labels/shuffle_labels.r b/preprocessing/shuffling_labels/shuffle_labels.r
index cfcf8393..7269be6d 100755
--- a/preprocessing/shuffling_labels/shuffle_labels.r
+++ b/preprocessing/shuffling_labels/shuffle_labels.r
@@ -47,9 +47,8 @@ if (!("label" %in% colnames(df))){
 }
 
 # Randomize labels
-df_randomized <- data.frame(label = sample(df$label))
-rownames(df_randomized) <- rownames(df)
+df$label <- sample(df$label)
 
 ## Write output
 outfile <- file(opt$out_file)
-write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = FALSE)
\ No newline at end of file
+write.table(df, outfile, sep = "\t", col.names = NA, quote = FALSE)
\ No newline at end of file

	label
	<chr>
ID1	Domain1
ID2	Domain1
ID3	Domain2
ID4	Domain3
ID5	Domain1
ID6	Domain2
ID7	Domain2
ID8	Domain1
ID9	Domain3
ID10	Domain4