Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ compile_commands.json

**/.DS_Store
flexiplex.dSYM/
debug/
debug/

**/flexiplex_reads_barcodes.txt
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ PYTHON?=python3

all: flexiplex

flexiplex: flexiplex.c++
flexiplex: flexiplex.c++
${CXX} $^ -Ofast -pthread -std=c++11 -o $@ edlib-1.2.7/edlib.cpp -I edlib-1.2.7/ ${CFLAGS}

flexiplex-debug: flexiplex.c++
${CXX} $^ -g -O0 -pthread -std=c++11 -o flexiplex edlib-1.2.7/edlib.cpp -I edlib-1.2.7/ ${CFLAGS}

clean:
rm flexiplex

Expand All @@ -19,7 +22,7 @@ install:
${PYTHON} -m pip install scripts/

test: flexiplex
@bash -c "if diff --color tests/output.fastq <(./flexiplex -k tests/barcodes.txt -b GGGG -x TTT tests/input.fastq 2>/dev/null); then echo 'Test success'; else echo 'Test fail'; fi"
@bash -c "./tests/integration_tests.sh"

uninstall:
rm ${DIR}/flexiplex
Expand Down
11 changes: 7 additions & 4 deletions flexiplex.c++
Original file line number Diff line number Diff line change
Expand Up @@ -214,12 +214,14 @@ std::string get_umi(const std::string &seq,

int umi_start, umi_length;
std::string umi_pad = "";
umi_length = search_pattern[umi_index].second.length();

if (umi_index == -1) {
return ""; // protocol does not have UMI
}

} else if (umi_index == bc_index + 1) {
umi_length = search_pattern[umi_index].second.length();

if (umi_index == bc_index + 1) {
// UMI right after BC
if (sliding_window_match) {
umi_start = left_bound + endDistance;
Expand All @@ -232,12 +234,11 @@ std::string get_umi(const std::string &seq,
umi_pad = std::string(search_pattern[umi_index].second.length() - umi_length, 'N');
}
return seq.substr(umi_start, umi_length) + umi_pad;

} else if (umi_index == bc_index - 1) {
// UMI right before BC
int bc_start = sliding_window_match ? left_bound + endDistance : read_to_subpatterns[bc_index];
// umi should start umi_offset bases before BC
int umi_offset = search_pattern[bc_index].second.length() + search_pattern[umi_index].second.length();
int umi_offset = search_pattern[umi_index].second.length();
if (bc_start < umi_offset) {
// not enough bases before BC
umi_pad = std::string(umi_offset - bc_start, 'N');
Expand All @@ -246,6 +247,8 @@ std::string get_umi(const std::string &seq,
} else {
umi_start = bc_start - umi_offset;
}
auto temp = seq.substr(umi_start, umi_length) + umi_pad;

return umi_pad + seq.substr(umi_start, umi_length);

} else {
Expand Down
17 changes: 17 additions & 0 deletions tests/integration_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
set -euo pipefail

cd tests

# test primary input
diff --color test_1_output.fastq \
<(../flexiplex -k barcodes.txt -b GGGG -x TTT test_1_input.fastq 2>/dev/null)

# test segfault when no UMI is provided
../flexiplex -x ATCGGCGTACGACT -b \"????????\" -x ATCCACGTGCTTGAGACTGTGG -k test_23_barcodes.txt -f 2 -e 1 test_2_input.fastq 2>/dev/null >/dev/null

# test prefixed UMI - sample read
diff --color test_3_output.fastq \
<(../flexiplex -u \"??????????\" -b \"????????\" -x GTGGCCGATGTTTCGCATCGGCGTACGACT -k test_23_barcodes.txt -f 4 -e 1 test_3_input.fastq 2>/dev/null)

rm flexiplex_reads_barcodes.txt
File renamed without changes.
File renamed without changes.
96 changes: 96 additions & 0 deletions tests/test_23_barcodes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
AACGTGAT
AAACATCG
ATGCCTAA
AGTGGTCA
ACCACTGT
ACATTGGC
CAGATCTG
CATCAAGT
CGCTGATC
ACAAGCTA
CTGTAGCC
AGTACAAG
AACAACCA
AACCGAGA
AACGCTTA
AAGACGGA
AAGGTACA
ACACAGAA
ACAGCAGA
ACCTCCAA
ACGCTCGA
ACGTATCA
ACTATGCA
AGAGTCAA
AGATCGCA
AGCAGGAA
AGTCACTA
ATCCTGTA
ATTGAGGA
CAACCACA
GACTAGTA
CAATGGAA
CACTTCGA
CAGCGTTA
CATACCAA
CCAGTTCA
CCGAAGTA
CCGTGAGA
CCTCCTGA
CGAACTTA
CGACTGGA
CGCATACA
CTCAATGA
CTGAGCCA
CTGGCATA
GAATCTGA
CAAGACTA
GAGCTGAA
GATAGACA
GCCACATA
GCGAGTAA
GCTAACGA
GCTCGGTA
GGAGAACA
GGTGCGAA
GTACGCAA
GTCGTAGA
GTCTGTCA
GTGTTCTA
TAGGATGA
TATCAGCA
TCCGTCTA
TCTTCACA
TGAAGAGA
TGGAACAA
TGGCTTCA
TGGTGGTA
TTCACGCA
AACTCACC
AAGAGATC
AAGGACAC
AATCCGTC
AATGTTGC
ACACGACC
ACAGATTC
AGATGTAC
AGCACCTC
AGCCATGC
AGGCTAAC
ATAGCGAC
ATCATTCC
ATTGGCTC
CAAGGAGC
CACCTTAC
CCATCCTC
CCGACAAC
CCTAATCC
CCTCTATC
CGACACAC
CGGATTGC
CTAAGGTC
GAACAGGC
GACAGTGC
GAGTTAGC
GATGAATC
GCCAAGAC
10,000 changes: 10,000 additions & 0 deletions tests/test_2_input.fastq

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions tests/test_3_input.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@SRR13948564.233 233 length=1658
ATGAAAGCAGCTGTGTGGTTCTGGTGAGCTTGTTCACAGAAGCAGCTCGTTGGGTTTGGTCTCGGGCACTTGGGATCTGCCAGCATATTGGCTTAATGATAGCTACCTCTTTAGGAAGTGGTTAAAGATGGTGAGGCTGGCATCTGGAATCTCTATCTGCTCTTGTGGTTGCAGTTTTTCTAGTAGCATCTCTGTGGCTCCAATGTCCACTCCTGAACTGGCGGTGGTCAGTGGTTGTACTCCCAGATAAACCAAGCACACCTTTGACCAGCTTCCTACAGGGCCAGAGGACAGAGAGAGATCCGGATGTGCAAACTATGGCCCAGAAAGGAGCACTCACCTGCCTGCCTTTTGTGATTACTATAGATAGGACAGATTATAGGATGGACAAAGAAGTTCCAAAGGCAGGCATATGTTGTGATGGAAGTGTGACAGGGACCCAAGTGGGCCTGCATCTCTTGTAGGGAGAAAAGAGGCTTTTCAGAGCTGTGGAGTCTGAGAAGAGCACGGCTCCGGTGGGTTTGTGCTGTCTTGCTCTGTCTGGCCACATATTTACATTTCCTTTAAAATATTCTAGGATCCAGCTCAGTCCTCTGGTTTTCATCCTTCCGGGTCTGTGGTTGCAGTGGGGACACTGACTGGGAGGTGGTTTGTGTTTGACACGGAGACAAAGGACTTGGTCACCGTCCACACGGATGGGAATGAGCAGCTGTCCGTGATGCGGTATTCTCCAGATGGGAACTTCTTAGCAATCGGCTCCCATGACAACTGCATCTACATATATGGAGTTACCGACAATGGAAGGAAGTACACACGAGTTGGCAAGTGCTCCGGCCATTCCAGCTTCATCACCCACTTGGACTGGTCCGTGAACTCACAATTCCTGGTGTCAAATTCCGGGGACTACGAGATCCTCTACTGTGAGTAACAGGCATCAATCAGGCTTGTCGGGGTTTAGATTACACAGGACACTCACTCTGACATTCCCATGGGCCCGTGTGGAGCGAGGCCCTCTCCATCCCTCAGTGTCCTTTGTGACATGAGGATTTGGCCTTACAGCCTCGTGGAGTCTTCCTGACTGGGAGGGGTCTTGTTGAAGAGCGGGACCCACCCGGGGGAAGAAGCAGGTGAACCCTGTGCAGAGGTCCTAGAGGTGGGACAGTCCCACAGTCACAGGATGAGTGACCCTCGTAAAGGTACCCACCTCAAGCAGGCAGAGACCCTCCCATTTCAGTGAATCAAAGCCCCTGGACCCTTTTCTTCCAGCCTGCCTTTAGGGCATCTCTGTGCCTACGTAAGACCTGGGAGGAGGGAGGATTTACGGAGTACCTGCTACATGCCATGCTCTGGGCAGAGCGGAGGGGGTCGTGGGGGGGGGGAGTCGTGGGATGTGAGAGCACCCCGACCTCCACCCACAGGATGTTACTTAGGACGCACACTGCTGTTTGCCTGGCTTTCTCATGATCCACAGTAGTATTTTCTGACAATTTCAGGCTCATCCCTTTGTATCGATACAAACAAACAAACAAACAAACAAACAAAAAAACAAAAAAAACCAAAAAAAAAAAAAAATTCGAGTGCCACAGTCTCAAGCACGTGGATTGAACTGGAGTCGTACGCCGATGCGAAACATCGGCCACGACGGATTATACGTCTTA
+SRR13948564.233 233 length=1658

4 changes: 4 additions & 0 deletions tests/test_3_output.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@AATCCGTC_GTATAATCCGTC#SRR13948564.233_-1of1 CB:Z:AATCCGTC UB:Z:GTATAATCCGTC
CCAGTTCAATCCACGTGCTTGAGACTGTGGCACTCGAATTTTTTTTTTTTTTTGGTTTTTTTTGTTTTTTTGTTTGTTTGTTTGTTTGTTTGTTTGTATCGATACAAAGGGATGAGCCTGAAATTGTCAGAAAATACTACTGTGGATCATGAGAAAGCCAGGCAAACAGCAGTGTGCGTCCTAAGTAACATCCTGTGGGTGGAGGTCGGGGTGCTCTCACATCCCACGACTCCCCCCCCCCACGACCCCCTCCGCTCTGCCCAGAGCATGGCATGTAGCAGGTACTCCGTAAATCCTCCCTCCTCCCAGGTCTTACGTAGGCACAGAGATGCCCTAAAGGCAGGCTGGAAGAAAAGGGTCCAGGGGCTTTGATTCACTGAAATGGGAGGGTCTCTGCCTGCTTGAGGTGGGTACCTTTACGAGGGTCACTCATCCTGTGACTGTGGGACTGTCCCACCTCTAGGACCTCTGCACAGGGTTCACCTGCTTCTTCCCCCGGGTGGGTCCCGCTCTTCAACAAGACCCCTCCCAGTCAGGAAGACTCCACGAGGCTGTAAGGCCAAATCCTCATGTCACAAAGGACACTGAGGGATGGAGAGGGCCTCGCTCCACACGGGCCCATGGGAATGTCAGAGTGAGTGTCCTGTGTAATCTAAACCCCGACAAGCCTGATTGATGCCTGTTACTCACAGTAGAGGATCTCGTAGTCCCCGGAATTTGACACCAGGAATTGTGAGTTCACGGACCAGTCCAAGTGGGTGATGAAGCTGGAATGGCCGGAGCACTTGCCAACTCGTGTGTACTTCCTTCCATTGTCGGTAACTCCATATATGTAGATGCAGTTGTCATGGGAGCCGATTGCTAAGAAGTTCCCATCTGGAGAATACCGCATCACGGACAGCTGCTCATTCCCATCCGTGTGGACGGTGACCAAGTCCTTTGTCTCCGTGTCAAACACAAACCACCTCCCAGTCAGTGTCCCCACTGCAACCACAGACCCGGAAGGATGAAAACCAGAGGACTGAGCTGGATCCTAGAATATTTTAAAGGAAATGTAAATATGTGGCCAGACAGAGCAAGACAGCACAAACCCACCGGAGCCGTGCTCTTCTCAGACTCCACAGCTCTGAAAAGCCTCTTTTCTCCCTACAAGAGATGCAGGCCCACTTGGGTCCCTGTCACACTTCCATCACAACATATGCCTGCCTTTGGAACTTCTTTGTCCATCCTATAATCTGTCCTATCTATAGTAATCACAAAAGGCAGGCAGGTGAGTGCTCCTTTCTGGGCCATAGTTTGCACATCCGGATCTCTCTCTGTCCTCTGGCCCTGTAGGAAGCTGGTCAAAGGTGTGCTTGGTTTATCTGGGAGTACAACCACTGACCACCGCCAGTTCAGGAGTGGACATTGGAGCCACAGAGATGCTACTAGAAAAACTGCAACCACAAGAGCAGATAGAGATTCCAGATGCCAGCCTCACCATCTTTAACCACTTCCTAAAGAGGTAGCTATCATTAAGCCAATATGCTGGCAGATCCCAAGTGCCCGAGACCAAACCCAACGAGCTGCTTCTGTGAACAAGCTCACCAGAACCACACAGCTGCTTTCAT
+AATCCGTC_GTATAATCCGTC#SRR13948564.233_-1of1 CB:Z:AATCCGTC UB:Z:GTATAATCCGTC

Loading