From 18e5afa7405e36db09c930801bb758333bd6d616 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Fri, 2 May 2025 13:08:48 -0600 Subject: [PATCH 01/13] init commit 1.1.0 --- src/cli.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli.hpp b/src/cli.hpp index c4f1175..280eb53 100644 --- a/src/cli.hpp +++ b/src/cli.hpp @@ -5,7 +5,7 @@ #ifndef ULTRA_CLI_HPP #define ULTRA_CLI_HPP -#define ULTRA_VERSION_STRING "1.0.4" +#define ULTRA_VERSION_STRING "1.1.0" #define DEBUG_STRING "" #ifdef DEBUG_PRAGMA #undef DEBUG_STRING From c124fb371d49eb2681233a91a4990c1a3051add9 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Fri, 2 May 2025 13:26:26 -0600 Subject: [PATCH 02/13] steps for standalone get repeats function --- src/ultra.cpp | 11 ++++++++++- src/ultra.hpp | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/ultra.cpp b/src/ultra.cpp index 6a183d7..c572d30 100644 --- a/src/ultra.cpp +++ b/src/ultra.cpp @@ -95,7 +95,7 @@ SequenceWindow *Ultra::GetSequenceWindow(SequenceWindow *seq, uthread *uth) { // NOTE: // THIS IS NOT THREAD SAFE -// WE ASSUME THATS OK (FOR NOW) +// WE ASSUME THAT'S OK (FOR NOW) int Ultra::SmallestReadID() { int smallest = 100000000; @@ -151,6 +151,15 @@ double Ultra::PvalForScore(float score) const { return exp(-1.0 * (score - loc) / scale) * freq; } +std::vector* GetRepeatsForSequence(const std::string &s) { + // Create sequence window from the string + // Store and change primary thread + // Run AnalyzeSequenceWindow + // Change back primary thread to stored value + // Steal the repeat array from the uth thread and give the uth thread a new one + // Return the repeat array +} + void Ultra::AnalyzeSequenceWindow(SequenceWindow *sequence, uthread *uth) { diff --git a/src/ultra.hpp b/src/ultra.hpp index 81de9d5..ec730b3 100644 --- a/src/ultra.hpp +++ b/src/ultra.hpp @@ -93,6 +93,8 @@ class Ultra { void OutputRepeats(bool flush = false); void OutputRepeat(RepeatRegion *r, bool isSubRep = false); + std::vector* GetRepeatsForSequence(const std::string &s); + void OutputULTRASettings(); void InitializeWriter(); From dce671c01a2c9f01e1f2234a0d6115d6f70052c2 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Fri, 2 May 2025 14:05:23 -0600 Subject: [PATCH 03/13] An implementation of GetRepeatsForSequence has been added --- src/ultra.cpp | 48 ++++++++++++++++-- src/umodel.cpp | 134 ------------------------------------------------- 2 files changed, 44 insertions(+), 138 deletions(-) diff --git a/src/ultra.cpp b/src/ultra.cpp index c572d30..ed07d6c 100644 --- a/src/ultra.cpp +++ b/src/ultra.cpp @@ -151,13 +151,53 @@ double Ultra::PvalForScore(float score) const { return exp(-1.0 * (score - loc) / scale) * freq; } -std::vector* GetRepeatsForSequence(const std::string &s) { +std::vector* Ultra::GetRepeatsForSequence(const std::string &seq) { + // Make sure that the seq window can fit in the DP matrix + uthread *uth = this->threads[0]; + if (seq.length() > uth->model->matrix->length) { + fprintf(stderr, "ULTRA model has maximum size %llu but string has length %zu\n", + uth->model->matrix->length, + seq.length()); + + return nullptr; + } + + // Make sure that uth isn't holding any repeats right now + if (uth->repeats.size() > 0) { + fprintf(stderr, "ULTRA repeat array is not empty.\n"); + return nullptr; + } + // Create sequence window from the string + SequenceWindow *seq_window = new SequenceWindow(seq.length(), 0); + seq_window->length = seq.length(); + seq_window->start = 0; + seq_window->end = seq.length(); + + // Fill sequence window + for (int i = 0; i < seq.length(); ++i) { + seq_window->seq[i] = SymbolForChar(seq[i]); + } + // Store and change primary thread + int pthread = this->primaryThread; + this->primaryThread = -10; + // Run AnalyzeSequenceWindow - // Change back primary thread to stored value - // Steal the repeat array from the uth thread and give the uth thread a new one - // Return the repeat array + this->AnalyzeSequenceWindow(seq_window, uth); + + // Gather repeats + std::vector *repeats = new std::vector(); + for (int i = 0; i < uth->repeats.size(); ++i) { + repeats->push_back(uth->repeats[i]); + } + + // Clean up after ourselves + uth->repeats.clear(); + this->primaryThread = pthread; + delete seq_window; + + return repeats; } void Ultra::AnalyzeSequenceWindow(SequenceWindow *sequence, uthread *uth) { diff --git a/src/umodel.cpp b/src/umodel.cpp index 8df2e06..98d212e 100644 --- a/src/umodel.cpp +++ b/src/umodel.cpp @@ -91,144 +91,18 @@ void UModel::CalculateScores() { bscore[i] = log2(backgroundProbabilties[i]); } - /*if (adjustMatchMismatchMatrix) { - - for (int i = 0; i < 256; ++i) { - - float at = (float)i / 256.0; - float cg = 1.0 - at; - - float a = at / 2.0; - float t = a; - float c = cg / 2.0; - float g = c; - - backgroundScores[i][0] = 0.25; - backgroundScores[i][N_A] = a; - backgroundScores[i][N_T] = t; - backgroundScores[i][N_C] = c; - backgroundScores[i][N_G] = g; - - // printf("%f %f %f %f\n", backgroundScores[i][N_A], - backgroundScores[i][N_T], backgroundScores[i][N_C], - backgroundScores[i][N_G]); - - for (int x = 0; x < 5; ++x) { - float s = 0.0; - for (int y = 0; y < 5; ++y) { - matchScores[i][x][y] = log2(backgroundScores[i][x] * - backgroundScores[i][y] + 0.0001) * matchProbabilities[x][y]; s += - matchScores[i][x][y]; - } - - for (int y = 0; y < 5; ++y) { - // printf("%f=", matchScores[i][x][y] / s); - matchScores[i][x][y] = log2(matchScores[i][x][y] / s); - // printf("%f ", matchScores[i][x][y]); - } - // printf("\n"); - } - - for (int x = 0; x < 5; ++x) { - - backgroundScores[i][x] = log2(backgroundScores[i][x] + 0.0001); - } - - - } - } - - else { - - for (int i = 0; i < 256; ++i) { - - float at = (float)i / 256.0; - float cg = 1.0 - at; - - float a = at / 2.0; - float t = a; - float c = cg / 2.0; - float g = c; - - backgroundScores[i][0] = 0.25; - backgroundScores[i][N_A] = a; - backgroundScores[i][N_T] = t; - backgroundScores[i][N_C] = c; - backgroundScores[i][N_G] = g; - - // printf("%f %f %f %f\n", backgroundScores[i][N_A], - backgroundScores[i][N_T], backgroundScores[i][N_C], - backgroundScores[i][N_G]); - - for (int x = 0; x < 5; ++x) { - for (int y = 0; y < 5; ++y) { - matchScores[i][x][y] = log2(matchProbabilities[x][y]); - } - - } - - for (int x = 0; x < 5; ++x) { - - backgroundScores[i][x] = log2(backgroundScores[i][x] + 0.0001); - } - - - } - } - */ } -/*void UModel::CalculateEmissionScores (unsigned char symbolFreq) { - - // Do linear mixing of background probabilities with window frequencies - - int at = symbolFreq; - - for (int i = 0; i < 5; ++i) { - for (int j = 0; j < 5; ++j) { - mscore[i][j] = matchScores[at][i][j]; - - } - bscore[i] = backgroundScores[at][i]; - } - -}*/ - -/*** UMODEL SCORE CALCULATION ***/ - // This does not check if index - d > 0 float UModel::PreviousEmissionScore(SequenceWindow *seq, int index, int order, int d) { int p = index - d; float b = bscore[seq->seq[p]]; - // symbol s = seq->seq[p]; - // printf("%i, %i %i %i, %f\n", p, order, p-order, (int)seq->seq[p], - // mscore[p-order][seq->seq[p]]); float v = mscore[seq->seq[p - order]][seq->seq[p]]; return v - b; - // return emissionScore(seq, index, order) - - // Calculate emission scores for index - d - /*int p = index - d; - - int at = seq->symbolFreqs[p]; - - float mp[5][5]; - float b = backgroundScores[at][seq->seq[p]]; - - for (int i = 0; i < 5; ++i) { - for (int j = 0; j < 5; ++j) { - mp[i][j] = matchScores[at][i][j]; - - } - } - - float v = mp[seq->seq[p - order]][seq->seq[p]]; - - return v - b;*/ } float UModel::EmissionScore(SequenceWindow *seq, int index, int order) { @@ -353,17 +227,9 @@ void UModel::CalculateCurrentColumn(SequenceWindow *sequence, int nucIndex, float score = p[row] + tscore[CT_INSERTION][CT_MATCH] + EmissionScore(sequence, nucIndex, order); if (score > c[parentIndex]) { - // printf("%i %i %f %f\n", order, indelNum, score, - // c[parentIndex]); c[parentIndex] = score; ct[desc[row].order] = row; - // printf("in[%i for %i:%i,%i,%i,%i] :%f vs %f to p %f (%f, - // %f= (%f with %f)\n", row, nucIndex, minIndex, - // parentIndex, order, indelNum, p[row], score, - // c[parentIndex], d, f, matrix->PreviousScore(parentIndex, - // peridocity), matrix->PreviousScore(parentIndex, - // peridocity + 1)); } // Calculate new score based off of previous score From 93c7085e4d655e12d793f67f3ad77bfde8ee757d Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Fri, 2 May 2025 16:02:09 -0600 Subject: [PATCH 04/13] Includes some lib prep testing, also fixes the exclusion of last nucleotide bug --- src/cli.cpp | 2 +- src/cli.hpp | 3 +- src/main.cpp | 33 +++++++ src/repeat.cpp | 4 +- src/ultra.cpp | 25 ++++-- src/ultra.hpp | 2 +- src/umatrix.cpp | 17 ++-- src/umatrix.hpp | 2 +- src/umodel.cpp | 229 ++++++++++++++++++++++++++++++++++++++++++++++++ src/umodel.hpp | 1 + 10 files changed, 299 insertions(+), 19 deletions(-) diff --git a/src/cli.cpp b/src/cli.cpp index 979f5de..4eb142a 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -390,7 +390,7 @@ bool Settings::parse_input(int argc, const char **argv) { } bool passed = true; - if (this->in_file.empty() && !this->show_memory) { + if (this->in_file.empty() && !this->show_memory && !this->run_without_reader) { fprintf(stderr, "Input file required.\n"); passed = false; } diff --git a/src/cli.hpp b/src/cli.hpp index 280eb53..41857e2 100644 --- a/src/cli.hpp +++ b/src/cli.hpp @@ -70,7 +70,6 @@ struct Settings { unsigned long long min_units = 2; // Tuning parameters - double tune_fdr = 0.05; bool tune = false; bool tune_medium = false; @@ -112,6 +111,8 @@ struct Settings { unsigned long long max_namable_period = 50; unsigned long long max_highfi_naming = 20; + bool run_without_reader = false; + CLI::App app{"\n" "=================================================\n" "(U)ltra (L)ocates (T)andemly (R)epetitive (A)reas\n" diff --git a/src/main.cpp b/src/main.cpp index b5d1037..4d10262 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -5,6 +5,38 @@ #include // for std::bad_alloc +int test_lib(int argc, const char * argv[]) { + printf("Starting test.\n"); + Settings *settings = new Settings(); + + settings->run_without_reader = true; + settings->windows=1; + settings->window_size = 1000000; + settings->overlap = 0; + settings->max_period=25; + settings->max_insert=3; + settings->max_delete=3; + + settings->prepare_settings(); + if (!settings->parse_input(argc, argv)) { + exit(0); + } + settings->assign_settings(); + + printf("%llu + %llu\n", settings->window_size, + settings->overlap); + + auto ultra = new Ultra(settings); + + auto repeats = ultra->FindRepeatsInString("aaaaaaaaaaaaaaaaaaaaaaaaaa"); + printf("%llu\n", repeats->size()); + for (int i= 0; i < repeats->size(); ++i) { + auto r = repeats->at(i); + printf("%lu %lu %i\n", r->windowStart, r->repeatLength, r->repeatPeriod); + } + return 0; +} + int main_wrapper(int argc, const char * argv[]) { // Prepare settings Settings *settings = new Settings(); @@ -174,6 +206,7 @@ int main_wrapper(int argc, const char * argv[]) { int main(int argc, const char *argv[]) { + return test_lib(argc, argv); char *reserve_memory = (char *)malloc(65536); try { diff --git a/src/repeat.cpp b/src/repeat.cpp index 79cf53a..b41c6a6 100644 --- a/src/repeat.cpp +++ b/src/repeat.cpp @@ -692,7 +692,9 @@ RepeatRegion *GetNextRepeat(SequenceWindow *window, UMatrix *matrix, int *pos) { int length = region->repeatPeriod; - for (; i < seqLength; ++i) { + + // We use a <= here because we calculate one additional column + for (; i <= seqLength; ++i) { if (matrix->traceback[i] != 0) { ++length; } diff --git a/src/ultra.cpp b/src/ultra.cpp index ed07d6c..94ff15b 100644 --- a/src/ultra.cpp +++ b/src/ultra.cpp @@ -151,7 +151,7 @@ double Ultra::PvalForScore(float score) const { return exp(-1.0 * (score - loc) / scale) * freq; } -std::vector* Ultra::GetRepeatsForSequence(const std::string &seq) { +std::vector* Ultra::FindRepeatsInString(const std::string &seq) { // Make sure that the seq window can fit in the DP matrix uthread *uth = this->threads[0]; if (seq.length() > uth->model->matrix->length) { @@ -215,6 +215,7 @@ void Ultra::AnalyzeSequenceWindow(SequenceWindow *sequence, uthread *uth) { for (int i = 0; i < sleng; ++i) { model->CalculateCurrentColumn(sequence, i); } + model->CalculateCurrentColumnWithoutEmission(); // WE'RE RIGHT HERE // GOING TO TRY TO PUSH ALL THE CODE IN !!! @@ -488,11 +489,12 @@ Ultra::Ultra(Settings *s) { outputRepeatSequence = settings->show_seq; passID = 0; - - reader = new FileReader(settings->in_file, settings->windows, - settings->window_size, settings->overlap, - settings->threads > 1); - reader->fastaReader->shuffle = shuffleSequence; + if (!settings->run_without_reader) { + reader = new FileReader(settings->in_file, settings->windows, + settings->window_size, settings->overlap, + settings->threads > 1); + reader->fastaReader->shuffle = shuffleSequence; + } int leng = settings->window_size + (settings->overlap + 2); storeTraceAndSequence = true; @@ -509,7 +511,7 @@ Ultra::Ultra(Settings *s) { // We now are making the v_maxPeriod setting more intuitive, by adding 1 to // it. This makes a v_maxPeriod of 10 able to detect repeats of length 10. UModel *mod = new UModel(settings->max_period + 1, settings->max_insert, - settings->max_delete, leng); + settings->max_delete, leng + 1); mod->periodDecay = settings->period_decay; @@ -548,18 +550,23 @@ Ultra::Ultra(Settings *s) { } if (numberOfThreads == 1) { - reader->multithread = false; + if (!settings->run_without_reader) { + reader->multithread = false; + } multithreading = false; } else { - reader->multithread = true; + if (!settings->run_without_reader) { + reader->multithread = true; + } multithreading = true; } } Ultra::~Ultra() { settings = nullptr; + delete reader; reader = nullptr; diff --git a/src/ultra.hpp b/src/ultra.hpp index ec730b3..5e44c72 100644 --- a/src/ultra.hpp +++ b/src/ultra.hpp @@ -93,7 +93,7 @@ class Ultra { void OutputRepeats(bool flush = false); void OutputRepeat(RepeatRegion *r, bool isSubRep = false); - std::vector* GetRepeatsForSequence(const std::string &s); + std::vector* FindRepeatsInString(const std::string &s); void OutputULTRASettings(); void InitializeWriter(); diff --git a/src/umatrix.cpp b/src/umatrix.cpp index 94a95fd..59a5c50 100644 --- a/src/umatrix.cpp +++ b/src/umatrix.cpp @@ -218,13 +218,20 @@ int *UMatrix::ForwardTraceback(int *traceArray, int windowLength, int row) { } // This has not been debugged yet -void UMatrix::CalculateTraceback(int startColumn) { +void UMatrix::CalculateTraceback(unsigned long long startColumn) { // Assume best row is 0 - int row = 0; - + unsigned long long row = 0; + float best_value = scoreColumns[startColumn][0]; + + for (unsigned long long i = 1; i < cellsPerColumn; ++i) { + if (cellDescriptions[i].type == CT_MATCH) + if (scoreColumns[startColumn][i] > best_value) { + row = i; + best_value = scoreColumns[startColumn][i]; + } + } // Do the normal calculations - - for (int i = 0; i <= startColumn; ++i) { + for (unsigned long long i = 0; i <= startColumn; ++i) { cell desc = cellDescriptions[row]; int rowOrder = desc.order; diff --git a/src/umatrix.hpp b/src/umatrix.hpp index 60d3a28..12ed423 100644 --- a/src/umatrix.hpp +++ b/src/umatrix.hpp @@ -92,7 +92,7 @@ class UMatrix { int *ForwardTraceback(int *traceArray, int windowLength, int row); void RestartMatrix(); - void CalculateTraceback(int startColumn); + void CalculateTraceback(unsigned long long startColumn); // Class management void CreateMatrix(); diff --git a/src/umodel.cpp b/src/umodel.cpp index 98d212e..d56d39f 100644 --- a/src/umodel.cpp +++ b/src/umodel.cpp @@ -378,6 +378,235 @@ void UModel::CalculateCurrentColumn(SequenceWindow *sequence, int nucIndex, // Right now we don't do anything with CanBeRepetitive } +void UModel::CalculateCurrentColumnWithoutEmission(bool *canBeRepetitive) { + bool cbr = false; // can be repetitive - true if it is possible for next + // character to be repetitve + float bestRepeatScore = NEG_INF; + + cell *desc = matrix->cellDescriptions; + float *p = matrix->previousScoreColumn; + float *c = matrix->currentScoreColumn; + + int *ct = matrix->currentTracebackColumn; + + int matind = matrix->previousColumnIndex; + + // CalculateEmissionScores(sequence->symbolFreqs[nucIndex]); + + for (int row = 0; row < matrix->cellsPerColumn; ++row) { + int bestToZero = 0; + switch (desc[row].type) { + + // ****ZEROTH ORDER CELL**** + // Can be transfered to from: zeroth order, nth order + case CT_BACKGROUND: { + // Calculate none->none + c[row] = p[row] + tscore[CT_BACKGROUND][CT_BACKGROUND]; + ct[0] = row; + + break; + } + + // ****NTH ORDER CELL**** + // Can be transfered to from: zeroth order, nth order, insertion, + // deletion This will also check to see if it C_NONE inherits from + // C_MATCH This does not check insertion->match or deletion->match - + // those are done in + // insertion and deletion cells + case CT_MATCH: { + + // Check to see if match->none is a better score than none->none + int parentIndex = desc[row].parentIndex; + int order = desc[row].order; + float score = p[row] + tscore[CT_MATCH][CT_BACKGROUND]; + score += (order)*periodDecay; + score += periodDecayOffset; + + if (score > c[parentIndex]) { + c[parentIndex] = score; + ct[0] = row; + cbr = true; + bestToZero = order; + } + + // Before we calculate normal match scores we need to check to see + // if we have had enough characters pass + if (matind < order) { + c[row] = NEG_INF; + ct[desc[row].order] = 0; + break; + } + + // Calculate none->match + + // Testing out having transitions to state n come from 0th order n + // characters previous + + float zscore = 0.0; + + if (immediateTransitionToRepeat) { + zscore = p[0]; + } + + else { + zscore = matrix->PreviousScore(0, order - 1); + zscore += tscore[CT_BACKGROUND][CT_BACKGROUND] * order; + } + + float es = 0; + + score = zscore + tscore[CT_BACKGROUND][CT_MATCH] + es; + + c[row] = score; + ct[desc[row].order] = 0; + + // Calculate match->match + score = p[row] + tscore[CT_MATCH][CT_MATCH] + es; + if (score > c[row]) { + c[row] = score; + ct[desc[row].order] = row; + } + + // Check to see if this is the best chance of being a m->0 + // transition + if (c[row] > bestRepeatScore) + bestRepeatScore = c[row]; + + break; + } + + case CT_INSERTION: { + + int order = desc[row].order; + int parentIndex = desc[row].parentIndex; + int peridocity = order + desc[row].indelNumber; + int minIndex = peridocity + order + 1; // 2 * order + indelnum + + // int indelNum = desc[row].indelNumber; + + // We can proceed normally` + if (matind > minIndex) { + + // Check to see if the match state should transition from here + float score = p[row] + tscore[CT_INSERTION][CT_MATCH]; + if (score > c[parentIndex]) { + c[parentIndex] = score; + ct[desc[row].order] = row; + + } + + // Calculate new score based off of previous score + score = p[row]; + + // Update changes in match state score + score -= matrix->PreviousScore(parentIndex, peridocity + 1); + score += matrix->PreviousScore(parentIndex, peridocity); + + + // Update insertion values + // There is effectively no score for insertions + + c[row] = score; + + } + + // We have to do a manual calculation of the score + else if (matind == minIndex) { + float score = matrix->PreviousScore( + parentIndex, + peridocity); // The match score this insertion comes from + score += 0.0; // The emission score of the actual insertion = 0 + // = bscore[n] - bscore[n] + + // Transition cost + score += tscore[CT_MATCH][CT_INSERTION]; + score += tscore[CT_INSERTION][CT_INSERTION] * + (float)(desc[row].indelNumber - 1); + score += log2(1.0 - tp_matchToZero) * (float)order; + + // We don not need to consider the insertion cost at this point + // because the insertion state is 0th order, and will have + // emission score = 1 = bscore/bscore = e^0 + + c[row] = score; + + } + + else { + c[row] = NEG_INF; + } + + } break; + + case CT_DELETION: { + + int parentIndex = desc[row].parentIndex; + int order = desc[row].order; + int indelNum = desc[row].indelNumber; + int minIndex = 3 * order; + + int delOrder = order - indelNum; + int EOrder = 2 * order - indelNum; + + // Proceed normally + if (matind > minIndex) { + + // Check to see if the match state can transition from the del + // state + float score = p[row] + tscore[CT_DELETION][CT_MATCH]; + if (score > c[parentIndex]) { + c[parentIndex] = score; + ct[desc[row].order] = row; + + // printf("del[%i for %i:%i,%i,%i,%i] :%f vs %f to p %f (%f, + // %f= (%f with %f)\n", row, nucIndex, minIndex, + // parentIndex, order, indelNum, p[row], score, + // c[parentIndex], d, f, matrix->PreviousScore(parentIndex, + // order), matrix->PreviousScore(parentIndex, order + 1)); + } + + score = p[row]; + // if (nucIndex > 3000000) + // printf("%f\n", score); + // Update changes in match state score + score -= matrix->PreviousScore(parentIndex, order + 1); + score += matrix->PreviousScore(parentIndex, order); + + c[row] = score; // score; + + } + + else if (matind == minIndex) { + + // Get the match state score + float score = matrix->PreviousScore(parentIndex, order); + // float oscore = p[row]; + + // Calculate transition score + score += tscore[CT_MATCH][CT_DELETION]; + score += tscore[CT_DELETION][CT_DELETION] * (float)(indelNum - 1); + score += log2(1.0 - tp_matchToZero) * (float)order; + + c[row] = score; + + } + + else { + c[row] = NEG_INF; + } + + break; + } + + default: + break; + } + } + + matrix->MoveMatrixForward(); + // Right now we don't do anything with CanBeRepetitive +} + /*** UMODEL CLASS MANAGEMENT ***/ diff --git a/src/umodel.hpp b/src/umodel.hpp index 63623e7..4c1444f 100644 --- a/src/umodel.hpp +++ b/src/umodel.hpp @@ -66,6 +66,7 @@ class UModel { void CalculateCurrentColumn(SequenceWindow *seq, int nucIndex, bool *canBeRepetitive = NULL); + void CalculateCurrentColumnWithoutEmission(bool *canBeRepetitive = NULL); // Clas management UModel(int maxPeriod, int maxInsertions, int maxDeletions, int matrixLength); From 9791468101ea55f63e74f799fcf8bc75b2635b5a Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Fri, 2 May 2025 16:04:58 -0600 Subject: [PATCH 05/13] Prepping update for release. --- src/main.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 4d10262..ba2bce0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -206,8 +206,6 @@ int main_wrapper(int argc, const char * argv[]) { int main(int argc, const char *argv[]) { - return test_lib(argc, argv); - char *reserve_memory = (char *)malloc(65536); try { int r = main_wrapper(argc, argv); From 3a59b976eb7aaec81762becb01d49d51aa74cf71 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Fri, 2 May 2025 16:12:20 -0600 Subject: [PATCH 06/13] Prepping update for release. --- src/main.cpp | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index ba2bce0..4be1885 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -5,38 +5,6 @@ #include // for std::bad_alloc -int test_lib(int argc, const char * argv[]) { - printf("Starting test.\n"); - Settings *settings = new Settings(); - - settings->run_without_reader = true; - settings->windows=1; - settings->window_size = 1000000; - settings->overlap = 0; - settings->max_period=25; - settings->max_insert=3; - settings->max_delete=3; - - settings->prepare_settings(); - if (!settings->parse_input(argc, argv)) { - exit(0); - } - settings->assign_settings(); - - printf("%llu + %llu\n", settings->window_size, - settings->overlap); - - auto ultra = new Ultra(settings); - - auto repeats = ultra->FindRepeatsInString("aaaaaaaaaaaaaaaaaaaaaaaaaa"); - printf("%llu\n", repeats->size()); - for (int i= 0; i < repeats->size(); ++i) { - auto r = repeats->at(i); - printf("%lu %lu %i\n", r->windowStart, r->repeatLength, r->repeatPeriod); - } - return 0; -} - int main_wrapper(int argc, const char * argv[]) { // Prepare settings Settings *settings = new Settings(); From c025be5e06687b337e5b193b1befd9ae0b9e2377 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Fri, 2 May 2025 16:15:31 -0600 Subject: [PATCH 07/13] Prepping update for release. --- src/main.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 4be1885..d38adb5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -194,6 +194,5 @@ int main(int argc, const char *argv[]) { // This block catches any other non-standard exceptions std::cerr << "Unknown exception caught!" << std::endl; } - return -1; } From be709fa4b064f7ba20de8507e2c116fe588bbd89 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 5 May 2025 07:05:01 -0600 Subject: [PATCH 08/13] vhangin ubuntu version in github actions --- .github/workflows/build-and-test.yml | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 709c384..7a72e76 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: check-build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest container: image: traviswheelerlab/ultra-build volumes: @@ -12,19 +12,9 @@ jobs: steps: - uses: actions/checkout@v3 - run: cmake . && make - - check-examples: - runs-on: ubuntu-20.04 - container: - image: traviswheelerlab/ultra-build - volumes: - - ${{ github.workspace }}:/code - steps: - - uses: actions/checkout@v3 - - run: cmake . && make examples check-format: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest container: image: traviswheelerlab/ultra-build volumes: From 7fab5063a43237093d0b0cd0188e7d7ec85b701c Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 5 May 2025 07:06:23 -0600 Subject: [PATCH 09/13] changing ubuntu version in github actions and did a tool/run-format.sh --- src/BEDFileWriter.cpp | 8 ++- src/FASTAReader.cpp | 2 - src/FASTAReader.hpp | 8 +-- src/FileReader.cpp | 1 - src/FileReader.hpp | 1 - src/JSONFileWriter.cpp | 8 +-- src/RepeatSplitter.cpp | 1 - src/SequenceWindow.cpp | 5 +- src/SequenceWindow.hpp | 3 +- src/TabFileWriter.cpp | 31 +++++----- src/cli.cpp | 133 ++++++++++++++++++++++------------------- src/cli.hpp | 17 +++--- src/main.cpp | 40 ++++++------- src/repeat.cpp | 1 - src/repeat.hpp | 1 - src/ultra.cpp | 56 +++++++++-------- src/ultra.hpp | 5 +- src/umatrix.cpp | 4 +- src/umatrix.hpp | 6 +- src/umodel.cpp | 5 -- 20 files changed, 165 insertions(+), 171 deletions(-) diff --git a/src/BEDFileWriter.cpp b/src/BEDFileWriter.cpp index 81b7cca..af2ee4b 100644 --- a/src/BEDFileWriter.cpp +++ b/src/BEDFileWriter.cpp @@ -7,7 +7,10 @@ #include "ultra.hpp" #include #include -void BEDFileWriter::InitializeWriter(Ultra *ultra, FILE *out_file) { owner = ultra; out=out_file; } +void BEDFileWriter::InitializeWriter(Ultra *ultra, FILE *out_file) { + owner = ultra; + out = out_file; +} void BEDFileWriter::WriteRepeat(RepeatRegion *repeat) { @@ -35,7 +38,8 @@ void BEDFileWriter::WriteRepeat(RepeatRegion *repeat) { // We need to decide what to do with the overall sequence std::string rep_con = std::to_string(repeat->repeatPeriod); - if (owner->settings->max_consensus_period >= repeat->repeatPeriod && !repeat->string_consensus.empty()) + if (owner->settings->max_consensus_period >= repeat->repeatPeriod && + !repeat->string_consensus.empty()) rep_con = repeat->string_consensus; fprintf(out, "\t%s\n", rep_con.c_str()); diff --git a/src/FASTAReader.cpp b/src/FASTAReader.cpp index 515e041..86ebd30 100644 --- a/src/FASTAReader.cpp +++ b/src/FASTAReader.cpp @@ -130,7 +130,6 @@ bool FASTAReader::ReadWindow(SequenceWindow *window) { window->PrepareWindow(sequenceName, sequenceID, symbolsReadInSeq, overlapLength); - if (overlapLength > 0) window->CopyOverlap(overlapBuffer); @@ -188,7 +187,6 @@ bool FASTAReader::ReadWindow(SequenceWindow *window) { window->readID = readID++; - return true; } diff --git a/src/FASTAReader.hpp b/src/FASTAReader.hpp index 16f2dd9..53a3e78 100644 --- a/src/FASTAReader.hpp +++ b/src/FASTAReader.hpp @@ -45,15 +45,15 @@ class FASTAReader { double C_pctg; double G_pctg; - symbol *overlapBuffer; // = NULL; - unsigned long overlapLength = 0; // = 0; + symbol *overlapBuffer; // = NULL; + unsigned long overlapLength = 0; // = 0; - std::string sequenceName = ""; // = ""; + std::string sequenceName = ""; // = ""; unsigned long sequenceID = 0; // = 0; unsigned long readID = 0; // = 0; // read id's may not be contiguous unsigned long symbolsReadInSeq = 0; // = 0; - bool doneReadingFile; // = false; + bool doneReadingFile; // = false; bool isReading; bool CopyOverlapBufferFromWindow(SequenceWindow *window, diff --git a/src/FileReader.cpp b/src/FileReader.cpp index 3fe8026..58c18af 100644 --- a/src/FileReader.cpp +++ b/src/FileReader.cpp @@ -36,7 +36,6 @@ FileReader::~FileReader() { delete fastaReader; fastaReader = nullptr; } - } SequenceWindow *FileReader::GetReadyWindow() { diff --git a/src/FileReader.hpp b/src/FileReader.hpp index 5a82687..f662fdb 100644 --- a/src/FileReader.hpp +++ b/src/FileReader.hpp @@ -17,7 +17,6 @@ class FileReader { unsigned long maxSeqLength; unsigned long maxOverlapLength; - bool multithread = true; file_type format = UNKNOWN; diff --git a/src/JSONFileWriter.cpp b/src/JSONFileWriter.cpp index ba5b730..0be4623 100644 --- a/src/JSONFileWriter.cpp +++ b/src/JSONFileWriter.cpp @@ -79,8 +79,7 @@ void JSONFileWriter::WriteRepeat(RepeatRegion *repeat) { ++this->repeatsOutput; - fprintf(out, "{\"SequenceName\": \"%s\"", - repeat->sequenceName.c_str()); + fprintf(out, "{\"SequenceName\": \"%s\"", repeat->sequenceName.c_str()); this->OutputJSONKeyValue("Start", std::to_string(repeat->sequenceStart)); this->OutputJSONKeyValue("Length", std::to_string(repeat->repeatLength)); @@ -91,9 +90,10 @@ void JSONFileWriter::WriteRepeat(RepeatRegion *repeat) { this->OutputJSONKeyValue("PVal", std::to_string(pval)); } - if (owner->settings->show_counts) { - auto copies = (repeat->repeatLength - repeat->insertions + repeat->deletions) / repeat->repeatPeriod; + auto copies = + (repeat->repeatLength - repeat->insertions + repeat->deletions) / + repeat->repeatPeriod; this->OutputJSONKeyValue("Copies", std::to_string(copies)); this->OutputJSONKeyValue("Substitutions", std::to_string(repeat->mismatches)); diff --git a/src/RepeatSplitter.cpp b/src/RepeatSplitter.cpp index aa0d687..3830bae 100644 --- a/src/RepeatSplitter.cpp +++ b/src/RepeatSplitter.cpp @@ -423,7 +423,6 @@ void ValidateSplits(std::vector *consensi, join_threshold)) { consensi->at(i) = consensi->at(i - 1); splits->at(i - 1) = -1; - } } } \ No newline at end of file diff --git a/src/SequenceWindow.cpp b/src/SequenceWindow.cpp index ceed41f..5002fb0 100644 --- a/src/SequenceWindow.cpp +++ b/src/SequenceWindow.cpp @@ -32,7 +32,8 @@ void SequenceWindow::PrepareWindow(std::string seqName, unsigned long sid, } } -long long SequenceWindow::ReadLine(std::string line, long long place, unsigned long long &total_seq_length) { +long long SequenceWindow::ReadLine(std::string line, long long place, + unsigned long long &total_seq_length) { // printf ("(mem: %llx overlap:%llx newseq: %llx, length: %llu, place %llu) // Reading line: %s\n", (unsigned long)seqMem, (unsigned long)overlapSeq, // (unsigned long)newSeq, length, place, line.c_str()); @@ -156,7 +157,7 @@ bool CompareSequenceWindows::operator()(SequenceWindow *lhs, } void ShuffleSequenceWindow(SequenceWindow *window) { - std::random_device rd; // a seed source for the random number engine + std::random_device rd; // a seed source for the random number engine std::mt19937 gen(rd()); std::uniform_int_distribution dist; for (unsigned long long i = 0; i < window->length; ++i) { diff --git a/src/SequenceWindow.hpp b/src/SequenceWindow.hpp index 000f378..ec8cb80 100644 --- a/src/SequenceWindow.hpp +++ b/src/SequenceWindow.hpp @@ -44,7 +44,8 @@ class SequenceWindow { // ReadLine() returns how much of line was read // and will returns -1 if the line is a new sequence - long long ReadLine(std::string line, long long place, unsigned long long &total_seq_length); + long long ReadLine(std::string line, long long place, + unsigned long long &total_seq_length); void CopyOverlap(symbol *b); void CalculateSymbolFrequencies(); diff --git a/src/TabFileWriter.cpp b/src/TabFileWriter.cpp index bb71f73..afbfbf1 100644 --- a/src/TabFileWriter.cpp +++ b/src/TabFileWriter.cpp @@ -36,9 +36,7 @@ void TabFileWriter::InitializeWriter(Ultra *ultra, FILE *out_f) { fprintf(out, "\tSequence"); } - fprintf(out, "\n"); - } void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { @@ -48,9 +46,9 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { for (int i = 0; i < name.size(); ++i) { if ((name[i] >= 'a' && name[i] <= 'z') || (name[i] >= 'A' && name[i] <= 'Z') || - (name[i] >= '0' && name[i] <= '9') || - name[i] == '-' || name[i] == '_' || name[i] == '.' || - name[i] == ':' || name[i] == '*' || name[i] == '#') { + (name[i] >= '0' && name[i] <= '9') || name[i] == '-' || + name[i] == '_' || name[i] == '.' || name[i] == ':' || name[i] == '*' || + name[i] == '#') { continue; } @@ -59,14 +57,15 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { break; } - /* else { - name[i] = '_'; - }*/ + /* else { + name[i] = '_'; + }*/ } // Columns 1 (name) 2 (start) 3 (end) 4 (score) fprintf(out, "%s\t%lu\t%lu\t%i\t%f", name.c_str(), repeat->sequenceStart, - repeat->sequenceStart + repeat->repeatLength, repeat->repeatPeriod, repeat->regionScore); + repeat->sequenceStart + repeat->repeatLength, repeat->repeatPeriod, + repeat->regionScore); if (owner->settings->pval) { fprintf(out, ",%g", owner->PvalForScore(repeat->regionScore)); } @@ -82,9 +81,12 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { } if (owner->settings->show_counts) { - auto copies = (repeat->repeatLength - repeat->insertions + repeat->deletions) / repeat->repeatPeriod; + auto copies = + (repeat->repeatLength - repeat->insertions + repeat->deletions) / + repeat->repeatPeriod; - fprintf(out, "\t%lu\t%d\t%d\t%d", copies, repeat->mismatches, repeat->insertions, repeat->deletions); + fprintf(out, "\t%lu\t%d\t%d\t%d", copies, repeat->mismatches, + repeat->insertions, repeat->deletions); } if (owner->settings->max_split >= 0) { @@ -116,7 +118,8 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { std::string con = "."; if (owner->settings->max_consensus_period >= repeat->repeatPeriod) { if (repeat->consensi != nullptr && repeat->consensi->size() > i) { - if (i > 0 && repeat->consensi->at(i) == repeat->consensi->at(i - 1)) + if (i > 0 && + repeat->consensi->at(i) == repeat->consensi->at(i - 1)) continue; con = repeat->consensi->at(i); } @@ -131,8 +134,7 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { sizes.push_back(','); sizes += std::to_string(repeat->repeatLength - cstart); - fprintf(out, "\t%i\t%s", numberOfValidSplits + 1, - starts.c_str()); + fprintf(out, "\t%i\t%s", numberOfValidSplits + 1, starts.c_str()); if (owner->settings->max_consensus_period >= 0) { fprintf(out, "\t%s", consensi.c_str()); } @@ -151,7 +153,6 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { fprintf(out, "\t%s", repeat->sequence.c_str()); } - fprintf(out, "\n"); } diff --git a/src/cli.cpp b/src/cli.cpp index 4eb142a..0ac98a9 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -35,11 +35,11 @@ void Settings::prepare_settings() { app.add_option("-o,--out", this->out_file, "Output file path") ->group("Output"); app.add_flag("--disable_streaming_out", this->disable_streaming_out, - "Disables streaming output; no output will be created until all analysis has been completed") + "Disables streaming output; no output will be created until all " + "analysis has been completed") ->group("Output"); - app.add_flag("-c, --show_counts", - this->show_counts, + app.add_flag("-c, --show_counts", this->show_counts, "Output #copies, #substitutions, #insertions, #deletions") ->group("Output"); @@ -47,24 +47,22 @@ void Settings::prepare_settings() { "Use p-values instead of scores in BED output") ->group("Output"); - app.add_flag("--pval_loc", this->p_value_loc, - "The exponential location used for converting scores to p-values.") + app.add_flag( + "--pval_loc", this->p_value_loc, + "The exponential location used for converting scores to p-values.") ->group("Output"); app.add_flag("--pval_scale", this->p_value_scale, "The exponential scale used for converting scores to p-values") ->group("Output"); - app.add_flag("--tsv", this->ultra_out, - "Use TSV output format") + app.add_flag("--tsv", this->ultra_out, "Use TSV output format") ->group("Output"); - app.add_flag("--json", this->json_out, - "Use JSON output format") + app.add_flag("--json", this->json_out, "Use JSON output format") ->group("Output"); - app.add_flag("--bed", this->bed_out, - "Use BED output format") + app.add_flag("--bed", this->bed_out, "Use BED output format") ->group("Output"); app.add_option("--max_consensus", this->max_consensus_period, @@ -72,8 +70,7 @@ void Settings::prepare_settings() { ->default_val(this->max_consensus_period) ->group("Output"); - app.add_flag("--show_seq", this->show_seq, - "Output repetitive region") + app.add_flag("--show_seq", this->show_seq, "Output repetitive region") ->group("Output"); app.add_flag("--show_delta", this->show_deltas, @@ -107,16 +104,16 @@ void Settings::prepare_settings() { app.add_flag("--hs, --hide_settings", this->hide_settings, "Do not output settings") ->group("Output"); - app.add_flag("--hidesettings", this->hide_settings, - "Do not output settings") + app.add_flag("--hidesettings", this->hide_settings, "Do not output settings") ->group(""); app.add_flag("--suppress", this->suppress_out, "Do not output BED or JSON annotation") ->group("Output"); - app.add_flag("--fdr", this->estimate_fdr, - "Estimate the False Discovery rate (runtime will be twice as long)") + app.add_flag( + "--fdr", this->estimate_fdr, + "Estimate the False Discovery rate (runtime will be twice as long)") ->group("Output"); // ************* @@ -139,8 +136,7 @@ void Settings::prepare_settings() { app.add_option("--win_size", this->window_size, "Manually set sequence window size") ->group("System"); - app.add_option("--winsize", this->window_size) - ->group(""); + app.add_option("--winsize", this->window_size)->group(""); app.add_option("--overlap", this->overlap, "Manually set sequence window overlap size") @@ -185,9 +181,9 @@ void Settings::prepare_settings() { // Tune options // ************* - app.add_flag("--tune", this->tune, - "Tune parameters using a small search grid before running (see README)") + "Tune parameters using a small search grid before running (see " + "README)") ->group("Parameter Tuning"); app.add_flag("--tune_medium", this->tune_medium, @@ -195,7 +191,8 @@ void Settings::prepare_settings() { ->group("Parameter Tuning"); app.add_flag("--tune_large", this->tune_large, - "Tune parameters using a larger search grid before running (see README)") + "Tune parameters using a larger search grid before running (see " + "README)") ->group("Parameter Tuning"); app.add_option("--tune_file", this->tune_param_path, @@ -203,11 +200,9 @@ void Settings::prepare_settings() { ->default_val("") ->group("Parameter Tuning"); - app.add_flag("--tune_indel", this->tune_indels, - "Enable indels while tuning") + app.add_flag("--tune_indel", this->tune_indels, "Enable indels while tuning") ->group("Parameter Tuning"); - app.add_flag("--tune_indels", this->tune_indels, - "Enable indels while tuning") + app.add_flag("--tune_indels", this->tune_indels, "Enable indels while tuning") ->group(""); app.add_option("--tune_fdr", this->tune_fdr, @@ -305,7 +300,8 @@ void Settings::prepare_settings() { ->default_val(this->max_split) ->group(""); - app.add_option("--split_threshold", this->split_threshold, "Split threshold value") + app.add_option("--split_threshold", this->split_threshold, + "Split threshold value") ->default_val(this->split_threshold) ->group("Splitting and Naming"); app.add_option("--splitval", this->split_threshold, "Split threshold value") @@ -351,8 +347,9 @@ bool Settings::parse_input(int argc, const char **argv) { if (strlen(argv[i]) >= 3) { if (argv[i][0] == '-') { if (isalpha(argv[i][1])) { - fprintf(stderr, "Argument '%s' is not allowed (long arguments begin with --, " - "filenames may not begin with -)\n", + fprintf(stderr, + "Argument '%s' is not allowed (long arguments begin with --, " + "filenames may not begin with -)\n", argv[i]); return false; } @@ -374,7 +371,8 @@ bool Settings::parse_input(int argc, const char **argv) { printf("BibTeX: \n" "@article{10.1093/bioadv/vbae149,\n" " author = {Olson, Daniel R and Wheeler, Travis J},\n" - " title = {ULTRA-effective labeling of tandem repeats in genomic sequence},\n" + " title = {ULTRA-effective labeling of tandem repeats in genomic " + "sequence},\n" " journal = {Bioinformatics Advances},\n" " volume = {4},\n" " number = {1},\n" @@ -384,13 +382,16 @@ bool Settings::parse_input(int argc, const char **argv) { " issn = {2635-0041},\n" " doi = {10.1093/bioadv/vbae149},\n" " url = {https://doi.org/10.1093/bioadv/vbae149},\n" - " eprint = {https://academic.oup.com/bioinformaticsadvances/article-pdf/4/1/vbae149/60779841/vbae149.pdf},\n" + " eprint = " + "{https://academic.oup.com/bioinformaticsadvances/article-pdf/4/1/" + "vbae149/60779841/vbae149.pdf},\n" "}\n"); exit(0); } bool passed = true; - if (this->in_file.empty() && !this->show_memory && !this->run_without_reader) { + if (this->in_file.empty() && !this->show_memory && + !this->run_without_reader) { fprintf(stderr, "Input file required.\n"); passed = false; } @@ -413,13 +414,16 @@ bool Settings::parse_input(int argc, const char **argv) { } } - if ((this->ultra_out || this->json_out || this->bed_out) && this->suppress_out) { - fprintf(stderr, "--suppress is incompatible with --tsv, --json, and --bed\n"); + if ((this->ultra_out || this->json_out || this->bed_out) && + this->suppress_out) { + fprintf(stderr, + "--suppress is incompatible with --tsv, --json, and --bed\n"); passed = false; } if (this->mask_file.empty() && this->mask_with_n) { - fprintf(stderr, "--maskn requires an output file path provided to --mask\n"); + fprintf(stderr, + "--maskn requires an output file path provided to --mask\n"); passed = false; } @@ -514,7 +518,8 @@ bool Settings::parse_input(int argc, const char **argv) { passed = false; } - if (this->tune_only || this->tune_medium || this->tune_large || this->tune_indels) { + if (this->tune_only || this->tune_medium || this->tune_large || + this->tune_indels) { this->tune = true; } @@ -523,19 +528,26 @@ bool Settings::parse_input(int argc, const char **argv) { passed = false; } - if (!this->tune_param_path.empty() && (this->tune_medium || this->tune_large)) { - fprintf(stderr, "Cannot use both --tune_file and (--tune_small or --tune_large).\n"); + if (!this->tune_param_path.empty() && + (this->tune_medium || this->tune_large)) { + fprintf( + stderr, + "Cannot use both --tune_file and (--tune_small or --tune_large).\n"); passed = false; } int c = 0; - if (this->ultra_out) c++; - if (this->json_out) c++; - if (this->bed_out) c++; + if (this->ultra_out) + c++; + if (this->json_out) + c++; + if (this->bed_out) + c++; if (c > 1) { if (this->out_file.empty()) { - fprintf(stderr, "Output file path must be provided when using multiple output formats .\n"); + fprintf(stderr, "Output file path must be provided when using multiple " + "output formats .\n"); passed = false; } } @@ -547,7 +559,8 @@ bool Settings::parse_input(int argc, const char **argv) { return passed; } -bool Settings::parse_multi_input(int argc, const char **argv, std::string arg_str) { +bool Settings::parse_multi_input(int argc, const char **argv, + std::string arg_str) { // Create combined arguments int new_argc; char **new_argv; @@ -556,7 +569,7 @@ bool Settings::parse_multi_input(int argc, const char **argv, std::string arg_st auto combined_argc = pair.first; auto combined_argv = pair.second; // Parse the combined arguments - bool result = parse_input(combined_argc, (const char**)combined_argv); + bool result = parse_input(combined_argc, (const char **)combined_argv); // Free the argument memory for (int i = 0; i < new_argc; ++i) { @@ -652,10 +665,9 @@ void Settings::assign_settings() { } // Large models use less than 4 GB per thread - else { + else { this->window_size = 25 * this->max_period; } - } this->a_freq = this->at / 2.0; @@ -808,8 +820,7 @@ std::string Settings::json_string() { } #undef JSONMACRO -std::vector small_tune_settings() -{ +std::vector small_tune_settings() { std::vector settings; @@ -833,8 +844,7 @@ std::vector small_tune_settings() return settings; } -std::vector medium_tune_settings() -{ +std::vector medium_tune_settings() { std::vector settings; @@ -859,13 +869,13 @@ std::vector medium_tune_settings() return settings; } -std::vector large_tune_settings() -{ +std::vector large_tune_settings() { std::vector settings; std::vector match_settings = std::vector{0.6, 0.7, 0.8, 0.9}; - std::vector at_settings = std::vector{0.3, 0.35, 0.4, 0.5, 0.6, 0.65, 0.7}; + std::vector at_settings = + std::vector{0.3, 0.35, 0.4, 0.5, 0.6, 0.65, 0.7}; std::vector repeat_start = std::vector{0.001, 0.005, 0.01}; std::vector repeat_stop = std::vector{0.005, 0.01, 0.05}; @@ -908,8 +918,9 @@ std::vector tune_settings_for_path(std::string path) { string_to_args(line, argc, argv); Settings *test_settings = new Settings(); test_settings->prepare_settings(); - if (!test_settings->parse_input(argc, (const char**)argv)) { - std::cerr << "Invalid arguments on line " << line_num << " in tune file. \"" << line << "\"" << std::endl; + if (!test_settings->parse_input(argc, (const char **)argv)) { + std::cerr << "Invalid arguments on line " << line_num + << " in tune file. \"" << line << "\"" << std::endl; exit(0); } @@ -921,16 +932,13 @@ std::vector tune_settings_for_path(std::string path) { settings.push_back(line); } - } file.close(); return settings; } - - -void string_to_args(const std::string& str, int& argc, char**& argv) { +void string_to_args(const std::string &str, int &argc, char **&argv) { std::istringstream iss(str); std::vector tokens; std::string token; @@ -944,7 +952,7 @@ void string_to_args(const std::string& str, int& argc, char**& argv) { argc = tokens.size(); // Allocate argv - argv = new char*[argc + 1]; + argv = new char *[argc + 1]; // Copy tokens to argv for (int i = 0; i < argc; ++i) { @@ -956,9 +964,10 @@ void string_to_args(const std::string& str, int& argc, char**& argv) { argv[argc] = nullptr; } -std::pair combine_args(int argc1, const char** argv1, int argc2, char** argv2) { +std::pair combine_args(int argc1, const char **argv1, int argc2, + char **argv2) { int combinedArgc = argc1 + argc2; - char** combinedArgv = new char*[combinedArgc + 1]; + char **combinedArgv = new char *[combinedArgc + 1]; for (int i = 0; i < argc1; ++i) { combinedArgv[i] = new char[std::strlen(argv1[i]) + 1]; diff --git a/src/cli.hpp b/src/cli.hpp index 41857e2..e1710b1 100644 --- a/src/cli.hpp +++ b/src/cli.hpp @@ -9,11 +9,10 @@ #define DEBUG_STRING "" #ifdef DEBUG_PRAGMA #undef DEBUG_STRING -#define DEBUG_STRING " **WARNING** BUILT WITHOUT RELEASE OPTIMIZATION **WARNING** \n" +#define DEBUG_STRING \ + " **WARNING** BUILT WITHOUT RELEASE OPTIMIZATION **WARNING** \n" #endif - - #include "../lib/CLI11.hpp" #include #include @@ -42,7 +41,6 @@ struct Settings { bool bed_out = false; bool json_out = false; - bool show_counts = false; bool show_seq = false; bool show_deltas = false; @@ -117,9 +115,8 @@ struct Settings { "=================================================\n" "(U)ltra (L)ocates (T)andemly (R)epetitive (A)reas\n" " Daniel R. Olson and Travis J. Wheeler\n" - " Version " ULTRA_VERSION_STRING "\n" - DEBUG_STRING - " Use '--cite' for citation instructions\n" + " Version " ULTRA_VERSION_STRING + "\n" DEBUG_STRING " Use '--cite' for citation instructions\n" "=================================================\n"}; void prepare_settings(); @@ -132,13 +129,13 @@ struct Settings { std::string json_string(); }; - std::vector small_tune_settings(); std::vector medium_tune_settings(); std::vector large_tune_settings(); std::vector tune_settings_for_path(std::string path); -void string_to_args(const std::string& str, int& argc, char**& argv); -std::pair combine_args(int argc1, const char** argv1, int argc2, char** argv2); +void string_to_args(const std::string &str, int &argc, char **&argv); +std::pair combine_args(int argc1, const char **argv1, int argc2, + char **argv2); #endif // ULTRA_CLI_HPP diff --git a/src/main.cpp b/src/main.cpp index d38adb5..a2c7a61 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,11 +1,10 @@ #include "cli.hpp" #include "mask.hpp" #include "ultra.hpp" +#include // for std::bad_alloc #include -#include // for std::bad_alloc - -int main_wrapper(int argc, const char * argv[]) { +int main_wrapper(int argc, const char *argv[]) { // Prepare settings Settings *settings = new Settings(); settings->prepare_settings(); @@ -34,7 +33,7 @@ int main_wrapper(int argc, const char * argv[]) { param_strings = large_tune_settings(); else param_strings = small_tune_settings(); - }else { + } else { param_strings = tune_settings_for_path(settings->tune_param_path); } @@ -102,26 +101,27 @@ int main_wrapper(int argc, const char * argv[]) { } } - printf("(%zu/%zu): %.5f, %.5f, %s\n",(coverage.size()), - param_strings.size(), - real_coverage, fdr, arg_string.c_str()); + printf("(%zu/%zu): %.5f, %.5f, %s\n", (coverage.size()), + param_strings.size(), real_coverage, fdr, arg_string.c_str()); } printf("-----------\n"); if (best_coverage_index >= 0) { - double real_coverage = (double)coverage[best_coverage_index] / (double)seq_length; - double false_coverage = (double)shuffled_coverage[best_coverage_index] / (double)seq_length; + double real_coverage = + (double)coverage[best_coverage_index] / (double)seq_length; + double false_coverage = + (double)shuffled_coverage[best_coverage_index] / (double)seq_length; double fdr = false_coverage / real_coverage; printf("Best coverage within FDR limit: %.5f, %.5f, %s\n", real_coverage, - fdr, - param_strings[best_coverage_index].c_str()); + fdr, param_strings[best_coverage_index].c_str()); delete settings; settings = new Settings(); settings->prepare_settings(); settings->set_multi_option(); - if (!settings->parse_multi_input(argc, argv, param_strings[best_coverage_index])) { + if (!settings->parse_multi_input(argc, argv, + param_strings[best_coverage_index])) { exit(0); } settings->assign_settings(); @@ -172,25 +172,23 @@ int main_wrapper(int argc, const char * argv[]) { return 0; } - int main(int argc, const char *argv[]) { char *reserve_memory = (char *)malloc(65536); try { int r = main_wrapper(argc, argv); return r; - } - catch (const std::bad_alloc& e) { + } catch (const std::bad_alloc &e) { // This block is executed if memory allocation fails free(reserve_memory); // May be necessary in order to print std::cerr << "Memory allocation failed: " << e.what() << std::endl; - std::cerr << "Your model may be too large to fit in memory" << std::endl; - std::cerr << "Try running: ultra --mem to see expected memory usage" << std::endl; - } - catch (const std::exception& e) { + std::cerr << "Your model may be too large to fit in memory" << std::endl; + std::cerr << "Try running: ultra --mem to see expected " + "memory usage" + << std::endl; + } catch (const std::exception &e) { // This block is executed for any other standard exceptions std::cerr << "Standard exception caught: " << e.what() << std::endl; - } - catch (...) { + } catch (...) { // This block catches any other non-standard exceptions std::cerr << "Unknown exception caught!" << std::endl; } diff --git a/src/repeat.cpp b/src/repeat.cpp index b41c6a6..e1338c0 100644 --- a/src/repeat.cpp +++ b/src/repeat.cpp @@ -692,7 +692,6 @@ RepeatRegion *GetNextRepeat(SequenceWindow *window, UMatrix *matrix, int *pos) { int length = region->repeatPeriod; - // We use a <= here because we calculate one additional column for (; i <= seqLength; ++i) { if (matrix->traceback[i] != 0) { diff --git a/src/repeat.hpp b/src/repeat.hpp index 49c221b..7b22f1c 100644 --- a/src/repeat.hpp +++ b/src/repeat.hpp @@ -65,7 +65,6 @@ class RepeatRegion { int overlapCorrection; - void CreateLogo(SequenceWindow *window, UMatrix *matrix); void CreateLogoWithoutMatrix(); // Requires traceback + sequence + lookBack diff --git a/src/ultra.cpp b/src/ultra.cpp index 94ff15b..335083b 100644 --- a/src/ultra.cpp +++ b/src/ultra.cpp @@ -82,7 +82,6 @@ SequenceWindow *Ultra::GetSequenceWindow(SequenceWindow *seq, uthread *uth) { if (shouldRead) { reader->FillWindows(); reader->SetIsReading(false); - } retval = reader->GetReadyWindow(); @@ -100,7 +99,7 @@ int Ultra::SmallestReadID() { int smallest = 100000000; for (int i = 0; i < threads.size(); ++i) { - //printf("%i: %i\n", i, threads[i]->smallestReadID); + // printf("%i: %i\n", i, threads[i]->smallestReadID); if (threads[i]->smallestReadID < smallest) { smallest = threads[i]->smallestReadID; } @@ -151,13 +150,14 @@ double Ultra::PvalForScore(float score) const { return exp(-1.0 * (score - loc) / scale) * freq; } -std::vector* Ultra::FindRepeatsInString(const std::string &seq) { +std::vector * +Ultra::FindRepeatsInString(const std::string &seq) { // Make sure that the seq window can fit in the DP matrix uthread *uth = this->threads[0]; if (seq.length() > uth->model->matrix->length) { - fprintf(stderr, "ULTRA model has maximum size %llu but string has length %zu\n", - uth->model->matrix->length, - seq.length()); + fprintf(stderr, + "ULTRA model has maximum size %llu but string has length %zu\n", + uth->model->matrix->length, seq.length()); return nullptr; } @@ -202,7 +202,6 @@ std::vector* Ultra::FindRepeatsInString(const std::string &seq) void Ultra::AnalyzeSequenceWindow(SequenceWindow *sequence, uthread *uth) { - int sleng = (int)sequence->length + (int)sequence->overlap; uth->activeReadID = sequence->readID; @@ -264,7 +263,8 @@ void Ultra::AnalyzeSequenceWindow(SequenceWindow *sequence, uthread *uth) { uth->repeats.clear(); uth->smallestReadID = uth->activeReadID; - if (outRepeats.size() > repeatBuffer && !this->settings->disable_streaming_out) { + if (outRepeats.size() > repeatBuffer && + !this->settings->disable_streaming_out) { OutputRepeats(); } @@ -292,8 +292,6 @@ void Ultra::OutputRepeats(bool flush) { maxReadID = 100000000; } - - SortRepeatRegions(); while (!outRepeats.empty()) { @@ -396,9 +394,7 @@ void Ultra::SortRepeatRegions() { std::sort(outRepeats.begin(), outRepeats.end(), CompareRepeatOrder()); } -unsigned long long Ultra::Coverage() { - return total_coverage; -} +unsigned long long Ultra::Coverage() { return total_coverage; } Ultra::Ultra(Settings *s) { settings = s; @@ -408,15 +404,19 @@ Ultra::Ultra(Settings *s) { if (!settings->out_file.empty()) { int c = 0; - if (settings->ultra_out) c++; - if (settings->json_out) c++; - if (settings->bed_out) c++; + if (settings->ultra_out) + c++; + if (settings->json_out) + c++; + if (settings->bed_out) + c++; if (c > 1) { if (settings->ultra_out) { std::string ultra_path = settings->out_file + ".tsv"; FILE *out = fopen(ultra_path.c_str(), "w"); if (out == NULL) { - fprintf(stderr, "Unable to open output file %s\n", ultra_path.c_str()); + fprintf(stderr, "Unable to open output file %s\n", + ultra_path.c_str()); exit(-1); } outs.push_back(out); @@ -449,7 +449,8 @@ Ultra::Ultra(Settings *s) { else { FILE *out = fopen(settings->out_file.c_str(), "w"); if (out == NULL) { - fprintf(stderr, "Unable to open output file %s\n", settings->out_file.c_str()); + fprintf(stderr, "Unable to open output file %s\n", + settings->out_file.c_str()); exit(-1); } outs.push_back(out); @@ -459,19 +460,19 @@ Ultra::Ultra(Settings *s) { writers.push_back(new JSONFileWriter()); else if (settings->bed_out) writers.push_back(new BEDFileWriter()); - } std::string settings_file_path = settings->out_file + ".settings"; if (!settings->hide_settings) { settings_out = fopen(settings_file_path.c_str(), "w"); if (settings_out == NULL) { - fprintf(stderr, "Unable to open settings output file %s\n", settings_file_path.c_str()); + fprintf(stderr, "Unable to open settings output file %s\n", + settings_file_path.c_str()); exit(-1); } } - }else { + } else { if (settings->ultra_out) writers.push_back(new TabFileWriter()); else if (settings->json_out) @@ -481,8 +482,6 @@ Ultra::Ultra(Settings *s) { outs.push_back(stdout); } - - numberOfThreads = settings->threads; scoreThreshold = settings->min_score; outputReadID = settings->show_wid; @@ -582,18 +581,18 @@ Ultra::~Ultra() { } outs.clear(); - for (auto& pair : masks_for_seq) { - delete pair.second; // pair.second is a std::vector * + for (auto &pair : masks_for_seq) { + delete pair.second; // pair.second is a std::vector * } masks_for_seq.clear(); for (auto val : outRepeats) { - delete val; // pair.second is a std::vector * + delete val; // pair.second is a std::vector * } outRepeats.clear(); for (auto val : models) { - delete val; // pair.second is a std::vector * + delete val; // pair.second is a std::vector * } models.clear(); @@ -603,7 +602,7 @@ Ultra::~Ultra() { } val->splitter->DeallocSplitWindow(); delete val->splitter; - delete val; // pair.second is a std::vector * + delete val; // pair.second is a std::vector * } threads.clear(); } @@ -615,4 +614,3 @@ bool CompareRepeatOrder::operator()(RepeatRegion *lhs, RepeatRegion *rhs) { return lhs->sequenceStart > rhs->sequenceStart; } - diff --git a/src/ultra.hpp b/src/ultra.hpp index 5e44c72..fa924c2 100644 --- a/src/ultra.hpp +++ b/src/ultra.hpp @@ -13,12 +13,12 @@ #include #include "BEDFileWriter.hpp" -#include "TabFileWriter.hpp" #include "FASTAReader.hpp" #include "FileReader.hpp" #include "JSONFileWriter.hpp" #include "RepeatFileWriter.hpp" #include "RepeatSplitter.hpp" +#include "TabFileWriter.hpp" #include "cli.hpp" #include "mask.hpp" #include "repeat.hpp" @@ -74,7 +74,6 @@ class Ultra { bool storeTraceAndSequence = false; - std::unordered_map *> masks_for_seq{}; std::vector outRepeats{}; @@ -93,7 +92,7 @@ class Ultra { void OutputRepeats(bool flush = false); void OutputRepeat(RepeatRegion *r, bool isSubRep = false); - std::vector* FindRepeatsInString(const std::string &s); + std::vector *FindRepeatsInString(const std::string &s); void OutputULTRASettings(); void InitializeWriter(); diff --git a/src/umatrix.cpp b/src/umatrix.cpp index 59a5c50..2bc70c3 100644 --- a/src/umatrix.cpp +++ b/src/umatrix.cpp @@ -21,7 +21,6 @@ bool UMatrix::MoveMatrixForward() { previousColumnIndex = currentColumnIndex; ++currentColumnIndex; - // Check to see if we need to wrap around /*if (currentColumnIndex >= length) { currentColumnIndex = 0; @@ -35,7 +34,6 @@ bool UMatrix::MoveMatrixForward() { currentTracebackColumn = tracebackColumns[currentColumnIndex]; previousTracebackColumn = tracebackColumns[previousColumnIndex]; - return wrap; } @@ -418,7 +416,7 @@ void UMatrix::CreateMatrix() { scoreColumns[i] = &(scoreMatrix[(i * (cellsPerColumn))]); tracebackColumns[i] = &(tracebackMatrix[(i * (maxPeriod + 1))]); } - + previousScoreColumn = scoreColumns[0]; currentScoreColumn = scoreColumns[1]; diff --git a/src/umatrix.hpp b/src/umatrix.hpp index 12ed423..ba76547 100644 --- a/src/umatrix.hpp +++ b/src/umatrix.hpp @@ -45,9 +45,9 @@ typedef struct { class UMatrix { public: // Class variables - unsigned long long maxPeriod; // Max period is included - unsigned long long maxInsertions; // Max insertion is included - unsigned long long maxDeletions; // Max deletion is included + unsigned long long maxPeriod; // Max period is included + unsigned long long maxInsertions; // Max insertion is included + unsigned long long maxDeletions; // Max deletion is included unsigned long long length; // Number of columns in matrix unsigned long long cellsPerColumn; // Number of rows in matrix diff --git a/src/umodel.cpp b/src/umodel.cpp index d56d39f..5630600 100644 --- a/src/umodel.cpp +++ b/src/umodel.cpp @@ -90,7 +90,6 @@ void UModel::CalculateScores() { bscore[i] = log2(backgroundProbabilties[i]); } - } // This does not check if index - d > 0 @@ -229,7 +228,6 @@ void UModel::CalculateCurrentColumn(SequenceWindow *sequence, int nucIndex, if (score > c[parentIndex]) { c[parentIndex] = score; ct[desc[row].order] = row; - } // Calculate new score based off of previous score @@ -492,7 +490,6 @@ void UModel::CalculateCurrentColumnWithoutEmission(bool *canBeRepetitive) { if (score > c[parentIndex]) { c[parentIndex] = score; ct[desc[row].order] = row; - } // Calculate new score based off of previous score @@ -502,7 +499,6 @@ void UModel::CalculateCurrentColumnWithoutEmission(bool *canBeRepetitive) { score -= matrix->PreviousScore(parentIndex, peridocity + 1); score += matrix->PreviousScore(parentIndex, peridocity); - // Update insertion values // There is effectively no score for insertions @@ -609,7 +605,6 @@ void UModel::CalculateCurrentColumnWithoutEmission(bool *canBeRepetitive) { /*** UMODEL CLASS MANAGEMENT ***/ - UModel::UModel(int maxPeriod, int maxInsertions, int maxDeletions, int matrixLength) { From 15d7dcddb43f015fa10076933224d79de2cc6c6d Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 5 May 2025 08:09:33 -0600 Subject: [PATCH 10/13] Changing how the library is compiled and adding an example of using the library. --- CMakeLists.txt | 61 +++++++++++++++++++++---------- examples/Makefile | 33 +++++++++++++++++ examples/library_example.cpp | 70 ++++++++++++++++++++++++++++++++++++ examples/run-all.sh | 18 ---------- lib/README.md | 6 ---- {lib => src}/CLI11.hpp | 0 src/cli.hpp | 2 +- 7 files changed, 147 insertions(+), 43 deletions(-) create mode 100644 examples/Makefile create mode 100644 examples/library_example.cpp delete mode 100755 examples/run-all.sh delete mode 100644 lib/README.md rename {lib => src}/CLI11.hpp (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b8e048..446b9c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,25 +4,25 @@ project( HOMEPAGE_URL https://github.com/TravisWheelerLab/ULTRA ) +# By default we do NOT build the standalone ULTRA library +option(BUILD_ULTRA_LIB "Also build ULTRA as a standalone library" OFF) +# Default to Release if no build type is set if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "") message(STATUS "Setting CMAKE_BUILD_TYPE=Release") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) endif() -if(NOT CMAKE_BUILD_TYPE STREQUAL "Release") +# Add a debug flag when not in Release +if (NOT CMAKE_BUILD_TYPE STREQUAL "Release") add_definitions(-DDEBUG_PRAGMA=1) endif() set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set( - LIB_CPP_FILES -) -set( - HPP_FILES - +# Header files (for installation / PUBLIC_HEADER) +set(HPP_FILES src/FASTAReader.hpp src/FileReader.hpp src/SequenceWindow.hpp @@ -39,10 +39,10 @@ set( src/RepeatSplitter.hpp src/cli.hpp src/mask.hpp -) -set( - CPP_FILES + ) +# All source files +set(CPP_FILES src/FASTAReader.cpp src/FileReader.cpp src/SequenceWindow.cpp @@ -63,18 +63,43 @@ set( find_package(Threads REQUIRED) -add_executable( - ultra +# LIB_SOURCES = everything except main.cpp +set(LIB_SOURCES ${CPP_FILES}) +list(REMOVE_ITEM LIB_SOURCES src/main.cpp) - ${LIB_CPP_FILES} - ${CPP_FILES} -) -target_link_libraries(ultra PRIVATE Threads::Threads) +# 1) Build static library only if requested +if (BUILD_ULTRA_LIB) + add_library(ultra_core STATIC ${LIB_SOURCES}) + target_include_directories(ultra_core + PUBLIC ${CMAKE_SOURCE_DIR}/src + ) + target_link_libraries(ultra_core + PUBLIC Threads::Threads + ) + + install( + TARGETS ultra_core + ARCHIVE DESTINATION lib + PUBLIC_HEADER DESTINATION include + ) +endif() + +# 2) Build the CLI executable +if (BUILD_ULTRA_LIB) + # Link only main.cpp against ultra_core + add_executable(ultra src/main.cpp) + target_link_libraries(ultra PRIVATE ultra_core) +else() + # Monolithic: compile everything into the exe + add_executable(ultra ${CPP_FILES}) + target_link_libraries(ultra PRIVATE Threads::Threads) +endif() -target_include_directories(ultra SYSTEM PRIVATE ${CMAKE_SOURCE_DIR}/lib) +target_include_directories(ultra PRIVATE ${CMAKE_SOURCE_DIR}/src) -install(TARGETS ultra RUNTIME) +install(TARGETS ultra RUNTIME DESTINATION bin) +# Optional examples target add_custom_target( examples COMMAND examples/run-all.sh diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..8f93a82 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,33 @@ +# examples/Makefile — build the ULTRA example program + +# C++ compiler and flags +CXX := g++ +CXXFLAGS := -std=c++11 -Wall -I../src + +# Path to the ULTRA static library +ULTRA_LIB := ../build/libultra_core.a + +# Libraries to link against +LDLIBS := $(ULTRA_LIB) -pthread + +# Your example source & target +SRC := library_example.cpp +OBJ := $(SRC:.cpp=.o) +TARGET := library_example + +# Default target: build the example executable +all: $(TARGET) + +# Link step +$(TARGET): $(OBJ) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDLIBS) + +# Compile step +%.o: %.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +# Clean up build artifacts +clean: + rm -f $(OBJ) $(TARGET) + +.PHONY: all clean diff --git a/examples/library_example.cpp b/examples/library_example.cpp new file mode 100644 index 0000000..f2f1950 --- /dev/null +++ b/examples/library_example.cpp @@ -0,0 +1,70 @@ +// +// Created by Daniel Olson on 5/5/25. +// +#include +#include +#include + +int main() { + printf("Note that the Makefile is currently linking against ULTRA/build/libultra_core.a\n"); + printf(" and is using ULTRA/src/ as the source of ULTRA header files.\n"); + printf("The location of the ULTRA lib and header files will need to be adjusted in real use-cases.\n\n\n"); + + printf("Starting test of library.\n"); + + + // These are the settings we will use with ULTRA + Settings *settings = new Settings(); + settings->run_without_reader = true; // This must be set to true when using as lib + settings->window_size = 1000000; // window size should be equal to the largest sequence you will analyze + settings->overlap = 0; // overlap should be turned off + + + // We can also pass in an argument string to settings like so: + int argc; + char **argv; + std::string arg_string = "-p 25 -i 3 -d 3"; + string_to_args(arg_string, argc, argv); + + settings->prepare_settings(); // This should be called before running settings->parse_input() + if (!settings->parse_input(argc, (const char**)argv)) { + exit(0); + } + + // Finally, we call settings->assign_settings() + settings->assign_settings(); + + // We create a reusable ULTRA object + auto ultra = new Ultra(settings); + // We can find repeats in a C++ string using "FindRepeatsInString(std::string)" + + printf("Finding repeats in string ``aaaaaaaaaaaaaaaaaaaaaaaaaa''\n"); + auto repeats = ultra->FindRepeatsInString("aaaaaaaaaaaaaaaaaaaaaaaaaa"); + + printf("Start Length Pattern\n"); + for (int i= 0; i < repeats->size(); ++i) { + auto r = repeats->at(i); + printf("%lu %lu %i %s\n", r->windowStart, r->repeatLength, r->repeatPeriod, r->GetConsensus().c_str()); + delete r; // We are in charge of the memory management of the repeats returned by FindRepeatsInString + } + + delete repeats; // We are also in charge of the memory for the repeat array itself + printf("---------\n"); + + + printf("Finding repeats in string ``aggtaaggtaaggtaaggtaaggtaagcggtataacatacagatctgactactactactactactactactactac''\n"); + repeats = ultra->FindRepeatsInString("aggtaaggtaaggtaaggtaaggtaagcggtataacatacagatctgactactactactactactactactactac"); + printf("Start Length Pattern\n"); + for (int i= 0; i < repeats->size(); ++i) { + auto r = repeats->at(i); + printf("%lu %lu %i %s\n", r->windowStart, r->repeatLength, r->repeatPeriod, r->GetConsensus().c_str()); + delete r; + } + + delete repeats; + printf("---------\n"); + + printf("All positions reported are 0 indexed.\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/run-all.sh b/examples/run-all.sh deleted file mode 100755 index 5e7da00..0000000 --- a/examples/run-all.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env sh - -set -e - -run_one() { - EX_PATH="$1" - EX_LINE=$(echo "$EX_PATH" | tr -C "\n" "-") - - echo "+-$EX_LINE-+" - echo "| $EX_PATH |" - echo "+-$EX_LINE-+" - echo "" - ./ultra "$EX_PATH" - echo "" -} - -run_one examples/example_1.fa -run_one examples/example_2.fa diff --git a/lib/README.md b/lib/README.md deleted file mode 100644 index 8c4ae04..0000000 --- a/lib/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# External Dependencies - -This is where vendored dependencies live. They may have separate licenses, see -the source files themselves. - - - `json11` (MIT) - diff --git a/lib/CLI11.hpp b/src/CLI11.hpp similarity index 100% rename from lib/CLI11.hpp rename to src/CLI11.hpp diff --git a/src/cli.hpp b/src/cli.hpp index e1710b1..e924cda 100644 --- a/src/cli.hpp +++ b/src/cli.hpp @@ -13,7 +13,7 @@ " **WARNING** BUILT WITHOUT RELEASE OPTIMIZATION **WARNING** \n" #endif -#include "../lib/CLI11.hpp" +#include "CLI11.hpp" #include #include From 1a2adb4166d129527c5ad39ca46469dcc3a5f7bf Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 5 May 2025 08:14:50 -0600 Subject: [PATCH 11/13] Adding back in the examples check to the github actions --- .github/workflows/build-and-test.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 7a72e76..9e263b9 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -12,7 +12,17 @@ jobs: steps: - uses: actions/checkout@v3 - run: cmake . && make - + + check-examples: + runs-on: ubuntu-latest + container: + image: traviswheelerlab/ultra-build + volumes: + - ${{ github.workspace }}:/code + steps: + - uses: actions/checkout@v3 + - run: cmake . && make examples + check-format: runs-on: ubuntu-latest container: From 05fa54deed3416c71cd759c84e3be8f86e2520e3 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 5 May 2025 08:19:22 -0600 Subject: [PATCH 12/13] Updating run-all examples code --- examples/run-all.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 examples/run-all.sh diff --git a/examples/run-all.sh b/examples/run-all.sh new file mode 100644 index 0000000..c8dc878 --- /dev/null +++ b/examples/run-all.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env sh + +set -e + +run_one() { + EX_PATH="$1" + EX_LINE=$(echo "$EX_PATH" | tr -C "\n" "-") + + echo "+-$EX_LINE-+" + echo "| $EX_PATH |" + echo "+-$EX_LINE-+" + echo "" + ./ultra "$EX_PATH" + echo "" +} + +run_one examples/example_1.fa +run_one examples/example_2.fa +run_one examples/example_3.fa \ No newline at end of file From 1ba4d710780203e57411d54fda38f06e1567dda5 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 5 May 2025 08:25:34 -0600 Subject: [PATCH 13/13] Updating run-all examples code --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 446b9c2..f493aa6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,7 +102,7 @@ install(TARGETS ultra RUNTIME DESTINATION bin) # Optional examples target add_custom_target( examples - COMMAND examples/run-all.sh + COMMAND /usr/bin/env sh examples/run-all.sh VERBATIM ) add_dependencies(examples ultra)