@@ -6,7 +6,7 @@ All main functionality is given by the function [`random_sequence`](@ref).
66"""
77module MotifSequenceGenerator
88
9- using Combinatorics, Random
9+ using Combinatorics, Random, StatsBase
1010
1111export random_sequence, all_possible_sums
1212
@@ -23,11 +23,12 @@ Base.showerror(io::IO, e::DeadEndMotifs) = print(io,
2323The algorithm works as follows: First a random sequence of motifs is created,
2424so that it has length of `q - δq ≤ ℓ ≤ q - δq + maximum(motiflengths)`.
2525The possible tries of random sequences is set by the `tries` keyword (default `5`).
26+ The sequence is optionally sampled given a probability vector.
2627
27- For each random try, it is first check whether the sequence is already correct.
28+ For each random try, it is first checked whether the sequence is already correct.
2829If not, the last entry of the sequence is dropped. Then, since the sequence is now
2930already smaller than `q`, all possible sums of `summands` out of the motif pool
30- are checked. If some combination of `summands` sums to exactly the difference,
31+ are checked. If some combination of `summands` sums to the difference,
3132they are added to the sequence.
3233For multiple satisfactory combinations, a random one is picked.
3334
@@ -50,7 +51,9 @@ a proper one, an error is thrown.
5051Create a random sequence of motifs of type `M`, under the constraint that the
5152sequence has "length" `ℓ` **exactly** within `q - δq ≤ ℓ ≤ q + δq`.
5253Return the sequence itself as well as the
53- sequence of indices of `motifs` used to create it.
54+ sequence of indices of `motifs` used to create it. A vector of probabilities `weights`
55+ can be given as a keyword argument, which then dictates the sampling probability
56+ for each entry of `motifs` for the initial sequence created.
5457
5558"length" here means an abstracted length defined by the struct `M`,
5659based on the `limits` and `translate` functions.
@@ -64,7 +67,7 @@ It does **not** refer to the amount of elements!
6467 motif which is translated by `t` (either negative or positive), with
6568 respect to the same units as `q`.
6669
67- ## Keywords
70+ ## Other Keywords
6871Please see the source code (use `@which`) for a full description of the algorithm.
6972
7073* `tries = 5` : Up to how many initial random sequences are accepted.
@@ -74,7 +77,10 @@ Please see the source code (use `@which`) for a full description of the algorith
7477"""
7578function random_sequence (motifs:: Vector{M} , q,
7679 limits, translate, δq = 0 ;
77- tries = 5 , summands = 3 , tailcut = 2 ) where {M}
80+ tries = 5 , summands = 3 , tailcut = 2 ,
81+ weights = ones (length (motifs))) where {M}
82+
83+ ws = _toweight (weights)
7884
7985 idxs = 1 : length (motifs)
8086 motifs0, motiflens = _motifs_at_origin (motifs, limits, translate)
@@ -94,7 +100,7 @@ function random_sequence(motifs::Vector{M}, q,
94100 while worked == false
95101 count > tries && throw (DeadEndMotifs (tries, summands, tailcut))
96102
97- seq, seq_length = _random_sequence_try (motiflens, q, δq)
103+ seq, seq_length = _random_sequence_try (motiflens, q, δq, ws )
98104
99105 worked = _complete_sequence! (seq, motiflens, q, δq, summands, tailcut)
100106
@@ -104,6 +110,8 @@ function random_sequence(motifs::Vector{M}, q,
104110 return _instantiate_sequence (motifs0, motiflens, seq, translate), seq
105111end
106112
113+ _toweight (a) = (s = sum (a); ProbabilityWeights (a./ s, 1 ))
114+
107115"""
108116 _motifs_at_origin(motifs, limits, translate) -> (motifs0, motiflens)
109117Bring all motifs to the origin and compute the motif lengths.
@@ -121,15 +129,15 @@ function _motifs_at_origin(motifs::Vector{M}, limits, translate) where M
121129end
122130
123131"""
124- _random_sequence_try(motiflens, q) -> seq, seq_length
132+ _random_sequence_try(motiflens, q, δq [, ws] ) -> seq, seq_length
125133Return a random sequence of motif indices
126134so that the total sequence is *guaranteed* to have total length of
127135`q - δq ≤ ℓ ≤ q - δq + maximum(motiflens)`.
128136"""
129- function _random_sequence_try (motiflens, q, δq)
137+ function _random_sequence_try (motiflens, q, δq, ws = defaultweights (motiflens) )
130138 seq = Int[]; seq_length = 0 ; idxs = 1 : length (motiflens)
131139 while seq_length < q - δq
132- i = rand (idxs)
140+ i = sample (idxs, ws )
133141 push! (seq, i)
134142 seq_length += motiflens[i]
135143 end
@@ -174,13 +182,6 @@ function _complete_sequence_remainder!(seq, motiflens, q, δq, summands, tailcut
174182 pop! (seq)
175183 isempty (seq) && return false
176184
177- # I Think the following if is unecessary?...
178- # if q - δq - sum(motiflens[k] for k in seq) < 0
179- # ok = _complete_sequence_extra!(seq, motiflens, q, δq)
180- # isempty(seq) && return false
181- # ok && return true
182- # end
183-
184185 # At this point ℓ is guaranteed less than q - δq
185186 remainder = q - δq - sum (motiflens[k] for k in seq)
186187 @assert remainder > 0
0 commit comments