1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // The class SeedSet. ScoringScheme and related tags are defined in
35 // seeds_scoring_scheme.h
36 // ==========================================================================
37
38 #ifndef SEQAN_SEEDS_SEEDS_SEED_SET_BASE_H_
39 #define SEQAN_SEEDS_SEEDS_SEED_SET_BASE_H_
40
41 namespace seqan {
42
43 // ===========================================================================
44 // Forwards
45 // ===========================================================================
46
47 // TODO(holtgrew): Add DiagonalSorted as default specialization.
48
49 struct Unordered_;
50 typedef Tag<Unordered_> Unordered;
51
52 // ===========================================================================
53 // Enums, Tags, Classes, Specializations
54 // ===========================================================================
55
56 // ---------------------------------------------------------------------------
57 // Class SeedSet
58 // ---------------------------------------------------------------------------
59
60 /*!
61 * @class SeedSet
62 * @headerfile <seqan/seeds.h>
63 * @implements ContainerConcept
64 * @brief Handles a set of seeds with local chaining on adding seeds.
65 * @note At the moment only <tt>Unordered SeedSets</tt> are supported.
66 *
67 * @signature template <typename TSeed[, typename TSpec]>
68 * class SeedSet;
69 *
70 * @tparam TSeed Type of the @link Seed @endlink objects stored in the seed set.
71 * @tparam TSpec Optional tag for seed set specialization. Defaults to <tt>Unordered</tt>.
72 */
73
74 // ..param.TScored:Either UnScored or a seed set scoring scheme specification.
75 // ..param.TSpec:Specialization of the seed set.
76 // ..param.TSeedConfig:Configuration for the seeds. Sensible defaults are chosen based on the other template parameters.
77
78 template <typename TSeed, typename TSpec = Unordered>
79 class SeedSet;
80
81 // ===========================================================================
82 // Metafunctions
83 // ===========================================================================
84
85 // ===========================================================================
86 // Functions
87 // ===========================================================================
88
89 // Basic Container Functions
90
91 // TODO(holtgrew): dddoc {begin,end,length,front,back}All(T)
92
93 // SeedSet Functions
94
95 /*!
96 * @fn SeedSet#addSeed
97 *
98 * @headerfile <seqan/seeds.h>
99 *
100 * @brief Adds a seed to an existing @link SeedSet @endlink using different
101 * algorithms for local chaining.
102 *
103 * @signature bool addSeed(seedSet, seed, distance, bandwidth, score, seqH, seqV, tag);
104 * @signature bool addSeed(seedSet, seed, distance, score, SimpleChain);
105 * @signature bool addSeed(seedSet, seed, distance, Merge);
106 * @signature bool addSeed(seedSet, seed, Single);
107 *
108 * @param[in,out] seedSet The SeedSet to add the seed to.
109 * @param[in] seed The seed to be added.
110 * @param[in] distance The maximal distance between the end point of the upper left and the begin point of the
111 * lower right @link Seed @endlink allowed for local chaining. NB: only Chaos, SimpleChain
112 * and Merge require the distance information.
113 * @param[in] bandwidth The window size to search for a chainable @link Seed @endlink. Note, only <tt>Chaos</tt>
114 * requires the bandwidth information.
115 * @param[in] score The scoring scheme.Note, only Chaos and SimpleChain require the score.
116 * Type: @link SimpleScore @endlink.
117 * @param[in] seqH Database sequence (horizontal). Only required for Chaos Chaining. Types: ContainerConcept.
118 * @param[in] seqV Query sequence (vertical). Only required for Chaos Chaining. Types: ContainerConcept.
119 * @param[in] tag Select the algorithm that is used to add the new seed. Note that not every algorithm can
120 * be used with each type of @link Seed @endlink. See special signatures above. The seed is
121 * copied and then added.
122 *
123 * @return bool <tt>true</tt> if successful. Adding can fail if no appropriate seed is there for chaining or merging.
124 * Adding using <tt>Single</tt> ever fails.
125 *
126 * @section Examples
127 *
128 * @include demos/seeds/seeds_add_seed.cpp
129 *
130 * The output is as follows:
131 *
132 * @code{.console}
133 * Single Method:
134 * Seed: Seed<Simple, TConfig>(4, 5, 8, 9, lower diag = -1, upper diag = -1)
135 * Seed: Seed<Simple, TConfig>(10, 10, 15, 15, lower diag = 0, upper diag = 0)
136 * Seed: Seed<Simple, TConfig>(14, 14, 18, 18, lower diag = 0, upper diag = 0)
137 * Seed: Seed<Simple, TConfig>(21, 21, 24, 24, lower diag = 0, upper diag = 0)
138 *
139 *
140 * Merge Method:
141 * Seed: Seed<Simple, TConfig>(4, 5, 8, 9, lower diag = -1, upper diag = -1)
142 * Seed: Seed<Simple, TConfig>(10, 10, 18, 18, lower diag = 0, upper diag = 0)
143 * Seed: Seed<Simple, TConfig>(21, 21, 24, 24, lower diag = 0, upper diag = 0)
144 *
145 *
146 * Chaos Method:
147 * Seed: Seed<Simple, TConfig>(4, 5, 15, 15, lower diag = -1, upper diag = 0)
148 * Seed: Seed<Simple, TConfig>(14, 14, 18, 18, lower diag = 0, upper diag = 0)
149 * Seed: Seed<Simple, TConfig>(21, 21, 24, 24, lower diag = 0, upper diag = 0)
150 * @endcode
151 */
152
153
154 // ---------------------------------------------------------------------------
155 // Function minScore()
156 // ---------------------------------------------------------------------------
157
158 /*!
159 * @fn SeedSet#minScore
160 * @headerfile <seqan/seeds.h>
161 * @brief Returns the threshold to distinguish between high-scoring and low-scoring seeds.
162 *
163 * @signature TSeedScore minScore(seedSet);
164 *
165 * @param[in] seedSet The SeedSet for which the threshold is set. If the score of a seed is higher than the given
166 * threshold, then it is virtually put into a container storing the high-scoring seeds which can
167 * be iterated separately.
168 *
169 * @return TSeedScore The score threshold. TSeedScore is the @link Seed#SeedScore @endlink of the contained seeds.
170 *
171 * @see SeedSet#setMinScore
172 */
173
174 template <typename TSeed, typename TSeedSetSpec>
175 typename SeedScore<typename Value<SeedSet<TSeed, TSeedSetSpec> >::Type >::Type
minScore(SeedSet<TSeed,TSeedSetSpec> const & seedSet)176 minScore(SeedSet<TSeed, TSeedSetSpec> const & seedSet)
177 {
178 return seedSet._minScore;
179 }
180
181 // ---------------------------------------------------------------------------
182 // Function setMinScore()
183 // ---------------------------------------------------------------------------
184
185 /*!
186 * @fn SeedSet#setMinScore
187 * @headerfile <seqan/seeds.h>
188 * @brief Sets the threshold at which seeds are considered high-scoring.
189 *
190 * @signature void setMinScore(seedSet, scoreValue);
191 *
192 * @param[in,out] seedSet The SeedSet for which the threshold is to be set.
193 * @param[in] scoreValue The new threshold to set. If the score of a seed is higher than the given threshold, then
194 * it is virtually put into a container storing the high-scoring seeds which can be iterated
195 * separately (@link IntegerConcept @endlink).
196 *
197 * @see SeedSet#minScore
198 */
199
200 template <typename TSeed, typename TSeedSetSpec, typename TScoreValue>
setMinScore(SeedSet<TSeed,TSeedSetSpec> & seedSet,TScoreValue val)201 void setMinScore(SeedSet<TSeed, TSeedSetSpec> & seedSet, TScoreValue val)
202 {
203 seedSet._minScore = val;
204 }
205
206 // ---------------------------------------------------------------------------
207 // Function minSeedSize()
208 // ---------------------------------------------------------------------------
209
210 template <typename TSeed, typename TSeedSetSpec>
211 typename Size<typename Value<SeedSet<TSeed, TSeedSetSpec> >::Type >::Type
minSeedSize(SeedSet<TSeed,TSeedSetSpec> const & seedSet)212 minSeedSize(SeedSet<TSeed, TSeedSetSpec> const & seedSet)
213 {
214 return seedSet._minSeedSize;
215 }
216
217 // ---------------------------------------------------------------------------
218 // Function setMinSeedSize()
219 // ---------------------------------------------------------------------------
220
221 template <typename TSeed, typename TSeedSetSpec, typename TSize>
setMinSeedSize(SeedSet<TSeed,TSeedSetSpec> & seedSet,TSize siz)222 void setMinSeedSize(SeedSet<TSeed, TSeedSetSpec> & seedSet, TSize siz)
223 {
224 seedSet._minSeedSize = siz;
225 }
226
227 // ---------------------------------------------------------------------------
228 // Helper Function _qualityReached()
229 // ---------------------------------------------------------------------------
230
231 // TODO(rmaerker): Is this function used anywhere?
232 template <typename TSeed, typename TSeedSetSpec, typename TSeedSpec, typename TSeedConfig>
_qualityReached(SeedSet<TSeed,TSeedSetSpec> const & seedSet,Seed<TSeedSpec,TSeedConfig> const & seed)233 inline bool _qualityReached(SeedSet<TSeed, TSeedSetSpec> const & seedSet,
234 Seed<TSeedSpec, TSeedConfig> const & seed)
235 {
236 // TODO(rmaerker): If different seed configs are supported we must make sure, that the scoreValues are comparable to avoid compiler warnings.
237 return score(seed) >= minScore(seedSet) && seedSize(seed) >= minSeedSize(seedSet);
238 }
239
240 // ---------------------------------------------------------------------------
241 // Function clear()
242 // ---------------------------------------------------------------------------
243
244 /*!
245 * @fn SeedSet#clear
246 * @headerfile <seqan/sequence.h>
247 * @brief Clear the SeedSet.
248 *
249 * @signature void clear(seedSet);
250 *
251 * @param[in,out] seedSet The SeedSet to clear.
252 */
253
254 template <typename TSeed, typename TSeedSetSpec>
clear(SeedSet<TSeed,TSeedSetSpec> & seedSet)255 inline void clear(SeedSet<TSeed, TSeedSetSpec> & seedSet)
256 {
257 seedSet._seeds.clear();
258 seedSet._minScore = 0;
259 seedSet._minSeedSize = 0;
260 }
261
262
263 // Debugging / TikZ Output
264
265 template <typename TStream, typename TQuerySequence, typename TDatabaseSequence, typename TSeedSetSpec, typename TSeed>
266 inline void
__write(TStream & stream,TQuerySequence & sequence0,TDatabaseSequence & sequence1,SeedSet<TSeed,TSeedSetSpec> const & seedSet,Tikz_ const &)267 __write(TStream & stream,
268 TQuerySequence & sequence0,
269 TDatabaseSequence & sequence1,
270 SeedSet<TSeed, TSeedSetSpec> const & seedSet,
271 Tikz_ const &)
272 {
273 //IOREV _nodoc_ specialization not documented
274 typedef SeedSet<TSeed, TSeedSetSpec> TSeedSet;
275
276 stream << "\\begin{tikzpicture}[" << std::endl
277 << " seed/.style={very thick}," << std::endl
278 << " seed diagonal/.style={red,<->}" << std::endl
279 << " ]" << std::endl;
280
281 // Draw sequences.
282 stream << " \\draw";
283 // Draw query / sequence 0;
284 for (unsigned i = 0; i < length(sequence0); ++i)
285 stream << std::endl << " (0, -" << i << ") node {" << sequence0[i] << "}";
286 stream << std::endl;
287 // Draw database / sequence 1.
288 for (unsigned i = 0; i < length(sequence1); ++i)
289 stream << std::endl << " (" << i << ", 0) node {" << sequence1[i] << "}";
290 stream << ";" << std::endl;
291
292 // Draw seeds.
293 typedef typename Iterator<TSeedSet const, Standard>::Type TIterator;
294 for (TIterator it = begin(seedSet); it != end(seedSet); ++it)
295 _write(stream, value(it), Tikz_());
296 stream << "\\end{tikzpicture}" << std::endl;
297 }
298
299 } // namespace seqan
300
301 #endif // SEQAN_SEEDS_SEEDS_SEED_SET_BASE_H_
302