1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // The class SeedSet.  ScoringScheme and related tags are defined in
35 // seeds_scoring_scheme.h
36 // ==========================================================================
37 
38 #ifndef SEQAN_SEEDS_SEEDS_SEED_SET_BASE_H_
39 #define SEQAN_SEEDS_SEEDS_SEED_SET_BASE_H_
40 
41 namespace seqan {
42 
43 // ===========================================================================
44 // Forwards
45 // ===========================================================================
46 
47 // TODO(holtgrew): Add DiagonalSorted as default specialization.
48 
49 struct Unordered_;
50 typedef Tag<Unordered_> Unordered;
51 
52 // ===========================================================================
53 // Enums, Tags, Classes, Specializations
54 // ===========================================================================
55 
56 // ---------------------------------------------------------------------------
57 // Class SeedSet
58 // ---------------------------------------------------------------------------
59 
60 /*!
61  * @class SeedSet
62  * @headerfile <seqan/seeds.h>
63  * @implements ContainerConcept
64  * @brief Handles a set of seeds with local chaining on adding seeds.
65  * @note At the moment only <tt>Unordered SeedSets</tt> are supported.
66  *
67  * @signature template <typename TSeed[, typename TSpec]>
68  *            class SeedSet;
69  *
70  * @tparam TSeed     Type of the @link Seed @endlink objects stored in the seed set.
71  * @tparam TSpec     Optional tag for seed set specialization. Defaults to <tt>Unordered</tt>.
72  */
73 
74 // ..param.TScored:Either UnScored or a seed set scoring scheme specification.
75 // ..param.TSpec:Specialization of the seed set.
76 // ..param.TSeedConfig:Configuration for the seeds.  Sensible defaults are chosen based on the other template parameters.
77 
78 template <typename TSeed, typename TSpec = Unordered>
79 class SeedSet;
80 
81 // ===========================================================================
82 // Metafunctions
83 // ===========================================================================
84 
85 // ===========================================================================
86 // Functions
87 // ===========================================================================
88 
89 // Basic Container Functions
90 
91 // TODO(holtgrew): dddoc {begin,end,length,front,back}All(T)
92 
93 // SeedSet Functions
94 
95 /*!
96  * @fn SeedSet#addSeed
97  *
98  * @headerfile <seqan/seeds.h>
99  *
100  * @brief Adds a seed to an existing @link SeedSet @endlink using different
101  *        algorithms for local chaining.
102  *
103  * @signature bool addSeed(seedSet, seed, distance, bandwidth, score, seqH, seqV, tag);
104  * @signature bool addSeed(seedSet, seed, distance, score, SimpleChain);
105  * @signature bool addSeed(seedSet, seed, distance, Merge);
106  * @signature bool addSeed(seedSet, seed, Single);
107  *
108  * @param[in,out] seedSet   The SeedSet to add the seed to.
109  * @param[in]     seed      The seed to be added.
110  * @param[in]     distance  The maximal distance between the end point of the upper left and the begin point of the
111  *                          lower right @link Seed @endlink allowed for local chaining.  NB: only Chaos, SimpleChain
112  *                          and Merge require the distance information.
113  * @param[in]     bandwidth The window size to search for a chainable @link Seed @endlink.  Note, only <tt>Chaos</tt>
114  *                          requires the bandwidth information.
115  * @param[in]     score     The scoring scheme.Note, only Chaos and SimpleChain require the score.
116  *                          Type: @link SimpleScore @endlink.
117  * @param[in]     seqH      Database sequence (horizontal).  Only required for Chaos Chaining.  Types: ContainerConcept.
118  * @param[in]     seqV      Query sequence (vertical).  Only required for Chaos Chaining.  Types: ContainerConcept.
119  * @param[in]     tag       Select the algorithm that is used to add the new seed.  Note that not every algorithm can
120  *                          be used with each type of @link Seed @endlink.  See special signatures above.  The seed is
121  *                          copied and then added.
122  *
123  * @return bool <tt>true</tt> if successful.  Adding can fail if no appropriate seed is there for chaining or merging.
124  *              Adding using <tt>Single</tt> ever fails.
125  *
126  * @section Examples
127  *
128  * @include demos/seeds/seeds_add_seed.cpp
129  *
130  * The output is as follows:
131  *
132  * @code{.console}
133  * Single Method:
134  * Seed: Seed<Simple, TConfig>(4, 5, 8, 9, lower diag = -1, upper diag = -1)
135  * Seed: Seed<Simple, TConfig>(10, 10, 15, 15, lower diag = 0, upper diag = 0)
136  * Seed: Seed<Simple, TConfig>(14, 14, 18, 18, lower diag = 0, upper diag = 0)
137  * Seed: Seed<Simple, TConfig>(21, 21, 24, 24, lower diag = 0, upper diag = 0)
138  *
139  *
140  * Merge Method:
141  * Seed: Seed<Simple, TConfig>(4, 5, 8, 9, lower diag = -1, upper diag = -1)
142  * Seed: Seed<Simple, TConfig>(10, 10, 18, 18, lower diag = 0, upper diag = 0)
143  * Seed: Seed<Simple, TConfig>(21, 21, 24, 24, lower diag = 0, upper diag = 0)
144  *
145  *
146  * Chaos Method:
147  * Seed: Seed<Simple, TConfig>(4, 5, 15, 15, lower diag = -1, upper diag = 0)
148  * Seed: Seed<Simple, TConfig>(14, 14, 18, 18, lower diag = 0, upper diag = 0)
149  * Seed: Seed<Simple, TConfig>(21, 21, 24, 24, lower diag = 0, upper diag = 0)
150  * @endcode
151  */
152 
153 
154 // ---------------------------------------------------------------------------
155 // Function minScore()
156 // ---------------------------------------------------------------------------
157 
158 /*!
159  * @fn SeedSet#minScore
160  * @headerfile <seqan/seeds.h>
161  * @brief Returns the threshold to distinguish between high-scoring and low-scoring seeds.
162  *
163  * @signature TSeedScore minScore(seedSet);
164  *
165  * @param[in] seedSet The SeedSet for which the threshold is set.  If the score of a seed is higher than the given
166  *                    threshold, then it is virtually put into a container storing the high-scoring seeds which can
167  *                    be iterated separately.
168  *
169  * @return TSeedScore The score threshold.  TSeedScore is the @link Seed#SeedScore @endlink of the contained seeds.
170  *
171  * @see SeedSet#setMinScore
172  */
173 
174 template <typename TSeed, typename TSeedSetSpec>
175 typename SeedScore<typename Value<SeedSet<TSeed, TSeedSetSpec> >::Type >::Type
minScore(SeedSet<TSeed,TSeedSetSpec> const & seedSet)176 minScore(SeedSet<TSeed, TSeedSetSpec> const & seedSet)
177 {
178     return seedSet._minScore;
179 }
180 
181 // ---------------------------------------------------------------------------
182 // Function setMinScore()
183 // ---------------------------------------------------------------------------
184 
185 /*!
186  * @fn SeedSet#setMinScore
187  * @headerfile <seqan/seeds.h>
188  * @brief Sets the threshold at which seeds are considered high-scoring.
189  *
190  * @signature void setMinScore(seedSet, scoreValue);
191  *
192  * @param[in,out] seedSet    The SeedSet for which the threshold is to be set.
193  * @param[in]     scoreValue The new threshold to set.  If the score of a seed is higher than the given threshold, then
194  *                           it is virtually put into a container storing the high-scoring seeds which can be iterated
195  *                           separately  (@link IntegerConcept @endlink).
196  *
197  * @see SeedSet#minScore
198  */
199 
200 template <typename TSeed, typename TSeedSetSpec, typename TScoreValue>
setMinScore(SeedSet<TSeed,TSeedSetSpec> & seedSet,TScoreValue val)201 void setMinScore(SeedSet<TSeed, TSeedSetSpec> & seedSet, TScoreValue val)
202 {
203     seedSet._minScore = val;
204 }
205 
206 // ---------------------------------------------------------------------------
207 // Function minSeedSize()
208 // ---------------------------------------------------------------------------
209 
210 template <typename TSeed, typename TSeedSetSpec>
211 typename Size<typename Value<SeedSet<TSeed, TSeedSetSpec> >::Type >::Type
minSeedSize(SeedSet<TSeed,TSeedSetSpec> const & seedSet)212 minSeedSize(SeedSet<TSeed, TSeedSetSpec> const & seedSet)
213 {
214     return seedSet._minSeedSize;
215 }
216 
217 // ---------------------------------------------------------------------------
218 // Function setMinSeedSize()
219 // ---------------------------------------------------------------------------
220 
221 template <typename TSeed, typename TSeedSetSpec, typename TSize>
setMinSeedSize(SeedSet<TSeed,TSeedSetSpec> & seedSet,TSize siz)222 void setMinSeedSize(SeedSet<TSeed, TSeedSetSpec> & seedSet, TSize siz)
223 {
224     seedSet._minSeedSize = siz;
225 }
226 
227 // ---------------------------------------------------------------------------
228 // Helper Function _qualityReached()
229 // ---------------------------------------------------------------------------
230 
231 // TODO(rmaerker): Is this function used anywhere?
232 template <typename TSeed, typename TSeedSetSpec, typename TSeedSpec, typename TSeedConfig>
_qualityReached(SeedSet<TSeed,TSeedSetSpec> const & seedSet,Seed<TSeedSpec,TSeedConfig> const & seed)233 inline bool _qualityReached(SeedSet<TSeed, TSeedSetSpec> const & seedSet,
234                             Seed<TSeedSpec, TSeedConfig> const & seed)
235 {
236     // TODO(rmaerker): If different seed configs are supported we must make sure, that the scoreValues are comparable to avoid compiler warnings.
237     return score(seed) >= minScore(seedSet) && seedSize(seed) >= minSeedSize(seedSet);
238 }
239 
240 // ---------------------------------------------------------------------------
241 // Function clear()
242 // ---------------------------------------------------------------------------
243 
244 /*!
245  * @fn SeedSet#clear
246  * @headerfile <seqan/sequence.h>
247  * @brief Clear the SeedSet.
248  *
249  * @signature void clear(seedSet);
250  *
251  * @param[in,out] seedSet The SeedSet to clear.
252  */
253 
254 template <typename TSeed, typename TSeedSetSpec>
clear(SeedSet<TSeed,TSeedSetSpec> & seedSet)255 inline void clear(SeedSet<TSeed, TSeedSetSpec> & seedSet)
256 {
257     seedSet._seeds.clear();
258     seedSet._minScore = 0;
259     seedSet._minSeedSize = 0;
260 }
261 
262 
263 // Debugging / TikZ Output
264 
265 template <typename TStream, typename TQuerySequence, typename TDatabaseSequence, typename TSeedSetSpec, typename TSeed>
266 inline void
__write(TStream & stream,TQuerySequence & sequence0,TDatabaseSequence & sequence1,SeedSet<TSeed,TSeedSetSpec> const & seedSet,Tikz_ const &)267 __write(TStream & stream,
268        TQuerySequence & sequence0,
269        TDatabaseSequence & sequence1,
270        SeedSet<TSeed, TSeedSetSpec> const & seedSet,
271        Tikz_ const &)
272 {
273 //IOREV _nodoc_ specialization not documented
274     typedef SeedSet<TSeed, TSeedSetSpec> TSeedSet;
275 
276     stream << "\\begin{tikzpicture}[" << std::endl
277            << "    seed/.style={very thick}," << std::endl
278            << "    seed diagonal/.style={red,<->}" << std::endl
279            << "    ]" << std::endl;
280 
281     // Draw sequences.
282     stream << "  \\draw";
283     // Draw query / sequence 0;
284     for (unsigned i = 0; i < length(sequence0); ++i)
285         stream << std::endl << "    (0, -" << i << ") node {" << sequence0[i] << "}";
286     stream << std::endl;
287     // Draw database / sequence 1.
288     for (unsigned i = 0; i < length(sequence1); ++i)
289         stream << std::endl << "    (" << i << ", 0) node {" << sequence1[i] << "}";
290     stream << ";" << std::endl;
291 
292     // Draw seeds.
293     typedef typename Iterator<TSeedSet const, Standard>::Type TIterator;
294     for (TIterator it = begin(seedSet); it != end(seedSet); ++it)
295         _write(stream, value(it), Tikz_());
296     stream << "\\end{tikzpicture}" << std::endl;
297 }
298 
299 }  // namespace seqan
300 
301 #endif  // SEQAN_SEEDS_SEEDS_SEED_SET_BASE_H_
302