1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Andreas Gogol-Doering <andreas.doering@mdc-berlin.de>
33 // ==========================================================================
34 // Tests for the SeqAn module find.
35 // ==========================================================================
36 
37 #include <iostream>
38 #include <fstream>
39 #include <typeinfo>
40 #include <time.h>
41 #include <cstring>  // size_t
42 #include <cstdio>
43 #include <vector>
44 #include <time.h>
45 
46 #define SEQAN_DEBUG
47 //#define SEQAN_TEST
48 
49 #define SEQAN_DEBUG_MYERSBITVECTOR
50 //#define SEQAN_DEBUG_MYERSBITVECTOR_DUMP
51 //#define SEQAN_TEST_MYERS_STRICTBANDED
52 
53 #include <seqan/basic.h>
54 #include <seqan/find.h>
55 
56 #include "test_find_hamming.h"
57 #include "test_find_myers_banded.h"
58 
59 using namespace std;
60 using namespace seqan;
61 
62 
63 template <typename TAlgorithmSpec>
Test_OnlineAlg()64 void Test_OnlineAlg() {
65 		typedef typename Position<CharString>::Type TPosition;
66 
67     String<TPosition> pos;
68 
69     //____________________________________________________________________________
70     // Test1 - small needle
71 
72     String<char> haystack("Dies ist ein Haystack. Ja, das ist wirklich einer!");
73     Finder<String<char> > finder(haystack);
74 
75     String<char> needle("ist");
76     Pattern<String<char>, TAlgorithmSpec> pattern(needle);
77 
78     while (find(finder, pattern)) {
79         appendValue(pos,position(finder));
80         SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
81         SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
82         SEQAN_ASSERT_EQ(length(finder), length(needle));
83         SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
84         SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
85         SEQAN_ASSERT_EQ(infix(finder), needle);
86     }
87 
88     SEQAN_ASSERT_EQ(host(pattern), needle);
89     SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<String<char>, TAlgorithmSpec> const &>(pattern)), needle);
90     SEQAN_ASSERT_EQ(pos[0], 5u);
91     SEQAN_ASSERT_EQ(pos[1], 31u);
92     SEQAN_ASSERT_EQ(length(pos), 2u);
93 
94     //____________________________________________________________________________
95     // Test2 - large needle
96 
97     haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgaabcdef";
98     setHost(finder, haystack);
99     clear(finder);
100 
101     needle = "abcdefghijklmnopqrstuvwxyzabcdefg";
102     setHost(pattern, needle);
103 
104     clear(pos);
105     while (find(finder, pattern)) {
106         append(pos,position(finder));
107         SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
108         SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
109         SEQAN_ASSERT_EQ(length(finder), length(needle));
110         SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
111         SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
112         SEQAN_ASSERT_EQ(infix(finder), needle);
113     }
114 
115     SEQAN_ASSERT_EQ(pos[0], 0u);
116     SEQAN_ASSERT_EQ(pos[1], 26u);
117     SEQAN_ASSERT_EQ(length(pos), 2u);
118 
119     //____________________________________________________________________________
120     // Test3 - different alphabet, small needle
121 
122     String<Dna> hstk = "aaaaaaacaa";
123     Finder<String<Dna> > finderDna(hstk);
124 
125     String<Dna> ndl = "aa";
126     setHost(pattern, ndl);
127 
128     clear(pos);
129     while (find(finderDna, pattern)) {
130         append(pos,position(finderDna));
131         SEQAN_ASSERT_EQ(position(finderDna), beginPosition(finderDna));
132         SEQAN_ASSERT_EQ(endPosition(finderDna), beginPosition(finderDna) + length(finderDna));
133         SEQAN_ASSERT_EQ(length(finderDna), length(ndl));
134         SEQAN_ASSERT_EQ(begin(finderDna), begin(hstk) + beginPosition(finderDna));
135         SEQAN_ASSERT_EQ(end(finderDna), begin(hstk) + endPosition(finderDna));
136         SEQAN_ASSERT_EQ(infix(finderDna), ndl);
137     }
138 
139     SEQAN_ASSERT_EQ(pos[0], 0u);
140     SEQAN_ASSERT_EQ(pos[1], 1u);
141     SEQAN_ASSERT_EQ(pos[2], 2u);
142     SEQAN_ASSERT_EQ(pos[3], 3u);
143     SEQAN_ASSERT_EQ(pos[4], 4u);
144     SEQAN_ASSERT_EQ(pos[5], 5u);
145     SEQAN_ASSERT_EQ(pos[6], 8u);
146     SEQAN_ASSERT_EQ(length(pos), 7u);
147 
148     //____________________________________________________________________________
149     // Test3b - different alphabet, small needle, jumping finder
150 
151     goBegin(finderDna); // That's a repositioning
152     clear(finderDna);       // That's why, clear state
153     clear(pos);
154 
155     bool firstHit = true;
156     while (find(finderDna, pattern)) {
157         if (firstHit) {
158             firstHit = false;
159             finderDna += 2;
160             clear(finderDna);  // clear the state of the finder
161         } else {
162             //unsigned int p = position(finderDna);
163             append(pos,position(finderDna));
164         }
165     }
166 
167     SEQAN_ASSERT_EQ(pos[0], 2u);
168     SEQAN_ASSERT_EQ(pos[1], 3u);
169     SEQAN_ASSERT_EQ(pos[2], 4u);
170     SEQAN_ASSERT_EQ(pos[3], 5u);
171     SEQAN_ASSERT_EQ(pos[4], 8u);
172     SEQAN_ASSERT_EQ(length(pos), 5u);
173 
174     //____________________________________________________________________________
175     // Test4 - different alphabet, large needle
176     String<Dna> text = "taaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaat";
177     Finder<String<Dna> > finderText(text);
178 
179     String<Dna> query = "taaaataaaataaaataaaataaaataaaataaaataaaataaaat";
180     setHost(pattern, query);
181 
182     clear(pos);
183     while (find(finderText, pattern)) {
184         append(pos,position(finderText));
185         SEQAN_ASSERT_EQ(position(finderText), beginPosition(finderText));
186         SEQAN_ASSERT_EQ(endPosition(finderText), beginPosition(finderText) + length(finderText));
187         SEQAN_ASSERT_EQ(length(finderText), length(query));
188         SEQAN_ASSERT_EQ(begin(finderText), begin(text) + beginPosition(finderText));
189         SEQAN_ASSERT_EQ(end(finderText), begin(text) + endPosition(finderText));
190         SEQAN_ASSERT_EQ(infix(finderText), query);
191     }
192 
193     SEQAN_ASSERT_EQ(pos[0], 0u);
194     SEQAN_ASSERT_EQ(pos[1], 5u);
195     SEQAN_ASSERT_EQ(pos[2], 10u);
196     SEQAN_ASSERT_EQ(pos[3], 15u);
197     SEQAN_ASSERT_EQ(pos[4], 20u);
198     SEQAN_ASSERT_EQ(pos[5], 25u);
199     SEQAN_ASSERT_EQ(length(pos), 6u);
200 }
201 
202 
203 template <typename TAlgorithmSpec>
Test_OnlineAlgMulti(bool order_by_begin_position)204 void Test_OnlineAlgMulti(bool order_by_begin_position) {
205 	typedef typename Position<CharString>::Type TPosition;
206 
207     String<TPosition> pos;
208 
209     //____________________________________________________________________________
210     // Test1 - Single keyword
211     String<char> haystack("Dies ist ein Haystack. Ja, das ist wirklich einer!");
212     Finder<String<char> > finder(haystack);
213 
214     typedef String<String<char> > TNeedle;
215     TNeedle keywords;
216     appendValue(keywords, String<char>("ist"));
217     Pattern<TNeedle, TAlgorithmSpec> pattern(keywords);
218 
219     while (find(finder, pattern)) {
220         append(pos,position(finder));
221         SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
222         SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
223         SEQAN_ASSERT_EQ(length(finder), length(keywords[position(pattern)]));
224         SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
225         SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
226         SEQAN_ASSERT_EQ(infix(finder), keywords[position(pattern)]);
227     }
228 
229     SEQAN_ASSERT_EQ(host(pattern), keywords);
230     SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<TNeedle, TAlgorithmSpec> const &>(pattern)), keywords);
231     SEQAN_ASSERT_EQ(pos[0], 5u);
232     SEQAN_ASSERT_EQ(pos[1], 31u);
233     SEQAN_ASSERT_EQ(length(pos), 2u);
234 
235     haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgaabcdef";
236     setHost(finder, haystack);
237     clear(finder);
238 
239     clear(keywords);
240     appendValue(keywords, String<char>("abcdefghijklmnopqrstuvwxyzabcdefg"));
241     setHost(pattern, keywords);
242     clear(pos);
243 
244     while (find(finder, pattern)) {
245         append(pos,position(finder));
246         SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
247         SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
248         SEQAN_ASSERT_EQ(length(finder), length(keywords[position(pattern)]));
249         SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
250         SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
251         SEQAN_ASSERT_EQ(infix(finder), keywords[position(pattern)]);
252     }
253 
254     SEQAN_ASSERT_EQ(pos[0], 0u);
255     SEQAN_ASSERT_EQ(pos[1], 26u);
256     SEQAN_ASSERT_EQ(length(pos), 2u);
257 
258 
259     String<Dna> hstk = "aaaaaaacaa";
260     Finder<String<Dna> > finderDna(hstk);
261 
262     typedef String<String<Dna> > TDnaNeedle;
263     Pattern<TDnaNeedle, TAlgorithmSpec> pattern_dna(keywords);
264 
265     TDnaNeedle dna_keywords;
266     appendValue(dna_keywords, String<Dna>("aa"));
267     setHost(pattern_dna, dna_keywords);
268 
269     clear(pos);
270     while (find(finderDna, pattern_dna)) {
271         append(pos,position(finderDna));
272         SEQAN_ASSERT_EQ(position(finderDna), beginPosition(finderDna));
273         SEQAN_ASSERT_EQ(endPosition(finderDna), beginPosition(finderDna) + length(finderDna));
274         SEQAN_ASSERT_EQ(length(finderDna), length(dna_keywords[position(pattern_dna)]));
275         SEQAN_ASSERT_EQ(begin(finderDna), begin(hstk) + beginPosition(finderDna));
276         SEQAN_ASSERT_EQ(end(finderDna), begin(hstk) + endPosition(finderDna));
277         SEQAN_ASSERT_EQ(infix(finderDna), dna_keywords[position(pattern_dna)]);
278     }
279 
280     SEQAN_ASSERT_EQ(pos[0], 0u);
281     SEQAN_ASSERT_EQ(pos[1], 1u);
282     SEQAN_ASSERT_EQ(pos[2], 2u);
283     SEQAN_ASSERT_EQ(pos[3], 3u);
284     SEQAN_ASSERT_EQ(pos[4], 4u);
285     SEQAN_ASSERT_EQ(pos[5], 5u);
286     SEQAN_ASSERT_EQ(pos[6], 8u);
287     SEQAN_ASSERT_EQ(length(pos), 7u);
288 
289     String<Dna> text = "taaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaat";
290     Finder<String<Dna> > finderText(text);
291 
292     clear(dna_keywords);
293     appendValue(dna_keywords, String<Dna>("taaaataaaataaaataaaataaaataaaataaaataaaataaaat"));
294     setHost(pattern_dna, dna_keywords);
295 
296     clear(pos);
297     while (find(finderText, pattern_dna)) {
298         append(pos,position(finderText));
299         SEQAN_ASSERT_EQ(position(finderText), beginPosition(finderText));
300         SEQAN_ASSERT_EQ(endPosition(finderText), beginPosition(finderText) + length(finderText));
301         SEQAN_ASSERT_EQ(length(finderText), length(dna_keywords[position(pattern_dna)]));
302         SEQAN_ASSERT_EQ(begin(finderText), begin(text) + beginPosition(finderText));
303         SEQAN_ASSERT_EQ(end(finderText), begin(text) + endPosition(finderText));
304         SEQAN_ASSERT_EQ(infix(finderText), dna_keywords[position(pattern_dna)]);
305     }
306 
307     SEQAN_ASSERT_EQ(pos[0], 0u);
308     SEQAN_ASSERT_EQ(pos[1], 5u);
309     SEQAN_ASSERT_EQ(pos[2], 10u);
310     SEQAN_ASSERT_EQ(pos[3], 15u);
311     SEQAN_ASSERT_EQ(pos[4], 20u);
312     SEQAN_ASSERT_EQ(pos[5], 25u);
313     SEQAN_ASSERT_EQ(length(pos), 6u);
314 
315     //____________________________________________________________________________
316     // Test2 - Multiple keywords
317     String<char> hst("annual_announce_any_annually");
318     Finder<String<char> > fd(hst);
319 
320     typedef String<String<char> > TN;
321     TN kyw;
322     appendValue(kyw, String<char>("announce"));
323     appendValue(kyw, String<char>("annual"));
324     appendValue(kyw, String<char>("annually"));
325     Pattern<TN, TAlgorithmSpec> pt(kyw);
326 
327     String<TPosition> finderPos;
328     String<TPosition> keywordIndex;
329     while (find(fd, pt)) {
330         append(finderPos,position(fd));
331         append(keywordIndex,position(pt));
332         SEQAN_ASSERT_EQ(position(fd), beginPosition(fd));
333         SEQAN_ASSERT_EQ(endPosition(fd), beginPosition(fd) + length(fd));
334         SEQAN_ASSERT_EQ(length(fd), length(kyw[position(pt)]));
335         SEQAN_ASSERT_EQ(begin(fd), begin(hst) + beginPosition(fd));
336         SEQAN_ASSERT_EQ(end(fd), begin(hst) + endPosition(fd));
337         SEQAN_ASSERT_EQ(infix(fd), kyw[position(pt)]);
338     }
339 
340     SEQAN_ASSERT_EQ(length(finderPos), 4u);
341     SEQAN_ASSERT_EQ(length(keywordIndex), 4u);
342     SEQAN_ASSERT_EQ(finderPos[0], 0u);
343     SEQAN_ASSERT_EQ(keywordIndex[0], 1u);
344     SEQAN_ASSERT_EQ(finderPos[1], 7u);
345     SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
346     SEQAN_ASSERT_EQ(finderPos[2], 20u);
347     SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
348     SEQAN_ASSERT_EQ(finderPos[3], 20u);
349     SEQAN_ASSERT_EQ(keywordIndex[3], 2u);
350 
351     String<Dna> hstDna("AGATACGATATATAC");
352     Finder<String<Dna> > fdDna(hstDna);
353 
354     typedef String<String<Dna> > TNDna;
355     TNDna kywDna;
356     appendValue(kywDna, String<Dna>("ATATATA"));
357     appendValue(kywDna, String<Dna>("TATAT"));
358     appendValue(kywDna, String<Dna>("ACGATAT"));
359     Pattern<TNDna, TAlgorithmSpec> ptDna(kywDna);
360 
361     clear(finderPos);
362     clear(keywordIndex);
363     while (find(fdDna, ptDna)) {
364         appendValue(finderPos, position(fdDna));
365         appendValue(keywordIndex, position(ptDna));
366         SEQAN_ASSERT_EQ(position(fdDna), beginPosition(fdDna));
367         SEQAN_ASSERT_EQ(endPosition(fdDna), beginPosition(fdDna) + length(fdDna));
368         SEQAN_ASSERT_EQ(length(fdDna), length(kywDna[position(ptDna)]));
369         SEQAN_ASSERT_EQ(begin(fdDna), begin(hstDna) + beginPosition(fdDna));
370         SEQAN_ASSERT_EQ(end(fdDna), begin(hstDna) + endPosition(fdDna));
371         SEQAN_ASSERT_EQ(infix(fdDna), kywDna[position(ptDna)]);
372     }
373 
374     SEQAN_ASSERT_EQ(length(finderPos), 3u);
375     SEQAN_ASSERT_EQ(length(keywordIndex), 3u);
376     if (order_by_begin_position) {
377         SEQAN_ASSERT_EQ(finderPos[0], 4u);
378         SEQAN_ASSERT_EQ(keywordIndex[0], 2u);
379         SEQAN_ASSERT_EQ(finderPos[1], 7u);
380         SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
381         SEQAN_ASSERT_EQ(finderPos[2], 8u);
382         SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
383     } else {
384         SEQAN_ASSERT_EQ(finderPos[0], 4u);
385         SEQAN_ASSERT_EQ(keywordIndex[0], 2u);
386         SEQAN_ASSERT_EQ(finderPos[1], 8u);
387         SEQAN_ASSERT_EQ(keywordIndex[1], 1u);
388         SEQAN_ASSERT_EQ(finderPos[2], 7u);
389         SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
390     }
391     //____________________________________________________________________________
392     // Test2 - Multiple keywords that do not fit into a machine word
393     String<Dna> my_haystack("AGATACGATATATACAGATACGATATATACAGATACGATATATACAGATACGATATATACAGATACGATATATAC");
394     Finder<String<Dna> > my_finder(my_haystack);
395 
396     typedef String<String<Dna> > TNeedle_My;
397     TNeedle_My my_keywords;
398     appendValue(my_keywords, String<Dna>("ATATATA"));
399     appendValue(my_keywords, String<Dna>("ACCGATCCAT"));
400     appendValue(my_keywords, String<Dna>("TATAT"));
401     appendValue(my_keywords, String<Dna>("ACCGAT"));
402     appendValue(my_keywords, String<Dna>("ACGATAT"));
403     appendValue(my_keywords, String<Dna>("CCAA"));
404     Pattern<TNeedle_My, TAlgorithmSpec> my_pattern(my_keywords);
405 
406     clear(finderPos);
407     clear(keywordIndex);
408     while (find(my_finder, my_pattern)) {
409         //std::cout << position(my_finder) << "-" << position(my_pattern) << std::endl;
410         append(finderPos,position(my_finder));
411         append(keywordIndex,position(my_pattern));
412         SEQAN_ASSERT_EQ(position(my_finder), beginPosition(my_finder));
413         SEQAN_ASSERT_EQ(endPosition(my_finder), beginPosition(my_finder) + length(my_finder));
414         SEQAN_ASSERT_EQ(length(my_finder), length(my_keywords[position(my_pattern)]));
415         SEQAN_ASSERT_EQ(begin(my_finder), begin(my_haystack) + beginPosition(my_finder));
416         SEQAN_ASSERT_EQ(end(my_finder), begin(my_haystack) + endPosition(my_finder));
417         SEQAN_ASSERT_EQ(infix(my_finder), my_keywords[position(my_pattern)]);
418     }
419 
420     SEQAN_ASSERT_EQ(length(finderPos), 15u);
421     SEQAN_ASSERT_EQ(length(keywordIndex), 15u);
422     if (order_by_begin_position) {
423         SEQAN_ASSERT_EQ(finderPos[0], 4u);
424         SEQAN_ASSERT_EQ(keywordIndex[0], 4u);
425         SEQAN_ASSERT_EQ(finderPos[1], 7u);
426         SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
427         SEQAN_ASSERT_EQ(finderPos[2], 8u);
428         SEQAN_ASSERT_EQ(keywordIndex[2], 2u);
429         SEQAN_ASSERT_EQ(finderPos[3], 19u);
430         SEQAN_ASSERT_EQ(keywordIndex[3], 4u);
431         SEQAN_ASSERT_EQ(finderPos[4], 22u);
432         SEQAN_ASSERT_EQ(keywordIndex[4], 0u);
433         SEQAN_ASSERT_EQ(finderPos[5], 23u);
434         SEQAN_ASSERT_EQ(keywordIndex[5], 2u);
435         SEQAN_ASSERT_EQ(finderPos[6], 34u);
436         SEQAN_ASSERT_EQ(keywordIndex[6], 4u);
437         SEQAN_ASSERT_EQ(finderPos[7], 37u);
438         SEQAN_ASSERT_EQ(keywordIndex[7], 0u);
439         SEQAN_ASSERT_EQ(finderPos[8], 38u);
440         SEQAN_ASSERT_EQ(keywordIndex[8], 2u);
441         SEQAN_ASSERT_EQ(finderPos[9], 49u);
442         SEQAN_ASSERT_EQ(keywordIndex[9], 4u);
443         SEQAN_ASSERT_EQ(finderPos[10], 52u);
444         SEQAN_ASSERT_EQ(keywordIndex[10], 0u);
445         SEQAN_ASSERT_EQ(finderPos[11], 53u);
446         SEQAN_ASSERT_EQ(keywordIndex[11], 2u);
447         SEQAN_ASSERT_EQ(finderPos[12], 64u);
448         SEQAN_ASSERT_EQ(keywordIndex[12], 4u);
449         SEQAN_ASSERT_EQ(finderPos[13], 67u);
450         SEQAN_ASSERT_EQ(keywordIndex[13], 0u);
451         SEQAN_ASSERT_EQ(finderPos[14], 68u);
452         SEQAN_ASSERT_EQ(keywordIndex[14], 2u);
453     } else {
454         SEQAN_ASSERT_EQ(finderPos[0], 4u);
455         SEQAN_ASSERT_EQ(keywordIndex[0], 4u);
456         SEQAN_ASSERT_EQ(finderPos[1], 8u);
457         SEQAN_ASSERT_EQ(keywordIndex[1], 2u);
458         SEQAN_ASSERT_EQ(finderPos[2], 7u);
459         SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
460         SEQAN_ASSERT_EQ(finderPos[3], 19u);
461         SEQAN_ASSERT_EQ(keywordIndex[3], 4u);
462         SEQAN_ASSERT_EQ(finderPos[4], 23u);
463         SEQAN_ASSERT_EQ(keywordIndex[4], 2u);
464         SEQAN_ASSERT_EQ(finderPos[5], 22u);
465         SEQAN_ASSERT_EQ(keywordIndex[5], 0u);
466         SEQAN_ASSERT_EQ(finderPos[6], 34u);
467         SEQAN_ASSERT_EQ(keywordIndex[6], 4u);
468         SEQAN_ASSERT_EQ(finderPos[7], 38u);
469         SEQAN_ASSERT_EQ(keywordIndex[7], 2u);
470         SEQAN_ASSERT_EQ(finderPos[8], 37u);
471         SEQAN_ASSERT_EQ(keywordIndex[8], 0u);
472         SEQAN_ASSERT_EQ(finderPos[9], 49u);
473         SEQAN_ASSERT_EQ(keywordIndex[9], 4u);
474         SEQAN_ASSERT_EQ(finderPos[10], 53u);
475         SEQAN_ASSERT_EQ(keywordIndex[10], 2u);
476         SEQAN_ASSERT_EQ(finderPos[11], 52u);
477         SEQAN_ASSERT_EQ(keywordIndex[11], 0u);
478         SEQAN_ASSERT_EQ(finderPos[12], 64u);
479         SEQAN_ASSERT_EQ(keywordIndex[12], 4u);
480         SEQAN_ASSERT_EQ(finderPos[13], 68u);
481         SEQAN_ASSERT_EQ(keywordIndex[13], 2u);
482         SEQAN_ASSERT_EQ(finderPos[14], 67u);
483         SEQAN_ASSERT_EQ(keywordIndex[14], 0u);
484     }
485 
486     //____________________________________________________________________________
487     // Multiple keywords with overlapping matches
488     String<Dna> my2_haystack("aaaacaaa");
489     Finder<String<Dna> > my2_finder(my2_haystack);
490 
491     typedef String<String<Dna> > TNeedle_My2;
492     TNeedle_My2 my2_keywords;
493     appendValue(my2_keywords, String<Dna>("aa"));
494     appendValue(my2_keywords, String<Dna>("aaa"));
495     appendValue(my2_keywords, String<Dna>("ac"));
496     appendValue(my2_keywords, String<Dna>("aac"));
497     appendValue(my2_keywords, String<Dna>("gctccacctgacctagcccatggggcccaaatttccggccttaattcccattt"));
498     Pattern<TNeedle_My2, TAlgorithmSpec> my2_pattern(my2_keywords);
499 
500     clear(finderPos);
501     clear(keywordIndex);
502     while (find(my2_finder, my2_pattern)) {
503         //std::cout << position(my2_finder) << ":" << position(my2_pattern) << std::endl;
504         append(finderPos,position(my2_finder));
505         append(keywordIndex,position(my2_pattern));
506         SEQAN_ASSERT_EQ(position(my2_finder), beginPosition(my2_finder));
507         SEQAN_ASSERT_EQ(endPosition(my2_finder), beginPosition(my2_finder) + length(my2_finder));
508         SEQAN_ASSERT_EQ(length(my2_finder), length(my2_keywords[position(my2_pattern)]));
509         SEQAN_ASSERT_EQ(begin(my2_finder), begin(my2_haystack) + beginPosition(my2_finder));
510         SEQAN_ASSERT_EQ(end(my2_finder), begin(my2_haystack) + endPosition(my2_finder));
511         SEQAN_ASSERT_EQ(infix(my2_finder), my2_keywords[position(my2_pattern)]);
512     }
513 
514     if (order_by_begin_position) {
515         SEQAN_ASSERT_EQ(finderPos[0], 0u);
516         SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
517         SEQAN_ASSERT_EQ(finderPos[1], 0u);
518         SEQAN_ASSERT_EQ(keywordIndex[1], 1u);
519         SEQAN_ASSERT_EQ(finderPos[2], 1u);
520         SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
521         SEQAN_ASSERT_EQ(finderPos[3], 1u);
522         SEQAN_ASSERT_EQ(keywordIndex[3], 1u);
523         SEQAN_ASSERT_EQ(finderPos[4], 2u);
524         SEQAN_ASSERT_EQ(keywordIndex[4], 0u);
525         SEQAN_ASSERT_EQ(finderPos[5], 2u);
526         SEQAN_ASSERT_EQ(keywordIndex[5], 3u);
527         SEQAN_ASSERT_EQ(finderPos[6], 3u);
528         SEQAN_ASSERT_EQ(keywordIndex[6], 2u);
529         SEQAN_ASSERT_EQ(finderPos[7], 5u);
530         SEQAN_ASSERT_EQ(keywordIndex[7], 0u);
531         SEQAN_ASSERT_EQ(finderPos[8], 5u);
532         SEQAN_ASSERT_EQ(keywordIndex[8], 1u);
533         SEQAN_ASSERT_EQ(finderPos[9], 6u);
534         SEQAN_ASSERT_EQ(keywordIndex[9], 0u);
535     } else{
536         SEQAN_ASSERT_EQ(finderPos[0], 0u);
537         SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
538         SEQAN_ASSERT_EQ(finderPos[1], 1u);
539         SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
540         SEQAN_ASSERT_EQ(finderPos[2], 0u);
541         SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
542         SEQAN_ASSERT_EQ(finderPos[3], 2u);
543         SEQAN_ASSERT_EQ(keywordIndex[3], 0u);
544         SEQAN_ASSERT_EQ(finderPos[4], 1u);
545         SEQAN_ASSERT_EQ(keywordIndex[4], 1u);
546         SEQAN_ASSERT_EQ(finderPos[5], 3u);
547         SEQAN_ASSERT_EQ(keywordIndex[5], 2u);
548         SEQAN_ASSERT_EQ(finderPos[6], 2u);
549         SEQAN_ASSERT_EQ(keywordIndex[6], 3u);
550         SEQAN_ASSERT_EQ(finderPos[7], 5u);
551         SEQAN_ASSERT_EQ(keywordIndex[7], 0u);
552         SEQAN_ASSERT_EQ(finderPos[8], 6u);
553         SEQAN_ASSERT_EQ(keywordIndex[8], 0u);
554         SEQAN_ASSERT_EQ(finderPos[9], 5u);
555         SEQAN_ASSERT_EQ(keywordIndex[9], 1u);
556     }
557 
558     //____________________________________________________________________________
559     // Multiple duplicated keywords with overlapping matches, jumping finder
560     goBegin(my2_finder); // That's a repositioning
561     clear(my2_finder);      // That's why, clear state
562     clear(finderPos);
563     clear(keywordIndex);
564 
565     unsigned int hits = 0;
566     while (find(my2_finder, my2_pattern)) {
567         if (hits < 2) break;
568         //std::cout << position(my2_finder) << ":" << position(my2_pattern) << std::endl;
569         append(finderPos,position(my2_finder));
570         append(keywordIndex,position(my2_pattern));
571         ++hits;
572     }
573     goBegin(my2_finder);
574     my2_finder+=5;
575     clear(my2_finder);
576     clear(finderPos);
577     clear(keywordIndex);
578     while (find(my2_finder, my2_pattern)) {
579         //std::cout << position(my2_finder) << ":" << position(my2_pattern) << std::endl;
580         append(finderPos,position(my2_finder));
581         append(keywordIndex,position(my2_pattern));
582     }
583 
584     if (order_by_begin_position) {
585         SEQAN_ASSERT_EQ(finderPos[0], 5u);
586         SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
587         SEQAN_ASSERT_EQ(finderPos[1], 5u);
588         SEQAN_ASSERT_EQ(keywordIndex[1], 1u);
589         SEQAN_ASSERT_EQ(finderPos[2], 6u);
590         SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
591     } else {
592         SEQAN_ASSERT_EQ(finderPos[0], 5u);
593         SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
594         SEQAN_ASSERT_EQ(finderPos[1], 6u);
595         SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
596         SEQAN_ASSERT_EQ(finderPos[2], 5u);
597         SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
598     }
599 }
600 
601 
602 template <typename TAlgorithmSpec>
Test_OnlineAlgWildcards()603 void Test_OnlineAlgWildcards() {
604 	typedef typename Position<CharString>::Type TPosition;
605 
606 	String<TPosition> pos;
607 
608     //____________________________________________________________________________
609     // Test1 - simple find wo wildcards
610     String<char> haystack("Dies ist ein Haystack. Ja, das ist wirklich einer!");
611     Finder<String<char> > finder(haystack);
612 
613     String<char> needle("ist");
614     Pattern<String<char>, TAlgorithmSpec> pattern(needle);
615     clear(pos);
616     while (find(finder, pattern))
617         appendValue(pos, position(finder));
618 
619     SEQAN_ASSERT_EQ(pos[0], 7u);
620     SEQAN_ASSERT_EQ(pos[1], 33u);
621     SEQAN_ASSERT_EQ(host(pattern), needle);
622     SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<String<char>, TAlgorithmSpec> const &>(pattern)), needle);
623 
624     //____________________________________________________________________________
625     // Test - validation of patterns
626     needle = "ist";
627     setHost(pattern, needle);
628     SEQAN_ASSERT_EQ(valid(pattern), true);
629     SEQAN_ASSERT_EQ(valid(reinterpret_cast<Pattern<String<char>, TAlgorithmSpec> const &>(pattern)), true);
630 
631     needle = "i[a-z]s{3,4}t?a*a+c..\\a";
632     setHost(pattern, needle);
633     SEQAN_ASSERT_EQ(valid(pattern), true);
634 
635     needle = "i[st";
636     setHost(pattern, needle);
637     SEQAN_ASSERT_EQ(valid(pattern), false);
638 
639     needle = "ist\\";
640     setHost(pattern, needle);
641     SEQAN_ASSERT_EQ(valid(pattern), false);
642 
643     needle = "ist?*";
644     setHost(pattern, needle);
645     SEQAN_ASSERT_EQ(valid(pattern), false);
646 
647     needle = "ist{5,4}";
648     setHost(pattern, needle);
649     SEQAN_ASSERT_EQ(valid(pattern), false);
650 
651     needle = "ist{5,a}";
652     setHost(pattern, needle);
653     SEQAN_ASSERT_EQ(valid(pattern), false);
654 
655     needle = "ist{5,";
656     setHost(pattern, needle);
657     SEQAN_ASSERT_EQ(valid(pattern), false);
658 
659     //____________________________________________________________________________
660     // Test - searching with invalid needles
661     haystack = "Dies i[st ein Haystack. Ja, das i[st wirklich einer!";
662     setHost(finder, haystack);
663     clear(finder);
664 
665     needle = "i[st";
666     setHost(pattern, needle);
667 
668     clear(pos);
669     while (find(finder, pattern))
670         append(pos,position(finder));
671 
672     SEQAN_ASSERT_EQ(length(pos), 0u);
673 
674     //____________________________________________________________________________
675     // Test - handle needles with wildcards
676     // to produce two \ in the pattern you need to escape both of them
677     needle = "aa+c*[a-z]xx?aa\\\\";
678     SEQAN_ASSERT_EQ(_lengthWithoutWildcards(needle), 9u);
679 
680     //____________________________________________________________________________
681     // Test - optional characters (?)
682     haystack = "abc__ac";
683     setHost(finder, haystack);
684     clear(finder);
685 
686     needle = "ab?c";
687     setHost(pattern, needle);
688 
689     clear(pos);
690     while (find(finder, pattern))
691         append(pos,position(finder));
692 
693     SEQAN_ASSERT_EQ(pos[0], 2u);
694     SEQAN_ASSERT_EQ(pos[1], 6u);
695     SEQAN_ASSERT_EQ(length(pos), 2u);
696 
697     //____________________________________________________________________________
698     // Test - repeatable characters (+)
699     haystack = "abc__abbbbbc_ac";
700     setHost(finder, haystack);
701     clear(finder);
702 
703     needle = "ab+c";
704     setHost(pattern, needle);
705 
706     clear(pos);
707     while (find(finder, pattern))
708         append(pos,position(finder));
709 
710     SEQAN_ASSERT_EQ(pos[0], 2u);
711     SEQAN_ASSERT_EQ(pos[1], 11u);
712     SEQAN_ASSERT_EQ(length(pos), 2u);
713 
714     //____________________________________________________________________________
715     // Test - repeatable characters (*)
716     haystack = "abc__abbbbbc_ac";
717     setHost(finder, haystack);
718     clear(finder);
719 
720     needle = "ab*c";
721     setHost(pattern, needle);
722 
723     clear(pos);
724     while (find(finder, pattern))
725         append(pos,position(finder));
726 
727     SEQAN_ASSERT_EQ(pos[0], 2u);
728     SEQAN_ASSERT_EQ(pos[1], 11u);
729     SEQAN_ASSERT_EQ(pos[2], 14u);
730 
731     SEQAN_ASSERT_EQ(length(pos), 3u);
732 
733     //____________________________________________________________________________
734     // Test - wildcard matching
735     haystack = "acccdfabdeeef";
736     setHost(finder, haystack);
737     clear(finder);
738 
739     needle = "ab?c*de+f";
740     setHost(pattern, needle);
741 
742     clear(pos);
743     while (find(finder, pattern))
744         append(pos,position(finder));
745 
746     SEQAN_ASSERT_EQ(pos[0], 12u);
747     SEQAN_ASSERT_EQ(length(pos), 1u);
748 
749     //____________________________________________________________________________
750     // Test - wildcard matching (hard case)
751     haystack = "aacccdfacccdeeef";
752     setHost(finder, haystack);
753     clear(finder);
754 
755     needle = "a*c*de+f";
756     setHost(pattern, needle);
757 
758     clear(pos);
759     while (find(finder, pattern))
760         append(pos,position(finder));
761 
762     SEQAN_ASSERT_EQ(pos[0], 15u);
763     SEQAN_ASSERT_EQ(length(pos), 1u);
764 
765 
766     //____________________________________________________________________________
767     // Test - character classes matching
768     haystack = "annual_Annual_znnual_Znnual";
769     setHost(finder, haystack);
770     clear(finder);
771 
772     needle = "[a-zA]nnual";
773     setHost(pattern, needle);
774 
775     clear(pos);
776     while (find(finder, pattern))
777         append(pos,position(finder));
778 
779     SEQAN_ASSERT_EQ(pos[0], 5u);
780     SEQAN_ASSERT_EQ(pos[1], 12u);
781     SEQAN_ASSERT_EQ(pos[2], 19u);
782     SEQAN_ASSERT_EQ(length(pos), 3u);
783 
784     //____________________________________________________________________________
785     // Test - long needles
786     haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgaabcdef";
787     setHost(finder, haystack);
788     clear(finder);
789 
790     needle = "abcdefghijklmnopqrstuvwxyzabcdefg";
791     setHost(pattern, needle);
792 
793     clear(pos);
794     while (find(finder, pattern))
795         append(pos,position(finder));
796 
797     SEQAN_ASSERT_EQ(pos[0], 32u);
798     SEQAN_ASSERT_EQ(pos[1], 58u);
799     SEQAN_ASSERT_EQ(length(pos), 2u);
800 
801     //____________________________________________________________________________
802     // Test - long needles with character classes
803     //              abcdefghijklmnopqrstuvwxyzabcdefghijkl
804     //                                        abcdefghijklmnopqrstuvwxyzabcdefghijkl
805     haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuzwxyzabcdefghzjklaabcdefhijkl";
806     setHost(finder, haystack);
807     clear(finder);
808 
809     needle = "abcdefghijklmnopqrstu[vz]wxyzabcdefgh[iz]jkl";
810     setHost(pattern, needle);
811 
812     clear(pos);
813     while (find(finder, pattern))
814         append(pos,position(finder));
815 
816     SEQAN_ASSERT_EQ(pos[0], 37u);
817     SEQAN_ASSERT_EQ(pos[1], 63u);
818     SEQAN_ASSERT_EQ(length(pos), 2u);
819 
820 
821     //____________________________________________________________________________
822     // Test - long needles with repeating characters
823     //              abcdefghijklmnopqrstuvwxyzabcdefghijkl
824     //                                                                                                        abcdefghijklmnopqrstuvwxyzabcdefghijkl
825     haystack = "abcdefghijklmnopqrstuvwxyzabcdefghiiiiijkl____aaaaabcdefghijklmnopqrstuvwxyzabcdeghijkl__aaaaabcdefghijklmnopqrstuvwxyzabcdefghjkl";
826     setHost(finder, haystack);
827     clear(finder);
828 
829     needle = "aa*bcdefghijklmnopqrstuvwxyzabcdef?g?hi+jkl";
830     setHost(pattern, needle);
831 
832     clear(pos);
833     while (find(finder, pattern))
834         append(pos,position(finder));
835 
836     SEQAN_ASSERT_EQ(pos[0], 41u);
837     SEQAN_ASSERT_EQ(pos[1], 86u);
838     SEQAN_ASSERT_EQ(length(pos), 2u);
839 
840     //____________________________________________________________________________
841     // Test - handle .
842     haystack = "annual_Annual_znnual";
843     setHost(finder, haystack);
844     clear(finder);
845 
846     needle = ".nnual";
847     setHost(pattern, needle);
848     clear(pos);
849     while (find(finder, pattern)) {
850         append(pos,position(finder));
851     }
852     SEQAN_ASSERT_EQ(pos[0], 5u);
853     SEQAN_ASSERT_EQ(pos[1], 12u);
854     SEQAN_ASSERT_EQ(pos[2], 19u);
855     SEQAN_ASSERT_EQ(length(pos), 3u);
856 
857     //____________________________________________________________________________
858     // Test - handle backslash
859     haystack = "annual_Annual_.nnual";
860     setHost(finder, haystack);
861     clear(finder);
862 
863     needle = "\\.nnual";
864     setHost(pattern, needle);
865     clear(pos);
866     while (find(finder, pattern)){
867         append(pos,position(finder));
868     }
869     SEQAN_ASSERT_EQ(pos[0], 19u);
870     SEQAN_ASSERT_EQ(length(pos), 1u);
871 
872 
873 
874     //____________________________________________________________________________
875     // Test - handle bounded length repeats {n,m}
876     haystack = "aannual_aaaannual_annual";
877     setHost(finder, haystack);
878     clear(finder);
879 
880     needle = "a{2,5}n{2}ual";
881 
882     SEQAN_ASSERT_EQ(_lengthWithoutWildcards(needle), 10u);
883 
884     setHost(pattern, needle);
885     clear(pos);
886     while (find(finder, pattern)) {
887         append(pos,position(finder));
888     }
889     SEQAN_ASSERT_EQ(pos[0], 6u);
890     SEQAN_ASSERT_EQ(pos[1], 16u);
891     SEQAN_ASSERT_EQ(length(pos), 2u);
892 
893 
894     //____________________________________________________________________________
895     // Test - handle different types of Pattern and Needle
896     String<Dna> dna_haystack("AAACCTATGGGTTTAAAACCCTGAAACCCC");
897     Finder<String<Dna> > dna_finder(dna_haystack);
898 
899     String<char> char_needle("a{3}c+t[ag].");
900     Pattern<String<Dna>, TAlgorithmSpec> dna_pattern(char_needle);
901     clear(pos);
902 
903     while (find(dna_finder, dna_pattern))
904         append(pos,position(dna_finder));
905 
906     SEQAN_ASSERT_EQ(pos[0], 7u);
907     SEQAN_ASSERT_EQ(pos[1], 23u);
908     SEQAN_ASSERT_EQ(length(pos), 2u);
909 
910 }
911 
912 
913 template <typename TPatternSpec>
Test_Approx_EditDist()914 void Test_Approx_EditDist() {
915     //test DPSearch
916     String<char> hstk("any_annealing");
917     String<char> nl("annual");
918 
919     Finder<String<char> > fd(hstk);
920 
921     Pattern<String<char>, TPatternSpec> pt(nl, -2);
922 
923     SEQAN_ASSERT(find(fd, pt));
924     SEQAN_ASSERT_EQ(position(fd), 8u);
925     SEQAN_ASSERT_EQ(getScore(pt), -2);
926     SEQAN_ASSERT(find(fd, pt));
927     SEQAN_ASSERT_EQ(position(fd), 9u);
928     SEQAN_ASSERT_EQ(getScore(pt), -1);
929     SEQAN_ASSERT(find(fd, pt));
930     SEQAN_ASSERT_EQ(position(fd), 10u);
931     SEQAN_ASSERT_EQ(getScore(pt), -2);
932 
933     SEQAN_ASSERT_NOT(find(fd,pt));
934 
935     String<char> haystk("Dies ist der Haystack des Tests. Ja, das ist er wirklich!");
936     String<char> ndl("des");
937 
938     Finder<String<char> > fnd(haystk);
939 
940     Pattern<String<char>, TPatternSpec> pat(ndl, -2);
941     SEQAN_ASSERT_EQ(host(pat), ndl);
942     SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<String<char>, TPatternSpec> const &>(pat)), ndl);
943 
944     SEQAN_ASSERT_EQ(scoreLimit(pat), -2);
945     setScoreLimit(pat, -1);
946     SEQAN_ASSERT_EQ(scoreLimit(pat), -1);
947 
948     SEQAN_ASSERT(find(fnd, pat));
949     SEQAN_ASSERT_EQ(position(fnd), 3u);
950     SEQAN_ASSERT_EQ(getScore(pat), -1);
951 
952     SEQAN_ASSERT(find(fnd, pat));
953     SEQAN_ASSERT_EQ(position(fnd), 10u);
954     SEQAN_ASSERT_EQ(getScore(pat), -1);
955 
956     SEQAN_ASSERT(find(fnd, pat));
957     SEQAN_ASSERT_EQ(position(fnd), 11u);
958     SEQAN_ASSERT_EQ(getScore(pat), -1);
959 
960     SEQAN_ASSERT(find(fnd, pat));
961     SEQAN_ASSERT_EQ(position(fnd), 23u);
962     SEQAN_ASSERT_EQ(getScore(pat), -1);
963 
964     SEQAN_ASSERT(find(fnd, pat));
965     SEQAN_ASSERT_EQ(position(fnd), 24u);
966     SEQAN_ASSERT_EQ(getScore(pat), 0);
967 
968     SEQAN_ASSERT(find(fnd, pat));
969     SEQAN_ASSERT_EQ(position(fnd), 25u);
970     SEQAN_ASSERT_EQ(getScore(pat), -1);
971 
972     SEQAN_ASSERT(find(fnd, pat));
973     SEQAN_ASSERT_EQ(position(fnd), 28u);
974     SEQAN_ASSERT_EQ(getScore(pat), -1);
975 
976     SEQAN_ASSERT(find(fnd, pat));
977     SEQAN_ASSERT_EQ(position(fnd), 39u);
978     SEQAN_ASSERT_EQ(getScore(pat), -1);
979 
980     SEQAN_ASSERT_NOT(find(fnd, pat));
981 
982     // Test with long needles and a Dna Alphabet
983     String<Dna> long_haystk("taaaataaaatacaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaat");
984     String<Dna> long_ndl("taaaataaaatacaataaaataaaatataataaaataaaataaaat");
985 
986     Finder<String<Dna> > long_fnd(long_haystk);
987 
988     Pattern<String<Dna>, TPatternSpec> long_pat(long_ndl, -2);
989 
990     SEQAN_ASSERT(find(long_fnd,long_pat));
991     SEQAN_ASSERT_EQ(position(long_fnd), 44u);
992     SEQAN_ASSERT_EQ(getScore(long_pat), -2);
993 
994     SEQAN_ASSERT(find(long_fnd,long_pat));
995     SEQAN_ASSERT_EQ(position(long_fnd), 45u);
996     SEQAN_ASSERT_EQ(getScore(long_pat), -1);
997 
998     SEQAN_ASSERT(find(long_fnd,long_pat));
999     SEQAN_ASSERT_EQ(position(long_fnd), 46u);
1000     SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1001 
1002     SEQAN_ASSERT(find(long_fnd,long_pat));
1003     SEQAN_ASSERT_EQ(position(long_fnd), 60u);
1004     SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1005 
1006     SEQAN_ASSERT(find(long_fnd,long_pat));
1007     SEQAN_ASSERT_EQ(position(long_fnd), 65u);
1008     SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1009 
1010     SEQAN_ASSERT(find(long_fnd,long_pat));
1011     SEQAN_ASSERT_EQ(position(long_fnd), 70u);
1012     SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1013 
1014     SEQAN_ASSERT_NOT(find(long_fnd,long_pat));
1015 
1016     //____________________________________________________________________________
1017 
1018     String<char> haystack_1 = "123XXXabaXXX45aba123";
1019     String<char> needle_1 = "XXXaba";
1020     Finder<String<char> > finder_1(haystack_1);
1021     Pattern<String<char>, TPatternSpec> pattern_1(needle_1, -2);
1022 
1023     SEQAN_ASSERT(find(finder_1, pattern_1));
1024     SEQAN_ASSERT_EQ(endPosition(finder_1), 7u);
1025     SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1026     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1027     SEQAN_ASSERT_EQ(infix(finder_1), "XXXa");
1028     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1029     SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1030 
1031     SEQAN_ASSERT(find(finder_1, pattern_1));
1032     SEQAN_ASSERT_EQ(endPosition(finder_1), 8u);
1033     SEQAN_ASSERT_EQ(getScore(pattern_1), -1);
1034     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1035     SEQAN_ASSERT_EQ(infix(finder_1), "XXab");
1036     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1037     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1038     SEQAN_ASSERT_EQ(infix(finder_1), "XXXab");
1039     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1040     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1041     SEQAN_ASSERT_EQ(infix(finder_1), "3XXXab");
1042     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1043     SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1044 
1045     SEQAN_ASSERT(find(finder_1, pattern_1));
1046     SEQAN_ASSERT_EQ(endPosition(finder_1), 9u);
1047     SEQAN_ASSERT_EQ(getScore(pattern_1), 0);
1048     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1049     SEQAN_ASSERT_EQ(infix(finder_1), "Xaba");
1050     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1051     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1052     SEQAN_ASSERT_EQ(infix(finder_1), "XXaba");
1053     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1054     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1055     SEQAN_ASSERT_EQ(infix(finder_1), "XXXaba");
1056     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), 0);
1057     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1058     SEQAN_ASSERT_EQ(infix(finder_1), "3XXXaba");
1059     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1060     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1061     SEQAN_ASSERT_EQ(infix(finder_1), "23XXXaba");
1062     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1063     SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1064 
1065     SEQAN_ASSERT(find(finder_1, pattern_1));
1066     SEQAN_ASSERT_EQ(endPosition(finder_1), 10u);
1067     SEQAN_ASSERT_EQ(getScore(pattern_1), -1);
1068     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1069     SEQAN_ASSERT_EQ(infix(finder_1), "XXabaX");
1070     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1071     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1072     SEQAN_ASSERT_EQ(infix(finder_1), "XXXabaX");
1073     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1074     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1075     SEQAN_ASSERT_EQ(infix(finder_1), "3XXXabaX");
1076     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1077     SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1078 
1079     SEQAN_ASSERT(find(finder_1, pattern_1));
1080     SEQAN_ASSERT_EQ(endPosition(finder_1), 11u);
1081     SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1082     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1083     SEQAN_ASSERT_EQ(infix(finder_1), "XXXabaXX");
1084     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1085     SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1086 
1087     SEQAN_ASSERT(find(finder_1, pattern_1));
1088     SEQAN_ASSERT_EQ(endPosition(finder_1), 15u);
1089     SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1090     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1091     SEQAN_ASSERT_EQ(infix(finder_1), "XXX45a");
1092     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1093     SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1094 
1095     SEQAN_ASSERT(find(finder_1, pattern_1));
1096     SEQAN_ASSERT_EQ(endPosition(finder_1), 17u);
1097     SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1098     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1099     SEQAN_ASSERT_EQ(infix(finder_1), "X45aba");
1100     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1101     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1102     SEQAN_ASSERT_EQ(infix(finder_1), "XX45aba");
1103     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1104     SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1105     SEQAN_ASSERT_EQ(infix(finder_1), "XXX45aba");
1106     SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1107     SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1108 
1109     SEQAN_ASSERT_NOT(find(finder_1, pattern_1));
1110 }
1111 
1112 
1113 // Test prefix search.
1114 template <typename TPatternSpec>
Test_Approx_Prefix_EditDist()1115 void Test_Approx_Prefix_EditDist() {
1116     String<char> haystack_1 = "mississippi";
1117     String<char> needle_1 = "misssi";
1118     Finder<String<char> > finder_1(haystack_1);
1119     Pattern<String<char>, TPatternSpec> pattern_1(needle_1, -2);
1120     SEQAN_ASSERT(find(finder_1, pattern_1));
1121     SEQAN_ASSERT_EQ(position(finder_1), 3u);
1122     SEQAN_ASSERT_EQ(length(finder_1), 4u);
1123     SEQAN_ASSERT_EQ(beginPosition(finder_1), 0u);
1124     SEQAN_ASSERT_EQ(endPosition(finder_1), 4u);
1125     SEQAN_ASSERT_EQ(infix(finder_1), "miss");
1126     SEQAN_ASSERT(find(finder_1, pattern_1));
1127     SEQAN_ASSERT_EQ(position(finder_1), 4u);
1128     SEQAN_ASSERT_EQ(infix(finder_1), "missi");
1129     SEQAN_ASSERT(find(finder_1, pattern_1));
1130     SEQAN_ASSERT_EQ(position(finder_1), 5u);
1131     SEQAN_ASSERT_EQ(infix(finder_1), "missis");
1132     SEQAN_ASSERT(find(finder_1, pattern_1));
1133     SEQAN_ASSERT_EQ(position(finder_1), 6u);
1134     SEQAN_ASSERT(find(finder_1, pattern_1));
1135     SEQAN_ASSERT_EQ(position(finder_1), 7u);
1136     SEQAN_ASSERT_NOT(find(finder_1, pattern_1));
1137 
1138 
1139     String<char> haystack_2 = "yyyXXaba";
1140     String<char> needle_2 = "yyyaba";
1141     Finder<String<char> > finder_2(haystack_2);
1142     Pattern<String<char>, TPatternSpec> pattern_2(needle_2, -2);
1143     SEQAN_ASSERT(find(finder_2, pattern_2));
1144     SEQAN_ASSERT_EQ(position(finder_2), 5u);
1145     SEQAN_ASSERT_EQ(infix(finder_2), "yyyXXa");
1146     SEQAN_ASSERT(find(finder_2, pattern_2));
1147     SEQAN_ASSERT_EQ(position(finder_2), 7u);
1148     SEQAN_ASSERT_EQ(infix(finder_2), "yyyXXaba");
1149     SEQAN_ASSERT_NOT(find(finder_2, pattern_2));
1150 
1151 
1152     String<char> haystack_3 = "testtexttext";
1153     String<char> needle_3 = "mismatch";
1154     Finder<String<char> > finder_3(haystack_3);
1155     Pattern<String<char>, TPatternSpec> pattern_3(needle_3, -2);
1156     SEQAN_ASSERT_NOT(find(finder_3, pattern_3));
1157 
1158 
1159     String<char> haystack_4 = "testtext";
1160     String<char> needle_4 = "a longer mismatch";
1161     Finder<String<char> > finder_4(haystack_4);
1162     Pattern<String<char>, TPatternSpec> pattern_4(needle_4, -2);
1163     SEQAN_ASSERT_NOT(find(finder_4, pattern_4));
1164 
1165 
1166     String<char> haystack_5 = "exactmatching";
1167     String<char> needle_5 = "exact";
1168     Finder<String<char> > finder_5(haystack_5);
1169     Pattern<String<char>, TPatternSpec> pattern_5(needle_5, 0);
1170     SEQAN_ASSERT(find(finder_5, pattern_5));
1171     SEQAN_ASSERT_EQ(position(finder_5), 4u);
1172     SEQAN_ASSERT_NOT(find(finder_5, pattern_5));
1173 
1174 
1175     String<char> haystack_6 = "this is a text that is a bit longer than one machine word of 32 or 64 bits. AAXYX";
1176     String<char> needle_6 =   "this is a text that is a bit longer than one machine word of 32 or 64 bits. XYX";
1177     Finder<String<char> > finder_6(haystack_6);
1178     Pattern<String<char>, TPatternSpec> pattern_6(needle_6, -2);
1179     SEQAN_ASSERT(find(finder_6, pattern_6));
1180     SEQAN_ASSERT_EQ(infix(finder_6), "this is a text that is a bit longer than one machine word of 32 or 64 bits. AAX");
1181     SEQAN_ASSERT(find(finder_6, pattern_6));
1182     SEQAN_ASSERT_EQ(infix(finder_6), "this is a text that is a bit longer than one machine word of 32 or 64 bits. AAXYX");
1183     SEQAN_ASSERT_NOT(find(finder_6, pattern_6));
1184 
1185     // The very high score limit with short needle all-mismatching the
1186     // pattern.  This is used to mirror the test below that use a long
1187     // needle.
1188     {
1189         const char *kHaystackStr = "CTCT";
1190         const char *kNeedleStr =   "AAAA";
1191         const int kNeedleLen = strlen(kNeedleStr);
1192         const int kScoreLimit = -1000;
1193         // Haystack has a length of 100, needle of 80.
1194         String<char> haystack(kHaystackStr);
1195         String<char> needle(kNeedleStr);
1196         Finder<String<char> > finder(haystack);
1197         Pattern<String<char>, TPatternSpec> pattern(needle, kScoreLimit);
1198 
1199         while (find(finder, pattern)) {
1200             SEQAN_ASSERT_EQ(-kNeedleLen, getScore(pattern));
1201         }
1202     }
1203 
1204     // Test very high score limit with large needles and very high
1205     // maximum scores.
1206     {
1207         const char *kHaystackStr = "CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT";
1208         const char *kNeedleStr =   "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
1209         const int kNeedleLen = strlen(kNeedleStr);
1210         const int kScoreLimit = -1000;
1211         // Haystack has a length of 100, needle of 80.
1212         String<char> haystack(kHaystackStr);
1213         String<char> needle(kNeedleStr);
1214         Finder<String<char> > finder(haystack);
1215         Pattern<String<char>, TPatternSpec> pattern(needle, kScoreLimit);
1216 
1217         while (find(finder, pattern)) {
1218             SEQAN_ASSERT_EQ(-kNeedleLen, getScore(pattern));
1219         }
1220     }
1221 }
1222 
1223 
SEQAN_DEFINE_TEST(test_find_online_Simple)1224 SEQAN_DEFINE_TEST(test_find_online_Simple) {
1225     Test_OnlineAlg<Simple>();
1226 }
1227 
1228 
SEQAN_DEFINE_TEST(test_find_online_Horspool)1229 SEQAN_DEFINE_TEST(test_find_online_Horspool) {
1230     Test_OnlineAlg<Horspool>();
1231 }
1232 
1233 
SEQAN_DEFINE_TEST(test_find_online_ShiftAnd)1234 SEQAN_DEFINE_TEST(test_find_online_ShiftAnd) {
1235     Test_OnlineAlg<ShiftAnd>();
1236 }
1237 
1238 
SEQAN_DEFINE_TEST(test_find_online_ShiftOr)1239 SEQAN_DEFINE_TEST(test_find_online_ShiftOr) {
1240     Test_OnlineAlg<ShiftOr>();
1241 }
1242 
1243 
SEQAN_DEFINE_TEST(test_find_online_BndmAlgo)1244 SEQAN_DEFINE_TEST(test_find_online_BndmAlgo) {
1245     Test_OnlineAlg<BndmAlgo>();
1246 }
1247 
1248 
SEQAN_DEFINE_TEST(test_find_online_BFAM_Oracle)1249 SEQAN_DEFINE_TEST(test_find_online_BFAM_Oracle) {
1250     Test_OnlineAlg<Bfam<Oracle> >();
1251 }
1252 
1253 
SEQAN_DEFINE_TEST(test_find_online_BFAM_Trie)1254 SEQAN_DEFINE_TEST(test_find_online_BFAM_Trie) {
1255     Test_OnlineAlg<Bfam<Trie> >();
1256 }
1257 
1258 
SEQAN_DEFINE_TEST(test_find_online_wildcards)1259 SEQAN_DEFINE_TEST(test_find_online_wildcards) {
1260     Test_OnlineAlgWildcards<WildShiftAnd>();
1261 }
1262 
1263 
SEQAN_DEFINE_TEST(test_find_online_multi_AhoCorasick)1264 SEQAN_DEFINE_TEST(test_find_online_multi_AhoCorasick) {
1265     Test_OnlineAlgMulti<AhoCorasick>(false);
1266 }
1267 
1268 
SEQAN_DEFINE_TEST(test_find_online_multi_MultipleShiftAnd)1269 SEQAN_DEFINE_TEST(test_find_online_multi_MultipleShiftAnd) {
1270     // TODO(holtgrew): Original comment: "leaks".
1271     // TODO(holtgrew): Fails, but was commented out in original code.
1272     // Test_OnlineAlgMulti<MultipleShiftAnd>(false);
1273 }
1274 
1275 
SEQAN_DEFINE_TEST(test_find_online_multi_SetHorspool)1276 SEQAN_DEFINE_TEST(test_find_online_multi_SetHorspool) {
1277     Test_OnlineAlgMulti<SetHorspool>(false);
1278 }
1279 
1280 
SEQAN_DEFINE_TEST(test_find_online_multi_WuManber)1281 SEQAN_DEFINE_TEST(test_find_online_multi_WuManber) {
1282     Test_OnlineAlgMulti<WuManber>(true);
1283 }
1284 
1285 
SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Oracle)1286 SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Oracle) {
1287     Test_OnlineAlgMulti<MultiBfam<Oracle> >(true);
1288 }
1289 
1290 
SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Trie)1291 SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Trie) {
1292     Test_OnlineAlgMulti<MultiBfam<Trie> >(true);
1293 }
1294 
1295 
SEQAN_DEFINE_TEST(test_find_approx_prefix_edit_dist_dpsearch)1296 SEQAN_DEFINE_TEST(test_find_approx_prefix_edit_dist_dpsearch) {
1297     Test_Approx_Prefix_EditDist<DPSearch<Score<>, FindPrefix> >();
1298 }
1299 
1300 
SEQAN_DEFINE_TEST(test_approx_prefix_edit_dist_myers)1301 SEQAN_DEFINE_TEST(test_approx_prefix_edit_dist_myers) {
1302     Test_Approx_Prefix_EditDist<Myers<FindPrefix> >();
1303 }
1304 
1305 
SEQAN_DEFINE_TEST(test_approx_edit_dist_dp_search_simple_score)1306 SEQAN_DEFINE_TEST(test_approx_edit_dist_dp_search_simple_score) {
1307     Test_Approx_EditDist<DPSearch<SimpleScore> >();
1308 }
1309 
1310 
SEQAN_DEFINE_TEST(test_approx_edit_dp_search_simple_score_legacy_case)1311 SEQAN_DEFINE_TEST(test_approx_edit_dp_search_simple_score_legacy_case) {
1312     // TODO(holtgrew): This was written out like this in Test_Approx() in original code.
1313     // Test DPSearch.
1314     Pattern<String<char>, DPSearch<SimpleScore> > pat1;
1315 
1316     SimpleScore sc;
1317     setScoreGap(sc, -10);
1318     setScoringScheme(pat1, sc);
1319     SEQAN_ASSERT_EQ(scoreGap(scoringScheme(pat1)), -10);
1320 
1321     setScoreGap(sc, -1);
1322     SEQAN_ASSERT_EQ(scoreGap(scoringScheme(pat1)), -10);
1323     setScoringScheme(pat1, sc);
1324     SEQAN_ASSERT_EQ(scoreGap(scoringScheme(pat1)), -1);
1325 }
1326 
1327 
SEQAN_DEFINE_TEST(test_approx_edit_dist_myers)1328 SEQAN_DEFINE_TEST(test_approx_edit_dist_myers) {
1329     Test_Approx_EditDist<Myers<> >();
1330 }
1331 
1332 
SEQAN_DEFINE_TEST(test_approx_edit_dist_abndm_algo)1333 SEQAN_DEFINE_TEST(test_approx_edit_dist_abndm_algo) {
1334     Test_Approx_EditDist<AbndmAlgo >();
1335 }
1336 
1337 
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical)1338 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical) {
1339     Test_Approx_EditDist<PexNonHierarchical>();
1340 }
1341 
1342 
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_hierarchical)1343 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_hierarchical) {
1344     Test_Approx_EditDist<PexHierarchical>();
1345 }
1346 
1347 
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_aho_corasick)1348 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_aho_corasick) {
1349     Test_Approx_EditDist< Pex<NonHierarchical,AhoCorasick > >();
1350 }
1351 
1352 
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_multi_bfam)1353 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_multi_bfam) {
1354     Test_Approx_EditDist< Pex<NonHierarchical,MultiBfam<> > >();
1355 }
1356 
1357 
1358 // Test that shows the problem with Myers-Ukkonen approximate search
1359 // when large needles were used and the maximum score limit was set
1360 // higher than needle length.
SEQAN_DEFINE_TEST(test_regression_rmbench)1361 SEQAN_DEFINE_TEST(test_regression_rmbench) {
1362     // The data to test with:  Needle, haystack and score limit.
1363     const char *kCharNeedle = "CCATATGCTTGTGTCGCGGGTTTATTTGCATTCGACCCAGTTGACTCGGAAGTCGAAATGTTCCTGCCCCGTTTCTGCGTTCCGTGCAGTTGCGCGGTCTGGTTGGGCGGGTCCCCCCCTGA";
1364     const char *kCharHaystack = "ATTCCATATGCTTGTGTCGCGGGTTTATTTGCATTCGACCCAGTTGACTCGGAAGTCGAAATGTTCCTGCCCCGTTTCTGCGTTCCGTGCAGTTGCGCGGTCTGGTTGGGCGGGTCCCCCGCCTACGGATGCACGTTCTCCCGGGCTCGTAAATCC";
1365     const int kScoreLimit = -100000;
1366 
1367     // Build actual DNA needle and haystack, the pattern and the
1368     // finder.
1369     DnaString needle(kCharNeedle);
1370     DnaString haystack(kCharHaystack);
1371     Finder<DnaString> finder(haystack);
1372     Pattern<DnaString, MyersUkkonen> pattern(needle);
1373     setScoreLimit(pattern, kScoreLimit);
1374 
1375     // The following invariant should always hold: The pattern score
1376     // should be smaller than the needle size.
1377     while (find(finder, pattern)) {
1378         SEQAN_ASSERT_LEQ(pattern.errors, pattern.needleSize);
1379     }
1380     SEQAN_ASSERT_LEQ(pattern.errors, pattern.needleSize);
1381 }
1382 
1383 
1384 // Tests for the Simple Hamming Finder.
SEQAN_DEFINE_TEST(test_find_hamming_simple)1385 SEQAN_DEFINE_TEST(test_find_hamming_simple) {
1386     // Test that the interface works.
1387     {
1388         DnaString haystack("AC");
1389         DnaString needle("AA");
1390         // Define finder and pattern.
1391         Finder<DnaString> finder(haystack);
1392         Pattern<DnaString, HammingSimple> pattern(needle, -1);
1393 
1394         // Perform a search, run all functions defined on the pattern.
1395         bool res = find(finder, pattern);
1396         SEQAN_ASSERT(res);
1397         SEQAN_ASSERT_EQ(0u, position(finder));
1398         SEQAN_ASSERT_EQ(2u, endPosition(finder));
1399         SEQAN_ASSERT_EQ(-1, score(pattern));
1400         SEQAN_ASSERT_EQ(-1, getScore(pattern));
1401     }
1402 
1403     // Test for distance 0;
1404     {
1405         // TODO(holtgrew): Should be const, but finder does not allow this.
1406         // Define haystack and needle.
1407         DnaString haystack("ACA");
1408         DnaString needle("AA");
1409         // Define finder and pattern.
1410         Finder<DnaString> finder(haystack);
1411         Pattern<DnaString, HammingSimple> pattern(needle, 0);
1412         // Perform the searches;
1413         bool res;
1414 
1415         res = find(finder, pattern);
1416         SEQAN_ASSERT_NOT(res);
1417     }
1418 
1419     // Test for distance -1, -2, -3.  Should yield the same results,
1420     // as tested for below.  The numbers are interesting since they
1421     // are the first > 0, length of pattern, first greater than length
1422     // of pattern.
1423     for (int i = 1; i < 3; ++i) {
1424         // TODO(holtgrew): Should be const, but finder does not allow this.
1425         // Define haystack and needle.
1426         DnaString haystack("ACA");
1427         DnaString needle("AA");
1428         // Define finder and pattern.
1429         Finder<DnaString> finder(haystack);
1430         Pattern<DnaString, HammingSimple> pattern(needle, -i);
1431         // Perform the searches;
1432         bool res;
1433 
1434         res = find(finder, pattern);
1435         SEQAN_ASSERT(res);
1436         SEQAN_ASSERT_EQ(0u, position(finder));
1437         SEQAN_ASSERT_EQ(2u, endPosition(finder));
1438         SEQAN_ASSERT_EQ(-1, score(pattern));
1439 
1440         res = find(finder, pattern);
1441         SEQAN_ASSERT(res);
1442         SEQAN_ASSERT_EQ(1u, position(finder));
1443         SEQAN_ASSERT_EQ(3u, endPosition(finder));
1444         SEQAN_ASSERT_EQ(-1, score(pattern));
1445     }
1446 
1447     // Test setting the score limit.
1448     {
1449         // TODO(holtgrew): Should be const, but finder does not allow this.
1450         // Define haystack and needle.
1451         DnaString haystack("AAC");
1452         DnaString needle("AA");
1453         // Define finder and pattern.
1454         Finder<DnaString> finder(haystack);
1455         Pattern<DnaString, HammingSimple> pattern(needle, 0);
1456         // Perform the searches;
1457         bool res;
1458 
1459         res = find(finder, pattern);
1460         SEQAN_ASSERT(res);
1461         SEQAN_ASSERT_EQ(0u, position(finder));
1462 
1463         setScoreLimit(pattern, -1);
1464 
1465         res = find(finder, pattern);
1466         SEQAN_ASSERT(res);
1467         SEQAN_ASSERT_EQ(1u, position(finder));
1468     }
1469 }
1470 
1471 
1472 // Tests for a regression found in read mapper benchmark.
SEQAN_DEFINE_TEST(test_find_hamming_simple_regression_rmbench)1473 SEQAN_DEFINE_TEST(test_find_hamming_simple_regression_rmbench) {
1474     // TODO(holtgrew): Should be const, but finder does not allow this.
1475     // Define haystack and needle.
1476     DnaString haystack("CCCCCCCCCCCCCCCCCCCCA");
1477     DnaString needle("CCCCCCCCCCCCCCCCCCCC");
1478     // Define finder and pattern.
1479     Finder<DnaString> finder(haystack);
1480     Pattern<DnaString, HammingSimple> pattern(needle);
1481     setScoreLimit(pattern, -1000);
1482     // Perform the searches;
1483     bool res;
1484     res = find(finder, pattern);
1485     SEQAN_ASSERT(res);
1486     SEQAN_ASSERT_EQ(0u, position(finder));
1487     SEQAN_ASSERT_EQ(length(needle), endPosition(finder));
1488     SEQAN_ASSERT_EQ(0, getScore(pattern));
1489 
1490     res = find(finder, pattern);
1491     SEQAN_ASSERT(res);
1492     SEQAN_ASSERT_EQ(1u, position(finder));
1493     SEQAN_ASSERT_EQ(length(needle) + 1, endPosition(finder));
1494     SEQAN_ASSERT_EQ(-1, getScore(pattern));
1495 }
1496 
1497 /*
1498 SEQAN_DEFINE_TEST(test_find_hamming_simple) {
1499     testFindApproximateHamming<HammingSimple, Dna>();
1500     }*/
1501 
SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_at_start)1502 SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_at_start) {
1503     String<char> haystack = "___AAA___AAA";
1504     String<char> needle = "___AAA";
1505     Finder<String<char> > finder(haystack);
1506     Pattern<String<char>, Myers<FindInfix> > pattern(needle, -2);
1507 
1508     // Find match: ___AAA___AAA
1509     //             ___A
1510     bool ret = find(finder, pattern);
1511     SEQAN_ASSERT(ret);
1512     SEQAN_ASSERT_EQ(4u, endPosition(finder));
1513     ret = findBegin(finder, pattern, getScore(pattern));  // TODO(holtgrew): getScore(pattern) is in book but should not be necessary
1514     SEQAN_ASSERT(ret);
1515     SEQAN_ASSERT_EQ(0u, beginPosition(finder));
1516 
1517     // Find match: ___AAA___AAA
1518     //             ___AA
1519     ret = find(finder, pattern);
1520     SEQAN_ASSERT(ret);
1521     SEQAN_ASSERT_EQ(5u, endPosition(finder));
1522     ret = findBegin(finder, pattern, getScore(pattern));  // TODO(holtgrew): getScore(pattern) is in book but should not be necessary
1523     SEQAN_ASSERT(ret);
1524     SEQAN_ASSERT_EQ(0u, beginPosition(finder));
1525 
1526     // Find match: ___AAA___AAA
1527     //             ___AAA
1528     ret = find(finder, pattern);
1529     SEQAN_ASSERT(ret);
1530     SEQAN_ASSERT_EQ(6u, endPosition(finder));
1531     ret = findBegin(finder, pattern, getScore(pattern));  // TODO(holtgrew): getScore(pattern) is in book but should not be necessary
1532     SEQAN_ASSERT(ret);
1533     SEQAN_ASSERT_EQ(0u, beginPosition(finder));
1534 }
1535 
1536 
SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_within)1537 SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_within) {
1538     String<char> haystack = "A___AAA___AAA";
1539     String<char> needle = "___AAA";
1540     Finder<String<char> > finder(haystack);
1541     Pattern<String<char>, Myers<FindInfix> > pattern(needle, -1);
1542 
1543     // Find match: A___AAA___AAA
1544     //              ___AA
1545     bool ret = find(finder, pattern);
1546     SEQAN_ASSERT(ret);
1547     SEQAN_ASSERT_EQ(6u, endPosition(finder));
1548     ret = findBegin(finder, pattern, getScore(pattern));  // getScore(pattern) is in book but should not be necessary
1549     SEQAN_ASSERT(ret);
1550     SEQAN_ASSERT_EQ(1u, beginPosition(finder));
1551 
1552     // Find match: A___AAA___AAA
1553     //              ___AAA
1554     ret = find(finder, pattern);
1555     SEQAN_ASSERT(ret);
1556     SEQAN_ASSERT_EQ(7u, endPosition(finder));
1557     ret = findBegin(finder, pattern, getScore(pattern));  // getScore(pattern) is in book but should not be necessary
1558     SEQAN_ASSERT(ret);
1559     SEQAN_ASSERT_EQ(1u, beginPosition(finder));
1560 }
1561 
1562 
1563 template <typename TString, typename TSegmentOrString>
test_find_on_segments_Helper(TString & haystack,const TSegmentOrString & needle)1564 void test_find_on_segments_Helper(TString &haystack, const TSegmentOrString &needle) {
1565     Finder<TString> finder(haystack);
1566     Pattern<TSegmentOrString, Myers<FindInfix> > pattern(needle);
1567 
1568     bool didFind = false;
1569     while (find(finder, pattern)) {
1570         findBegin(finder, pattern);
1571         didFind = true;
1572     }
1573     SEQAN_ASSERT(didFind);
1574 
1575     // TODO(holtgrew): Some kind of assertion on the results.
1576 }
1577 
1578 
1579 // Test string search code on segments.  At the moment, this only
1580 // tests whether the code compiles and runs through without any
1581 // obvious errors.  No checks are done on the results.
SEQAN_DEFINE_TEST(test_find_on_segments)1582 SEQAN_DEFINE_TEST(test_find_on_segments) {
1583     // TODO(holtgrew): Should be const.
1584     CharString kHaystack = "CGATCGAT";
1585     CharString kNeedle = "GATC";
1586 
1587     test_find_on_segments_Helper<>(kHaystack, kNeedle);
1588 
1589     Segment<CharString, PrefixSegment> myPrefix(prefix(kNeedle, 1));
1590     test_find_on_segments_Helper<>(kHaystack, myPrefix);
1591     test_find_on_segments_Helper<>(kHaystack, prefix(kNeedle, 1));
1592 
1593     Segment<CharString, InfixSegment> myInfix(infix(kNeedle, 1, 2));
1594     test_find_on_segments_Helper<>(kHaystack, myInfix);
1595     test_find_on_segments_Helper<>(kHaystack, infix(kNeedle, 1, 2));
1596 
1597     Segment<CharString, SuffixSegment> mySuffix(suffix(kNeedle, 1));
1598     test_find_on_segments_Helper<>(kHaystack, mySuffix);
1599     test_find_on_segments_Helper<>(kHaystack, suffix(kNeedle, 1));
1600 }
1601 
1602 
SEQAN_DEFINE_TEST(test_myers_trigger_bug)1603 SEQAN_DEFINE_TEST(test_myers_trigger_bug) {
1604     DnaString haystackString = "TCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCCCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTC";
1605     DnaString needleString = "GAGAAGAGAAGAGAAGAGAAGAGAAGAGAAGAGAAGAAGAAG";
1606     reverseComplement(needleString);
1607     typedef Segment<DnaString, InfixSegment> TSegment;
1608     typedef ModifiedString<TSegment, ModReverse> TSegmentRev;
1609 //     std::cout << "haystack = " << haystackString << std::endl;
1610 //     std::cout << "needle = " << needleString << std::endl;
1611 //     std::cout << "haystack length = " << length(haystackString) << std::endl
1612 //               << "needle length = " << length(needleString) << std::endl
1613 //               << "machine word length = " << sizeof(unsigned) * 8 << std::endl;
1614 
1615     for (int maxDistance = 0; maxDistance < 10; ++maxDistance) {
1616         for (unsigned beginPosition = 0; beginPosition < length(haystackString) - length(needleString); ++beginPosition) {
1617 //             std::cout << "max distance = " << maxDistance << ", begin position = " << beginPosition << std::endl;
1618             TSegmentRev haystackSegment(infix(haystackString, beginPosition, length(haystackString)));
1619 //             std::cout << "haystack = " << haystackSegment << std::endl;
1620 //             std::cout << "needle = " << needleString << std::endl;
1621 //             std::cout << "-----------------------" << std::endl;
1622 
1623             String<size_t> positionsMyers;
1624             {
1625                 TSegmentRev segment(infix(haystackString, beginPosition, length(haystackString)));
1626                 Finder<TSegmentRev> finder(segment);
1627                 Pattern<DnaString, MyersUkkonenGlobal > pattern(needleString, -maxDistance);
1628                 while (find(finder, pattern)) {
1629                     appendValue(positionsMyers, endPosition(finder));
1630                 }
1631             }
1632 
1633             String<size_t> positionsDpSearch;
1634             {
1635                 TSegmentRev segment(infix(haystackString, beginPosition, length(haystackString)));
1636                 Finder<TSegmentRev> finder(segment);
1637                 Pattern<DnaString, DPSearch<EditDistanceScore, FindPrefix> > pattern(needleString, -maxDistance);
1638                 while (find(finder, pattern)) {
1639                     appendValue(positionsDpSearch, endPosition(finder));
1640                 }
1641             }
1642 
1643 //             std::cout << "Myers end positions: " << std::endl;
1644 //             for (size_t i = 0; i < length(positionsMyers); ++i)
1645 //                 std::cout << positionsMyers[i] << " ";
1646 //             std::cout << std::endl;
1647 //             std::cout << "DPSearch end positions: " << std::endl;
1648 //             for (size_t i = 0; i < length(positionsDpSearch); ++i)
1649 //                 std::cout << positionsDpSearch[i] << " ";
1650 //             std::cout << std::endl;
1651 
1652             SEQAN_ASSERT_EQ(length(positionsDpSearch), length(positionsMyers));
1653             for (unsigned i = 0; i < length(positionsMyers); ++i)
1654                 SEQAN_ASSERT_EQ_MSG(positionsDpSearch[i], positionsMyers[i], "i = %u", i);
1655         }
1656     }
1657 }
1658 
1659 
1660 // Test myers algorithm: Palindrom vs. non-palindrom.
SEQAN_DEFINE_TEST(test_myers_find_begin)1661 SEQAN_DEFINE_TEST(test_myers_find_begin) {
1662     {
1663         String<char> haystack_1 = "AABBAA";
1664         String<char> needle_1 = "ABBA";
1665         Finder<String<char> > finder_1(haystack_1);
1666         Pattern<String<char>, Myers<FindInfix> > pattern_1(needle_1, 0);
1667         SEQAN_ASSERT(find(finder_1, pattern_1));
1668         SEQAN_ASSERT_EQ(endPosition(finder_1), 5u);
1669         SEQAN_ASSERT_EQ(getScore(pattern_1), 0);
1670         SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1671         SEQAN_ASSERT_EQ(infix(finder_1), "ABBA");
1672         SEQAN_ASSERT_EQ(getBeginScore(pattern_1), 0);
1673         SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1674     }
1675     {
1676         String<char> haystack_1 = "ABCD";
1677         String<char> needle_1 = "BCD";
1678         Finder<String<char> > finder_1(haystack_1);
1679         Pattern<String<char>, Myers<FindInfix> > pattern_1(needle_1, 0);
1680         SEQAN_ASSERT(find(finder_1, pattern_1));
1681         SEQAN_ASSERT_EQ(endPosition(finder_1), 4u);
1682         SEQAN_ASSERT_EQ(getScore(pattern_1), 0);
1683         SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1684         SEQAN_ASSERT_EQ(infix(finder_1), "BCD");
1685         SEQAN_ASSERT_EQ(getBeginScore(pattern_1), 0);
1686         SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1687     }
1688 }
1689 
1690 template <typename TPatternSpec>
test_pattern_copycon()1691 void test_pattern_copycon() {
1692     typedef Pattern<CharString, TPatternSpec> TPattern;
1693     TPattern p1("Some needle");
1694     TPattern p2(p1);
1695     TPattern const p3(p2);
1696     TPattern const p4(p3);
1697 }
1698 
1699 template <typename TPatternSpec>
test_pattern_assign()1700 void test_pattern_assign() {
1701     typedef Pattern<CharString, TPatternSpec> TPattern;
1702     TPattern p1("Some needle");
1703     TPattern p2;
1704     TPattern const p3(p1);
1705     TPattern p4;
1706     p2 = p1;
1707     p4 = p3;
1708 }
1709 
1710 template <typename TPatternSpec>
test_pattern_movecon()1711 void test_pattern_movecon() {
1712     typedef Pattern<CharString, TPatternSpec> TPattern;
1713     TPattern p1("Some needle");
1714     TPattern p2(std::move(p1));
1715     TPattern const p3(std::move(p2));
1716     TPattern const p4(std::move(p3));
1717 }
1718 
1719 template <typename TPatternSpec>
test_pattern_moveassign()1720 void test_pattern_moveassign() {
1721     typedef Pattern<CharString, TPatternSpec> TPattern;
1722     TPattern p1("Some needle");
1723     TPattern p2;
1724     TPattern const p3(p1);
1725     TPattern p4;
1726     p2 = std::move(p1);
1727     p4 = std::move(p3);
1728 }
1729 
1730 template <typename TPatternSpec>
test_pattern_set_host()1731 void test_pattern_set_host()
1732 {
1733     typedef Pattern<DnaString, TPatternSpec> TPattern;
1734 
1735     {  // Set to lvalue reference.
1736         TPattern p;
1737         DnaString ndl = "AGTGGATAGAGAT";
1738         setHost(p, ndl);
1739 
1740         SEQAN_ASSERT(&host(p) == &ndl);
1741     }
1742 
1743     {  // Set to lvalue with different alphabet causing the source to be changed.
1744         DnaString ndl = "AGTGGATAGAGAT";
1745         TPattern p;
1746         setHost(p, ndl);
1747         SEQAN_ASSERT_EQ(&host(p), &ndl);
1748 
1749         CharString ndl2 = "AT CARLS";
1750         setHost(p, ndl2);
1751         SEQAN_ASSERT(&host(p) == &ndl);
1752         SEQAN_ASSERT_EQ(ndl, "ATACAAAA");
1753         SEQAN_ASSERT_EQ(ndl2, "AT CARLS");
1754     }
1755 
1756     {  // Set to const lvalue reference with different alphabet causing the source to be changed.
1757         DnaString const ndl = "AGTGGATAGAGAT";
1758         TPattern p;
1759         setHost(p, ndl);
1760         SEQAN_ASSERT(&host(p) != &ndl);
1761         SEQAN_ASSERT_EQ(host(p), ndl);
1762 
1763         CharString ndl2 = "AT CARLS";
1764         setHost(p, ndl2);
1765         SEQAN_ASSERT(&host(p) != &ndl);
1766         SEQAN_ASSERT_EQ(ndl, "AGTGGATAGAGAT");
1767         SEQAN_ASSERT_EQ(ndl2, "AT CARLS");
1768         SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1769     }
1770 
1771     {  // Set with rvalue reference.
1772         DnaString ndl = "AGTGGATAGAGAT";
1773         TPattern p;
1774         setHost(p, std::move(ndl));
1775         SEQAN_ASSERT(&host(p) != &ndl);
1776         SEQAN_ASSERT_EQ(host(p), "AGTGGATAGAGAT");
1777     }
1778 
1779     {  // Set to rvalue reference after setting holder dependent.
1780         DnaString ndl = "AGTGGATAGAGAT";
1781         TPattern p;
1782         setHost(p, ndl);
1783         SEQAN_ASSERT(&host(p) == &ndl);
1784 
1785         CharString ndl2 = "AT CARLS";
1786         setHost(p, std::move(ndl2));
1787         SEQAN_ASSERT(&host(p) == &ndl);
1788         SEQAN_ASSERT_EQ(ndl, "ATACAAAA");
1789         SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1790     }
1791 
1792     {  // Set to rvalue reference before setting holder dependent.
1793         DnaString ndl = "AGTGGATAGAGAT";
1794         TPattern p;
1795         setHost(p, std::move(ndl));
1796         SEQAN_ASSERT(&host(p) != &ndl);
1797         SEQAN_ASSERT_EQ(host(p), "AGTGGATAGAGAT");
1798 
1799         CharString ndl2 = "AT CARLS";
1800         setHost(p, ndl2);
1801         SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1802     }
1803 
1804     {  // Set to rvalue with different alphabet.
1805         TPattern p;
1806         setHost(p, "AT CARLS");
1807         SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1808     }
1809 }
1810 
SEQAN_DEFINE_TEST(test_pattern_copycon)1811 SEQAN_DEFINE_TEST(test_pattern_copycon) {
1812     // Test whether the needle is preserved in copying a pattern.
1813     // See http://trac.mi.fu-berlin.de/seqan/ticket/318
1814     test_pattern_copycon<Simple>();
1815     test_pattern_copycon<Horspool>();
1816     test_pattern_copycon<ShiftAnd>();
1817     test_pattern_copycon<ShiftOr>();
1818     test_pattern_copycon<HammingSimple>();
1819     test_pattern_copycon<WildShiftAnd>();
1820     test_pattern_copycon<Bfam<Oracle> >();
1821     test_pattern_copycon<Bfam<Trie> >();
1822 }
1823 
SEQAN_DEFINE_TEST(test_pattern_assign)1824 SEQAN_DEFINE_TEST(test_pattern_assign) {
1825     // Test whether the needle is preserved in assigning a pattern.
1826     // See http://trac.mi.fu-berlin.de/seqan/ticket/318
1827     test_pattern_assign<Simple>();
1828     test_pattern_assign<Horspool>();
1829     test_pattern_assign<ShiftAnd>();
1830     test_pattern_assign<ShiftOr>();
1831     test_pattern_assign<HammingSimple>();
1832     test_pattern_assign<WildShiftAnd>();
1833     test_pattern_assign<Bfam<Oracle> >();
1834     test_pattern_assign<Bfam<Trie> >();
1835 }
1836 
SEQAN_DEFINE_TEST(test_pattern_movecon)1837 SEQAN_DEFINE_TEST(test_pattern_movecon) {
1838     test_pattern_movecon<Simple>();
1839     test_pattern_movecon<Horspool>();
1840     test_pattern_movecon<ShiftAnd>();
1841     test_pattern_movecon<ShiftOr>();
1842     test_pattern_copycon<HammingSimple>();
1843     test_pattern_copycon<WildShiftAnd>();
1844     test_pattern_copycon<Bfam<Oracle> >();
1845     test_pattern_copycon<Bfam<Trie> >();
1846 }
1847 
SEQAN_DEFINE_TEST(test_pattern_moveassign)1848 SEQAN_DEFINE_TEST(test_pattern_moveassign) {
1849     test_pattern_moveassign<Simple>();
1850     test_pattern_moveassign<Horspool>();
1851     test_pattern_moveassign<ShiftAnd>();
1852     test_pattern_moveassign<ShiftOr>();
1853     test_pattern_assign<HammingSimple>();
1854     test_pattern_assign<WildShiftAnd>();
1855     test_pattern_assign<Bfam<Oracle> >();
1856     test_pattern_assign<Bfam<Trie> >();
1857 }
1858 
1859 // TODO(rrahn): Should be a typed test for all pattern classes.
SEQAN_DEFINE_TEST(test_pattern_set_host)1860 SEQAN_DEFINE_TEST(test_pattern_set_host) {
1861     test_pattern_set_host<Simple>();
1862     test_pattern_set_host<Horspool>();
1863     test_pattern_set_host<ShiftAnd>();
1864     test_pattern_set_host<ShiftOr>();
1865     test_pattern_set_host<HammingSimple>();
1866     test_pattern_set_host<WildShiftAnd>();
1867     test_pattern_set_host<Bfam<Oracle> >();
1868     test_pattern_set_host<Bfam<Trie> >();
1869     test_pattern_set_host<Myers<AlignTextBanded<FindInfix, NMatchesN_, NMatchesN_>, void> >();
1870     test_pattern_set_host<Myers<AlignTextBanded<FindInfix, NMatchesN_, NMatchesN_>, Myers<FindPrefix> > >();
1871     test_pattern_set_host<Myers<AlignTextBanded<FindPrefix, NMatchesN_, NMatchesN_>, void> >();
1872     test_pattern_set_host<Myers<AlignTextBanded<FindPrefix, NMatchesN_, NMatchesN_>, Myers<FindPrefix> > >();
1873     test_pattern_set_host<Myers<FindInfix, void> >();
1874     test_pattern_set_host<Myers<FindInfix, Myers<FindPrefix> > >();
1875     test_pattern_set_host<Myers<FindPrefix, void> >();
1876     test_pattern_set_host<Myers<FindPrefix, Myers<FindPrefix> > >();
1877 }
1878 
SEQAN_BEGIN_TESTSUITE(test_find)1879 SEQAN_BEGIN_TESTSUITE(test_find) {
1880 //     SEQAN_CALL_TEST(test_myers_trigger_bug);
1881     SEQAN_CALL_TEST(test_myers_find_begin);
1882     SEQAN_CALL_TEST(test_myers_find_banded);
1883     SEQAN_CALL_TEST(test_myers_find_banded_csp);
1884 
1885     // Testing Myers<FindInfix> with findBegin().
1886     SEQAN_CALL_TEST(test_myers_find_infix_find_begin_at_start);
1887     SEQAN_CALL_TEST(test_myers_find_infix_find_begin_within);
1888 
1889     SEQAN_CALL_TEST(test_find_on_segments);
1890 
1891     SEQAN_CALL_TEST(test_find_hamming_simple);
1892 
1893     SEQAN_CALL_TEST(test_find_hamming_simple_regression_rmbench);
1894 
1895     // Testing MyersUkkonen with large needle and manual score limit.
1896     SEQAN_CALL_TEST(test_regression_rmbench);
1897 
1898     // Call all tests.
1899     SEQAN_CALL_TEST(test_find_online_Simple);
1900     SEQAN_CALL_TEST(test_find_online_Horspool);
1901     SEQAN_CALL_TEST(test_find_online_ShiftAnd);
1902     SEQAN_CALL_TEST(test_find_online_ShiftOr);
1903     SEQAN_CALL_TEST(test_find_online_BndmAlgo);
1904     SEQAN_CALL_TEST(test_find_online_BFAM_Oracle);
1905     SEQAN_CALL_TEST(test_find_online_BFAM_Trie);
1906     SEQAN_CALL_TEST(test_find_online_wildcards);
1907     SEQAN_CALL_TEST(test_find_online_multi_AhoCorasick);
1908     SEQAN_CALL_TEST(test_find_online_multi_MultipleShiftAnd);
1909     SEQAN_CALL_TEST(test_find_online_multi_SetHorspool);
1910     SEQAN_CALL_TEST(test_find_online_multi_WuManber);
1911     SEQAN_CALL_TEST(test_find_online_multi_MultiBFAM_Oracle);
1912     SEQAN_CALL_TEST(test_find_online_multi_MultiBFAM_Trie);
1913     SEQAN_CALL_TEST(test_find_approx_prefix_edit_dist_dpsearch);
1914     SEQAN_CALL_TEST(test_approx_prefix_edit_dist_myers);
1915     SEQAN_CALL_TEST(test_approx_edit_dist_dp_search_simple_score);
1916     SEQAN_CALL_TEST(test_approx_edit_dist_myers);
1917     // Test DP Serach.
1918     SEQAN_CALL_TEST(test_approx_edit_dp_search_simple_score_legacy_case);
1919     // Test other approximate search algorithms.
1920     SEQAN_CALL_TEST(test_approx_edit_dist_abndm_algo);
1921     SEQAN_CALL_TEST(test_approx_edit_dist_pex_non_hierarchical);
1922     SEQAN_CALL_TEST(test_approx_edit_dist_pex_hierarchical);
1923     // Tests with different multifinder.
1924     SEQAN_CALL_TEST(test_approx_edit_dist_pex_non_hierarchical_aho_corasick);
1925     SEQAN_CALL_TEST(test_approx_edit_dist_pex_non_hierarchical_multi_bfam);
1926     // Test for hamming distance approximate matching.
1927     SEQAN_CALL_TEST(test_find_hamming_simple);
1928 
1929     SEQAN_CALL_TEST(test_pattern_copycon);
1930     SEQAN_CALL_TEST(test_pattern_assign);
1931 
1932     SEQAN_CALL_TEST(test_pattern_movecon);
1933     SEQAN_CALL_TEST(test_pattern_moveassign);
1934     SEQAN_CALL_TEST(test_pattern_set_host);
1935 }
1936 SEQAN_END_TESTSUITE
1937