1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Andreas Gogol-Doering <andreas.doering@mdc-berlin.de>
33 // ==========================================================================
34 // Tests for the SeqAn module find.
35 // ==========================================================================
36
37 #include <iostream>
38 #include <fstream>
39 #include <typeinfo>
40 #include <time.h>
41 #include <cstring> // size_t
42 #include <cstdio>
43 #include <vector>
44 #include <time.h>
45
46 #define SEQAN_DEBUG
47 //#define SEQAN_TEST
48
49 #define SEQAN_DEBUG_MYERSBITVECTOR
50 //#define SEQAN_DEBUG_MYERSBITVECTOR_DUMP
51 //#define SEQAN_TEST_MYERS_STRICTBANDED
52
53 #include <seqan/basic.h>
54 #include <seqan/find.h>
55
56 #include "test_find_hamming.h"
57 #include "test_find_myers_banded.h"
58
59 using namespace std;
60 using namespace seqan;
61
62
63 template <typename TAlgorithmSpec>
Test_OnlineAlg()64 void Test_OnlineAlg() {
65 typedef typename Position<CharString>::Type TPosition;
66
67 String<TPosition> pos;
68
69 //____________________________________________________________________________
70 // Test1 - small needle
71
72 String<char> haystack("Dies ist ein Haystack. Ja, das ist wirklich einer!");
73 Finder<String<char> > finder(haystack);
74
75 String<char> needle("ist");
76 Pattern<String<char>, TAlgorithmSpec> pattern(needle);
77
78 while (find(finder, pattern)) {
79 appendValue(pos,position(finder));
80 SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
81 SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
82 SEQAN_ASSERT_EQ(length(finder), length(needle));
83 SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
84 SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
85 SEQAN_ASSERT_EQ(infix(finder), needle);
86 }
87
88 SEQAN_ASSERT_EQ(host(pattern), needle);
89 SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<String<char>, TAlgorithmSpec> const &>(pattern)), needle);
90 SEQAN_ASSERT_EQ(pos[0], 5u);
91 SEQAN_ASSERT_EQ(pos[1], 31u);
92 SEQAN_ASSERT_EQ(length(pos), 2u);
93
94 //____________________________________________________________________________
95 // Test2 - large needle
96
97 haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgaabcdef";
98 setHost(finder, haystack);
99 clear(finder);
100
101 needle = "abcdefghijklmnopqrstuvwxyzabcdefg";
102 setHost(pattern, needle);
103
104 clear(pos);
105 while (find(finder, pattern)) {
106 append(pos,position(finder));
107 SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
108 SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
109 SEQAN_ASSERT_EQ(length(finder), length(needle));
110 SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
111 SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
112 SEQAN_ASSERT_EQ(infix(finder), needle);
113 }
114
115 SEQAN_ASSERT_EQ(pos[0], 0u);
116 SEQAN_ASSERT_EQ(pos[1], 26u);
117 SEQAN_ASSERT_EQ(length(pos), 2u);
118
119 //____________________________________________________________________________
120 // Test3 - different alphabet, small needle
121
122 String<Dna> hstk = "aaaaaaacaa";
123 Finder<String<Dna> > finderDna(hstk);
124
125 String<Dna> ndl = "aa";
126 setHost(pattern, ndl);
127
128 clear(pos);
129 while (find(finderDna, pattern)) {
130 append(pos,position(finderDna));
131 SEQAN_ASSERT_EQ(position(finderDna), beginPosition(finderDna));
132 SEQAN_ASSERT_EQ(endPosition(finderDna), beginPosition(finderDna) + length(finderDna));
133 SEQAN_ASSERT_EQ(length(finderDna), length(ndl));
134 SEQAN_ASSERT_EQ(begin(finderDna), begin(hstk) + beginPosition(finderDna));
135 SEQAN_ASSERT_EQ(end(finderDna), begin(hstk) + endPosition(finderDna));
136 SEQAN_ASSERT_EQ(infix(finderDna), ndl);
137 }
138
139 SEQAN_ASSERT_EQ(pos[0], 0u);
140 SEQAN_ASSERT_EQ(pos[1], 1u);
141 SEQAN_ASSERT_EQ(pos[2], 2u);
142 SEQAN_ASSERT_EQ(pos[3], 3u);
143 SEQAN_ASSERT_EQ(pos[4], 4u);
144 SEQAN_ASSERT_EQ(pos[5], 5u);
145 SEQAN_ASSERT_EQ(pos[6], 8u);
146 SEQAN_ASSERT_EQ(length(pos), 7u);
147
148 //____________________________________________________________________________
149 // Test3b - different alphabet, small needle, jumping finder
150
151 goBegin(finderDna); // That's a repositioning
152 clear(finderDna); // That's why, clear state
153 clear(pos);
154
155 bool firstHit = true;
156 while (find(finderDna, pattern)) {
157 if (firstHit) {
158 firstHit = false;
159 finderDna += 2;
160 clear(finderDna); // clear the state of the finder
161 } else {
162 //unsigned int p = position(finderDna);
163 append(pos,position(finderDna));
164 }
165 }
166
167 SEQAN_ASSERT_EQ(pos[0], 2u);
168 SEQAN_ASSERT_EQ(pos[1], 3u);
169 SEQAN_ASSERT_EQ(pos[2], 4u);
170 SEQAN_ASSERT_EQ(pos[3], 5u);
171 SEQAN_ASSERT_EQ(pos[4], 8u);
172 SEQAN_ASSERT_EQ(length(pos), 5u);
173
174 //____________________________________________________________________________
175 // Test4 - different alphabet, large needle
176 String<Dna> text = "taaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaat";
177 Finder<String<Dna> > finderText(text);
178
179 String<Dna> query = "taaaataaaataaaataaaataaaataaaataaaataaaataaaat";
180 setHost(pattern, query);
181
182 clear(pos);
183 while (find(finderText, pattern)) {
184 append(pos,position(finderText));
185 SEQAN_ASSERT_EQ(position(finderText), beginPosition(finderText));
186 SEQAN_ASSERT_EQ(endPosition(finderText), beginPosition(finderText) + length(finderText));
187 SEQAN_ASSERT_EQ(length(finderText), length(query));
188 SEQAN_ASSERT_EQ(begin(finderText), begin(text) + beginPosition(finderText));
189 SEQAN_ASSERT_EQ(end(finderText), begin(text) + endPosition(finderText));
190 SEQAN_ASSERT_EQ(infix(finderText), query);
191 }
192
193 SEQAN_ASSERT_EQ(pos[0], 0u);
194 SEQAN_ASSERT_EQ(pos[1], 5u);
195 SEQAN_ASSERT_EQ(pos[2], 10u);
196 SEQAN_ASSERT_EQ(pos[3], 15u);
197 SEQAN_ASSERT_EQ(pos[4], 20u);
198 SEQAN_ASSERT_EQ(pos[5], 25u);
199 SEQAN_ASSERT_EQ(length(pos), 6u);
200 }
201
202
203 template <typename TAlgorithmSpec>
Test_OnlineAlgMulti(bool order_by_begin_position)204 void Test_OnlineAlgMulti(bool order_by_begin_position) {
205 typedef typename Position<CharString>::Type TPosition;
206
207 String<TPosition> pos;
208
209 //____________________________________________________________________________
210 // Test1 - Single keyword
211 String<char> haystack("Dies ist ein Haystack. Ja, das ist wirklich einer!");
212 Finder<String<char> > finder(haystack);
213
214 typedef String<String<char> > TNeedle;
215 TNeedle keywords;
216 appendValue(keywords, String<char>("ist"));
217 Pattern<TNeedle, TAlgorithmSpec> pattern(keywords);
218
219 while (find(finder, pattern)) {
220 append(pos,position(finder));
221 SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
222 SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
223 SEQAN_ASSERT_EQ(length(finder), length(keywords[position(pattern)]));
224 SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
225 SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
226 SEQAN_ASSERT_EQ(infix(finder), keywords[position(pattern)]);
227 }
228
229 SEQAN_ASSERT_EQ(host(pattern), keywords);
230 SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<TNeedle, TAlgorithmSpec> const &>(pattern)), keywords);
231 SEQAN_ASSERT_EQ(pos[0], 5u);
232 SEQAN_ASSERT_EQ(pos[1], 31u);
233 SEQAN_ASSERT_EQ(length(pos), 2u);
234
235 haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgaabcdef";
236 setHost(finder, haystack);
237 clear(finder);
238
239 clear(keywords);
240 appendValue(keywords, String<char>("abcdefghijklmnopqrstuvwxyzabcdefg"));
241 setHost(pattern, keywords);
242 clear(pos);
243
244 while (find(finder, pattern)) {
245 append(pos,position(finder));
246 SEQAN_ASSERT_EQ(position(finder), beginPosition(finder));
247 SEQAN_ASSERT_EQ(endPosition(finder), beginPosition(finder) + length(finder));
248 SEQAN_ASSERT_EQ(length(finder), length(keywords[position(pattern)]));
249 SEQAN_ASSERT_EQ(begin(finder), begin(haystack) + beginPosition(finder));
250 SEQAN_ASSERT_EQ(end(finder), begin(haystack) + endPosition(finder));
251 SEQAN_ASSERT_EQ(infix(finder), keywords[position(pattern)]);
252 }
253
254 SEQAN_ASSERT_EQ(pos[0], 0u);
255 SEQAN_ASSERT_EQ(pos[1], 26u);
256 SEQAN_ASSERT_EQ(length(pos), 2u);
257
258
259 String<Dna> hstk = "aaaaaaacaa";
260 Finder<String<Dna> > finderDna(hstk);
261
262 typedef String<String<Dna> > TDnaNeedle;
263 Pattern<TDnaNeedle, TAlgorithmSpec> pattern_dna(keywords);
264
265 TDnaNeedle dna_keywords;
266 appendValue(dna_keywords, String<Dna>("aa"));
267 setHost(pattern_dna, dna_keywords);
268
269 clear(pos);
270 while (find(finderDna, pattern_dna)) {
271 append(pos,position(finderDna));
272 SEQAN_ASSERT_EQ(position(finderDna), beginPosition(finderDna));
273 SEQAN_ASSERT_EQ(endPosition(finderDna), beginPosition(finderDna) + length(finderDna));
274 SEQAN_ASSERT_EQ(length(finderDna), length(dna_keywords[position(pattern_dna)]));
275 SEQAN_ASSERT_EQ(begin(finderDna), begin(hstk) + beginPosition(finderDna));
276 SEQAN_ASSERT_EQ(end(finderDna), begin(hstk) + endPosition(finderDna));
277 SEQAN_ASSERT_EQ(infix(finderDna), dna_keywords[position(pattern_dna)]);
278 }
279
280 SEQAN_ASSERT_EQ(pos[0], 0u);
281 SEQAN_ASSERT_EQ(pos[1], 1u);
282 SEQAN_ASSERT_EQ(pos[2], 2u);
283 SEQAN_ASSERT_EQ(pos[3], 3u);
284 SEQAN_ASSERT_EQ(pos[4], 4u);
285 SEQAN_ASSERT_EQ(pos[5], 5u);
286 SEQAN_ASSERT_EQ(pos[6], 8u);
287 SEQAN_ASSERT_EQ(length(pos), 7u);
288
289 String<Dna> text = "taaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaat";
290 Finder<String<Dna> > finderText(text);
291
292 clear(dna_keywords);
293 appendValue(dna_keywords, String<Dna>("taaaataaaataaaataaaataaaataaaataaaataaaataaaat"));
294 setHost(pattern_dna, dna_keywords);
295
296 clear(pos);
297 while (find(finderText, pattern_dna)) {
298 append(pos,position(finderText));
299 SEQAN_ASSERT_EQ(position(finderText), beginPosition(finderText));
300 SEQAN_ASSERT_EQ(endPosition(finderText), beginPosition(finderText) + length(finderText));
301 SEQAN_ASSERT_EQ(length(finderText), length(dna_keywords[position(pattern_dna)]));
302 SEQAN_ASSERT_EQ(begin(finderText), begin(text) + beginPosition(finderText));
303 SEQAN_ASSERT_EQ(end(finderText), begin(text) + endPosition(finderText));
304 SEQAN_ASSERT_EQ(infix(finderText), dna_keywords[position(pattern_dna)]);
305 }
306
307 SEQAN_ASSERT_EQ(pos[0], 0u);
308 SEQAN_ASSERT_EQ(pos[1], 5u);
309 SEQAN_ASSERT_EQ(pos[2], 10u);
310 SEQAN_ASSERT_EQ(pos[3], 15u);
311 SEQAN_ASSERT_EQ(pos[4], 20u);
312 SEQAN_ASSERT_EQ(pos[5], 25u);
313 SEQAN_ASSERT_EQ(length(pos), 6u);
314
315 //____________________________________________________________________________
316 // Test2 - Multiple keywords
317 String<char> hst("annual_announce_any_annually");
318 Finder<String<char> > fd(hst);
319
320 typedef String<String<char> > TN;
321 TN kyw;
322 appendValue(kyw, String<char>("announce"));
323 appendValue(kyw, String<char>("annual"));
324 appendValue(kyw, String<char>("annually"));
325 Pattern<TN, TAlgorithmSpec> pt(kyw);
326
327 String<TPosition> finderPos;
328 String<TPosition> keywordIndex;
329 while (find(fd, pt)) {
330 append(finderPos,position(fd));
331 append(keywordIndex,position(pt));
332 SEQAN_ASSERT_EQ(position(fd), beginPosition(fd));
333 SEQAN_ASSERT_EQ(endPosition(fd), beginPosition(fd) + length(fd));
334 SEQAN_ASSERT_EQ(length(fd), length(kyw[position(pt)]));
335 SEQAN_ASSERT_EQ(begin(fd), begin(hst) + beginPosition(fd));
336 SEQAN_ASSERT_EQ(end(fd), begin(hst) + endPosition(fd));
337 SEQAN_ASSERT_EQ(infix(fd), kyw[position(pt)]);
338 }
339
340 SEQAN_ASSERT_EQ(length(finderPos), 4u);
341 SEQAN_ASSERT_EQ(length(keywordIndex), 4u);
342 SEQAN_ASSERT_EQ(finderPos[0], 0u);
343 SEQAN_ASSERT_EQ(keywordIndex[0], 1u);
344 SEQAN_ASSERT_EQ(finderPos[1], 7u);
345 SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
346 SEQAN_ASSERT_EQ(finderPos[2], 20u);
347 SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
348 SEQAN_ASSERT_EQ(finderPos[3], 20u);
349 SEQAN_ASSERT_EQ(keywordIndex[3], 2u);
350
351 String<Dna> hstDna("AGATACGATATATAC");
352 Finder<String<Dna> > fdDna(hstDna);
353
354 typedef String<String<Dna> > TNDna;
355 TNDna kywDna;
356 appendValue(kywDna, String<Dna>("ATATATA"));
357 appendValue(kywDna, String<Dna>("TATAT"));
358 appendValue(kywDna, String<Dna>("ACGATAT"));
359 Pattern<TNDna, TAlgorithmSpec> ptDna(kywDna);
360
361 clear(finderPos);
362 clear(keywordIndex);
363 while (find(fdDna, ptDna)) {
364 appendValue(finderPos, position(fdDna));
365 appendValue(keywordIndex, position(ptDna));
366 SEQAN_ASSERT_EQ(position(fdDna), beginPosition(fdDna));
367 SEQAN_ASSERT_EQ(endPosition(fdDna), beginPosition(fdDna) + length(fdDna));
368 SEQAN_ASSERT_EQ(length(fdDna), length(kywDna[position(ptDna)]));
369 SEQAN_ASSERT_EQ(begin(fdDna), begin(hstDna) + beginPosition(fdDna));
370 SEQAN_ASSERT_EQ(end(fdDna), begin(hstDna) + endPosition(fdDna));
371 SEQAN_ASSERT_EQ(infix(fdDna), kywDna[position(ptDna)]);
372 }
373
374 SEQAN_ASSERT_EQ(length(finderPos), 3u);
375 SEQAN_ASSERT_EQ(length(keywordIndex), 3u);
376 if (order_by_begin_position) {
377 SEQAN_ASSERT_EQ(finderPos[0], 4u);
378 SEQAN_ASSERT_EQ(keywordIndex[0], 2u);
379 SEQAN_ASSERT_EQ(finderPos[1], 7u);
380 SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
381 SEQAN_ASSERT_EQ(finderPos[2], 8u);
382 SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
383 } else {
384 SEQAN_ASSERT_EQ(finderPos[0], 4u);
385 SEQAN_ASSERT_EQ(keywordIndex[0], 2u);
386 SEQAN_ASSERT_EQ(finderPos[1], 8u);
387 SEQAN_ASSERT_EQ(keywordIndex[1], 1u);
388 SEQAN_ASSERT_EQ(finderPos[2], 7u);
389 SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
390 }
391 //____________________________________________________________________________
392 // Test2 - Multiple keywords that do not fit into a machine word
393 String<Dna> my_haystack("AGATACGATATATACAGATACGATATATACAGATACGATATATACAGATACGATATATACAGATACGATATATAC");
394 Finder<String<Dna> > my_finder(my_haystack);
395
396 typedef String<String<Dna> > TNeedle_My;
397 TNeedle_My my_keywords;
398 appendValue(my_keywords, String<Dna>("ATATATA"));
399 appendValue(my_keywords, String<Dna>("ACCGATCCAT"));
400 appendValue(my_keywords, String<Dna>("TATAT"));
401 appendValue(my_keywords, String<Dna>("ACCGAT"));
402 appendValue(my_keywords, String<Dna>("ACGATAT"));
403 appendValue(my_keywords, String<Dna>("CCAA"));
404 Pattern<TNeedle_My, TAlgorithmSpec> my_pattern(my_keywords);
405
406 clear(finderPos);
407 clear(keywordIndex);
408 while (find(my_finder, my_pattern)) {
409 //std::cout << position(my_finder) << "-" << position(my_pattern) << std::endl;
410 append(finderPos,position(my_finder));
411 append(keywordIndex,position(my_pattern));
412 SEQAN_ASSERT_EQ(position(my_finder), beginPosition(my_finder));
413 SEQAN_ASSERT_EQ(endPosition(my_finder), beginPosition(my_finder) + length(my_finder));
414 SEQAN_ASSERT_EQ(length(my_finder), length(my_keywords[position(my_pattern)]));
415 SEQAN_ASSERT_EQ(begin(my_finder), begin(my_haystack) + beginPosition(my_finder));
416 SEQAN_ASSERT_EQ(end(my_finder), begin(my_haystack) + endPosition(my_finder));
417 SEQAN_ASSERT_EQ(infix(my_finder), my_keywords[position(my_pattern)]);
418 }
419
420 SEQAN_ASSERT_EQ(length(finderPos), 15u);
421 SEQAN_ASSERT_EQ(length(keywordIndex), 15u);
422 if (order_by_begin_position) {
423 SEQAN_ASSERT_EQ(finderPos[0], 4u);
424 SEQAN_ASSERT_EQ(keywordIndex[0], 4u);
425 SEQAN_ASSERT_EQ(finderPos[1], 7u);
426 SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
427 SEQAN_ASSERT_EQ(finderPos[2], 8u);
428 SEQAN_ASSERT_EQ(keywordIndex[2], 2u);
429 SEQAN_ASSERT_EQ(finderPos[3], 19u);
430 SEQAN_ASSERT_EQ(keywordIndex[3], 4u);
431 SEQAN_ASSERT_EQ(finderPos[4], 22u);
432 SEQAN_ASSERT_EQ(keywordIndex[4], 0u);
433 SEQAN_ASSERT_EQ(finderPos[5], 23u);
434 SEQAN_ASSERT_EQ(keywordIndex[5], 2u);
435 SEQAN_ASSERT_EQ(finderPos[6], 34u);
436 SEQAN_ASSERT_EQ(keywordIndex[6], 4u);
437 SEQAN_ASSERT_EQ(finderPos[7], 37u);
438 SEQAN_ASSERT_EQ(keywordIndex[7], 0u);
439 SEQAN_ASSERT_EQ(finderPos[8], 38u);
440 SEQAN_ASSERT_EQ(keywordIndex[8], 2u);
441 SEQAN_ASSERT_EQ(finderPos[9], 49u);
442 SEQAN_ASSERT_EQ(keywordIndex[9], 4u);
443 SEQAN_ASSERT_EQ(finderPos[10], 52u);
444 SEQAN_ASSERT_EQ(keywordIndex[10], 0u);
445 SEQAN_ASSERT_EQ(finderPos[11], 53u);
446 SEQAN_ASSERT_EQ(keywordIndex[11], 2u);
447 SEQAN_ASSERT_EQ(finderPos[12], 64u);
448 SEQAN_ASSERT_EQ(keywordIndex[12], 4u);
449 SEQAN_ASSERT_EQ(finderPos[13], 67u);
450 SEQAN_ASSERT_EQ(keywordIndex[13], 0u);
451 SEQAN_ASSERT_EQ(finderPos[14], 68u);
452 SEQAN_ASSERT_EQ(keywordIndex[14], 2u);
453 } else {
454 SEQAN_ASSERT_EQ(finderPos[0], 4u);
455 SEQAN_ASSERT_EQ(keywordIndex[0], 4u);
456 SEQAN_ASSERT_EQ(finderPos[1], 8u);
457 SEQAN_ASSERT_EQ(keywordIndex[1], 2u);
458 SEQAN_ASSERT_EQ(finderPos[2], 7u);
459 SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
460 SEQAN_ASSERT_EQ(finderPos[3], 19u);
461 SEQAN_ASSERT_EQ(keywordIndex[3], 4u);
462 SEQAN_ASSERT_EQ(finderPos[4], 23u);
463 SEQAN_ASSERT_EQ(keywordIndex[4], 2u);
464 SEQAN_ASSERT_EQ(finderPos[5], 22u);
465 SEQAN_ASSERT_EQ(keywordIndex[5], 0u);
466 SEQAN_ASSERT_EQ(finderPos[6], 34u);
467 SEQAN_ASSERT_EQ(keywordIndex[6], 4u);
468 SEQAN_ASSERT_EQ(finderPos[7], 38u);
469 SEQAN_ASSERT_EQ(keywordIndex[7], 2u);
470 SEQAN_ASSERT_EQ(finderPos[8], 37u);
471 SEQAN_ASSERT_EQ(keywordIndex[8], 0u);
472 SEQAN_ASSERT_EQ(finderPos[9], 49u);
473 SEQAN_ASSERT_EQ(keywordIndex[9], 4u);
474 SEQAN_ASSERT_EQ(finderPos[10], 53u);
475 SEQAN_ASSERT_EQ(keywordIndex[10], 2u);
476 SEQAN_ASSERT_EQ(finderPos[11], 52u);
477 SEQAN_ASSERT_EQ(keywordIndex[11], 0u);
478 SEQAN_ASSERT_EQ(finderPos[12], 64u);
479 SEQAN_ASSERT_EQ(keywordIndex[12], 4u);
480 SEQAN_ASSERT_EQ(finderPos[13], 68u);
481 SEQAN_ASSERT_EQ(keywordIndex[13], 2u);
482 SEQAN_ASSERT_EQ(finderPos[14], 67u);
483 SEQAN_ASSERT_EQ(keywordIndex[14], 0u);
484 }
485
486 //____________________________________________________________________________
487 // Multiple keywords with overlapping matches
488 String<Dna> my2_haystack("aaaacaaa");
489 Finder<String<Dna> > my2_finder(my2_haystack);
490
491 typedef String<String<Dna> > TNeedle_My2;
492 TNeedle_My2 my2_keywords;
493 appendValue(my2_keywords, String<Dna>("aa"));
494 appendValue(my2_keywords, String<Dna>("aaa"));
495 appendValue(my2_keywords, String<Dna>("ac"));
496 appendValue(my2_keywords, String<Dna>("aac"));
497 appendValue(my2_keywords, String<Dna>("gctccacctgacctagcccatggggcccaaatttccggccttaattcccattt"));
498 Pattern<TNeedle_My2, TAlgorithmSpec> my2_pattern(my2_keywords);
499
500 clear(finderPos);
501 clear(keywordIndex);
502 while (find(my2_finder, my2_pattern)) {
503 //std::cout << position(my2_finder) << ":" << position(my2_pattern) << std::endl;
504 append(finderPos,position(my2_finder));
505 append(keywordIndex,position(my2_pattern));
506 SEQAN_ASSERT_EQ(position(my2_finder), beginPosition(my2_finder));
507 SEQAN_ASSERT_EQ(endPosition(my2_finder), beginPosition(my2_finder) + length(my2_finder));
508 SEQAN_ASSERT_EQ(length(my2_finder), length(my2_keywords[position(my2_pattern)]));
509 SEQAN_ASSERT_EQ(begin(my2_finder), begin(my2_haystack) + beginPosition(my2_finder));
510 SEQAN_ASSERT_EQ(end(my2_finder), begin(my2_haystack) + endPosition(my2_finder));
511 SEQAN_ASSERT_EQ(infix(my2_finder), my2_keywords[position(my2_pattern)]);
512 }
513
514 if (order_by_begin_position) {
515 SEQAN_ASSERT_EQ(finderPos[0], 0u);
516 SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
517 SEQAN_ASSERT_EQ(finderPos[1], 0u);
518 SEQAN_ASSERT_EQ(keywordIndex[1], 1u);
519 SEQAN_ASSERT_EQ(finderPos[2], 1u);
520 SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
521 SEQAN_ASSERT_EQ(finderPos[3], 1u);
522 SEQAN_ASSERT_EQ(keywordIndex[3], 1u);
523 SEQAN_ASSERT_EQ(finderPos[4], 2u);
524 SEQAN_ASSERT_EQ(keywordIndex[4], 0u);
525 SEQAN_ASSERT_EQ(finderPos[5], 2u);
526 SEQAN_ASSERT_EQ(keywordIndex[5], 3u);
527 SEQAN_ASSERT_EQ(finderPos[6], 3u);
528 SEQAN_ASSERT_EQ(keywordIndex[6], 2u);
529 SEQAN_ASSERT_EQ(finderPos[7], 5u);
530 SEQAN_ASSERT_EQ(keywordIndex[7], 0u);
531 SEQAN_ASSERT_EQ(finderPos[8], 5u);
532 SEQAN_ASSERT_EQ(keywordIndex[8], 1u);
533 SEQAN_ASSERT_EQ(finderPos[9], 6u);
534 SEQAN_ASSERT_EQ(keywordIndex[9], 0u);
535 } else{
536 SEQAN_ASSERT_EQ(finderPos[0], 0u);
537 SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
538 SEQAN_ASSERT_EQ(finderPos[1], 1u);
539 SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
540 SEQAN_ASSERT_EQ(finderPos[2], 0u);
541 SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
542 SEQAN_ASSERT_EQ(finderPos[3], 2u);
543 SEQAN_ASSERT_EQ(keywordIndex[3], 0u);
544 SEQAN_ASSERT_EQ(finderPos[4], 1u);
545 SEQAN_ASSERT_EQ(keywordIndex[4], 1u);
546 SEQAN_ASSERT_EQ(finderPos[5], 3u);
547 SEQAN_ASSERT_EQ(keywordIndex[5], 2u);
548 SEQAN_ASSERT_EQ(finderPos[6], 2u);
549 SEQAN_ASSERT_EQ(keywordIndex[6], 3u);
550 SEQAN_ASSERT_EQ(finderPos[7], 5u);
551 SEQAN_ASSERT_EQ(keywordIndex[7], 0u);
552 SEQAN_ASSERT_EQ(finderPos[8], 6u);
553 SEQAN_ASSERT_EQ(keywordIndex[8], 0u);
554 SEQAN_ASSERT_EQ(finderPos[9], 5u);
555 SEQAN_ASSERT_EQ(keywordIndex[9], 1u);
556 }
557
558 //____________________________________________________________________________
559 // Multiple duplicated keywords with overlapping matches, jumping finder
560 goBegin(my2_finder); // That's a repositioning
561 clear(my2_finder); // That's why, clear state
562 clear(finderPos);
563 clear(keywordIndex);
564
565 unsigned int hits = 0;
566 while (find(my2_finder, my2_pattern)) {
567 if (hits < 2) break;
568 //std::cout << position(my2_finder) << ":" << position(my2_pattern) << std::endl;
569 append(finderPos,position(my2_finder));
570 append(keywordIndex,position(my2_pattern));
571 ++hits;
572 }
573 goBegin(my2_finder);
574 my2_finder+=5;
575 clear(my2_finder);
576 clear(finderPos);
577 clear(keywordIndex);
578 while (find(my2_finder, my2_pattern)) {
579 //std::cout << position(my2_finder) << ":" << position(my2_pattern) << std::endl;
580 append(finderPos,position(my2_finder));
581 append(keywordIndex,position(my2_pattern));
582 }
583
584 if (order_by_begin_position) {
585 SEQAN_ASSERT_EQ(finderPos[0], 5u);
586 SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
587 SEQAN_ASSERT_EQ(finderPos[1], 5u);
588 SEQAN_ASSERT_EQ(keywordIndex[1], 1u);
589 SEQAN_ASSERT_EQ(finderPos[2], 6u);
590 SEQAN_ASSERT_EQ(keywordIndex[2], 0u);
591 } else {
592 SEQAN_ASSERT_EQ(finderPos[0], 5u);
593 SEQAN_ASSERT_EQ(keywordIndex[0], 0u);
594 SEQAN_ASSERT_EQ(finderPos[1], 6u);
595 SEQAN_ASSERT_EQ(keywordIndex[1], 0u);
596 SEQAN_ASSERT_EQ(finderPos[2], 5u);
597 SEQAN_ASSERT_EQ(keywordIndex[2], 1u);
598 }
599 }
600
601
602 template <typename TAlgorithmSpec>
Test_OnlineAlgWildcards()603 void Test_OnlineAlgWildcards() {
604 typedef typename Position<CharString>::Type TPosition;
605
606 String<TPosition> pos;
607
608 //____________________________________________________________________________
609 // Test1 - simple find wo wildcards
610 String<char> haystack("Dies ist ein Haystack. Ja, das ist wirklich einer!");
611 Finder<String<char> > finder(haystack);
612
613 String<char> needle("ist");
614 Pattern<String<char>, TAlgorithmSpec> pattern(needle);
615 clear(pos);
616 while (find(finder, pattern))
617 appendValue(pos, position(finder));
618
619 SEQAN_ASSERT_EQ(pos[0], 7u);
620 SEQAN_ASSERT_EQ(pos[1], 33u);
621 SEQAN_ASSERT_EQ(host(pattern), needle);
622 SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<String<char>, TAlgorithmSpec> const &>(pattern)), needle);
623
624 //____________________________________________________________________________
625 // Test - validation of patterns
626 needle = "ist";
627 setHost(pattern, needle);
628 SEQAN_ASSERT_EQ(valid(pattern), true);
629 SEQAN_ASSERT_EQ(valid(reinterpret_cast<Pattern<String<char>, TAlgorithmSpec> const &>(pattern)), true);
630
631 needle = "i[a-z]s{3,4}t?a*a+c..\\a";
632 setHost(pattern, needle);
633 SEQAN_ASSERT_EQ(valid(pattern), true);
634
635 needle = "i[st";
636 setHost(pattern, needle);
637 SEQAN_ASSERT_EQ(valid(pattern), false);
638
639 needle = "ist\\";
640 setHost(pattern, needle);
641 SEQAN_ASSERT_EQ(valid(pattern), false);
642
643 needle = "ist?*";
644 setHost(pattern, needle);
645 SEQAN_ASSERT_EQ(valid(pattern), false);
646
647 needle = "ist{5,4}";
648 setHost(pattern, needle);
649 SEQAN_ASSERT_EQ(valid(pattern), false);
650
651 needle = "ist{5,a}";
652 setHost(pattern, needle);
653 SEQAN_ASSERT_EQ(valid(pattern), false);
654
655 needle = "ist{5,";
656 setHost(pattern, needle);
657 SEQAN_ASSERT_EQ(valid(pattern), false);
658
659 //____________________________________________________________________________
660 // Test - searching with invalid needles
661 haystack = "Dies i[st ein Haystack. Ja, das i[st wirklich einer!";
662 setHost(finder, haystack);
663 clear(finder);
664
665 needle = "i[st";
666 setHost(pattern, needle);
667
668 clear(pos);
669 while (find(finder, pattern))
670 append(pos,position(finder));
671
672 SEQAN_ASSERT_EQ(length(pos), 0u);
673
674 //____________________________________________________________________________
675 // Test - handle needles with wildcards
676 // to produce two \ in the pattern you need to escape both of them
677 needle = "aa+c*[a-z]xx?aa\\\\";
678 SEQAN_ASSERT_EQ(_lengthWithoutWildcards(needle), 9u);
679
680 //____________________________________________________________________________
681 // Test - optional characters (?)
682 haystack = "abc__ac";
683 setHost(finder, haystack);
684 clear(finder);
685
686 needle = "ab?c";
687 setHost(pattern, needle);
688
689 clear(pos);
690 while (find(finder, pattern))
691 append(pos,position(finder));
692
693 SEQAN_ASSERT_EQ(pos[0], 2u);
694 SEQAN_ASSERT_EQ(pos[1], 6u);
695 SEQAN_ASSERT_EQ(length(pos), 2u);
696
697 //____________________________________________________________________________
698 // Test - repeatable characters (+)
699 haystack = "abc__abbbbbc_ac";
700 setHost(finder, haystack);
701 clear(finder);
702
703 needle = "ab+c";
704 setHost(pattern, needle);
705
706 clear(pos);
707 while (find(finder, pattern))
708 append(pos,position(finder));
709
710 SEQAN_ASSERT_EQ(pos[0], 2u);
711 SEQAN_ASSERT_EQ(pos[1], 11u);
712 SEQAN_ASSERT_EQ(length(pos), 2u);
713
714 //____________________________________________________________________________
715 // Test - repeatable characters (*)
716 haystack = "abc__abbbbbc_ac";
717 setHost(finder, haystack);
718 clear(finder);
719
720 needle = "ab*c";
721 setHost(pattern, needle);
722
723 clear(pos);
724 while (find(finder, pattern))
725 append(pos,position(finder));
726
727 SEQAN_ASSERT_EQ(pos[0], 2u);
728 SEQAN_ASSERT_EQ(pos[1], 11u);
729 SEQAN_ASSERT_EQ(pos[2], 14u);
730
731 SEQAN_ASSERT_EQ(length(pos), 3u);
732
733 //____________________________________________________________________________
734 // Test - wildcard matching
735 haystack = "acccdfabdeeef";
736 setHost(finder, haystack);
737 clear(finder);
738
739 needle = "ab?c*de+f";
740 setHost(pattern, needle);
741
742 clear(pos);
743 while (find(finder, pattern))
744 append(pos,position(finder));
745
746 SEQAN_ASSERT_EQ(pos[0], 12u);
747 SEQAN_ASSERT_EQ(length(pos), 1u);
748
749 //____________________________________________________________________________
750 // Test - wildcard matching (hard case)
751 haystack = "aacccdfacccdeeef";
752 setHost(finder, haystack);
753 clear(finder);
754
755 needle = "a*c*de+f";
756 setHost(pattern, needle);
757
758 clear(pos);
759 while (find(finder, pattern))
760 append(pos,position(finder));
761
762 SEQAN_ASSERT_EQ(pos[0], 15u);
763 SEQAN_ASSERT_EQ(length(pos), 1u);
764
765
766 //____________________________________________________________________________
767 // Test - character classes matching
768 haystack = "annual_Annual_znnual_Znnual";
769 setHost(finder, haystack);
770 clear(finder);
771
772 needle = "[a-zA]nnual";
773 setHost(pattern, needle);
774
775 clear(pos);
776 while (find(finder, pattern))
777 append(pos,position(finder));
778
779 SEQAN_ASSERT_EQ(pos[0], 5u);
780 SEQAN_ASSERT_EQ(pos[1], 12u);
781 SEQAN_ASSERT_EQ(pos[2], 19u);
782 SEQAN_ASSERT_EQ(length(pos), 3u);
783
784 //____________________________________________________________________________
785 // Test - long needles
786 haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefgaabcdef";
787 setHost(finder, haystack);
788 clear(finder);
789
790 needle = "abcdefghijklmnopqrstuvwxyzabcdefg";
791 setHost(pattern, needle);
792
793 clear(pos);
794 while (find(finder, pattern))
795 append(pos,position(finder));
796
797 SEQAN_ASSERT_EQ(pos[0], 32u);
798 SEQAN_ASSERT_EQ(pos[1], 58u);
799 SEQAN_ASSERT_EQ(length(pos), 2u);
800
801 //____________________________________________________________________________
802 // Test - long needles with character classes
803 // abcdefghijklmnopqrstuvwxyzabcdefghijkl
804 // abcdefghijklmnopqrstuvwxyzabcdefghijkl
805 haystack = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuzwxyzabcdefghzjklaabcdefhijkl";
806 setHost(finder, haystack);
807 clear(finder);
808
809 needle = "abcdefghijklmnopqrstu[vz]wxyzabcdefgh[iz]jkl";
810 setHost(pattern, needle);
811
812 clear(pos);
813 while (find(finder, pattern))
814 append(pos,position(finder));
815
816 SEQAN_ASSERT_EQ(pos[0], 37u);
817 SEQAN_ASSERT_EQ(pos[1], 63u);
818 SEQAN_ASSERT_EQ(length(pos), 2u);
819
820
821 //____________________________________________________________________________
822 // Test - long needles with repeating characters
823 // abcdefghijklmnopqrstuvwxyzabcdefghijkl
824 // abcdefghijklmnopqrstuvwxyzabcdefghijkl
825 haystack = "abcdefghijklmnopqrstuvwxyzabcdefghiiiiijkl____aaaaabcdefghijklmnopqrstuvwxyzabcdeghijkl__aaaaabcdefghijklmnopqrstuvwxyzabcdefghjkl";
826 setHost(finder, haystack);
827 clear(finder);
828
829 needle = "aa*bcdefghijklmnopqrstuvwxyzabcdef?g?hi+jkl";
830 setHost(pattern, needle);
831
832 clear(pos);
833 while (find(finder, pattern))
834 append(pos,position(finder));
835
836 SEQAN_ASSERT_EQ(pos[0], 41u);
837 SEQAN_ASSERT_EQ(pos[1], 86u);
838 SEQAN_ASSERT_EQ(length(pos), 2u);
839
840 //____________________________________________________________________________
841 // Test - handle .
842 haystack = "annual_Annual_znnual";
843 setHost(finder, haystack);
844 clear(finder);
845
846 needle = ".nnual";
847 setHost(pattern, needle);
848 clear(pos);
849 while (find(finder, pattern)) {
850 append(pos,position(finder));
851 }
852 SEQAN_ASSERT_EQ(pos[0], 5u);
853 SEQAN_ASSERT_EQ(pos[1], 12u);
854 SEQAN_ASSERT_EQ(pos[2], 19u);
855 SEQAN_ASSERT_EQ(length(pos), 3u);
856
857 //____________________________________________________________________________
858 // Test - handle backslash
859 haystack = "annual_Annual_.nnual";
860 setHost(finder, haystack);
861 clear(finder);
862
863 needle = "\\.nnual";
864 setHost(pattern, needle);
865 clear(pos);
866 while (find(finder, pattern)){
867 append(pos,position(finder));
868 }
869 SEQAN_ASSERT_EQ(pos[0], 19u);
870 SEQAN_ASSERT_EQ(length(pos), 1u);
871
872
873
874 //____________________________________________________________________________
875 // Test - handle bounded length repeats {n,m}
876 haystack = "aannual_aaaannual_annual";
877 setHost(finder, haystack);
878 clear(finder);
879
880 needle = "a{2,5}n{2}ual";
881
882 SEQAN_ASSERT_EQ(_lengthWithoutWildcards(needle), 10u);
883
884 setHost(pattern, needle);
885 clear(pos);
886 while (find(finder, pattern)) {
887 append(pos,position(finder));
888 }
889 SEQAN_ASSERT_EQ(pos[0], 6u);
890 SEQAN_ASSERT_EQ(pos[1], 16u);
891 SEQAN_ASSERT_EQ(length(pos), 2u);
892
893
894 //____________________________________________________________________________
895 // Test - handle different types of Pattern and Needle
896 String<Dna> dna_haystack("AAACCTATGGGTTTAAAACCCTGAAACCCC");
897 Finder<String<Dna> > dna_finder(dna_haystack);
898
899 String<char> char_needle("a{3}c+t[ag].");
900 Pattern<String<Dna>, TAlgorithmSpec> dna_pattern(char_needle);
901 clear(pos);
902
903 while (find(dna_finder, dna_pattern))
904 append(pos,position(dna_finder));
905
906 SEQAN_ASSERT_EQ(pos[0], 7u);
907 SEQAN_ASSERT_EQ(pos[1], 23u);
908 SEQAN_ASSERT_EQ(length(pos), 2u);
909
910 }
911
912
913 template <typename TPatternSpec>
Test_Approx_EditDist()914 void Test_Approx_EditDist() {
915 //test DPSearch
916 String<char> hstk("any_annealing");
917 String<char> nl("annual");
918
919 Finder<String<char> > fd(hstk);
920
921 Pattern<String<char>, TPatternSpec> pt(nl, -2);
922
923 SEQAN_ASSERT(find(fd, pt));
924 SEQAN_ASSERT_EQ(position(fd), 8u);
925 SEQAN_ASSERT_EQ(getScore(pt), -2);
926 SEQAN_ASSERT(find(fd, pt));
927 SEQAN_ASSERT_EQ(position(fd), 9u);
928 SEQAN_ASSERT_EQ(getScore(pt), -1);
929 SEQAN_ASSERT(find(fd, pt));
930 SEQAN_ASSERT_EQ(position(fd), 10u);
931 SEQAN_ASSERT_EQ(getScore(pt), -2);
932
933 SEQAN_ASSERT_NOT(find(fd,pt));
934
935 String<char> haystk("Dies ist der Haystack des Tests. Ja, das ist er wirklich!");
936 String<char> ndl("des");
937
938 Finder<String<char> > fnd(haystk);
939
940 Pattern<String<char>, TPatternSpec> pat(ndl, -2);
941 SEQAN_ASSERT_EQ(host(pat), ndl);
942 SEQAN_ASSERT_EQ(host(reinterpret_cast<Pattern<String<char>, TPatternSpec> const &>(pat)), ndl);
943
944 SEQAN_ASSERT_EQ(scoreLimit(pat), -2);
945 setScoreLimit(pat, -1);
946 SEQAN_ASSERT_EQ(scoreLimit(pat), -1);
947
948 SEQAN_ASSERT(find(fnd, pat));
949 SEQAN_ASSERT_EQ(position(fnd), 3u);
950 SEQAN_ASSERT_EQ(getScore(pat), -1);
951
952 SEQAN_ASSERT(find(fnd, pat));
953 SEQAN_ASSERT_EQ(position(fnd), 10u);
954 SEQAN_ASSERT_EQ(getScore(pat), -1);
955
956 SEQAN_ASSERT(find(fnd, pat));
957 SEQAN_ASSERT_EQ(position(fnd), 11u);
958 SEQAN_ASSERT_EQ(getScore(pat), -1);
959
960 SEQAN_ASSERT(find(fnd, pat));
961 SEQAN_ASSERT_EQ(position(fnd), 23u);
962 SEQAN_ASSERT_EQ(getScore(pat), -1);
963
964 SEQAN_ASSERT(find(fnd, pat));
965 SEQAN_ASSERT_EQ(position(fnd), 24u);
966 SEQAN_ASSERT_EQ(getScore(pat), 0);
967
968 SEQAN_ASSERT(find(fnd, pat));
969 SEQAN_ASSERT_EQ(position(fnd), 25u);
970 SEQAN_ASSERT_EQ(getScore(pat), -1);
971
972 SEQAN_ASSERT(find(fnd, pat));
973 SEQAN_ASSERT_EQ(position(fnd), 28u);
974 SEQAN_ASSERT_EQ(getScore(pat), -1);
975
976 SEQAN_ASSERT(find(fnd, pat));
977 SEQAN_ASSERT_EQ(position(fnd), 39u);
978 SEQAN_ASSERT_EQ(getScore(pat), -1);
979
980 SEQAN_ASSERT_NOT(find(fnd, pat));
981
982 // Test with long needles and a Dna Alphabet
983 String<Dna> long_haystk("taaaataaaatacaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaataaaat");
984 String<Dna> long_ndl("taaaataaaatacaataaaataaaatataataaaataaaataaaat");
985
986 Finder<String<Dna> > long_fnd(long_haystk);
987
988 Pattern<String<Dna>, TPatternSpec> long_pat(long_ndl, -2);
989
990 SEQAN_ASSERT(find(long_fnd,long_pat));
991 SEQAN_ASSERT_EQ(position(long_fnd), 44u);
992 SEQAN_ASSERT_EQ(getScore(long_pat), -2);
993
994 SEQAN_ASSERT(find(long_fnd,long_pat));
995 SEQAN_ASSERT_EQ(position(long_fnd), 45u);
996 SEQAN_ASSERT_EQ(getScore(long_pat), -1);
997
998 SEQAN_ASSERT(find(long_fnd,long_pat));
999 SEQAN_ASSERT_EQ(position(long_fnd), 46u);
1000 SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1001
1002 SEQAN_ASSERT(find(long_fnd,long_pat));
1003 SEQAN_ASSERT_EQ(position(long_fnd), 60u);
1004 SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1005
1006 SEQAN_ASSERT(find(long_fnd,long_pat));
1007 SEQAN_ASSERT_EQ(position(long_fnd), 65u);
1008 SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1009
1010 SEQAN_ASSERT(find(long_fnd,long_pat));
1011 SEQAN_ASSERT_EQ(position(long_fnd), 70u);
1012 SEQAN_ASSERT_EQ(getScore(long_pat), -2);
1013
1014 SEQAN_ASSERT_NOT(find(long_fnd,long_pat));
1015
1016 //____________________________________________________________________________
1017
1018 String<char> haystack_1 = "123XXXabaXXX45aba123";
1019 String<char> needle_1 = "XXXaba";
1020 Finder<String<char> > finder_1(haystack_1);
1021 Pattern<String<char>, TPatternSpec> pattern_1(needle_1, -2);
1022
1023 SEQAN_ASSERT(find(finder_1, pattern_1));
1024 SEQAN_ASSERT_EQ(endPosition(finder_1), 7u);
1025 SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1026 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1027 SEQAN_ASSERT_EQ(infix(finder_1), "XXXa");
1028 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1029 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1030
1031 SEQAN_ASSERT(find(finder_1, pattern_1));
1032 SEQAN_ASSERT_EQ(endPosition(finder_1), 8u);
1033 SEQAN_ASSERT_EQ(getScore(pattern_1), -1);
1034 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1035 SEQAN_ASSERT_EQ(infix(finder_1), "XXab");
1036 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1037 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1038 SEQAN_ASSERT_EQ(infix(finder_1), "XXXab");
1039 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1040 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1041 SEQAN_ASSERT_EQ(infix(finder_1), "3XXXab");
1042 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1043 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1044
1045 SEQAN_ASSERT(find(finder_1, pattern_1));
1046 SEQAN_ASSERT_EQ(endPosition(finder_1), 9u);
1047 SEQAN_ASSERT_EQ(getScore(pattern_1), 0);
1048 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1049 SEQAN_ASSERT_EQ(infix(finder_1), "Xaba");
1050 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1051 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1052 SEQAN_ASSERT_EQ(infix(finder_1), "XXaba");
1053 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1054 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1055 SEQAN_ASSERT_EQ(infix(finder_1), "XXXaba");
1056 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), 0);
1057 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1058 SEQAN_ASSERT_EQ(infix(finder_1), "3XXXaba");
1059 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1060 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1061 SEQAN_ASSERT_EQ(infix(finder_1), "23XXXaba");
1062 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1063 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1064
1065 SEQAN_ASSERT(find(finder_1, pattern_1));
1066 SEQAN_ASSERT_EQ(endPosition(finder_1), 10u);
1067 SEQAN_ASSERT_EQ(getScore(pattern_1), -1);
1068 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1069 SEQAN_ASSERT_EQ(infix(finder_1), "XXabaX");
1070 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1071 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1072 SEQAN_ASSERT_EQ(infix(finder_1), "XXXabaX");
1073 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -1);
1074 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1075 SEQAN_ASSERT_EQ(infix(finder_1), "3XXXabaX");
1076 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1077 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1078
1079 SEQAN_ASSERT(find(finder_1, pattern_1));
1080 SEQAN_ASSERT_EQ(endPosition(finder_1), 11u);
1081 SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1082 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1083 SEQAN_ASSERT_EQ(infix(finder_1), "XXXabaXX");
1084 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1085 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1086
1087 SEQAN_ASSERT(find(finder_1, pattern_1));
1088 SEQAN_ASSERT_EQ(endPosition(finder_1), 15u);
1089 SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1090 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1091 SEQAN_ASSERT_EQ(infix(finder_1), "XXX45a");
1092 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1093 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1094
1095 SEQAN_ASSERT(find(finder_1, pattern_1));
1096 SEQAN_ASSERT_EQ(endPosition(finder_1), 17u);
1097 SEQAN_ASSERT_EQ(getScore(pattern_1), -2);
1098 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1099 SEQAN_ASSERT_EQ(infix(finder_1), "X45aba");
1100 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1101 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1102 SEQAN_ASSERT_EQ(infix(finder_1), "XX45aba");
1103 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1104 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1105 SEQAN_ASSERT_EQ(infix(finder_1), "XXX45aba");
1106 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), -2);
1107 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1108
1109 SEQAN_ASSERT_NOT(find(finder_1, pattern_1));
1110 }
1111
1112
1113 // Test prefix search.
1114 template <typename TPatternSpec>
Test_Approx_Prefix_EditDist()1115 void Test_Approx_Prefix_EditDist() {
1116 String<char> haystack_1 = "mississippi";
1117 String<char> needle_1 = "misssi";
1118 Finder<String<char> > finder_1(haystack_1);
1119 Pattern<String<char>, TPatternSpec> pattern_1(needle_1, -2);
1120 SEQAN_ASSERT(find(finder_1, pattern_1));
1121 SEQAN_ASSERT_EQ(position(finder_1), 3u);
1122 SEQAN_ASSERT_EQ(length(finder_1), 4u);
1123 SEQAN_ASSERT_EQ(beginPosition(finder_1), 0u);
1124 SEQAN_ASSERT_EQ(endPosition(finder_1), 4u);
1125 SEQAN_ASSERT_EQ(infix(finder_1), "miss");
1126 SEQAN_ASSERT(find(finder_1, pattern_1));
1127 SEQAN_ASSERT_EQ(position(finder_1), 4u);
1128 SEQAN_ASSERT_EQ(infix(finder_1), "missi");
1129 SEQAN_ASSERT(find(finder_1, pattern_1));
1130 SEQAN_ASSERT_EQ(position(finder_1), 5u);
1131 SEQAN_ASSERT_EQ(infix(finder_1), "missis");
1132 SEQAN_ASSERT(find(finder_1, pattern_1));
1133 SEQAN_ASSERT_EQ(position(finder_1), 6u);
1134 SEQAN_ASSERT(find(finder_1, pattern_1));
1135 SEQAN_ASSERT_EQ(position(finder_1), 7u);
1136 SEQAN_ASSERT_NOT(find(finder_1, pattern_1));
1137
1138
1139 String<char> haystack_2 = "yyyXXaba";
1140 String<char> needle_2 = "yyyaba";
1141 Finder<String<char> > finder_2(haystack_2);
1142 Pattern<String<char>, TPatternSpec> pattern_2(needle_2, -2);
1143 SEQAN_ASSERT(find(finder_2, pattern_2));
1144 SEQAN_ASSERT_EQ(position(finder_2), 5u);
1145 SEQAN_ASSERT_EQ(infix(finder_2), "yyyXXa");
1146 SEQAN_ASSERT(find(finder_2, pattern_2));
1147 SEQAN_ASSERT_EQ(position(finder_2), 7u);
1148 SEQAN_ASSERT_EQ(infix(finder_2), "yyyXXaba");
1149 SEQAN_ASSERT_NOT(find(finder_2, pattern_2));
1150
1151
1152 String<char> haystack_3 = "testtexttext";
1153 String<char> needle_3 = "mismatch";
1154 Finder<String<char> > finder_3(haystack_3);
1155 Pattern<String<char>, TPatternSpec> pattern_3(needle_3, -2);
1156 SEQAN_ASSERT_NOT(find(finder_3, pattern_3));
1157
1158
1159 String<char> haystack_4 = "testtext";
1160 String<char> needle_4 = "a longer mismatch";
1161 Finder<String<char> > finder_4(haystack_4);
1162 Pattern<String<char>, TPatternSpec> pattern_4(needle_4, -2);
1163 SEQAN_ASSERT_NOT(find(finder_4, pattern_4));
1164
1165
1166 String<char> haystack_5 = "exactmatching";
1167 String<char> needle_5 = "exact";
1168 Finder<String<char> > finder_5(haystack_5);
1169 Pattern<String<char>, TPatternSpec> pattern_5(needle_5, 0);
1170 SEQAN_ASSERT(find(finder_5, pattern_5));
1171 SEQAN_ASSERT_EQ(position(finder_5), 4u);
1172 SEQAN_ASSERT_NOT(find(finder_5, pattern_5));
1173
1174
1175 String<char> haystack_6 = "this is a text that is a bit longer than one machine word of 32 or 64 bits. AAXYX";
1176 String<char> needle_6 = "this is a text that is a bit longer than one machine word of 32 or 64 bits. XYX";
1177 Finder<String<char> > finder_6(haystack_6);
1178 Pattern<String<char>, TPatternSpec> pattern_6(needle_6, -2);
1179 SEQAN_ASSERT(find(finder_6, pattern_6));
1180 SEQAN_ASSERT_EQ(infix(finder_6), "this is a text that is a bit longer than one machine word of 32 or 64 bits. AAX");
1181 SEQAN_ASSERT(find(finder_6, pattern_6));
1182 SEQAN_ASSERT_EQ(infix(finder_6), "this is a text that is a bit longer than one machine word of 32 or 64 bits. AAXYX");
1183 SEQAN_ASSERT_NOT(find(finder_6, pattern_6));
1184
1185 // The very high score limit with short needle all-mismatching the
1186 // pattern. This is used to mirror the test below that use a long
1187 // needle.
1188 {
1189 const char *kHaystackStr = "CTCT";
1190 const char *kNeedleStr = "AAAA";
1191 const int kNeedleLen = strlen(kNeedleStr);
1192 const int kScoreLimit = -1000;
1193 // Haystack has a length of 100, needle of 80.
1194 String<char> haystack(kHaystackStr);
1195 String<char> needle(kNeedleStr);
1196 Finder<String<char> > finder(haystack);
1197 Pattern<String<char>, TPatternSpec> pattern(needle, kScoreLimit);
1198
1199 while (find(finder, pattern)) {
1200 SEQAN_ASSERT_EQ(-kNeedleLen, getScore(pattern));
1201 }
1202 }
1203
1204 // Test very high score limit with large needles and very high
1205 // maximum scores.
1206 {
1207 const char *kHaystackStr = "CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCT";
1208 const char *kNeedleStr = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
1209 const int kNeedleLen = strlen(kNeedleStr);
1210 const int kScoreLimit = -1000;
1211 // Haystack has a length of 100, needle of 80.
1212 String<char> haystack(kHaystackStr);
1213 String<char> needle(kNeedleStr);
1214 Finder<String<char> > finder(haystack);
1215 Pattern<String<char>, TPatternSpec> pattern(needle, kScoreLimit);
1216
1217 while (find(finder, pattern)) {
1218 SEQAN_ASSERT_EQ(-kNeedleLen, getScore(pattern));
1219 }
1220 }
1221 }
1222
1223
SEQAN_DEFINE_TEST(test_find_online_Simple)1224 SEQAN_DEFINE_TEST(test_find_online_Simple) {
1225 Test_OnlineAlg<Simple>();
1226 }
1227
1228
SEQAN_DEFINE_TEST(test_find_online_Horspool)1229 SEQAN_DEFINE_TEST(test_find_online_Horspool) {
1230 Test_OnlineAlg<Horspool>();
1231 }
1232
1233
SEQAN_DEFINE_TEST(test_find_online_ShiftAnd)1234 SEQAN_DEFINE_TEST(test_find_online_ShiftAnd) {
1235 Test_OnlineAlg<ShiftAnd>();
1236 }
1237
1238
SEQAN_DEFINE_TEST(test_find_online_ShiftOr)1239 SEQAN_DEFINE_TEST(test_find_online_ShiftOr) {
1240 Test_OnlineAlg<ShiftOr>();
1241 }
1242
1243
SEQAN_DEFINE_TEST(test_find_online_BndmAlgo)1244 SEQAN_DEFINE_TEST(test_find_online_BndmAlgo) {
1245 Test_OnlineAlg<BndmAlgo>();
1246 }
1247
1248
SEQAN_DEFINE_TEST(test_find_online_BFAM_Oracle)1249 SEQAN_DEFINE_TEST(test_find_online_BFAM_Oracle) {
1250 Test_OnlineAlg<Bfam<Oracle> >();
1251 }
1252
1253
SEQAN_DEFINE_TEST(test_find_online_BFAM_Trie)1254 SEQAN_DEFINE_TEST(test_find_online_BFAM_Trie) {
1255 Test_OnlineAlg<Bfam<Trie> >();
1256 }
1257
1258
SEQAN_DEFINE_TEST(test_find_online_wildcards)1259 SEQAN_DEFINE_TEST(test_find_online_wildcards) {
1260 Test_OnlineAlgWildcards<WildShiftAnd>();
1261 }
1262
1263
SEQAN_DEFINE_TEST(test_find_online_multi_AhoCorasick)1264 SEQAN_DEFINE_TEST(test_find_online_multi_AhoCorasick) {
1265 Test_OnlineAlgMulti<AhoCorasick>(false);
1266 }
1267
1268
SEQAN_DEFINE_TEST(test_find_online_multi_MultipleShiftAnd)1269 SEQAN_DEFINE_TEST(test_find_online_multi_MultipleShiftAnd) {
1270 // TODO(holtgrew): Original comment: "leaks".
1271 // TODO(holtgrew): Fails, but was commented out in original code.
1272 // Test_OnlineAlgMulti<MultipleShiftAnd>(false);
1273 }
1274
1275
SEQAN_DEFINE_TEST(test_find_online_multi_SetHorspool)1276 SEQAN_DEFINE_TEST(test_find_online_multi_SetHorspool) {
1277 Test_OnlineAlgMulti<SetHorspool>(false);
1278 }
1279
1280
SEQAN_DEFINE_TEST(test_find_online_multi_WuManber)1281 SEQAN_DEFINE_TEST(test_find_online_multi_WuManber) {
1282 Test_OnlineAlgMulti<WuManber>(true);
1283 }
1284
1285
SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Oracle)1286 SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Oracle) {
1287 Test_OnlineAlgMulti<MultiBfam<Oracle> >(true);
1288 }
1289
1290
SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Trie)1291 SEQAN_DEFINE_TEST(test_find_online_multi_MultiBFAM_Trie) {
1292 Test_OnlineAlgMulti<MultiBfam<Trie> >(true);
1293 }
1294
1295
SEQAN_DEFINE_TEST(test_find_approx_prefix_edit_dist_dpsearch)1296 SEQAN_DEFINE_TEST(test_find_approx_prefix_edit_dist_dpsearch) {
1297 Test_Approx_Prefix_EditDist<DPSearch<Score<>, FindPrefix> >();
1298 }
1299
1300
SEQAN_DEFINE_TEST(test_approx_prefix_edit_dist_myers)1301 SEQAN_DEFINE_TEST(test_approx_prefix_edit_dist_myers) {
1302 Test_Approx_Prefix_EditDist<Myers<FindPrefix> >();
1303 }
1304
1305
SEQAN_DEFINE_TEST(test_approx_edit_dist_dp_search_simple_score)1306 SEQAN_DEFINE_TEST(test_approx_edit_dist_dp_search_simple_score) {
1307 Test_Approx_EditDist<DPSearch<SimpleScore> >();
1308 }
1309
1310
SEQAN_DEFINE_TEST(test_approx_edit_dp_search_simple_score_legacy_case)1311 SEQAN_DEFINE_TEST(test_approx_edit_dp_search_simple_score_legacy_case) {
1312 // TODO(holtgrew): This was written out like this in Test_Approx() in original code.
1313 // Test DPSearch.
1314 Pattern<String<char>, DPSearch<SimpleScore> > pat1;
1315
1316 SimpleScore sc;
1317 setScoreGap(sc, -10);
1318 setScoringScheme(pat1, sc);
1319 SEQAN_ASSERT_EQ(scoreGap(scoringScheme(pat1)), -10);
1320
1321 setScoreGap(sc, -1);
1322 SEQAN_ASSERT_EQ(scoreGap(scoringScheme(pat1)), -10);
1323 setScoringScheme(pat1, sc);
1324 SEQAN_ASSERT_EQ(scoreGap(scoringScheme(pat1)), -1);
1325 }
1326
1327
SEQAN_DEFINE_TEST(test_approx_edit_dist_myers)1328 SEQAN_DEFINE_TEST(test_approx_edit_dist_myers) {
1329 Test_Approx_EditDist<Myers<> >();
1330 }
1331
1332
SEQAN_DEFINE_TEST(test_approx_edit_dist_abndm_algo)1333 SEQAN_DEFINE_TEST(test_approx_edit_dist_abndm_algo) {
1334 Test_Approx_EditDist<AbndmAlgo >();
1335 }
1336
1337
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical)1338 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical) {
1339 Test_Approx_EditDist<PexNonHierarchical>();
1340 }
1341
1342
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_hierarchical)1343 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_hierarchical) {
1344 Test_Approx_EditDist<PexHierarchical>();
1345 }
1346
1347
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_aho_corasick)1348 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_aho_corasick) {
1349 Test_Approx_EditDist< Pex<NonHierarchical,AhoCorasick > >();
1350 }
1351
1352
SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_multi_bfam)1353 SEQAN_DEFINE_TEST(test_approx_edit_dist_pex_non_hierarchical_multi_bfam) {
1354 Test_Approx_EditDist< Pex<NonHierarchical,MultiBfam<> > >();
1355 }
1356
1357
1358 // Test that shows the problem with Myers-Ukkonen approximate search
1359 // when large needles were used and the maximum score limit was set
1360 // higher than needle length.
SEQAN_DEFINE_TEST(test_regression_rmbench)1361 SEQAN_DEFINE_TEST(test_regression_rmbench) {
1362 // The data to test with: Needle, haystack and score limit.
1363 const char *kCharNeedle = "CCATATGCTTGTGTCGCGGGTTTATTTGCATTCGACCCAGTTGACTCGGAAGTCGAAATGTTCCTGCCCCGTTTCTGCGTTCCGTGCAGTTGCGCGGTCTGGTTGGGCGGGTCCCCCCCTGA";
1364 const char *kCharHaystack = "ATTCCATATGCTTGTGTCGCGGGTTTATTTGCATTCGACCCAGTTGACTCGGAAGTCGAAATGTTCCTGCCCCGTTTCTGCGTTCCGTGCAGTTGCGCGGTCTGGTTGGGCGGGTCCCCCGCCTACGGATGCACGTTCTCCCGGGCTCGTAAATCC";
1365 const int kScoreLimit = -100000;
1366
1367 // Build actual DNA needle and haystack, the pattern and the
1368 // finder.
1369 DnaString needle(kCharNeedle);
1370 DnaString haystack(kCharHaystack);
1371 Finder<DnaString> finder(haystack);
1372 Pattern<DnaString, MyersUkkonen> pattern(needle);
1373 setScoreLimit(pattern, kScoreLimit);
1374
1375 // The following invariant should always hold: The pattern score
1376 // should be smaller than the needle size.
1377 while (find(finder, pattern)) {
1378 SEQAN_ASSERT_LEQ(pattern.errors, pattern.needleSize);
1379 }
1380 SEQAN_ASSERT_LEQ(pattern.errors, pattern.needleSize);
1381 }
1382
1383
1384 // Tests for the Simple Hamming Finder.
SEQAN_DEFINE_TEST(test_find_hamming_simple)1385 SEQAN_DEFINE_TEST(test_find_hamming_simple) {
1386 // Test that the interface works.
1387 {
1388 DnaString haystack("AC");
1389 DnaString needle("AA");
1390 // Define finder and pattern.
1391 Finder<DnaString> finder(haystack);
1392 Pattern<DnaString, HammingSimple> pattern(needle, -1);
1393
1394 // Perform a search, run all functions defined on the pattern.
1395 bool res = find(finder, pattern);
1396 SEQAN_ASSERT(res);
1397 SEQAN_ASSERT_EQ(0u, position(finder));
1398 SEQAN_ASSERT_EQ(2u, endPosition(finder));
1399 SEQAN_ASSERT_EQ(-1, score(pattern));
1400 SEQAN_ASSERT_EQ(-1, getScore(pattern));
1401 }
1402
1403 // Test for distance 0;
1404 {
1405 // TODO(holtgrew): Should be const, but finder does not allow this.
1406 // Define haystack and needle.
1407 DnaString haystack("ACA");
1408 DnaString needle("AA");
1409 // Define finder and pattern.
1410 Finder<DnaString> finder(haystack);
1411 Pattern<DnaString, HammingSimple> pattern(needle, 0);
1412 // Perform the searches;
1413 bool res;
1414
1415 res = find(finder, pattern);
1416 SEQAN_ASSERT_NOT(res);
1417 }
1418
1419 // Test for distance -1, -2, -3. Should yield the same results,
1420 // as tested for below. The numbers are interesting since they
1421 // are the first > 0, length of pattern, first greater than length
1422 // of pattern.
1423 for (int i = 1; i < 3; ++i) {
1424 // TODO(holtgrew): Should be const, but finder does not allow this.
1425 // Define haystack and needle.
1426 DnaString haystack("ACA");
1427 DnaString needle("AA");
1428 // Define finder and pattern.
1429 Finder<DnaString> finder(haystack);
1430 Pattern<DnaString, HammingSimple> pattern(needle, -i);
1431 // Perform the searches;
1432 bool res;
1433
1434 res = find(finder, pattern);
1435 SEQAN_ASSERT(res);
1436 SEQAN_ASSERT_EQ(0u, position(finder));
1437 SEQAN_ASSERT_EQ(2u, endPosition(finder));
1438 SEQAN_ASSERT_EQ(-1, score(pattern));
1439
1440 res = find(finder, pattern);
1441 SEQAN_ASSERT(res);
1442 SEQAN_ASSERT_EQ(1u, position(finder));
1443 SEQAN_ASSERT_EQ(3u, endPosition(finder));
1444 SEQAN_ASSERT_EQ(-1, score(pattern));
1445 }
1446
1447 // Test setting the score limit.
1448 {
1449 // TODO(holtgrew): Should be const, but finder does not allow this.
1450 // Define haystack and needle.
1451 DnaString haystack("AAC");
1452 DnaString needle("AA");
1453 // Define finder and pattern.
1454 Finder<DnaString> finder(haystack);
1455 Pattern<DnaString, HammingSimple> pattern(needle, 0);
1456 // Perform the searches;
1457 bool res;
1458
1459 res = find(finder, pattern);
1460 SEQAN_ASSERT(res);
1461 SEQAN_ASSERT_EQ(0u, position(finder));
1462
1463 setScoreLimit(pattern, -1);
1464
1465 res = find(finder, pattern);
1466 SEQAN_ASSERT(res);
1467 SEQAN_ASSERT_EQ(1u, position(finder));
1468 }
1469 }
1470
1471
1472 // Tests for a regression found in read mapper benchmark.
SEQAN_DEFINE_TEST(test_find_hamming_simple_regression_rmbench)1473 SEQAN_DEFINE_TEST(test_find_hamming_simple_regression_rmbench) {
1474 // TODO(holtgrew): Should be const, but finder does not allow this.
1475 // Define haystack and needle.
1476 DnaString haystack("CCCCCCCCCCCCCCCCCCCCA");
1477 DnaString needle("CCCCCCCCCCCCCCCCCCCC");
1478 // Define finder and pattern.
1479 Finder<DnaString> finder(haystack);
1480 Pattern<DnaString, HammingSimple> pattern(needle);
1481 setScoreLimit(pattern, -1000);
1482 // Perform the searches;
1483 bool res;
1484 res = find(finder, pattern);
1485 SEQAN_ASSERT(res);
1486 SEQAN_ASSERT_EQ(0u, position(finder));
1487 SEQAN_ASSERT_EQ(length(needle), endPosition(finder));
1488 SEQAN_ASSERT_EQ(0, getScore(pattern));
1489
1490 res = find(finder, pattern);
1491 SEQAN_ASSERT(res);
1492 SEQAN_ASSERT_EQ(1u, position(finder));
1493 SEQAN_ASSERT_EQ(length(needle) + 1, endPosition(finder));
1494 SEQAN_ASSERT_EQ(-1, getScore(pattern));
1495 }
1496
1497 /*
1498 SEQAN_DEFINE_TEST(test_find_hamming_simple) {
1499 testFindApproximateHamming<HammingSimple, Dna>();
1500 }*/
1501
SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_at_start)1502 SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_at_start) {
1503 String<char> haystack = "___AAA___AAA";
1504 String<char> needle = "___AAA";
1505 Finder<String<char> > finder(haystack);
1506 Pattern<String<char>, Myers<FindInfix> > pattern(needle, -2);
1507
1508 // Find match: ___AAA___AAA
1509 // ___A
1510 bool ret = find(finder, pattern);
1511 SEQAN_ASSERT(ret);
1512 SEQAN_ASSERT_EQ(4u, endPosition(finder));
1513 ret = findBegin(finder, pattern, getScore(pattern)); // TODO(holtgrew): getScore(pattern) is in book but should not be necessary
1514 SEQAN_ASSERT(ret);
1515 SEQAN_ASSERT_EQ(0u, beginPosition(finder));
1516
1517 // Find match: ___AAA___AAA
1518 // ___AA
1519 ret = find(finder, pattern);
1520 SEQAN_ASSERT(ret);
1521 SEQAN_ASSERT_EQ(5u, endPosition(finder));
1522 ret = findBegin(finder, pattern, getScore(pattern)); // TODO(holtgrew): getScore(pattern) is in book but should not be necessary
1523 SEQAN_ASSERT(ret);
1524 SEQAN_ASSERT_EQ(0u, beginPosition(finder));
1525
1526 // Find match: ___AAA___AAA
1527 // ___AAA
1528 ret = find(finder, pattern);
1529 SEQAN_ASSERT(ret);
1530 SEQAN_ASSERT_EQ(6u, endPosition(finder));
1531 ret = findBegin(finder, pattern, getScore(pattern)); // TODO(holtgrew): getScore(pattern) is in book but should not be necessary
1532 SEQAN_ASSERT(ret);
1533 SEQAN_ASSERT_EQ(0u, beginPosition(finder));
1534 }
1535
1536
SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_within)1537 SEQAN_DEFINE_TEST(test_myers_find_infix_find_begin_within) {
1538 String<char> haystack = "A___AAA___AAA";
1539 String<char> needle = "___AAA";
1540 Finder<String<char> > finder(haystack);
1541 Pattern<String<char>, Myers<FindInfix> > pattern(needle, -1);
1542
1543 // Find match: A___AAA___AAA
1544 // ___AA
1545 bool ret = find(finder, pattern);
1546 SEQAN_ASSERT(ret);
1547 SEQAN_ASSERT_EQ(6u, endPosition(finder));
1548 ret = findBegin(finder, pattern, getScore(pattern)); // getScore(pattern) is in book but should not be necessary
1549 SEQAN_ASSERT(ret);
1550 SEQAN_ASSERT_EQ(1u, beginPosition(finder));
1551
1552 // Find match: A___AAA___AAA
1553 // ___AAA
1554 ret = find(finder, pattern);
1555 SEQAN_ASSERT(ret);
1556 SEQAN_ASSERT_EQ(7u, endPosition(finder));
1557 ret = findBegin(finder, pattern, getScore(pattern)); // getScore(pattern) is in book but should not be necessary
1558 SEQAN_ASSERT(ret);
1559 SEQAN_ASSERT_EQ(1u, beginPosition(finder));
1560 }
1561
1562
1563 template <typename TString, typename TSegmentOrString>
test_find_on_segments_Helper(TString & haystack,const TSegmentOrString & needle)1564 void test_find_on_segments_Helper(TString &haystack, const TSegmentOrString &needle) {
1565 Finder<TString> finder(haystack);
1566 Pattern<TSegmentOrString, Myers<FindInfix> > pattern(needle);
1567
1568 bool didFind = false;
1569 while (find(finder, pattern)) {
1570 findBegin(finder, pattern);
1571 didFind = true;
1572 }
1573 SEQAN_ASSERT(didFind);
1574
1575 // TODO(holtgrew): Some kind of assertion on the results.
1576 }
1577
1578
1579 // Test string search code on segments. At the moment, this only
1580 // tests whether the code compiles and runs through without any
1581 // obvious errors. No checks are done on the results.
SEQAN_DEFINE_TEST(test_find_on_segments)1582 SEQAN_DEFINE_TEST(test_find_on_segments) {
1583 // TODO(holtgrew): Should be const.
1584 CharString kHaystack = "CGATCGAT";
1585 CharString kNeedle = "GATC";
1586
1587 test_find_on_segments_Helper<>(kHaystack, kNeedle);
1588
1589 Segment<CharString, PrefixSegment> myPrefix(prefix(kNeedle, 1));
1590 test_find_on_segments_Helper<>(kHaystack, myPrefix);
1591 test_find_on_segments_Helper<>(kHaystack, prefix(kNeedle, 1));
1592
1593 Segment<CharString, InfixSegment> myInfix(infix(kNeedle, 1, 2));
1594 test_find_on_segments_Helper<>(kHaystack, myInfix);
1595 test_find_on_segments_Helper<>(kHaystack, infix(kNeedle, 1, 2));
1596
1597 Segment<CharString, SuffixSegment> mySuffix(suffix(kNeedle, 1));
1598 test_find_on_segments_Helper<>(kHaystack, mySuffix);
1599 test_find_on_segments_Helper<>(kHaystack, suffix(kNeedle, 1));
1600 }
1601
1602
SEQAN_DEFINE_TEST(test_myers_trigger_bug)1603 SEQAN_DEFINE_TEST(test_myers_trigger_bug) {
1604 DnaString haystackString = "TCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCCCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTCTCTTC";
1605 DnaString needleString = "GAGAAGAGAAGAGAAGAGAAGAGAAGAGAAGAGAAGAAGAAG";
1606 reverseComplement(needleString);
1607 typedef Segment<DnaString, InfixSegment> TSegment;
1608 typedef ModifiedString<TSegment, ModReverse> TSegmentRev;
1609 // std::cout << "haystack = " << haystackString << std::endl;
1610 // std::cout << "needle = " << needleString << std::endl;
1611 // std::cout << "haystack length = " << length(haystackString) << std::endl
1612 // << "needle length = " << length(needleString) << std::endl
1613 // << "machine word length = " << sizeof(unsigned) * 8 << std::endl;
1614
1615 for (int maxDistance = 0; maxDistance < 10; ++maxDistance) {
1616 for (unsigned beginPosition = 0; beginPosition < length(haystackString) - length(needleString); ++beginPosition) {
1617 // std::cout << "max distance = " << maxDistance << ", begin position = " << beginPosition << std::endl;
1618 TSegmentRev haystackSegment(infix(haystackString, beginPosition, length(haystackString)));
1619 // std::cout << "haystack = " << haystackSegment << std::endl;
1620 // std::cout << "needle = " << needleString << std::endl;
1621 // std::cout << "-----------------------" << std::endl;
1622
1623 String<size_t> positionsMyers;
1624 {
1625 TSegmentRev segment(infix(haystackString, beginPosition, length(haystackString)));
1626 Finder<TSegmentRev> finder(segment);
1627 Pattern<DnaString, MyersUkkonenGlobal > pattern(needleString, -maxDistance);
1628 while (find(finder, pattern)) {
1629 appendValue(positionsMyers, endPosition(finder));
1630 }
1631 }
1632
1633 String<size_t> positionsDpSearch;
1634 {
1635 TSegmentRev segment(infix(haystackString, beginPosition, length(haystackString)));
1636 Finder<TSegmentRev> finder(segment);
1637 Pattern<DnaString, DPSearch<EditDistanceScore, FindPrefix> > pattern(needleString, -maxDistance);
1638 while (find(finder, pattern)) {
1639 appendValue(positionsDpSearch, endPosition(finder));
1640 }
1641 }
1642
1643 // std::cout << "Myers end positions: " << std::endl;
1644 // for (size_t i = 0; i < length(positionsMyers); ++i)
1645 // std::cout << positionsMyers[i] << " ";
1646 // std::cout << std::endl;
1647 // std::cout << "DPSearch end positions: " << std::endl;
1648 // for (size_t i = 0; i < length(positionsDpSearch); ++i)
1649 // std::cout << positionsDpSearch[i] << " ";
1650 // std::cout << std::endl;
1651
1652 SEQAN_ASSERT_EQ(length(positionsDpSearch), length(positionsMyers));
1653 for (unsigned i = 0; i < length(positionsMyers); ++i)
1654 SEQAN_ASSERT_EQ_MSG(positionsDpSearch[i], positionsMyers[i], "i = %u", i);
1655 }
1656 }
1657 }
1658
1659
1660 // Test myers algorithm: Palindrom vs. non-palindrom.
SEQAN_DEFINE_TEST(test_myers_find_begin)1661 SEQAN_DEFINE_TEST(test_myers_find_begin) {
1662 {
1663 String<char> haystack_1 = "AABBAA";
1664 String<char> needle_1 = "ABBA";
1665 Finder<String<char> > finder_1(haystack_1);
1666 Pattern<String<char>, Myers<FindInfix> > pattern_1(needle_1, 0);
1667 SEQAN_ASSERT(find(finder_1, pattern_1));
1668 SEQAN_ASSERT_EQ(endPosition(finder_1), 5u);
1669 SEQAN_ASSERT_EQ(getScore(pattern_1), 0);
1670 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1671 SEQAN_ASSERT_EQ(infix(finder_1), "ABBA");
1672 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), 0);
1673 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1674 }
1675 {
1676 String<char> haystack_1 = "ABCD";
1677 String<char> needle_1 = "BCD";
1678 Finder<String<char> > finder_1(haystack_1);
1679 Pattern<String<char>, Myers<FindInfix> > pattern_1(needle_1, 0);
1680 SEQAN_ASSERT(find(finder_1, pattern_1));
1681 SEQAN_ASSERT_EQ(endPosition(finder_1), 4u);
1682 SEQAN_ASSERT_EQ(getScore(pattern_1), 0);
1683 SEQAN_ASSERT(findBegin(finder_1, pattern_1));
1684 SEQAN_ASSERT_EQ(infix(finder_1), "BCD");
1685 SEQAN_ASSERT_EQ(getBeginScore(pattern_1), 0);
1686 SEQAN_ASSERT_NOT(findBegin(finder_1, pattern_1));
1687 }
1688 }
1689
1690 template <typename TPatternSpec>
test_pattern_copycon()1691 void test_pattern_copycon() {
1692 typedef Pattern<CharString, TPatternSpec> TPattern;
1693 TPattern p1("Some needle");
1694 TPattern p2(p1);
1695 TPattern const p3(p2);
1696 TPattern const p4(p3);
1697 }
1698
1699 template <typename TPatternSpec>
test_pattern_assign()1700 void test_pattern_assign() {
1701 typedef Pattern<CharString, TPatternSpec> TPattern;
1702 TPattern p1("Some needle");
1703 TPattern p2;
1704 TPattern const p3(p1);
1705 TPattern p4;
1706 p2 = p1;
1707 p4 = p3;
1708 }
1709
1710 template <typename TPatternSpec>
test_pattern_movecon()1711 void test_pattern_movecon() {
1712 typedef Pattern<CharString, TPatternSpec> TPattern;
1713 TPattern p1("Some needle");
1714 TPattern p2(std::move(p1));
1715 TPattern const p3(std::move(p2));
1716 TPattern const p4(std::move(p3));
1717 }
1718
1719 template <typename TPatternSpec>
test_pattern_moveassign()1720 void test_pattern_moveassign() {
1721 typedef Pattern<CharString, TPatternSpec> TPattern;
1722 TPattern p1("Some needle");
1723 TPattern p2;
1724 TPattern const p3(p1);
1725 TPattern p4;
1726 p2 = std::move(p1);
1727 p4 = std::move(p3);
1728 }
1729
1730 template <typename TPatternSpec>
test_pattern_set_host()1731 void test_pattern_set_host()
1732 {
1733 typedef Pattern<DnaString, TPatternSpec> TPattern;
1734
1735 { // Set to lvalue reference.
1736 TPattern p;
1737 DnaString ndl = "AGTGGATAGAGAT";
1738 setHost(p, ndl);
1739
1740 SEQAN_ASSERT(&host(p) == &ndl);
1741 }
1742
1743 { // Set to lvalue with different alphabet causing the source to be changed.
1744 DnaString ndl = "AGTGGATAGAGAT";
1745 TPattern p;
1746 setHost(p, ndl);
1747 SEQAN_ASSERT_EQ(&host(p), &ndl);
1748
1749 CharString ndl2 = "AT CARLS";
1750 setHost(p, ndl2);
1751 SEQAN_ASSERT(&host(p) == &ndl);
1752 SEQAN_ASSERT_EQ(ndl, "ATACAAAA");
1753 SEQAN_ASSERT_EQ(ndl2, "AT CARLS");
1754 }
1755
1756 { // Set to const lvalue reference with different alphabet causing the source to be changed.
1757 DnaString const ndl = "AGTGGATAGAGAT";
1758 TPattern p;
1759 setHost(p, ndl);
1760 SEQAN_ASSERT(&host(p) != &ndl);
1761 SEQAN_ASSERT_EQ(host(p), ndl);
1762
1763 CharString ndl2 = "AT CARLS";
1764 setHost(p, ndl2);
1765 SEQAN_ASSERT(&host(p) != &ndl);
1766 SEQAN_ASSERT_EQ(ndl, "AGTGGATAGAGAT");
1767 SEQAN_ASSERT_EQ(ndl2, "AT CARLS");
1768 SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1769 }
1770
1771 { // Set with rvalue reference.
1772 DnaString ndl = "AGTGGATAGAGAT";
1773 TPattern p;
1774 setHost(p, std::move(ndl));
1775 SEQAN_ASSERT(&host(p) != &ndl);
1776 SEQAN_ASSERT_EQ(host(p), "AGTGGATAGAGAT");
1777 }
1778
1779 { // Set to rvalue reference after setting holder dependent.
1780 DnaString ndl = "AGTGGATAGAGAT";
1781 TPattern p;
1782 setHost(p, ndl);
1783 SEQAN_ASSERT(&host(p) == &ndl);
1784
1785 CharString ndl2 = "AT CARLS";
1786 setHost(p, std::move(ndl2));
1787 SEQAN_ASSERT(&host(p) == &ndl);
1788 SEQAN_ASSERT_EQ(ndl, "ATACAAAA");
1789 SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1790 }
1791
1792 { // Set to rvalue reference before setting holder dependent.
1793 DnaString ndl = "AGTGGATAGAGAT";
1794 TPattern p;
1795 setHost(p, std::move(ndl));
1796 SEQAN_ASSERT(&host(p) != &ndl);
1797 SEQAN_ASSERT_EQ(host(p), "AGTGGATAGAGAT");
1798
1799 CharString ndl2 = "AT CARLS";
1800 setHost(p, ndl2);
1801 SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1802 }
1803
1804 { // Set to rvalue with different alphabet.
1805 TPattern p;
1806 setHost(p, "AT CARLS");
1807 SEQAN_ASSERT_EQ(host(p), "ATACAAAA");
1808 }
1809 }
1810
SEQAN_DEFINE_TEST(test_pattern_copycon)1811 SEQAN_DEFINE_TEST(test_pattern_copycon) {
1812 // Test whether the needle is preserved in copying a pattern.
1813 // See http://trac.mi.fu-berlin.de/seqan/ticket/318
1814 test_pattern_copycon<Simple>();
1815 test_pattern_copycon<Horspool>();
1816 test_pattern_copycon<ShiftAnd>();
1817 test_pattern_copycon<ShiftOr>();
1818 test_pattern_copycon<HammingSimple>();
1819 test_pattern_copycon<WildShiftAnd>();
1820 test_pattern_copycon<Bfam<Oracle> >();
1821 test_pattern_copycon<Bfam<Trie> >();
1822 }
1823
SEQAN_DEFINE_TEST(test_pattern_assign)1824 SEQAN_DEFINE_TEST(test_pattern_assign) {
1825 // Test whether the needle is preserved in assigning a pattern.
1826 // See http://trac.mi.fu-berlin.de/seqan/ticket/318
1827 test_pattern_assign<Simple>();
1828 test_pattern_assign<Horspool>();
1829 test_pattern_assign<ShiftAnd>();
1830 test_pattern_assign<ShiftOr>();
1831 test_pattern_assign<HammingSimple>();
1832 test_pattern_assign<WildShiftAnd>();
1833 test_pattern_assign<Bfam<Oracle> >();
1834 test_pattern_assign<Bfam<Trie> >();
1835 }
1836
SEQAN_DEFINE_TEST(test_pattern_movecon)1837 SEQAN_DEFINE_TEST(test_pattern_movecon) {
1838 test_pattern_movecon<Simple>();
1839 test_pattern_movecon<Horspool>();
1840 test_pattern_movecon<ShiftAnd>();
1841 test_pattern_movecon<ShiftOr>();
1842 test_pattern_copycon<HammingSimple>();
1843 test_pattern_copycon<WildShiftAnd>();
1844 test_pattern_copycon<Bfam<Oracle> >();
1845 test_pattern_copycon<Bfam<Trie> >();
1846 }
1847
SEQAN_DEFINE_TEST(test_pattern_moveassign)1848 SEQAN_DEFINE_TEST(test_pattern_moveassign) {
1849 test_pattern_moveassign<Simple>();
1850 test_pattern_moveassign<Horspool>();
1851 test_pattern_moveassign<ShiftAnd>();
1852 test_pattern_moveassign<ShiftOr>();
1853 test_pattern_assign<HammingSimple>();
1854 test_pattern_assign<WildShiftAnd>();
1855 test_pattern_assign<Bfam<Oracle> >();
1856 test_pattern_assign<Bfam<Trie> >();
1857 }
1858
1859 // TODO(rrahn): Should be a typed test for all pattern classes.
SEQAN_DEFINE_TEST(test_pattern_set_host)1860 SEQAN_DEFINE_TEST(test_pattern_set_host) {
1861 test_pattern_set_host<Simple>();
1862 test_pattern_set_host<Horspool>();
1863 test_pattern_set_host<ShiftAnd>();
1864 test_pattern_set_host<ShiftOr>();
1865 test_pattern_set_host<HammingSimple>();
1866 test_pattern_set_host<WildShiftAnd>();
1867 test_pattern_set_host<Bfam<Oracle> >();
1868 test_pattern_set_host<Bfam<Trie> >();
1869 test_pattern_set_host<Myers<AlignTextBanded<FindInfix, NMatchesN_, NMatchesN_>, void> >();
1870 test_pattern_set_host<Myers<AlignTextBanded<FindInfix, NMatchesN_, NMatchesN_>, Myers<FindPrefix> > >();
1871 test_pattern_set_host<Myers<AlignTextBanded<FindPrefix, NMatchesN_, NMatchesN_>, void> >();
1872 test_pattern_set_host<Myers<AlignTextBanded<FindPrefix, NMatchesN_, NMatchesN_>, Myers<FindPrefix> > >();
1873 test_pattern_set_host<Myers<FindInfix, void> >();
1874 test_pattern_set_host<Myers<FindInfix, Myers<FindPrefix> > >();
1875 test_pattern_set_host<Myers<FindPrefix, void> >();
1876 test_pattern_set_host<Myers<FindPrefix, Myers<FindPrefix> > >();
1877 }
1878
SEQAN_BEGIN_TESTSUITE(test_find)1879 SEQAN_BEGIN_TESTSUITE(test_find) {
1880 // SEQAN_CALL_TEST(test_myers_trigger_bug);
1881 SEQAN_CALL_TEST(test_myers_find_begin);
1882 SEQAN_CALL_TEST(test_myers_find_banded);
1883 SEQAN_CALL_TEST(test_myers_find_banded_csp);
1884
1885 // Testing Myers<FindInfix> with findBegin().
1886 SEQAN_CALL_TEST(test_myers_find_infix_find_begin_at_start);
1887 SEQAN_CALL_TEST(test_myers_find_infix_find_begin_within);
1888
1889 SEQAN_CALL_TEST(test_find_on_segments);
1890
1891 SEQAN_CALL_TEST(test_find_hamming_simple);
1892
1893 SEQAN_CALL_TEST(test_find_hamming_simple_regression_rmbench);
1894
1895 // Testing MyersUkkonen with large needle and manual score limit.
1896 SEQAN_CALL_TEST(test_regression_rmbench);
1897
1898 // Call all tests.
1899 SEQAN_CALL_TEST(test_find_online_Simple);
1900 SEQAN_CALL_TEST(test_find_online_Horspool);
1901 SEQAN_CALL_TEST(test_find_online_ShiftAnd);
1902 SEQAN_CALL_TEST(test_find_online_ShiftOr);
1903 SEQAN_CALL_TEST(test_find_online_BndmAlgo);
1904 SEQAN_CALL_TEST(test_find_online_BFAM_Oracle);
1905 SEQAN_CALL_TEST(test_find_online_BFAM_Trie);
1906 SEQAN_CALL_TEST(test_find_online_wildcards);
1907 SEQAN_CALL_TEST(test_find_online_multi_AhoCorasick);
1908 SEQAN_CALL_TEST(test_find_online_multi_MultipleShiftAnd);
1909 SEQAN_CALL_TEST(test_find_online_multi_SetHorspool);
1910 SEQAN_CALL_TEST(test_find_online_multi_WuManber);
1911 SEQAN_CALL_TEST(test_find_online_multi_MultiBFAM_Oracle);
1912 SEQAN_CALL_TEST(test_find_online_multi_MultiBFAM_Trie);
1913 SEQAN_CALL_TEST(test_find_approx_prefix_edit_dist_dpsearch);
1914 SEQAN_CALL_TEST(test_approx_prefix_edit_dist_myers);
1915 SEQAN_CALL_TEST(test_approx_edit_dist_dp_search_simple_score);
1916 SEQAN_CALL_TEST(test_approx_edit_dist_myers);
1917 // Test DP Serach.
1918 SEQAN_CALL_TEST(test_approx_edit_dp_search_simple_score_legacy_case);
1919 // Test other approximate search algorithms.
1920 SEQAN_CALL_TEST(test_approx_edit_dist_abndm_algo);
1921 SEQAN_CALL_TEST(test_approx_edit_dist_pex_non_hierarchical);
1922 SEQAN_CALL_TEST(test_approx_edit_dist_pex_hierarchical);
1923 // Tests with different multifinder.
1924 SEQAN_CALL_TEST(test_approx_edit_dist_pex_non_hierarchical_aho_corasick);
1925 SEQAN_CALL_TEST(test_approx_edit_dist_pex_non_hierarchical_multi_bfam);
1926 // Test for hamming distance approximate matching.
1927 SEQAN_CALL_TEST(test_find_hamming_simple);
1928
1929 SEQAN_CALL_TEST(test_pattern_copycon);
1930 SEQAN_CALL_TEST(test_pattern_assign);
1931
1932 SEQAN_CALL_TEST(test_pattern_movecon);
1933 SEQAN_CALL_TEST(test_pattern_moveassign);
1934 SEQAN_CALL_TEST(test_pattern_set_host);
1935 }
1936 SEQAN_END_TESTSUITE
1937