1 /*  $Id: bl2seq_unit_test.cpp 607143 2020-04-30 13:01:21Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_unit_test.cpp
31  * Unit tests for the CBl2Seq class
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/test_boost.hpp>
36 #include <algo/blast/api/bl2seq.hpp>
37 #include <objects/seqalign/Seq_align.hpp>
38 #include <objects/seqalign/Seq_align_set.hpp>
39 #include <objects/seqalign/Std_seg.hpp>
40 #include <objects/seqalign/Dense_seg.hpp>
41 #include <objects/seqalign/Score.hpp>
42 #include <objects/general/Object_id.hpp>
43 
44 #include <serial/serial.hpp>
45 #include <serial/iterator.hpp>
46 #include <serial/objostr.hpp>
47 
48 #include <algo/blast/api/tblastn_options.hpp>
49 #include <algo/blast/format/blastfmtutil.hpp>
50 
51 #include <algo/blast/api/blast_options_handle.hpp>
52 #include <algo/blast/api/blast_prot_options.hpp>
53 #include <algo/blast/api/blastx_options.hpp>
54 #include <algo/blast/api/tblastn_options.hpp>
55 #include <algo/blast/api/blast_nucl_options.hpp>
56 #include <algo/blast/api/disc_nucl_options.hpp>
57 #include <algo/blast/api/local_blast.hpp>       // for CLocalBlast
58 #include <algo/blast/api/local_db_adapter.hpp>  // for CLocalDbAdapter
59 #include <algo/blast/api/objmgr_query_data.hpp> // for CObjMgr_QueryFactory
60 #include <algo/blast/blastinput/blast_input.hpp>
61 #include <algo/blast/blastinput/blast_fasta_input.hpp>
62 #include <algo/blast/api/windowmask_filter.hpp>
63 
64 #include <objtools/data_loaders/genbank/gbloader.hpp>
65 #include <objtools/simple/simple_om.hpp>        // for CSimpleOM
66 #include <objtools/readers/fasta.hpp>           // for CFastaReader
67 #include <objmgr/util/seq_loc_util.hpp>
68 #include <objmgr/util/sequence.hpp>
69 
70 #include "test_objmgr.hpp"
71 
72 #include <util/random_gen.hpp>
73 
74 #include <corelib/test_boost.hpp>
75 
76 #ifndef SKIP_DOXYGEN_PROCESSING
77 
78 USING_NCBI_SCOPE;
79 USING_SCOPE(blast);
80 USING_SCOPE(objects);
81 
BOOST_AUTO_TEST_SUITE(bl2seq)82 BOOST_AUTO_TEST_SUITE(bl2seq)
83 
84 CRef<CSeq_loc> s_MakePackedInt2(CRef<CSeq_id> id, vector<TSeqRange>& range_vec)
85 {
86      CRef<CSeq_loc> retval(new CSeq_loc());
87      NON_CONST_ITERATE(vector<TSeqRange>, itr, range_vec)
88      {
89           retval->SetPacked_int().AddInterval(*id, (*itr).GetFrom(), (*itr).GetTo());
90      }
91      return retval;
92 }
93 
BOOST_AUTO_TEST_CASE(NucleotideMultipleSeqLocs1)94 BOOST_AUTO_TEST_CASE(NucleotideMultipleSeqLocs1) {
95 
96     CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 224514841));
97     CRef<CScope> scope(CSimpleOM::NewScope());
98     TSeqLocVector query;
99 
100     vector<TSeqRange> range_vec;
101 
102     range_vec.push_back(TSeqRange(73011288, 73011591));
103     range_vec.push_back(TSeqRange(73080052, 73080223));
104     range_vec.push_back(TSeqRange(73096483, 73096589));
105     range_vec.push_back(TSeqRange(73097765, 73097864));
106     range_vec.push_back(TSeqRange(73113762, 73113809));
107     range_vec.push_back(TSeqRange(73119266, 73119340));
108     range_vec.push_back(TSeqRange(73168955, 73169141));
109     range_vec.push_back(TSeqRange(73178294, 73178376));
110     range_vec.push_back(TSeqRange(73220818, 73220920));
111     range_vec.push_back(TSeqRange(73223091, 73223365));
112 
113     CRef<CSeq_loc> temp_loc1 = s_MakePackedInt2(id, range_vec);
114 ofstream o1("temploc1.out.asn");
115 o1 << MSerial_AsnText << *temp_loc1 ;
116     query.push_back(SSeqLoc(temp_loc1, scope));
117 
118     TSeqLocVector subjects;
119     CRef<CSeq_id> sid1(new CSeq_id(CSeq_id::e_Gi, 262050671));
120     CRef<CSeq_loc> ssl1(new CSeq_loc());
121     ssl1->SetWhole(*sid1);
122     SSeqLoc subj_seqloc(ssl1, scope);
123     subjects.push_back(subj_seqloc);
124 
125     CBl2Seq bl2seq(query, subjects, eMegablast);
126     TSeqAlignVector sav(bl2seq.Run());
127     BOOST_CHECK_EQUAL(10, (int) sav.size());
128     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
129     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
130     int num_ident = 0;
131     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
132     BOOST_CHECK_EQUAL((int)303, num_ident);
133     BOOST_CHECK_EQUAL((TSeqPos)73011288, sar->GetSeqStart(0));
134     BOOST_CHECK_EQUAL((TSeqPos)1, sar->GetSeqStart(1));
135 
136 #if 1
137 ofstream o("seqalign.out.asn");
138 ITERATE(TSeqAlignVector, v, sav) {
139 o << MSerial_AsnText << **v ;
140 }
141 o.close();
142 #endif
143 }
144 
145 
BOOST_AUTO_TEST_CASE(ProteinBlastInvalidSeqIdSelfHit)146 BOOST_AUTO_TEST_CASE(ProteinBlastInvalidSeqIdSelfHit)
147 {
148     CRef<CSeq_loc> loc(new CSeq_loc());
149     loc->SetWhole().SetGi(INVALID_GI);
150 
151     CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
152     scope->AddDefaults();
153     SSeqLoc query(loc, scope);
154 
155     TSeqLocVector subjects;
156     {
157         CRef<CSeq_loc> local_loc(new CSeq_loc());
158         local_loc->SetWhole().SetGi(INVALID_GI);
159 
160         CScope* local_scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
161         local_scope->AddDefaults();
162         subjects.push_back(SSeqLoc(local_loc, local_scope));
163     }
164 
165     // BLAST by concatenating all queries
166     CBl2Seq blaster4all(query, subjects, eBlastp);
167     TSeqAlignVector sas_v;
168     BOOST_CHECK_THROW(sas_v = blaster4all.Run(), CBlastException);
169 }
170 
171 enum EBl2seqTest {
172     eBlastp_129295_129295 = 0,
173     eBlastn_555_555,
174     eMegablast_555_555,
175     eDiscMegablast_555_555,
176     eBlastx_555_129295,
177     eTblastn_129295_555,
178     eTblastn_129295_555_large_word,
179     eTblastx_555_555,
180     eTblastx_many_hits,
181     eBlastp_129295_7662354,
182     eBlastn_555_3090,
183     eBlastp_multi_q,
184     eBlastn_multi_q,
185     eBlastp_multi_q_s,
186     eTblastn_oof,
187     eBlastx_oof,
188     eDiscMegablast_U02544_U61969,
189     eMegablast_chrom_mrna
190 };
191 
192 /* The following functions are used to test the functionality to interrupt
193  * CBl2Seq runs */
194 
195 /// Returns true so that the processing stops upon the first invocation of this
196 /// callback
interrupt_immediately(SBlastProgress *)197 extern "C" Boolean interrupt_immediately(SBlastProgress* /*progress_info*/)
198 {
199     return TRUE;
200 }
201 
202 /// Returns false so that the processing never stops in spite of a callback
203 /// function to interrupt the process is provided
do_not_interrupt(SBlastProgress *)204 extern "C" Boolean do_not_interrupt(SBlastProgress* /*progress_info*/)
205 {
206     return FALSE;
207 }
208 
209 /// This callback never interrupts the BLAST search, its only purpose is to
210 /// count the number of times this is invoked for the given input. Also to be
211 /// used in CBl2SeqTest::testInterruptXExitAtRandom.
callback_counter(SBlastProgress * progress_info)212 extern "C" Boolean callback_counter(SBlastProgress* progress_info)
213 {
214     int& counter = *reinterpret_cast<int*>(progress_info->user_data);
215     counter++;
216     return FALSE;
217 }
218 
219 /// This callback interrupts the BLAST search after the callback has been
220 /// executed the requested number of times in the pair's second member.
221 /// This is used in CBl2SeqTest::testInterruptXExitAtRandom.
interrupt_at_random(SBlastProgress * progress_info)222 extern "C" Boolean interrupt_at_random(SBlastProgress* progress_info)
223 {
224     pair<int, int>& progress_pair =
225         *reinterpret_cast< pair<int, int>* >(progress_info->user_data);
226 
227     if (++progress_pair.first == progress_pair.second) {
228         return TRUE;
229     } else {
230         return FALSE;
231     }
232 }
233 
234 /// The interruption occurs after 3 invokations of this callback
interrupt_after3calls(SBlastProgress *)235 extern "C" Boolean interrupt_after3calls(SBlastProgress* /*progress_info*/)
236 {
237     static int num_calls = 0;
238     if (++num_calls < 3) {
239         return FALSE;
240     } else {
241         return TRUE;
242     }
243 }
244 
245 /// The interruption occurs after starting the traceback stage
interrupt_on_traceback(SBlastProgress * progress_info)246 extern "C" Boolean interrupt_on_traceback(SBlastProgress* progress_info)
247 {
248     if (progress_info->stage == eTracebackSearch) {
249         return TRUE;
250     } else {
251         return FALSE;
252     }
253 }
254 
testRawCutoffs(CBl2Seq & blaster,EProgram program,EBl2seqTest test_id)255 void testRawCutoffs(CBl2Seq& blaster, EProgram program,
256                     EBl2seqTest test_id)
257 {
258     BlastRawCutoffs* raw_cutoffs =
259         blaster.GetDiagnostics()->cutoffs;
260     int x_drop_ungapped;
261     int gap_trigger;
262 
263     if (program == eBlastn || program == eDiscMegablast) {
264         x_drop_ungapped = 22;
265         gap_trigger = 16;
266     } else if (program == eMegablast) {
267         x_drop_ungapped = 8;
268         gap_trigger = 8;
269     } else {
270         x_drop_ungapped = 16;
271         gap_trigger = 41;
272     }
273 
274     switch (test_id) {
275     case eBlastn_555_3090:
276         x_drop_ungapped = 22;
277         gap_trigger = 18;
278         break;
279     case eBlastn_multi_q:
280         x_drop_ungapped = 22;
281         gap_trigger = 18;
282         break;
283     case eMegablast_chrom_mrna:
284         x_drop_ungapped = 7;
285         gap_trigger = 7;
286         break;
287     case eDiscMegablast_U02544_U61969:
288         x_drop_ungapped = 22;
289         gap_trigger = 20;
290         break;
291     case eBlastp_multi_q:
292         gap_trigger = 23;
293         break;
294     case eBlastp_multi_q_s:
295         gap_trigger = 19;
296         break;
297     case eBlastp_129295_129295:
298         gap_trigger = 19; break;
299     case eTblastn_129295_555:
300         gap_trigger = 23; break;
301     case eTblastn_129295_555_large_word:
302         gap_trigger = 23; break;
303     case eBlastp_129295_7662354:
304         gap_trigger = 23; break;
305     case eBlastx_555_129295:
306         gap_trigger = 19; break;
307     case eTblastn_oof:
308         gap_trigger = 43;
309     default:
310         break;
311     }
312 
313     switch (program) {
314     case eBlastn: case eDiscMegablast:
315         BOOST_CHECK_EQUAL(x_drop_ungapped,
316                              raw_cutoffs->x_drop_ungapped);
317         BOOST_CHECK_EQUAL(33, raw_cutoffs->x_drop_gap);
318         // CC changed 08/07/08
319         //BOOST_CHECK_EQUAL(55, raw_cutoffs->x_drop_gap_final);
320         BOOST_CHECK_EQUAL(110, raw_cutoffs->x_drop_gap_final);
321         BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
322         break;
323     case eMegablast:
324         BOOST_CHECK_EQUAL(x_drop_ungapped,
325                              raw_cutoffs->x_drop_ungapped);
326         BOOST_CHECK_EQUAL(13, raw_cutoffs->x_drop_gap);
327         // CC changed 08/07/08
328         //BOOST_CHECK_EQUAL(27, raw_cutoffs->x_drop_gap_final);
329         BOOST_CHECK_EQUAL(54, raw_cutoffs->x_drop_gap_final);
330         BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
331         break;
332     case eBlastp: case eBlastx: case eTblastn:
333         BOOST_CHECK_EQUAL(38, raw_cutoffs->x_drop_gap);
334         BOOST_CHECK_EQUAL(64, raw_cutoffs->x_drop_gap_final);
335         BOOST_CHECK_EQUAL(gap_trigger, raw_cutoffs->ungapped_cutoff);
336         /* No break intentional: next test is valid for all the above
337            programs */
338     case eTblastx:
339         BOOST_CHECK_EQUAL(x_drop_ungapped,
340                              raw_cutoffs->x_drop_ungapped);
341         break;
342     default: break;
343     }
344 }
345 
testResultAlignments(size_t num_queries,size_t num_subjects,TSeqAlignVector result_alnvec)346 void testResultAlignments(size_t num_queries,
347                           size_t num_subjects,
348                           TSeqAlignVector result_alnvec)
349 {
350     size_t num_total_alns = num_queries * num_subjects;
351 
352     // test the number of resulting alignments
353     BOOST_REQUIRE_EQUAL(result_alnvec.size(), num_total_alns);
354 
355     // test the correct ordering of resulting alignments
356     // (q1 s1 q1 s2 ... q2 s1 q2 s2 ...)
357 
358     CConstRef<CSeq_id> id_query, id_prev_query;
359     CConstRef<CSeq_id> id_subject;
360     vector< CConstRef<CSeq_id> > id_prev_subjects;
361     id_prev_subjects.resize(num_subjects);
362 
363     bool prev_query_available = false;
364     vector<bool> prev_subjects_available(num_subjects, false);
365 
366     /* DEBUG OUTPUT
367     cerr << "................................................" << endl;
368     for (size_t i = 0; i < result_alnvec.size(); i++)
369         cerr << "\n<" << i << ">\n"
370             << MSerial_AsnText << result_alnvec[i].GetObject() << endl;
371     cerr << "................................................" << endl;
372     ------------ */
373 
374     for (size_t i_query = 0; i_query < num_queries; i_query++)
375     {
376         prev_query_available = false;
377         for (size_t i_subject = 0; i_subject < num_subjects; i_subject++)
378         {
379             size_t i_lin_index = i_query * num_subjects + i_subject;
380             CRef<CSeq_align_set> aln_set = result_alnvec[i_lin_index];
381 
382             // test if the alignment set is available (even if empty)
383             BOOST_REQUIRE(aln_set.NotNull());
384 
385             // if the alignment set is not empty, take the first alignment
386             // and see if the ID's are in correct order
387             if (aln_set->Get().size() > 0)
388             {
389                 CRef<CSeq_align> aln = aln_set->Get().front();
390                 id_query.Reset(&(aln->GetSeq_id(0)));
391                 id_subject.Reset(&(aln->GetSeq_id(1)));
392 
393                 // check if the query id was the same
394                 // for the previous subject
395                 if (i_subject > 0 &&
396                     prev_query_available)
397                 {
398                     BOOST_REQUIRE(
399                         id_query->Match(
400                         id_prev_query.GetObject()));
401                 }
402 
403                 // check if the subject id was the same
404                 // on the same position for the previous query
405                 if (i_query > 0 &&
406                     prev_subjects_available[i_subject])
407                 {
408                     BOOST_REQUIRE(
409                         id_subject->Match(
410                         id_prev_subjects[i_subject].GetObject()));
411                 }
412 
413                 // update the entry in previous subjects vector
414                 prev_subjects_available[i_subject] = true;
415                 id_prev_subjects[i_subject] = id_subject;
416 
417                 // update the previous query entry
418                 prev_query_available = true;
419                 id_prev_query = id_query;
420             }
421         }
422     }
423 }
424 
testBlastHitCounts(CBl2Seq & blaster,EBl2seqTest test_id)425 void testBlastHitCounts(CBl2Seq& blaster, EBl2seqTest test_id)
426 {
427     BlastUngappedStats* ungapped_stats =
428         blaster.GetDiagnostics()->ungapped_stat;
429     BlastGappedStats* gapped_stats =
430         blaster.GetDiagnostics()->gapped_stat;
431 
432     switch (test_id) {
433     case eBlastp_129295_129295:
434         BOOST_CHECK_EQUAL(314, (int)ungapped_stats->lookup_hits);
435         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
436         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
437         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
438         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
439         break;
440     case eBlastn_555_555:
441         BOOST_CHECK_EQUAL(157, (int)ungapped_stats->lookup_hits);
442         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
443         BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
444         BOOST_CHECK_EQUAL(3, gapped_stats->extensions);
445         BOOST_CHECK_EQUAL(3, gapped_stats->good_extensions);
446         break;
447     case eMegablast_555_555:
448         BOOST_CHECK_EQUAL(30, (int)ungapped_stats->lookup_hits);
449         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
450         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
451         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
452         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
453         break;
454     case eDiscMegablast_555_555:
455         BOOST_CHECK_EQUAL(582, (int)ungapped_stats->lookup_hits);
456         // CC changed 08/07/08
457         //BOOST_CHECK_EQUAL(32, ungapped_stats->init_extends);
458         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
459         // CC changed 08/07/08
460         //BOOST_CHECK_EQUAL(32, ungapped_stats->good_init_extends);
461         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
462         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
463         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
464         break;
465     case eBlastx_555_129295:
466         BOOST_CHECK_EQUAL(282, (int)ungapped_stats->lookup_hits);
467         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
468         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
469         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
470         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
471         break;
472     case eTblastn_129295_555:
473         BOOST_CHECK_EQUAL(157, (int)ungapped_stats->lookup_hits);
474         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
475         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
476         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
477         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
478         break;
479     case eTblastn_129295_555_large_word:
480         BOOST_CHECK_EQUAL(5, (int)ungapped_stats->lookup_hits);
481         BOOST_CHECK_EQUAL(4, ungapped_stats->init_extends);
482         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
483         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
484         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
485         break;
486     case eTblastx_555_555:
487         BOOST_CHECK_EQUAL(2590, (int)ungapped_stats->lookup_hits);
488         BOOST_CHECK_EQUAL(57, ungapped_stats->init_extends);
489         BOOST_CHECK_EQUAL(41, ungapped_stats->good_init_extends);
490         break;
491     case eTblastx_many_hits:
492         BOOST_CHECK_EQUAL(18587, (int)ungapped_stats->lookup_hits);
493         BOOST_CHECK_EQUAL(348, ungapped_stats->init_extends);
494         BOOST_CHECK_EQUAL(66, ungapped_stats->good_init_extends);
495         break;
496     case eBlastp_129295_7662354:
497         BOOST_CHECK_EQUAL(210, (int)ungapped_stats->lookup_hits);
498         BOOST_CHECK_EQUAL(10, ungapped_stats->init_extends);
499         BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
500         BOOST_CHECK_EQUAL(3, gapped_stats->extensions);
501         BOOST_CHECK_EQUAL(3, gapped_stats->good_extensions);
502         break;
503     case eBlastn_555_3090:
504         BOOST_CHECK_EQUAL(15, (int)ungapped_stats->lookup_hits);
505         BOOST_CHECK_EQUAL(2, ungapped_stats->init_extends);
506         BOOST_CHECK_EQUAL(2, ungapped_stats->good_init_extends);
507         BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
508         BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
509         break;
510     case eBlastp_multi_q:
511         BOOST_CHECK_EQUAL(2129, (int)ungapped_stats->lookup_hits);
512         BOOST_CHECK_EQUAL(76, ungapped_stats->init_extends);
513         BOOST_CHECK_EQUAL(14, ungapped_stats->good_init_extends);
514         BOOST_CHECK_EQUAL(8, gapped_stats->extensions);
515         BOOST_CHECK_EQUAL(8, gapped_stats->good_extensions);
516         break;
517     case eBlastn_multi_q:
518         BOOST_CHECK_EQUAL(963, (int)ungapped_stats->lookup_hits);
519         BOOST_CHECK_EQUAL(13, ungapped_stats->init_extends);
520         BOOST_CHECK_EQUAL(13, ungapped_stats->good_init_extends);
521         BOOST_CHECK_EQUAL(11, gapped_stats->extensions);
522         BOOST_CHECK_EQUAL(11, gapped_stats->good_extensions);
523         break;
524     case eBlastp_multi_q_s:
525 #if 0
526         // The following 2 numbers are different in Release and Debug modes
527         // due to a minor discrepancy in locations masked by seg filtering.
528         // The latter is due to a tiny difference in values involved in
529         // a comparison of real numbers inside the seg algorithm.
530         // In Debug mode:
531         BOOST_CHECK_EQUAL(3579, (int)ungapped_stats->lookup_hits);
532         BOOST_CHECK_EQUAL(138, ungapped_stats->init_extends);
533         // In Release mode:
534         BOOST_CHECK_EQUAL(3580, (int)ungapped_stats->lookup_hits);
535         BOOST_CHECK_EQUAL(140, ungapped_stats->init_extends);
536 #endif
537         // Note: Seg is not enabled for this case anymore
538         // (changed blastp defaults)
539         BOOST_CHECK_EQUAL(3939, (int)ungapped_stats->lookup_hits);
540         BOOST_CHECK_EQUAL(159, ungapped_stats->init_extends);
541         BOOST_CHECK_EQUAL(63, ungapped_stats->good_init_extends);
542         BOOST_CHECK_EQUAL(27, gapped_stats->extensions);
543         BOOST_CHECK_EQUAL(24, gapped_stats->good_extensions);
544         break;
545     case eTblastn_oof:
546         BOOST_CHECK_EQUAL(2666, (int)ungapped_stats->lookup_hits);
547         BOOST_CHECK_EQUAL(50, ungapped_stats->init_extends);
548         BOOST_CHECK_EQUAL(4, ungapped_stats->good_init_extends);
549         BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
550         BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
551         break;
552     case eBlastx_oof:
553         BOOST_CHECK_EQUAL(5950, (int)ungapped_stats->lookup_hits);
554         BOOST_CHECK_EQUAL(155, ungapped_stats->init_extends);
555         BOOST_CHECK_EQUAL(6, ungapped_stats->good_init_extends);
556         BOOST_CHECK_EQUAL(2, gapped_stats->extensions);
557         BOOST_CHECK_EQUAL(2, gapped_stats->good_extensions);
558         break;
559     case eDiscMegablast_U02544_U61969:
560         BOOST_CHECK_EQUAL(108, (int)ungapped_stats->lookup_hits);
561         // CC changed 08/07/08
562         //BOOST_CHECK_EQUAL(15, ungapped_stats->init_extends);
563         //BOOST_CHECK_EQUAL(15, ungapped_stats->good_init_extends);
564         BOOST_CHECK_EQUAL(3, ungapped_stats->init_extends);
565         BOOST_CHECK_EQUAL(3, ungapped_stats->good_init_extends);
566         BOOST_CHECK_EQUAL(3, gapped_stats->extensions);
567         BOOST_CHECK_EQUAL(3, gapped_stats->good_extensions);
568         break;
569     case eMegablast_chrom_mrna:
570         BOOST_CHECK_EQUAL(14, (int)ungapped_stats->lookup_hits);
571         BOOST_CHECK_EQUAL(1, ungapped_stats->init_extends);
572         BOOST_CHECK_EQUAL(1, ungapped_stats->good_init_extends);
573         BOOST_CHECK_EQUAL(1, gapped_stats->extensions);
574         BOOST_CHECK_EQUAL(1, gapped_stats->good_extensions);
575         break;
576     default: break;
577     }
578 }
579 
BOOST_AUTO_TEST_CASE(ProteinBlastSelfHit)580 BOOST_AUTO_TEST_CASE(ProteinBlastSelfHit)
581 {
582     //const int kSeqLength = 232;
583     CSeq_id id("gi|129295");
584     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
585 
586     CBl2Seq blaster(*sl, *sl, eBlastp);
587     TSeqAlignVector sav(blaster.Run());
588     BOOST_REQUIRE(sav[0].NotEmpty());
589     BOOST_REQUIRE( !sav[0]->IsEmpty() );
590     BOOST_REQUIRE(sav[0]->Get().begin()->NotEmpty());
591     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
592     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
593     testBlastHitCounts(blaster, eBlastp_129295_129295);
594     testRawCutoffs(blaster, eBlastp, eBlastp_129295_129295);
595 
596     // the number of identities is NOT calculated when composition based
597     // statistics is turned on (default for blastp)
598     int num_ident = 0;
599     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
600 #if 0
601     ofstream o("0.asn");
602     o << MSerial_AsnText << *sar ;
603     o.close();
604 #endif
605     BOOST_CHECK_EQUAL(232, num_ident);
606 
607     // calculate the number of identities using the BLAST formatter
608 /*
609     double percent_identity =
610         CBlastFormatUtil::GetPercentIdentity(*sar, *sl->scope, false);
611     BOOST_CHECK_EQUAL(1, (int) percent_identity);
612 */
613 
614     // Check the ancillary results
615     CSearchResultSet::TAncillaryVector ancillary_data;
616     blaster.GetAncillaryResults(ancillary_data);
617     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
618     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
619     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
620     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
621 
622 }
623 
BOOST_AUTO_TEST_CASE(TBlastn2Seqs)624 BOOST_AUTO_TEST_CASE(TBlastn2Seqs)
625 {
626     CSeq_id qid("gi|129295");
627     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
628 
629     CSeq_id sid("gi|555");
630     auto_ptr<SSeqLoc> subj(
631         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
632 
633     CBl2Seq blaster(*query, *subj, eTblastn);
634     TSeqAlignVector sav(blaster.Run());
635     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
636 
637 #if 0
638     ofstream o("1.asn");
639     o << MSerial_AsnText << *sar ;
640     o.close();
641 #endif
642 
643     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
644     testBlastHitCounts(blaster, eTblastn_129295_555);
645     testRawCutoffs(blaster, eTblastn, eTblastn_129295_555);
646 
647     int score = 0, comp_adj = 0;
648     sar->GetNamedScore(CSeq_align::eScore_Score, score);
649     sar->GetNamedScore(CSeq_align::eScore_CompAdjMethod, comp_adj);
650     BOOST_CHECK_EQUAL(26, score);
651     BOOST_CHECK_EQUAL(2, comp_adj);
652 
653     // Check the ancillary results
654     CSearchResultSet::TAncillaryVector ancillary_data;
655     blaster.GetAncillaryResults(ancillary_data);
656     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
657     BOOST_REQUIRE( ancillary_data.front().NotEmpty() );
658     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
659     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
660     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
661 }
662 
BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand1)663 BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand1)
664 {
665     CSeq_id qid("gi|1945390");
666     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
667 
668     pair<TSeqPos, TSeqPos> range(150000, 170000);
669     CSeq_id sid("gi|4755212");
670     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_minus));
671 
672     CBl2Seq blaster(*query, *subj, eTblastn);
673     TSeqAlignVector sav(blaster.Run());
674     BOOST_CHECK_EQUAL(11, (int) sav[0]->Get().size());
675     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
676     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
677     vector < CRef< CSeq_loc > > locs = sar->GetSegs().GetStd().front()->GetLoc();
678     BOOST_CHECK_EQUAL((int)eNa_strand_minus, (int)(locs[1])->GetStrand());
679     int num_ident = 0;
680     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
681     BOOST_CHECK_EQUAL(161, num_ident);
682 #if 0
683 ofstream o("minus1.new.asn");
684 ITERATE(TSeqAlignVector, v, sav) {
685 o << MSerial_AsnText << **v ;
686 }
687 o.close();
688 #endif
689 }
690 
BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand2)691 BOOST_AUTO_TEST_CASE(TBlastn2SeqsRevStrand2)
692 {
693     CSeq_id qid("gi|1945390");
694     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
695 
696     CSeq_id sid("gi|1945388");
697     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_minus));
698 
699     CBl2Seq blaster(*query, *subj, eTblastn);
700     TSeqAlignVector sav(blaster.Run());
701     BOOST_CHECK_EQUAL(1, (int) sav[0]->Get().size());
702     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
703     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
704     vector < CRef< CSeq_loc > > locs = sar->GetSegs().GetStd().front()->GetLoc();
705     BOOST_CHECK_EQUAL((int)eNa_strand_minus, (int)(locs[1])->GetStrand());
706     int num_ident = 0;
707     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
708     BOOST_CHECK_EQUAL(11, num_ident);
709 #if 0
710 ofstream o("minus2.asn");
711 o << MSerial_AsnText << *sar ;
712 o.close();
713 #endif
714 }
715 
716 
BOOST_AUTO_TEST_CASE(TBlastn2SeqsCompBasedStats)717 BOOST_AUTO_TEST_CASE(TBlastn2SeqsCompBasedStats)
718 {
719     CSeq_id qid("gi|68737"); // "pir|A01243|DXCH"
720     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
721 
722     CSeq_id sid("gi|118086484");
723     auto_ptr<SSeqLoc> subj(
724         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
725 
726     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eTblastn));
727     opts->SetOptions().SetCompositionBasedStats(eCompositionBasedStats);
728 
729     CBl2Seq blaster(*query, *subj, *opts);
730     TSeqAlignVector sav(blaster.Run());
731     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
732     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetStd().size());
733 
734     int num_ident = 0;
735     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
736     BOOST_CHECK_EQUAL(229, num_ident);
737 #if 0
738 ofstream o("2.asn");
739 o << MSerial_AsnText << *sar ;
740 o.close();
741 #endif
742 
743     // Check the ancillary results
744     CSearchResultSet::TAncillaryVector ancillary_data;
745     blaster.GetAncillaryResults(ancillary_data);
746     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
747     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
748     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
749     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
750 }
751 
BOOST_AUTO_TEST_CASE(TBlastn2SeqsLargeWord)752 BOOST_AUTO_TEST_CASE(TBlastn2SeqsLargeWord)
753 {
754     CSeq_id qid("gi|129295");
755     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
756 
757     CSeq_id sid("gi|555");
758     auto_ptr<SSeqLoc> subj(
759         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
760 
761     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eTblastn));
762     opts->SetOptions().SetWordSize(6);
763     opts->SetOptions().SetLookupTableType(eCompressedAaLookupTable);
764     opts->SetOptions().SetWordThreshold(21.69);
765     opts->SetOptions().SetWindowSize(0);
766     opts->SetOptions().SetCompositionBasedStats(eNoCompositionBasedStats);
767 
768     CBl2Seq blaster(*query, *subj, *opts);
769     TSeqAlignVector sav(blaster.Run());
770     BOOST_CHECK_EQUAL(1, (int)sav[0]->Size());
771     testBlastHitCounts(blaster, eTblastn_129295_555_large_word);
772     testRawCutoffs(blaster, eTblastn, eTblastn_129295_555_large_word);
773 
774     int num_ident = 0;
775     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
776     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
777 #if 0
778 ofstream o("3.asn");
779 o << MSerial_AsnText << *sar ;
780 o.close();
781 #endif
782     BOOST_CHECK_EQUAL(5, num_ident);
783 }
784 
BOOST_AUTO_TEST_CASE(IdenticalProteins)785 BOOST_AUTO_TEST_CASE(IdenticalProteins)
786 {
787     //const int kSeqLength = 377;
788     CSeq_id qid("gi|34810917");
789     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
790     CSeq_id sid("gi|34810916");
791     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
792 
793     CBl2Seq blaster(*query, *subj, eBlastp);
794     TSeqAlignVector sav(blaster.Run());
795     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
796     BOOST_CHECK_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
797 
798     // the number of identities is NOT calculated when composition based
799     // statistics is turned on (default for blastp)
800     int num_ident = 0;
801     sar->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
802 #if 0
803     ofstream o("4.asn");
804     o << MSerial_AsnText << *sar ;
805     o.close();
806 #endif
807     BOOST_CHECK_EQUAL(377, num_ident);
808 
809     // calculate the number of identities using the BLAST formatter
810 /*
811     double percent_identity =
812         CBlastFormatUtil::GetPercentIdentity(*sar, *query->scope, false);
813     BOOST_CHECK_EQUAL(1, (int) percent_identity);
814 */
815 
816     // Check the ancillary results
817     CSearchResultSet::TAncillaryVector ancillary_data;
818     blaster.GetAncillaryResults(ancillary_data);
819     BOOST_CHECK_EQUAL((size_t)1, ancillary_data.size());
820     BOOST_CHECK( ancillary_data.front()->GetGappedKarlinBlk() != NULL );
821     BOOST_CHECK( ancillary_data.front()->GetUngappedKarlinBlk() != NULL );
822     BOOST_CHECK( ancillary_data.front()->GetSearchSpace() != (Int8)0 );
823 }
824 
BOOST_AUTO_TEST_CASE(UnsupportedOption)825 BOOST_AUTO_TEST_CASE(UnsupportedOption) {
826     CDiscNucleotideOptionsHandle opts_handle;
827     BOOST_REQUIRE_THROW(opts_handle.SetTraditionalBlastnDefaults(),
828                         CBlastException);
829 }
830 
BOOST_AUTO_TEST_CASE(PositiveMismatchOption)831 BOOST_AUTO_TEST_CASE(PositiveMismatchOption) {
832     CSeq_id qid("gi|408478");  // zebrafish sequence U02544
833     CSeq_id sid("gi|1546012"); // mouse sequence U61969
834 
835     auto_ptr<SSeqLoc> query(
836         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
837     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
838 
839     const int kMatch = 2;
840     const int kMismatch = 5;  // Positive mismatch not allowed.
841 
842     CBlastNucleotideOptionsHandle nucl_options_handle;
843 
844     nucl_options_handle.SetMatchReward(kMatch);
845     nucl_options_handle.SetMismatchPenalty(kMismatch);
846     CBl2Seq blaster(*query, *subj, nucl_options_handle);
847     try {
848        TSeqAlignVector sav(blaster.Run());
849     } catch (const CException& e) {
850         BOOST_REQUIRE(
851             !strcmp("BLASTN penalty must be negative",
852                     e.GetMsg().c_str()));
853     }
854 }
855 
856 // If a single query is fully masked, throw an exception (i.e.: an error)
BOOST_AUTO_TEST_CASE(FullyMaskedSequence)857 BOOST_AUTO_TEST_CASE(FullyMaskedSequence) {
858     CSeq_id qid("ref|NT_024524.13");
859     pair<TSeqPos, TSeqPos> range(27886902, 27886932);
860     auto_ptr<SSeqLoc> query(
861         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
862                                                eNa_strand_plus));
863     range.first = 2052;
864     range.second = 2082;
865     CSeq_id sid("emb|BX641126.1");
866     auto_ptr<SSeqLoc> subj(
867         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
868                                                eNa_strand_minus));
869     CRef<CBlastNucleotideOptionsHandle> options(new CBlastNucleotideOptionsHandle);
870     options->SetTraditionalBlastnDefaults();
871     options->SetMismatchPenalty(-1);
872     options->SetMatchReward(1);
873     options->SetGapXDropoff(100);
874     options->SetMaskAtHash(false);
875     CBl2Seq blaster(*query, *subj, *options);
876     CRef<CSearchResultSet> results;
877     BOOST_REQUIRE_NO_THROW(results = blaster.RunEx());
878 }
879 
880 // In the case of multiple queries and one being masked, only emit a warning
881 // N.B.: this doesn't test the MT case as MT unit tests aren't supported
BOOST_AUTO_TEST_CASE(MultipleQueries1FullyMasked)882 BOOST_AUTO_TEST_CASE(MultipleQueries1FullyMasked) {
883     const bool kHasProteinQuery(true);
884     CBlastInputSourceConfig iconfig(kHasProteinQuery);
885     const CSearchDatabase dbinfo("data/nt.41646578", CSearchDatabase::eBlastDbIsNucleotide);
886     CRef<CBlastOptionsHandle> opts_handle(CBlastOptionsFactory::Create(eTblastn));
887 
888     CNcbiIfstream infile("data/mult_queries_1fully_masked.fsa");
889     CRef<CScope> scope = CBlastScopeSource(kHasProteinQuery).NewScope();
890     CRef<CBlastFastaInputSource> fasta_src
891         (new CBlastFastaInputSource(infile, iconfig));
892     CRef<CBlastInput> input(new CBlastInput(&*fasta_src));
893     TSeqLocVector query_vec = input->GetAllSeqLocs(*scope);
894 
895     // The first iteration runs the search *without* splitting, the second one
896     // forces to use query splitting
897     for (int env = 0; env < 2; env++) {
898 
899         CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(query_vec));
900         auto_ptr<CAutoEnvironmentVariable> envvar1, envvar2;
901         if (env) {
902             envvar1.reset(new CAutoEnvironmentVariable("OVERLAP_CHUNK_SIZE", NStr::SizetToString(10)));
903             envvar2.reset(new CAutoEnvironmentVariable("CHUNK_SIZE", NStr::SizetToString(100)));
904         }
905         CLocalBlast blaster(queries, opts_handle, dbinfo);
906         CRef<CSearchResultSet> results;
907         BOOST_REQUIRE_NO_THROW(results = blaster.Run());
908         BOOST_REQUIRE_EQUAL(3U, results->size());
909         for (size_t query_idx = 0; query_idx < results->size(); query_idx++) {
910             const string& warnings((*results)[query_idx].GetWarningStrings());
911             const string& errors((*results)[query_idx].GetErrorStrings());
912 
913             CNcbiOstrstream oss;
914             if (env) {
915                 oss << "Forced splitting of queries: ";
916             } else {
917                 oss << "No splitting of queries: ";
918             }
919 
920             if (query_idx == 0 || query_idx == 2) {
921                 oss << " expected no warnings/errors, got errors='" << errors << "'; "
922                     << "warnings='" << warnings << "'";
923                 const string msg = CNcbiOstrstreamToString(oss);
924                 BOOST_CHECK_MESSAGE(kEmptyStr == warnings, msg);
925                 BOOST_CHECK_MESSAGE(kEmptyStr == errors, msg);
926             } else {
927                 oss << " expected warnings to match '" << kBlastErrMsg_CantCalculateUngappedKAParams
928                     << "; instead got '" << warnings << "'";
929                 const string msg = CNcbiOstrstreamToString(oss);
930                 BOOST_CHECK_MESSAGE(warnings.find(kBlastErrMsg_CantCalculateUngappedKAParams) != NPOS, msg);
931             }
932         }
933     }
934 }
935 
BOOST_AUTO_TEST_CASE(testInterruptBlastpExitImmediately)936 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitImmediately) {
937     CSeq_id id("gi|129295");
938     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
939 
940     CBl2Seq blaster(*sl, *sl, eBlastp);
941     TInterruptFnPtr fnptr =
942         blaster.SetInterruptCallback(interrupt_immediately);
943     BOOST_REQUIRE(fnptr == NULL);
944 
945     TSeqAlignVector sav;
946     try { sav = blaster.Run(); }
947     catch (...) {
948         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
949     }
950 }
951 
952 // SB-814
BOOST_AUTO_TEST_CASE(Tblastn2Seqs_PlusStrandOnly)953 BOOST_AUTO_TEST_CASE(Tblastn2Seqs_PlusStrandOnly) {
954     // We expect only 1 hit between these two sequences in the plus strand
955     TSeqLocVector query_vec, subj_vec;
956     CSeq_id qid(CSeq_id::e_Gi, 7019569);
957     auto_ptr<SSeqLoc> ql(CTestObjMgr::Instance().CreateSSeqLoc(qid));
958     query_vec.push_back(*ql);
959     CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(query_vec));
960 
961     const ENa_strand target_strand = eNa_strand_plus;
962     CSeq_id sid(CSeq_id::e_Gi, 17865806);
963     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(sid,
964                                                                target_strand));
965     subj_vec.push_back(*sl);
966     CRef<IQueryFactory> subjects(new CObjMgr_QueryFactory(subj_vec));
967 
968     CRef<CBlastOptionsHandle> tblastn_opts(new CTBlastnOptionsHandle());
969     // N.B.: this setting is not applicable, instead the strand should be set
970     // in the subject seqlocs. FIXME: add a comment to this effect to
971     // SetStrandOption.
972     // N.B.2: BLAST doesn't support setting the subject strand for a database
973     // search
974     //tblastn_opts->SetOptions().SetStrandOption(target_strand);
975     CRef<CLocalDbAdapter> db_adapter(new CLocalDbAdapter(subjects, tblastn_opts));
976 
977     CLocalBlast blaster(queries, tblastn_opts, db_adapter);
978 
979     CRef<CSearchResultSet> results = blaster.Run();
980     BOOST_REQUIRE_EQUAL(eSequenceComparison, results->GetResultType());
981     BOOST_CHECK_EQUAL((size_t)1, results->GetNumResults());
982     const CSearchResults& result = (*results)[0];
983     BOOST_CHECK(result.HasAlignments());
984     CConstRef<CSeq_align_set> alignment = result.GetSeqAlign();
985     BOOST_CHECK_EQUAL((size_t)1, alignment->Size());
986     ITERATE(CSeq_align_set::Tdata, aln, alignment->Get()) {
987         // check the query's strand (protein)
988         BOOST_CHECK_EQUAL(static_cast<int>(eNa_strand_unknown), static_cast<int>((*aln)->GetSeqStrand(0)));
989         // check the subject's strand (nucleotide)
990         BOOST_CHECK_EQUAL(static_cast<int>(eNa_strand_plus), static_cast<int>((*aln)->GetSeqStrand(1)));
991     }
992 }
993 
994 // SB-386
995 BOOST_AUTO_TEST_CASE_TIMEOUT(testInterruptBlastSetup, 3);
BOOST_AUTO_TEST_CASE(testInterruptBlastSetup)996 BOOST_AUTO_TEST_CASE(testInterruptBlastSetup) {
997     CSeq_id id("NC_000002.11");
998     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
999 
1000     CBl2Seq blaster(*sl, *sl, eBlastn);
1001     TInterruptFnPtr fnptr =
1002         blaster.SetInterruptCallback(interrupt_immediately);
1003     BOOST_REQUIRE(fnptr == NULL);
1004 
1005     TSeqAlignVector sav;
1006     try { sav = blaster.Run(); }
1007     catch (...) {
1008         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1009     }
1010 }
1011 
BOOST_AUTO_TEST_CASE(testInterruptBlastnExitImmediately)1012 BOOST_AUTO_TEST_CASE(testInterruptBlastnExitImmediately) {
1013     CSeq_id id("gi|555");
1014     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
1015 
1016     CBl2Seq blaster(*sl, *sl, eBlastn);
1017     TInterruptFnPtr fnptr =
1018         blaster.SetInterruptCallback(interrupt_immediately);
1019     BOOST_REQUIRE(fnptr == NULL);
1020 
1021     TSeqAlignVector sav;
1022     try { sav = blaster.Run(); }
1023     catch (...) {
1024         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1025     }
1026 }
1027 
BOOST_AUTO_TEST_CASE(testInterruptBlastxExitImmediately)1028 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitImmediately) {
1029     CSeq_id query_id("gi|555");
1030     auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
1031     CSeq_id subj_id("gi|129295");
1032     auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
1033 
1034     CBl2Seq blaster(*slq, *sls, eBlastx);
1035     TInterruptFnPtr fnptr =
1036         blaster.SetInterruptCallback(interrupt_immediately);
1037     BOOST_REQUIRE(fnptr == NULL);
1038 
1039     TSeqAlignVector sav;
1040     try { sav = blaster.Run(); }
1041     catch (...) {
1042         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1043     }
1044 }
1045 
BOOST_AUTO_TEST_CASE(testInterruptTblastxExitImmediately)1046 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitImmediately) {
1047     CSeq_id query_id("gi|555");
1048     auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
1049     CSeq_id subj_id("gi|555");
1050     auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
1051 
1052     CBl2Seq blaster(*slq, *sls, eTblastx);
1053     TInterruptFnPtr fnptr =
1054         blaster.SetInterruptCallback(interrupt_immediately);
1055     BOOST_REQUIRE(fnptr == NULL);
1056 
1057     TSeqAlignVector sav;
1058     try { sav = blaster.Run(); }
1059     catch (...) {
1060         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1061     }
1062 }
1063 
BOOST_AUTO_TEST_CASE(testInterruptTblastnExitImmediately)1064 BOOST_AUTO_TEST_CASE(testInterruptTblastnExitImmediately) {
1065     CSeq_id query_id("gi|129295");
1066     auto_ptr<SSeqLoc> slq(CTestObjMgr::Instance().CreateSSeqLoc(query_id));
1067     CSeq_id subj_id("gi|555");
1068     auto_ptr<SSeqLoc> sls(CTestObjMgr::Instance().CreateSSeqLoc(subj_id));
1069 
1070     CBl2Seq blaster(*slq, *sls, eTblastn);
1071     TInterruptFnPtr fnptr =
1072         blaster.SetInterruptCallback(interrupt_immediately);
1073     BOOST_REQUIRE(fnptr == NULL);
1074 
1075     TSeqAlignVector sav;
1076     try { sav = blaster.Run(); }
1077     catch (...) {
1078         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1079     }
1080 }
1081 
1082 #define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a))
1083 static
s_SetupWithMultipleQueriesAndSubjects(bool query_is_nucl,bool subj_is_nucl,EProgram program)1084 CRef<CBl2Seq> s_SetupWithMultipleQueriesAndSubjects(bool query_is_nucl,
1085                                                     bool subj_is_nucl,
1086                                                     EProgram program) {
1087 
1088     TIntId protein_gis[] = { 6, 129295, 15606659, 4336138, 5556 };
1089     TIntId nucl_gis[] = { 272208, 272217, 272211, 272247, 272227, 272236,
1090         272219 };
1091 
1092     vector<TIntId> q_gis, s_gis;
1093     if (query_is_nucl) {
1094         copy(&nucl_gis[0],
1095              &nucl_gis[ARRAY_SIZE(nucl_gis)],
1096              back_inserter(q_gis));
1097     } else {
1098         copy(&protein_gis[0],
1099              &protein_gis[ARRAY_SIZE(protein_gis)],
1100              back_inserter(q_gis));
1101     }
1102 
1103     if (subj_is_nucl) {
1104         copy(&nucl_gis[0],
1105              &nucl_gis[ARRAY_SIZE(nucl_gis)],
1106              back_inserter(s_gis));
1107     } else {
1108         copy(&protein_gis[0],
1109              &protein_gis[ARRAY_SIZE(protein_gis)],
1110              back_inserter(s_gis));
1111     }
1112 
1113 
1114     TSeqLocVector queries;
1115     ITERATE(vector<TIntId>, itr, q_gis) {
1116         CRef<CSeq_loc> loc(new CSeq_loc());
1117         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
1118 
1119         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1120         scope->AddDefaults();
1121         queries.push_back(SSeqLoc(loc, scope));
1122     }
1123 
1124     TSeqLocVector subjects;
1125     ITERATE(vector<TIntId>, itr, s_gis) {
1126         CRef<CSeq_loc> loc(new CSeq_loc());
1127         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
1128 
1129         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1130         scope->AddDefaults();
1131         subjects.push_back(SSeqLoc(loc, scope));
1132     }
1133 
1134     return CRef<CBl2Seq>(new CBl2Seq(queries, subjects, program));
1135 }
1136 
BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAtRandom)1137 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAtRandom) {
1138 
1139     CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects(false,
1140                                                                   false,
1141                                                                   eBlastp);
1142 
1143     int num_callbacks_executed(0);
1144     TInterruptFnPtr fnptr =
1145         blaster->SetInterruptCallback(callback_counter,
1146                                       (void*) &num_callbacks_executed);
1147     BOOST_REQUIRE(fnptr == NULL);
1148 
1149     TSeqAlignVector sav(blaster->Run()); // won't throw
1150 	CRandom r((CRandom::TValue)time(0));
1151     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
1152     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1153 
1154     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
1155                                           (void*)&progress_pair);
1156     BOOST_REQUIRE(fnptr == callback_counter);
1157     sav.clear();
1158 
1159     try { sav = blaster->Run(); }
1160     catch (...) {
1161         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1162     }
1163 }
1164 
BOOST_AUTO_TEST_CASE(testInterruptBlastnExitAtRandom)1165 BOOST_AUTO_TEST_CASE(testInterruptBlastnExitAtRandom) {
1166 
1167     CRef<CBl2Seq> blaster =
1168         s_SetupWithMultipleQueriesAndSubjects(true, true, eBlastn);
1169 
1170     int num_callbacks_executed(0);
1171     TInterruptFnPtr fnptr =
1172         blaster->SetInterruptCallback(callback_counter,
1173                                       (void*)&num_callbacks_executed);
1174     BOOST_REQUIRE(fnptr == NULL);
1175 
1176     TSeqAlignVector sav(blaster->Run()); // won't throw
1177     CRandom r((CRandom::TValue)time(0));
1178     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
1179     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1180 
1181     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
1182                                           (void*)&progress_pair);
1183     BOOST_REQUIRE(fnptr == callback_counter);
1184     sav.clear();
1185 
1186     try { sav = blaster->Run(); }
1187     catch (...) {
1188         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1189     }
1190 }
1191 
1192 // interrupt_at_random.
BOOST_AUTO_TEST_CASE(testInterruptBlastxExitAtRandom)1193 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitAtRandom) {
1194 
1195     CRef<CBl2Seq> blaster =
1196         s_SetupWithMultipleQueriesAndSubjects(true, false, eBlastx);
1197 
1198     int num_callbacks_executed(0);
1199     TInterruptFnPtr fnptr =
1200         blaster->SetInterruptCallback(callback_counter,
1201                                       (void*) & num_callbacks_executed);
1202     BOOST_REQUIRE(fnptr == NULL);
1203 
1204     TSeqAlignVector sav(blaster->Run()); // won't throw
1205     CRandom r((CRandom::TValue)time(0));
1206     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
1207     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1208 
1209     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
1210                                           (void*)&progress_pair);
1211     BOOST_REQUIRE(fnptr == callback_counter);
1212     sav.clear();
1213 
1214     try { sav = blaster->Run(); }
1215     catch (...) {
1216         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1217     }
1218 }
1219 
BOOST_AUTO_TEST_CASE(testInterruptTblastnExitAtRandom)1220 BOOST_AUTO_TEST_CASE(testInterruptTblastnExitAtRandom) {
1221 
1222     CRef<CBl2Seq> blaster =
1223         s_SetupWithMultipleQueriesAndSubjects(false, true, eTblastn);
1224 
1225     int num_callbacks_executed(0);
1226     TInterruptFnPtr fnptr =
1227         blaster->SetInterruptCallback(callback_counter,
1228                                       (void*)&num_callbacks_executed);
1229     BOOST_REQUIRE(fnptr == NULL);
1230 
1231     TSeqAlignVector sav(blaster->Run()); // won't throw
1232     CRandom r((CRandom::TValue)time(0));
1233     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
1234     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1235 
1236     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
1237                                           (void*)&progress_pair);
1238     BOOST_REQUIRE(fnptr == callback_counter);
1239     sav.clear();
1240 
1241     try { sav = blaster->Run(); }
1242     catch (...) {
1243         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1244     }
1245 }
1246 
BOOST_AUTO_TEST_CASE(testInterruptTblastxExitAtRandom)1247 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitAtRandom) {
1248 
1249     CRef<CBl2Seq> blaster =
1250         s_SetupWithMultipleQueriesAndSubjects(true, true, eTblastx);
1251 
1252     int num_callbacks_executed(0);
1253     TInterruptFnPtr fnptr =
1254         blaster->SetInterruptCallback(callback_counter,
1255                                       (void*) & num_callbacks_executed);
1256     BOOST_REQUIRE(fnptr == NULL);
1257 
1258     TSeqAlignVector sav(blaster->Run()); // won't throw
1259     CRandom r((CRandom::TValue)time(0));
1260     int max_interrupt_callbacks = r.GetRand(1, num_callbacks_executed);
1261     pair<int, int> progress_pair(make_pair(0, max_interrupt_callbacks));
1262 
1263     fnptr = blaster->SetInterruptCallback(interrupt_at_random,
1264                                           (void*)&progress_pair);
1265     BOOST_REQUIRE(fnptr == callback_counter);
1266     sav.clear();
1267 
1268     try { sav = blaster->Run(); }
1269     catch (...) {
1270         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1271     }
1272 }
1273 
BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAfter3Callbacks)1274 BOOST_AUTO_TEST_CASE(testInterruptBlastpExitAfter3Callbacks) {
1275     CSeq_id id("gi|129295");
1276     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
1277 
1278     CBl2Seq blaster(*sl, *sl, eBlastp);
1279     TInterruptFnPtr fnptr =
1280         blaster.SetInterruptCallback(interrupt_after3calls);
1281     BOOST_REQUIRE(fnptr == NULL);
1282 
1283     TSeqAlignVector sav;
1284     try { sav = blaster.Run(); }
1285     catch (...) {
1286         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1287     }
1288 }
1289 
BOOST_AUTO_TEST_CASE(testInterruptBlastxExitOnTraceback)1290 BOOST_AUTO_TEST_CASE(testInterruptBlastxExitOnTraceback) {
1291 
1292     CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects(true,
1293                                                                   false,
1294                                                                   eBlastx);
1295     TInterruptFnPtr fnptr =
1296         blaster->SetInterruptCallback(interrupt_on_traceback);
1297     BOOST_REQUIRE(fnptr == NULL);
1298 
1299     TSeqAlignVector sav;
1300     try { sav = blaster->Run(); }
1301     catch (...) {
1302         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1303     }
1304 }
1305 
BOOST_AUTO_TEST_CASE(testInterruptTblastxExitOnTraceback)1306 BOOST_AUTO_TEST_CASE(testInterruptTblastxExitOnTraceback) {
1307 
1308     CRef<CBl2Seq> blaster = s_SetupWithMultipleQueriesAndSubjects
1309         (true, true, eTblastx);
1310     TInterruptFnPtr fnptr =
1311         blaster->SetInterruptCallback(interrupt_on_traceback);
1312     BOOST_REQUIRE(fnptr == NULL);
1313 
1314     TSeqAlignVector sav;
1315     try { sav = blaster->Run(); }
1316     catch (...) {
1317         BOOST_REQUIRE_EQUAL((size_t)0, sav.size());
1318     }
1319 }
1320 
BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueriesWithInvalidSeqId)1321 BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueriesWithInvalidSeqId) {
1322     vector<TIntId> q_gis, s_gis;
1323 
1324     // Setup the queries
1325     q_gis.push_back(129295);
1326     q_gis.push_back(-1);        // invalid seqid
1327 
1328     // setup the subjects
1329     s_gis.push_back(129295);
1330     s_gis.push_back(4336138);   // no hits with gi 129295
1331 
1332     TSeqLocVector queries;
1333     ITERATE(vector<TIntId>, itr, q_gis) {
1334         CRef<CSeq_loc> loc(new CSeq_loc());
1335         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
1336 
1337         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1338         scope->AddDefaults();
1339         queries.push_back(SSeqLoc(loc, scope));
1340     }
1341 
1342     TSeqLocVector subjects;
1343     ITERATE(vector<TIntId>, itr, s_gis) {
1344         CRef<CSeq_loc> loc(new CSeq_loc());
1345         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
1346 
1347         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1348         scope->AddDefaults();
1349         subjects.push_back(SSeqLoc(loc, scope));
1350     }
1351 
1352     // BLAST by concatenating all queries
1353     CBl2Seq blaster4all(queries, subjects, eBlastp);
1354     TSeqAlignVector sas_v = blaster4all.Run();
1355 
1356     TSearchMessages m;
1357     blaster4all.GetMessages(m);
1358     BOOST_REQUIRE_EQUAL(subjects.size()*queries.size(), sas_v.size());
1359 
1360     BOOST_REQUIRE(m[0].empty());
1361     BOOST_REQUIRE(!m[1].empty());
1362 
1363     // Verify the error message
1364     TQueryMessages qm = m[1];
1365     BOOST_REQUIRE(qm.front()->GetMessage().find("Cannot resolve") !=
1366                    string::npos);
1367 
1368     // Verify that the alignments corresponding to the 2nd query are indeed empty
1369     // in older version this was sas_v[1], order has changed
1370     BOOST_REQUIRE_EQUAL(0, (int) sas_v[2]->Size());
1371 
1372     CRef<CSearchResultSet> res = blaster4all.RunEx();
1373     BOOST_REQUIRE(res.NotEmpty());
1374     BOOST_REQUIRE_EQUAL(q_gis.size()*s_gis.size(), res->GetNumResults());
1375     BOOST_REQUIRE(res->GetResults(0,0).HasAlignments());
1376     BOOST_REQUIRE(!res->GetResults(0,1).HasAlignments());
1377     BOOST_REQUIRE(!res->GetResults(1,0).HasAlignments());
1378     BOOST_REQUIRE(!res->GetResults(1,1).HasAlignments());
1379 }
1380 
1381 // JIRA:SB-1242
BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueriesWithBadQuery)1382 BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueriesWithBadQuery) {
1383     vector<TIntId> q_gis, s_gis;
1384 
1385     // Setup the queries
1386     q_gis.push_back(296863684);    // All X's
1387     q_gis.push_back(129295);
1388 
1389     // setup the subjects
1390     s_gis.push_back(129296);
1391 
1392     TSeqLocVector queries;
1393     ITERATE(vector<TIntId>, itr, q_gis) {
1394         CRef<CSeq_loc> loc(new CSeq_loc());
1395         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
1396 
1397         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1398         scope->AddDefaults();
1399         queries.push_back(SSeqLoc(loc, scope));
1400     }
1401 
1402     TSeqLocVector subjects;
1403     ITERATE(vector<TIntId>, itr, s_gis) {
1404         CRef<CSeq_loc> loc(new CSeq_loc());
1405         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
1406 
1407         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1408         scope->AddDefaults();
1409         subjects.push_back(SSeqLoc(loc, scope));
1410     }
1411 
1412     // BLAST by concatenating all queries
1413     CBl2Seq blaster4all(queries, subjects, eBlastp);
1414     TSeqAlignVector sas_v = blaster4all.Run();
1415 
1416     TSearchMessages m;
1417     blaster4all.GetMessages(m);
1418     BOOST_REQUIRE_EQUAL(subjects.size()*queries.size(), sas_v.size());
1419 
1420     BOOST_REQUIRE(!m[0].empty());
1421     BOOST_REQUIRE(m[1].empty());
1422 }
1423 
BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueriesWithInvalidSeqId)1424 BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueriesWithInvalidSeqId) {
1425     CRef<CSeq_id> id1(new CSeq_id(CSeq_id::e_Gi, 555));
1426     auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(*id1));
1427     CRef<CSeq_id> id2(new CSeq_id(CSeq_id::e_Gi, 556));
1428     auto_ptr<SSeqLoc> sl2(CTestObjMgr::Instance().CreateSSeqLoc(*id2));
1429 
1430     const TSeqPos kFakeBioseqLength = 12;
1431     const char byte(0);   // string of 4 A's in ncbi2na
1432     vector<char> na_data(kFakeBioseqLength/4, byte);
1433 
1434     CRef<CSeq_id> fake_id(new CSeq_id("lcl|77"));
1435     CRef<CBioseq> fake_bioseq(new CBioseq);
1436     fake_bioseq->SetInst().SetLength(kFakeBioseqLength);
1437     fake_bioseq->SetInst().SetSeq_data().SetNcbi2na().Set().swap(na_data);
1438     fake_bioseq->SetInst().SetMol(CSeq_inst::eMol_na);
1439     fake_bioseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
1440     fake_bioseq->SetId().push_back(fake_id);
1441     CRef<CSeq_loc> fake_loc(new CSeq_loc);
1442     fake_loc->SetWhole(*fake_id);
1443 
1444     CRef<CScope> scope(CSimpleOM::NewScope(false));
1445     scope->AddBioseq(*fake_bioseq);
1446     auto_ptr<SSeqLoc> sl_bad(new SSeqLoc(*fake_loc, *scope));
1447 
1448     TSeqPos len = sequence::GetLength(*sl_bad->seqloc, sl_bad->scope);
1449     BOOST_REQUIRE_EQUAL(kFakeBioseqLength, len);
1450 
1451     TSeqLocVector queries;
1452     queries.push_back(*sl1);
1453     queries.push_back(*sl_bad);
1454     queries.push_back(*sl2);
1455 
1456     // All subjects have matches against this gi
1457     CRef<CSeq_id> subj_id(new CSeq_id(CSeq_id::e_Gi, 555));
1458     auto_ptr<SSeqLoc> subj_loc
1459         (CTestObjMgr::Instance().CreateSSeqLoc(*subj_id));
1460     TSeqLocVector subject;
1461     subject.push_back(*subj_loc);;
1462 
1463     CRef<CBlastNucleotideOptionsHandle> opts_handle(new CBlastNucleotideOptionsHandle);
1464     opts_handle->SetMaskAtHash(false);
1465     CBl2Seq bl2seq(queries, subject, *opts_handle);
1466     TSeqAlignVector sas_v = bl2seq.Run();
1467     sas_v = bl2seq.Run();
1468     TSearchMessages m;
1469     bl2seq.GetMessages(m);
1470     BOOST_REQUIRE_EQUAL(sas_v.size(), m.size());
1471     BOOST_REQUIRE_EQUAL(queries.size(), sas_v.size());
1472 
1473     BOOST_REQUIRE(m[0].empty());
1474     BOOST_REQUIRE(!m[1].empty());
1475     BOOST_REQUIRE(m[2].empty());
1476 
1477     TQueryMessages qm = m[1];
1478 
1479     // no duplicate messages for the contexts
1480     BOOST_REQUIRE(qm.size() == 1);
1481     // Verify the error message
1482     ITERATE(TQueryMessages, itr, qm) {
1483         BOOST_REQUIRE((*itr)->GetMessage().find("Could not calculate "
1484                                                  "ungapped Karlin-Altschul "
1485                                                  "parameters")
1486                        != string::npos);
1487     }
1488     // Verify that the alignments corresponding to the 2nd query are indeed
1489     // empty
1490     ITERATE(CSeq_align_set::Tdata, alignments, sas_v[1]->Get()) {
1491         BOOST_REQUIRE((*alignments)->GetSegs().IsDisc());
1492         BOOST_REQUIRE((*alignments)->GetSegs().GetDisc().Get().empty());
1493     }
1494 }
1495 
BOOST_AUTO_TEST_CASE(ProteinSelfHitWithMask)1496 BOOST_AUTO_TEST_CASE(ProteinSelfHitWithMask) {
1497     CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 129295));
1498     CRef<CSeq_loc> sl(new CSeq_loc());
1499     sl->SetWhole(*id);
1500     CRef<CSeq_loc> mask(new CSeq_loc(*id, 50, 100));
1501     CRef<CScope> scope(CSimpleOM::NewScope());
1502     SSeqLoc seqloc(sl, scope, mask);
1503 
1504     CBl2Seq bl2seq(seqloc, seqloc, eBlastp);
1505     TSeqAlignVector sav(bl2seq.Run());
1506     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1507     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
1508 }
1509 
1510 // Inspired by SB-285
BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation)1511 BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation) {
1512     CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 83219349));
1513     CRef<CSeq_loc> sl(new CSeq_loc());
1514     sl->SetWhole(*id);
1515     CRef<CSeq_loc> mask(new CSeq_loc(*id, 57, 484));
1516     CRef<CScope> scope(CSimpleOM::NewScope());
1517     SSeqLoc query_seqloc(sl, scope, mask);
1518 
1519     CRef<CSeq_id> sid(new CSeq_id(CSeq_id::e_Gi, 88954065));
1520     CRef<CSeq_loc> ssl(new CSeq_loc(*sid, 9909580-100, 9909607+100));
1521     SSeqLoc subj_seqloc(ssl, scope);
1522 
1523     CBl2Seq bl2seq(query_seqloc, subj_seqloc, eMegablast);
1524     TSeqAlignVector sav(bl2seq.Run());
1525     BOOST_REQUIRE_EQUAL(0U, sav[0]->Get().size());
1526 }
1527 
1528 // Inspired by SB-285
BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation_FromFile)1529 BOOST_AUTO_TEST_CASE(NucleotideMaskedLocation_FromFile) {
1530     CNcbiIfstream infile("data/masked.fsa");
1531     const bool is_protein(false);
1532     CBlastInputSourceConfig iconfig(is_protein);
1533     iconfig.SetLowercaseMask(true);
1534     CRef<CBlastFastaInputSource> fasta_src
1535         (new CBlastFastaInputSource(infile, iconfig));
1536     CRef<CBlastInput> input(new CBlastInput(&*fasta_src));
1537     //CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
1538     //scope->AddDefaults();
1539     CRef<CScope> scope = CBlastScopeSource(is_protein).NewScope();
1540 
1541     CRef<blast::CBlastQueryVector> seqs = input->GetNextSeqBatch(*scope);
1542     CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*seqs));
1543 
1544     TSeqLocVector subj_vec;
1545     CRef<CSeq_id> sid(new CSeq_id(CSeq_id::e_Gi, 88954065));
1546     CRef<CSeq_loc> ssl(new CSeq_loc(*sid, 9909580-100, 9909607+100));
1547     subj_vec.push_back(SSeqLoc(ssl, scope));
1548     CRef<IQueryFactory> subj_qf(new CObjMgr_QueryFactory(subj_vec));
1549     CRef<CBlastOptionsHandle>
1550         opts_handle(CBlastOptionsFactory::Create(eBlastn));
1551     CRef<CLocalDbAdapter> subjects(new CLocalDbAdapter(subj_qf,
1552                                                        opts_handle));
1553 
1554     size_t num_queries = seqs->Size();
1555     size_t num_subjects = subj_vec.size();
1556     BOOST_REQUIRE_EQUAL((size_t)1, num_queries);
1557     BOOST_REQUIRE_EQUAL((size_t)1, num_subjects);
1558 
1559     // BLAST by concatenating all queries
1560     CLocalBlast blaster(queries, opts_handle, subjects);
1561     CRef<CSearchResultSet> results = blaster.Run();
1562     BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
1563     BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
1564                         results->GetNumResults());
1565     BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->size());
1566     BOOST_REQUIRE_EQUAL(num_queries, results->GetNumQueries());
1567     BOOST_REQUIRE_EQUAL(num_subjects,
1568                         results->GetNumResults()/results->GetNumQueries());
1569 
1570     CSearchResults& res = (*results)[0];
1571     BOOST_REQUIRE(res.HasAlignments() == false);
1572 }
1573 
1574 // JIRA SB-732
BOOST_AUTO_TEST_CASE(InvalidMaskingAlgorithm)1575 BOOST_AUTO_TEST_CASE(InvalidMaskingAlgorithm) {
1576     CRef<blast::CSearchDatabase> m_SearchDb;
1577     m_SearchDb.Reset(new CSearchDatabase("nr",
1578                                          CSearchDatabase::eBlastDbIsProtein));
1579     m_SearchDb->SetFilteringAlgorithm(66, eSoftSubjMasking);
1580     CRef<CLocalDbAdapter> subject_adapter(new CLocalDbAdapter(*m_SearchDb));
1581     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastp));
1582     vector<TIntId> q_gis;
1583     // Setup the queries
1584     CSeq_id seq_id("P01013.1");
1585 
1586     TSeqLocVector queries;
1587     CRef<CSeq_loc> loc(new CSeq_loc());
1588     loc->SetWhole(seq_id);
1589 
1590     CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
1591     scope->AddDefaults();
1592     queries.push_back(SSeqLoc(loc, scope));
1593 
1594     CRef<IQueryFactory> query_fact(new CObjMgr_QueryFactory(queries));
1595     BOOST_REQUIRE_THROW(CLocalBlast blaster(query_fact, opts, subject_adapter),
1596                         CBlastException);
1597 }
1598 
1599 // test for the case where the use of composition based
1600 // satistics should have deleted a hit but did not (used to crash)
BOOST_AUTO_TEST_CASE(ProteinCompBasedStats)1601 BOOST_AUTO_TEST_CASE(ProteinCompBasedStats) {
1602 
1603     CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
1604     CRef<CScope> scope(new CScope(*kObjMgr));
1605     CRef<CSeq_entry> seq_entry1;
1606     const string kFileName("data/blastp_compstats.fa");
1607     ifstream in1(kFileName.c_str());
1608     if ( !in1 )
1609         throw runtime_error("Failed to open " + kFileName);
1610     if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
1611         throw runtime_error("Failed to read sequence from " + kFileName);
1612     scope->AddTopLevelSeqEntry(*seq_entry1);
1613     CRef<CSeq_loc> seqloc1(new CSeq_loc);
1614     const string kSeqIdString1("lcl|1");
1615     CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
1616     seqloc1->SetWhole(*id1);
1617     SSeqLoc ss1(seqloc1, scope);
1618 
1619     CSeq_id id("gi|4503637");
1620     auto_ptr<SSeqLoc> ss2(CTestObjMgr::Instance().CreateSSeqLoc(id));
1621 
1622     CRef<CBlastProteinOptionsHandle> opts_handle (new CBlastProteinOptionsHandle);
1623     opts_handle->SetWordSize(2);
1624     opts_handle->SetEvalueThreshold(20000);
1625     opts_handle->SetFilterString("F");/* NCBI_FAKE_WARNING */
1626     opts_handle->SetMatrixName("PAM30");
1627     opts_handle->SetGapOpeningCost(9);
1628     opts_handle->SetGapExtensionCost(1);
1629     opts_handle->SetOptions().SetCompositionBasedStats(
1630                                           eCompositionBasedStats);
1631 
1632     CBl2Seq blaster(ss1, *ss2, *opts_handle);
1633     TSeqAlignVector sav(blaster.Run());
1634     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1635     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
1636 }
1637 
BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryBothStrands)1638 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryBothStrands) {
1639     CSeq_id qid("gi|555");
1640     auto_ptr<SSeqLoc> query(
1641         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
1642     query->genetic_code_id = 1;
1643 
1644     CSeq_id sid("gi|129295");
1645     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
1646 
1647     CBl2Seq blaster(*query, *subj, eBlastx);
1648     TSeqAlignVector sav(blaster.Run());
1649     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1650             // cerr << "Align " << MSerial_AsnText << *sar << endl;
1651     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetStd().size());
1652     testBlastHitCounts(blaster, eBlastx_555_129295);
1653     testRawCutoffs(blaster, eBlastx, eBlastx_555_129295);
1654 }
1655 
BOOST_AUTO_TEST_CASE(NucleotideSelfHitWithSubjectMask)1656 BOOST_AUTO_TEST_CASE(NucleotideSelfHitWithSubjectMask) {
1657     CRef<CSeq_id> query_id(new CSeq_id(CSeq_id::e_Gi, 148727250));
1658     CRef<CSeq_id> subj_id(new CSeq_id(CSeq_id::e_Gi, 89059606));
1659     CRef<CSeq_loc> qsl(new CSeq_loc(*query_id, 0, 1000));
1660     CRef<CSeq_loc> ssl(new CSeq_loc(*subj_id, 0, 1000));
1661     CPacked_seqint::TRanges mask_vector;
1662     mask_vector.push_back(TSeqRange(0, 44));
1663     mask_vector.push_back(TSeqRange(69, 582));
1664     mask_vector.push_back(TSeqRange(610, 834));
1665     mask_vector.push_back(TSeqRange(854, 1000));
1666     CRef<CPacked_seqint> masks(new CPacked_seqint(*subj_id,
1667                                                   mask_vector));
1668     CRef<CSeq_loc> subj_mask(new CSeq_loc());
1669     subj_mask->SetPacked_int(*masks);
1670     CRef<CScope> scope(CSimpleOM::NewScope());
1671     SSeqLoc query(qsl, scope);
1672     auto_ptr<SSeqLoc> subject(new SSeqLoc(ssl, scope, subj_mask));
1673     {
1674         CBl2Seq bl2seq(query, *subject, eBlastn);
1675         TSeqAlignVector sav(bl2seq.Run());
1676         BOOST_REQUIRE_EQUAL((size_t)1, sav.front()->Get().size());
1677     }
1678 
1679     // Now compare the same sequences, without the subject masks
1680     subject.reset(new SSeqLoc(ssl, scope));
1681     {
1682         CBl2Seq bl2seq(query, *subject, eBlastn);
1683         TSeqAlignVector sav(bl2seq.Run());
1684         BOOST_REQUIRE_EQUAL((size_t)4, sav.front()->Get().size());
1685     }
1686 }
1687 
BOOST_AUTO_TEST_CASE(NucleotideBlastSelfHit)1688 BOOST_AUTO_TEST_CASE(NucleotideBlastSelfHit) {
1689     CSeq_id id("gi|555");
1690     auto_ptr<SSeqLoc> sl(
1691         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
1692 
1693     // Traditional blastn search
1694     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastn));
1695     CBl2Seq blaster(*sl, *sl, *opts);
1696     TSeqAlignVector sav = blaster.Run();
1697     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1698     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
1699     testBlastHitCounts(blaster, eBlastn_555_555);
1700     testRawCutoffs(blaster, eBlastn, eBlastn_555_555);
1701 
1702     // Change the options to megablast
1703     opts.Reset(CBlastOptionsFactory::Create(eMegablast));
1704     blaster.SetOptionsHandle() = *opts;
1705     sav = blaster.Run();
1706     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
1707     sar = *(sav[0]->Get().begin());
1708     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
1709     testBlastHitCounts(blaster, eMegablast_555_555);
1710     testRawCutoffs(blaster, eMegablast, eMegablast_555_555);
1711 
1712     // Change the options to discontiguous megablast
1713     opts.Reset(CBlastOptionsFactory::Create(eDiscMegablast));
1714     blaster.SetOptionsHandle() = *opts;
1715     sav = blaster.Run();
1716     sar = *(sav[0]->Get().begin());
1717     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
1718     testBlastHitCounts(blaster, eDiscMegablast_555_555);
1719     testRawCutoffs(blaster, eDiscMegablast, eDiscMegablast_555_555);
1720 }
1721 
BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback)1722 BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback) {
1723     CSeq_id query_id("gi|2655203");
1724     auto_ptr<SSeqLoc> ql(
1725         CTestObjMgr::Instance().CreateSSeqLoc(query_id,
1726                                               eNa_strand_plus));
1727 
1728     CSeq_id subject_id("gi|200811");
1729     auto_ptr<SSeqLoc> sl(
1730         CTestObjMgr::Instance().CreateSSeqLoc(subject_id,
1731                                               eNa_strand_minus));
1732 
1733     // test a fix for a bug that corrupted the traceback
1734     // in the one hit returned from this search
1735 
1736     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
1737     opts->SetTraditionalMegablastDefaults();
1738     opts->SetMatchReward(1);
1739     opts->SetMismatchPenalty(-2);
1740     opts->SetGapOpeningCost(3);
1741     opts->SetGapExtensionCost(1);
1742     opts->SetWordSize(24);
1743     opts->SetGapExtnAlgorithm(eGreedyScoreOnly);
1744     opts->SetGapTracebackAlgorithm(eGreedyTbck);
1745 
1746     CBl2Seq blaster(*ql, *sl, *opts);
1747 
1748     CRef<CSearchResultSet> res = blaster.RunEx();
1749     BOOST_REQUIRE_EQUAL(eSequenceComparison, res->GetResultType());
1750     BOOST_REQUIRE_EQUAL(1U, res->size());
1751     CConstRef<CSeq_align_set> sas = (*res)[0].GetSeqAlign();
1752     BOOST_REQUIRE(sas.NotEmpty());
1753     BOOST_REQUIRE_EQUAL(1U, sas->Get().size());
1754     CRef<CSeq_align> sa = sas->Get().front();
1755     int score = 0;
1756     sa->GetNamedScore(CSeq_align::eScore_Score, score);
1757     BOOST_REQUIRE_EQUAL(832, score);
1758 
1759     TSeqAlignVector alignments = blaster.Run();
1760     BOOST_REQUIRE_EQUAL(1U, alignments.size());
1761     sa = alignments[0]->Get().front();
1762     sa->GetNamedScore(CSeq_align::eScore_Score, score);
1763     BOOST_REQUIRE_EQUAL(832, score);
1764 }
1765 
1766 
BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback2)1767 BOOST_AUTO_TEST_CASE(MegablastGreedyTraceback2) {
1768     CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
1769     CRef<CScope> scope(new CScope(*kObjMgr));
1770 
1771     CRef<CSeq_entry> seq_entry1;
1772     ifstream in1("data/greedy1a.fsa");
1773     if ( !in1 )
1774         throw runtime_error("Failed to open file1");
1775     if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
1776         throw runtime_error("Failed to read sequence from file1");
1777     scope->AddTopLevelSeqEntry(*seq_entry1);
1778     CRef<CSeq_loc> seqloc1(new CSeq_loc);
1779     const string kSeqIdString1("lcl|1");
1780     CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
1781     seqloc1->SetWhole(*id1);
1782     SSeqLoc ss1(seqloc1, scope);
1783 
1784     CRef<CSeq_entry> seq_entry2;
1785     ifstream in2("data/greedy1b.fsa");
1786     if ( !in2 )
1787         throw runtime_error("Failed to open file2");
1788     if ( !(seq_entry2 = CFastaReader(in2).ReadOneSeq()))
1789         throw runtime_error("Failed to read sequence from file2");
1790     scope->AddTopLevelSeqEntry(*seq_entry2);
1791     CRef<CSeq_loc> seqloc2(new CSeq_loc);
1792     const string kSeqIdString2("lcl|2");
1793     CRef<CSeq_id> id2(new CSeq_id(kSeqIdString2));
1794     seqloc2->SetWhole(*id2);
1795     SSeqLoc ss2(seqloc2, scope);
1796 
1797     CRef<CBlastNucleotideOptionsHandle> handle(new CBlastNucleotideOptionsHandle);
1798     handle->SetGapOpeningCost(0);
1799     handle->SetGapExtensionCost(0);
1800     handle->SetDustFiltering(false);
1801 
1802     // test multiple bug fixes in greedy gapped alignment
1803 
1804     CBl2Seq blaster1(ss1, ss2, *handle);
1805     TSeqAlignVector sav(blaster1.Run());
1806     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1807     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
1808 
1809     const CSeq_align& seqalign1 = *sar;
1810     BOOST_REQUIRE(seqalign1.IsSetScore());
1811     ITERATE(CSeq_align::TScore, itr, seqalign1.GetScore()) {
1812         BOOST_REQUIRE((*itr)->IsSetId());
1813         if ((*itr)->GetId().GetStr() == "score") {
1814             BOOST_REQUIRE_EQUAL(619, (*itr)->GetValue().GetInt());
1815             break;
1816         }
1817     }
1818 
1819     handle->SetMatchReward(10);
1820     handle->SetMismatchPenalty(-25);
1821     handle->SetGapXDropoff(100.0);
1822     handle->SetGapXDropoffFinal(100.0);
1823 
1824     CBl2Seq blaster2(ss1, ss2, *handle);
1825     sav = blaster2.Run();
1826     sar = *(sav[0]->Get().begin());
1827     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
1828 
1829     const CSeq_align& seqalign2 = *sar;
1830     BOOST_REQUIRE(seqalign2.IsSetScore());
1831     ITERATE(CSeq_align::TScore, itr, seqalign2.GetScore()) {
1832         BOOST_REQUIRE((*itr)->IsSetId());
1833         if ((*itr)->GetId().GetStr() == "score") {
1834             BOOST_REQUIRE_EQUAL(6035, (*itr)->GetValue().GetInt());
1835             break;
1836         }
1837     }
1838 }
1839 
BOOST_AUTO_TEST_CASE(MegablastGreedyTracebackSelfHits)1840 BOOST_AUTO_TEST_CASE(MegablastGreedyTracebackSelfHits) {
1841     // the following tests are for JIRA SB-1041, where the greedy
1842     // traceback may stop prematurely.
1843     CSeq_id query_id("gi|56384585");
1844     auto_ptr<SSeqLoc> ql(
1845         CTestObjMgr::Instance().CreateSSeqLoc(query_id,
1846                                               eNa_strand_plus));
1847 
1848     CSeq_id subject_id("gi|56384585");
1849     auto_ptr<SSeqLoc> sl(
1850         CTestObjMgr::Instance().CreateSSeqLoc(subject_id,
1851                                               eNa_strand_plus));
1852 
1853     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
1854     opts->SetTraditionalMegablastDefaults();
1855     opts->SetMatchReward(1);
1856     opts->SetMismatchPenalty(-2);
1857     opts->SetGapOpeningCost(0);
1858     opts->SetGapExtensionCost(0);
1859     opts->SetWordSize(28);
1860     opts->SetGapExtnAlgorithm(eGreedyScoreOnly);
1861     opts->SetGapTracebackAlgorithm(eGreedyTbck);
1862 
1863     CBl2Seq blaster(*ql, *sl, *opts);
1864 
1865     CRef<CSearchResultSet> res = blaster.RunEx();
1866     BOOST_REQUIRE_EQUAL(eSequenceComparison, res->GetResultType());
1867     BOOST_REQUIRE_EQUAL(1U, res->size());
1868     CConstRef<CSeq_align_set> sas = (*res)[0].GetSeqAlign();
1869     BOOST_REQUIRE(sas.NotEmpty());
1870     CRef<CSeq_align> sa = sas->Get().front();
1871     int score = 0;
1872     sa->GetNamedScore(CSeq_align::eScore_Score, score);
1873     BOOST_REQUIRE_EQUAL(3794584, score);
1874 }
1875 
BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryPlusStrand)1876 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryPlusStrand) {
1877     CSeq_id qid("gi|555");
1878     auto_ptr<SSeqLoc> query(
1879         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_plus));
1880 
1881     CSeq_id sid("gi|129295");
1882     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
1883 
1884     CBl2Seq blaster(*query, *subj, eBlastx);
1885     TSeqAlignVector sav(blaster.Run());
1886     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1887     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetStd().size());
1888 }
1889 
BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryMinusStrand)1890 BOOST_AUTO_TEST_CASE(Blastx2Seqs_QueryMinusStrand) {
1891     CSeq_id qid("gi|555");
1892     auto_ptr<SSeqLoc> query(
1893         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_minus));
1894 
1895     CSeq_id sid("gi|129295");
1896     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
1897 
1898     CBl2Seq blaster(*query, *subj, eBlastx);
1899     TSeqAlignVector sav(blaster.Run());
1900     // No hits.  Empty CSeq_align_set returned.
1901     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
1902 }
1903 
1904 
BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryBothStrands)1905 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryBothStrands) {
1906     CSeq_id id("gi|555");
1907     auto_ptr<SSeqLoc> sl(
1908         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_both));
1909 
1910     CBl2Seq blaster(*sl, *sl, eTblastx);
1911     TSeqAlignVector sav(blaster.Run());
1912     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1913     BOOST_REQUIRE_EQUAL(39, (int)sar->GetSegs().GetStd().size());
1914     testBlastHitCounts(blaster, eTblastx_555_555);
1915     testRawCutoffs(blaster, eTblastx, eTblastx_555_555);
1916 }
1917 
BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryPlusStrand)1918 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryPlusStrand) {
1919     CSeq_id id("gi|555");
1920     auto_ptr<SSeqLoc> sl(
1921         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_plus));
1922 
1923     CBl2Seq blaster(*sl, *sl, eTblastx);
1924     TSeqAlignVector sav(blaster.Run());
1925     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1926     BOOST_REQUIRE_EQUAL(11, (int)sar->GetSegs().GetStd().size());
1927 }
1928 
BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryMinusStrand)1929 BOOST_AUTO_TEST_CASE(TBlastx2Seqs_QueryMinusStrand) {
1930     CSeq_id id("gi|555");
1931     auto_ptr<SSeqLoc> sl(
1932         CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_minus));
1933 
1934     CBl2Seq blaster(*sl, *sl, eTblastx);
1935     TSeqAlignVector sav(blaster.Run());
1936     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1937     BOOST_REQUIRE_EQUAL(12, (int)sar->GetSegs().GetStd().size());
1938 }
1939 
1940 
BOOST_AUTO_TEST_CASE(TblastxManyHits)1941 BOOST_AUTO_TEST_CASE(TblastxManyHits) {
1942     const int total_num_hsps = 49;
1943     const int num_hsps_to_check = 8;
1944     const int score_array[num_hsps_to_check] =
1945         { 947, 125, 820, 113, 624, 221, 39, 778};
1946     const int sum_n_array[num_hsps_to_check] =
1947         { 2, 2, 2, 2, 3, 3, 3, 0};
1948     CSeq_id qid("gi|24719404");
1949     auto_ptr<SSeqLoc> qsl(
1950         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
1951     CSeq_id sid("gi|29807292");
1952     pair<TSeqPos, TSeqPos> range(15185000, 15195000);
1953     auto_ptr<SSeqLoc> ssl(
1954         CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
1955     CBl2Seq blaster(*qsl, *ssl, eTblastx);
1956     blaster.SetOptionsHandle().SetMaxNumHspPerSequence(total_num_hsps);
1957 
1958     TSeqAlignVector sav(blaster.Run());
1959 
1960     testBlastHitCounts(blaster, eTblastx_many_hits);
1961     testRawCutoffs(blaster, eTblastx, eTblastx_many_hits);
1962 
1963     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1964     list< CRef<CStd_seg> >& segs = sar->SetSegs().SetStd();
1965     BOOST_REQUIRE_EQUAL(total_num_hsps, (int)segs.size());
1966     int index = 0;
1967     ITERATE(list< CRef<CStd_seg> >, itr, segs) {
1968         const vector< CRef< CScore > >& score_v = (*itr)->GetScores();
1969         ITERATE(CSeq_align::TScore, sitr, score_v) {
1970             BOOST_REQUIRE((*sitr)->IsSetId());
1971             if ((*sitr)->GetId().GetStr() == "score") {
1972                 BOOST_REQUIRE_EQUAL(score_array[index],
1973                                      (*sitr)->GetValue().GetInt());
1974             } else if ((*sitr)->GetId().GetStr() == "sum_n") {
1975                 BOOST_REQUIRE_EQUAL(sum_n_array[index],
1976                                      (*sitr)->GetValue().GetInt());
1977             }
1978         }
1979         if (++index == num_hsps_to_check)
1980             break;
1981     }
1982 }
1983 
BOOST_AUTO_TEST_CASE(ProteinBlast2Seqs)1984 BOOST_AUTO_TEST_CASE(ProteinBlast2Seqs) {
1985     CSeq_id id("gi|129295");
1986     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(id));
1987 
1988     id.SetGi(GI_CONST(7662354));
1989     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(id));
1990 
1991     CBl2Seq blaster(*query, *subj, eBlastp);
1992     TSeqAlignVector sav(blaster.Run());
1993     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
1994     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
1995     testBlastHitCounts(blaster, eBlastp_129295_7662354);
1996     testRawCutoffs(blaster, eBlastp, eBlastp_129295_7662354);
1997 }
1998 
BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering_InvalidDB)1999 BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering_InvalidDB) {
2000     CSeq_id qid("gi|555");
2001     auto_ptr<SSeqLoc> query(
2002         CTestObjMgr::Instance().CreateSSeqLoc(qid));
2003 
2004     CBlastNucleotideOptionsHandle opts;
2005     opts.SetTraditionalMegablastDefaults();
2006     const string kRepeatDb("junk");
2007     opts.SetRepeatFilteringDB(kRepeatDb.c_str());
2008     bool is_repeat_filtering_on = opts.GetRepeatFiltering();
2009     BOOST_REQUIRE(is_repeat_filtering_on);
2010     string repeat_db(opts.GetRepeatFilteringDB()
2011                      ? opts.GetRepeatFilteringDB()
2012                      : kEmptyStr);
2013     BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
2014 
2015     CBl2Seq blaster(*query, *query, opts);
2016     BOOST_CHECK_THROW((void) blaster.Run(), CBlastException);
2017     BOOST_CHECK_THROW((void) blaster.RunEx(), CBlastException);
2018 }
2019 
BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering)2020 BOOST_AUTO_TEST_CASE(BlastnWithRepeatFiltering) {
2021     CSeq_id qid("gi|555");
2022     auto_ptr<SSeqLoc> query(
2023         CTestObjMgr::Instance().CreateSSeqLoc(qid));
2024 
2025     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2026     opts->SetTraditionalMegablastDefaults();
2027     opts->SetRepeatFiltering(true);
2028     string repeat_db(opts->GetRepeatFilteringDB()
2029                      ? opts->GetRepeatFilteringDB()
2030                      : kEmptyStr);
2031     BOOST_REQUIRE_EQUAL(string(kDefaultRepeatFilterDb), repeat_db);
2032     // it's harmless to set them both, but only the latter one will be used
2033     const string kRepeatDb("repeat/repeat_9606");
2034     opts->SetRepeatFilteringDB(kRepeatDb.c_str());
2035     repeat_db.assign(opts->GetRepeatFilteringDB()
2036                      ? opts->GetRepeatFilteringDB()
2037                      : kEmptyStr);
2038     BOOST_REQUIRE_EQUAL(kRepeatDb, repeat_db);
2039 
2040     bool is_repeat_filtering_on = opts->GetRepeatFiltering();
2041     BOOST_REQUIRE(is_repeat_filtering_on);
2042 
2043     CBl2Seq blaster(*query, *query, *opts);
2044     TSeqAlignVector sav(blaster.Run());
2045     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2046     BOOST_REQUIRE(sar.NotEmpty());
2047     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
2048 }
2049 
BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Db)2050 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Db) {
2051     CSeq_id qid("gi|555");
2052     auto_ptr<SSeqLoc> query(
2053         CTestObjMgr::Instance().CreateSSeqLoc(qid));
2054 
2055     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2056     opts->SetTraditionalMegablastDefaults();
2057     const string kWindowMaskerDb = WindowMaskerTaxidToDb(9606);
2058     opts->SetWindowMaskerDatabase(kWindowMaskerDb.c_str());
2059     string wmdb(opts->GetWindowMaskerDatabase()
2060                 ? opts->GetWindowMaskerDatabase() : kEmptyStr);
2061     BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
2062     BOOST_REQUIRE_EQUAL(0, opts->GetWindowMaskerTaxId());
2063     CBl2Seq blaster(*query, *query, *opts);
2064     TSeqAlignVector sav(blaster.Run());
2065     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2066     BOOST_REQUIRE(sar.NotEmpty());
2067     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
2068 }
2069 
BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Taxid)2070 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_Taxid) {
2071     CSeq_id qid("gi|555");
2072     auto_ptr<SSeqLoc> query(
2073         CTestObjMgr::Instance().CreateSSeqLoc(qid));
2074 
2075     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2076     opts->SetTraditionalMegablastDefaults();
2077     opts->SetWindowMaskerTaxId(9606);
2078     CBl2Seq blaster(*query, *query, *opts);
2079     TSeqAlignVector sav(blaster.Run());
2080     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2081     BOOST_REQUIRE(sar.NotEmpty());
2082     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
2083 }
2084 
BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidDb)2085 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidDb) {
2086     CSeq_id qid("gi|555");
2087     auto_ptr<SSeqLoc> query(
2088         CTestObjMgr::Instance().CreateSSeqLoc(qid));
2089 
2090     CBlastNucleotideOptionsHandle opts;
2091     opts.SetTraditionalMegablastDefaults();
2092     const string kWindowMaskerDb("Dummydb");
2093     opts.SetWindowMaskerDatabase(kWindowMaskerDb.c_str());
2094     string wmdb(opts.GetWindowMaskerDatabase()
2095                 ? opts.GetWindowMaskerDatabase() : kEmptyStr);
2096     BOOST_REQUIRE_EQUAL(kWindowMaskerDb, wmdb);
2097     CBl2Seq blaster(*query, *query, opts);
2098     BOOST_CHECK_THROW((void) blaster.Run(), CBlastException);
2099     BOOST_CHECK_THROW((void) blaster.RunEx(), CBlastException);
2100 }
2101 
BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidTaxid)2102 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_InvalidTaxid) {
2103     CSeq_id qid("gi|555");
2104     auto_ptr<SSeqLoc> query(
2105         CTestObjMgr::Instance().CreateSSeqLoc(qid));
2106 
2107     CBlastNucleotideOptionsHandle opts;
2108     opts.SetTraditionalMegablastDefaults();
2109     const int kInvalidTaxId = -1;
2110     opts.SetWindowMaskerTaxId(kInvalidTaxId);
2111     BOOST_REQUIRE_EQUAL(kInvalidTaxId, opts.GetWindowMaskerTaxId());
2112     CBl2Seq blaster(*query, *query, opts);
2113     BOOST_CHECK_THROW((void) blaster.Run(), CBlastException);
2114     BOOST_CHECK_THROW((void) blaster.RunEx(), CBlastException);
2115 }
2116 
BOOST_AUTO_TEST_CASE(WindowMaskerPathInitInvalid)2117 BOOST_AUTO_TEST_CASE(WindowMaskerPathInitInvalid) {
2118     int rv = WindowMaskerPathInit(kEmptyStr);
2119     BOOST_CHECK_EQUAL(1, rv);
2120     rv = WindowMaskerPathInit("blast_unit_test");
2121     BOOST_CHECK_EQUAL(1, rv);
2122 }
2123 
BOOST_AUTO_TEST_CASE(WindowMaskerPathInitValid)2124 BOOST_AUTO_TEST_CASE(WindowMaskerPathInitValid) {
2125     int rv = WindowMaskerPathInit(CDir::GetCwd());
2126     BOOST_CHECK_EQUAL(0, rv);
2127     WindowMaskerPathReset();
2128 }
2129 
BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_DbAndTaxid)2130 BOOST_AUTO_TEST_CASE(BlastnWithWindowMasker_DbAndTaxid) {
2131     CSeq_id qid("gi|555");
2132     auto_ptr<SSeqLoc> query(
2133         CTestObjMgr::Instance().CreateSSeqLoc(qid));
2134 
2135     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2136     opts->SetTraditionalMegablastDefaults();
2137     // if both are set, the database name will be given preference
2138     opts->SetWindowMaskerDatabase(WindowMaskerTaxidToDb(9606).c_str());
2139     opts->SetWindowMaskerTaxId(-1);
2140     CBl2Seq blaster(*query, *query, *opts);
2141     TSeqAlignVector sav(blaster.Run());
2142     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2143     BOOST_REQUIRE(sar.NotEmpty());
2144     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
2145 }
2146 
BOOST_AUTO_TEST_CASE(ConvertTaxIdToWindowMaskerDb)2147 BOOST_AUTO_TEST_CASE(ConvertTaxIdToWindowMaskerDb) {
2148     string path;
2149     BOOST_CHECK_NO_THROW(path = WindowMaskerTaxidToDb(9606));
2150     BOOST_REQUIRE(NStr::EndsWith(path, "wmasker.obinary") ||
2151                   NStr::EndsWith(path, "wmasker.oascii") );
2152 }
2153 
2154 // Bug report from Alex Astashyn
BOOST_AUTO_TEST_CASE(Alex)2155 BOOST_AUTO_TEST_CASE(Alex) {
2156     CSeq_id qid("NG_007092.2");
2157     TSeqRange qr(0, 2311633);
2158     auto_ptr<SSeqLoc> query(
2159         CTestObjMgr::Instance().CreateSSeqLoc(qid, qr, eNa_strand_plus));
2160 
2161     CSeq_id sid("NT_007914.14");
2162     TSeqRange sr(5233652, 9849919);
2163     auto_ptr<SSeqLoc> subj(
2164         CTestObjMgr::Instance().CreateSSeqLoc(sid, sr));
2165 
2166     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2167     opts->SetTraditionalMegablastDefaults();
2168     opts->SetRepeatFiltering(true);
2169     CBl2Seq blaster(*query, *subj, *opts);
2170     TSeqAlignVector sav(blaster.Run());
2171     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2172     BOOST_REQUIRE(sar.NotEmpty());
2173     BOOST_REQUIRE(sar->GetSegs().GetDenseg().GetNumseg() >= 1);
2174 }
2175 
BOOST_AUTO_TEST_CASE(NucleotideBlast2Seqs)2176 BOOST_AUTO_TEST_CASE(NucleotideBlast2Seqs) {
2177     CSeq_id qid("gi|555");
2178     auto_ptr<SSeqLoc> query(
2179         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
2180 
2181     CSeq_id sid("gi|3090");
2182     auto_ptr<SSeqLoc> subj(
2183         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
2184 
2185     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2186     opts->SetTraditionalBlastnDefaults();
2187     CBl2Seq blaster(*query, *subj, *opts);
2188     TSeqAlignVector sav(blaster.Run());
2189     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2190     BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
2191     testBlastHitCounts(blaster, eBlastn_555_3090);
2192     testRawCutoffs(blaster, eBlastn, eBlastn_555_3090);
2193 }
2194 
BOOST_AUTO_TEST_CASE(ProteinBlastChangeQuery)2195 BOOST_AUTO_TEST_CASE(ProteinBlastChangeQuery) {
2196     CSeq_id id("gi|129295");
2197     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(id));
2198 
2199     id.SetGi(GI_CONST(7662354));
2200     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(id));
2201 
2202     // Run self hit first
2203     CBl2Seq blaster(*subj, *subj, eBlastp);
2204     TSeqAlignVector sav(blaster.Run());
2205     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2206     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2207 
2208     // Change the query sequence (recreates the lookup table)
2209     blaster.SetQuery(*query);
2210     sav = blaster.Run();
2211     sar = *(sav[0]->Get().begin());
2212     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2213 }
2214 
BOOST_AUTO_TEST_CASE(ProteinBlastChangeSubject)2215 BOOST_AUTO_TEST_CASE(ProteinBlastChangeSubject) {
2216     CSeq_id qid("gi|129295");
2217     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
2218 
2219     CSeq_id sid("gi|7662354");
2220     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
2221 
2222     // Run self hit first
2223     CBl2Seq blaster(*query, *query, eBlastp);
2224     TSeqAlignVector sav(blaster.Run());
2225     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2226     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2227 
2228     // Change the subject sequence
2229     blaster.SetSubject(*subj);
2230     sav = blaster.Run();
2231     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
2232     sar = *(sav[0]->Get().begin());
2233     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2234 }
2235 
BOOST_AUTO_TEST_CASE(NucleotideBlastChangeQuery)2236 BOOST_AUTO_TEST_CASE(NucleotideBlastChangeQuery) {
2237     CSeq_id qid("gi|555");
2238     auto_ptr<SSeqLoc> query(
2239         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
2240 
2241     CSeq_id sid("gi|3090");
2242     auto_ptr<SSeqLoc> subj(
2243         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
2244 
2245     // Run self hit first
2246     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2247     opts->SetTraditionalBlastnDefaults();
2248     CBl2Seq blaster(*subj, *subj, *opts);
2249     TSeqAlignVector sav(blaster.Run());
2250     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2251     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2252 
2253     // Change the query sequence (recreates the lookup table)
2254     blaster.SetQuery(*query);
2255     sav = blaster.Run();
2256     BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
2257     sar = *(sav[0]->Get().begin());
2258     BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
2259 }
2260 
BOOST_AUTO_TEST_CASE(NucleotideBlastChangeSubject)2261 BOOST_AUTO_TEST_CASE(NucleotideBlastChangeSubject) {
2262     CSeq_id qid("gi|555");
2263     auto_ptr<SSeqLoc> query(
2264         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
2265 
2266     CSeq_id sid("gi|3090");
2267     auto_ptr<SSeqLoc> subj(
2268         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
2269 
2270     // Run self hit first
2271     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2272     opts->SetTraditionalBlastnDefaults();
2273     CBl2Seq blaster(*query, *query, *opts);
2274     TSeqAlignVector sav(blaster.Run());
2275     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2276     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2277 
2278     // Change the subject sequence
2279     blaster.SetSubject(*subj);
2280     sav = blaster.Run();
2281     sar = *(sav[0]->Get().begin());
2282     BOOST_REQUIRE_EQUAL(3, (int)sar->GetSegs().GetDenseg().GetNumseg());
2283 }
2284 
2285 
BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueries)2286 BOOST_AUTO_TEST_CASE(ProteinBlastMultipleQueries) {
2287     TSeqLocVector sequences;
2288 
2289     CSeq_id qid("gi|129295");
2290     auto_ptr<SSeqLoc> sl1(CTestObjMgr::Instance().CreateSSeqLoc(qid));
2291     sequences.push_back(*sl1);
2292 
2293     CSeq_id sid("gi|7662354");
2294     auto_ptr<SSeqLoc> sl2(CTestObjMgr::Instance().CreateSSeqLoc(sid));
2295     sequences.push_back(*sl2);
2296 
2297     CBl2Seq blaster(sequences, sequences, eBlastp);
2298     TSeqAlignVector seqalign_v = blaster.Run();
2299 
2300     /* DEBUG OUTPUT
2301     for (size_t i = 0; i < seqalign_v.size(); i++)
2302         cerr << "\n<" << i << ">\n"
2303             << MSerial_AsnText << seqalign_v[i].GetObject() << endl;
2304     */
2305 
2306     BOOST_REQUIRE_EQUAL(4, (int)seqalign_v.size());
2307     BOOST_REQUIRE_EQUAL(2, (int)sequences.size());
2308 
2309     CRef<CSeq_align> sar;
2310 
2311     BOOST_REQUIRE_EQUAL(1U, seqalign_v[0]->Get().size());
2312     sar = *(seqalign_v[0]->Get().begin());
2313     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2314 
2315     BOOST_REQUIRE_EQUAL((size_t)2, seqalign_v[1]->Get().size());
2316     sar = *(seqalign_v[1]->Get().begin());
2317     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2318     sar = *(++(seqalign_v[1]->Get().begin()));
2319     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2320 
2321     BOOST_REQUIRE_EQUAL((size_t)3, seqalign_v[2]->Get().size());
2322     sar = *(seqalign_v[2]->Get().begin());
2323     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2324     sar = *(++(seqalign_v[2]->Get().begin()));
2325     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2326 
2327     BOOST_REQUIRE_EQUAL((size_t)1, seqalign_v[3]->Get().size());
2328     sar = *(seqalign_v[3]->Get().begin());
2329     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2330 
2331 
2332     testBlastHitCounts(blaster, eBlastp_multi_q);
2333     testRawCutoffs(blaster, eBlastp, eBlastp_multi_q);
2334 
2335     // test the order of queries and subjects:
2336     testResultAlignments(sequences.size(), sequences.size(),
2337                             seqalign_v);
2338 }
2339 
BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueries)2340 BOOST_AUTO_TEST_CASE(NucleotideBlastMultipleQueries) {
2341     TSeqLocVector sequences;
2342 
2343     CSeq_id qid("gi|555");
2344     auto_ptr<SSeqLoc> sl1(
2345         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
2346     sequences.push_back(*sl1);
2347     BOOST_REQUIRE(sl1->mask.Empty());
2348 
2349     CSeq_id sid("gi|3090");
2350     auto_ptr<SSeqLoc> sl2(
2351         CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
2352     sequences.push_back(*sl2);
2353     BOOST_REQUIRE(sl2->mask.Empty());
2354 
2355     CBl2Seq blaster(sequences, sequences, eBlastn);
2356     TSeqAlignVector seqalign_v = blaster.Run();
2357     BOOST_REQUIRE_EQUAL(2, (int)sequences.size());
2358     BOOST_REQUIRE_EQUAL(4, (int)seqalign_v.size());
2359 
2360     CRef<CSeq_align> sar = *(seqalign_v[0]->Get().begin());
2361     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2362 
2363     // in older version this was seqalign_v[1], order has changed
2364     sar = *(seqalign_v[2]->Get().begin());
2365     BOOST_REQUIRE_EQUAL((CDense_seg::TNumseg)1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2366 
2367     testBlastHitCounts(blaster, eBlastn_multi_q);
2368     testRawCutoffs(blaster, eBlastn, eBlastn_multi_q);
2369 
2370     // test the order of queries and subjects:
2371     testResultAlignments(sequences.size(), sequences.size(),
2372                             seqalign_v);
2373 }
2374 
2375 #if 0
2376 void DoSearchWordSize4(const char *file1, const char *file2) {
2377     CRef<CObjectManager> kObjMgr = CObjectManager::GetInstance();
2378     CRef<CScope> scope(new CScope(*kObjMgr));
2379 
2380     CRef<CSeq_entry> seq_entry1;
2381     ifstream in1(file1);
2382     if ( !in1 )
2383         throw runtime_error("Failed to open file1");
2384     if ( !(seq_entry1 = CFastaReader(in1).ReadOneSeq()))
2385         throw runtime_error("Failed to read sequence from file1");
2386     scope->AddTopLevelSeqEntry(*seq_entry1);
2387     CRef<CSeq_loc> seqloc1(new CSeq_loc);
2388     const string kSeqIdString1("lcl|1");
2389     CRef<CSeq_id> id1(new CSeq_id(kSeqIdString1));
2390     seqloc1->SetWhole(*id1);
2391     SSeqLoc ss1(seqloc1, scope);
2392 
2393     CRef<CSeq_entry> seq_entry2;
2394     ifstream in2(file2);
2395     if ( !in2 )
2396         throw runtime_error("Failed to open file2");
2397     if ( !(seq_entry2 = CFastaReader(in2).ReadOneSeq()))
2398         throw runtime_error("Failed to read sequence from file2");
2399     scope->AddTopLevelSeqEntry(*seq_entry2);
2400     CRef<CSeq_loc> seqloc2(new CSeq_loc);
2401     const string kSeqIdString2("lcl|2");
2402     CRef<CSeq_id> id2(new CSeq_id(kSeqIdString2));
2403     seqloc2->SetWhole(*id2);
2404     SSeqLoc ss2(seqloc2, scope);
2405 
2406     CBlastNucleotideOptionsHandle handle;
2407     handle.SetTraditionalBlastnDefaults();
2408     handle.SetWordSize(4);
2409     handle.SetDustFiltering(false);
2410     handle.SetMismatchPenalty(-1);
2411     handle.SetMatchReward(1);
2412     handle.SetEvalueThreshold(10000);
2413 
2414     CBl2Seq blaster(ss1, ss2, handle);
2415     blaster.RunWithoutSeqalignGeneration(); /* NCBI_FAKE_WARNING */
2416     BlastHSPResults *results = blaster.GetResults(); /* NCBI_FAKE_WARNING */
2417     BOOST_REQUIRE(results != NULL);
2418     BOOST_REQUIRE(results->hitlist_array[0] != NULL);
2419     BOOST_REQUIRE(results->hitlist_array[0]->hsplist_array[0] != NULL);
2420     BlastHSPList *hsp_list = results->hitlist_array[0]->hsplist_array[0];
2421     BOOST_REQUIRE(hsp_list->hspcnt > 0);
2422     BOOST_REQUIRE(hsp_list->hsp_array[0] != NULL);
2423 
2424     // verify that all hits are properly formed, and
2425     // at least as long as the word size
2426 
2427     for (int i = 0; i < hsp_list->hspcnt; i++) {
2428         BlastHSP *hsp = hsp_list->hsp_array[i];
2429         BOOST_REQUIRE(hsp != NULL);
2430         BOOST_REQUIRE(hsp->query.offset < hsp->query.end);
2431         BOOST_REQUIRE(hsp->subject.offset < hsp->subject.end);
2432         BOOST_REQUIRE(hsp->query.gapped_start >= hsp->query.offset &&
2433                        hsp->query.gapped_start < hsp->query.end);
2434         BOOST_REQUIRE(hsp->subject.gapped_start >= hsp->subject.offset &&
2435                        hsp->subject.gapped_start < hsp->subject.end);
2436         BOOST_REQUIRE(hsp->query.end - hsp->query.offset >= 4);
2437         BOOST_REQUIRE(hsp->subject.end - hsp->subject.offset >= 4);
2438     }
2439 
2440     CRef<CSearchResultSet> res = blaster.RunEx();
2441     BOOST_REQUIRE_EQUAL(eSequenceComparison, res->GetResultType());
2442     BOOST_REQUIRE_EQUAL(1, res->GetNumQueries());
2443     BOOST_REQUIRE_EQUAL(1, res->GetNumResults());
2444     BOOST_REQUIRE_EQUAL(1, res->size());
2445     CSearchResults& r = res->GetResults(0, 0);
2446     BOOST_REQUIRE(r.HasAlignments());
2447     CConstRef<CSeq_align_set> a = r.GetSeqAlign();
2448     cerr << MSerial_AsnText << * a<< endl;
2449 
2450 }
2451 
2452 BOOST_AUTO_TEST_CASE(NucleotideBlastWordSize4) {
2453     DoSearchWordSize4("data/blastn_size4a.fsa",
2454                       "data/blastn_size4b.fsa");
2455 }
2456 
2457 // test bug fix when size-4 seed falls at the end of
2458 // the subject sequence
2459 BOOST_AUTO_TEST_CASE(NucleotideBlastWordSize4_EOS) {
2460     DoSearchWordSize4("data/blastn_size4c.fsa",
2461                       "data/blastn_size4d.fsa");
2462 }
2463 #endif
2464 
BOOST_AUTO_TEST_CASE(TblastnOutOfFrame)2465 BOOST_AUTO_TEST_CASE(TblastnOutOfFrame) {
2466     CSeq_id qid("NP_647642.2"); // Protein sequence
2467     CSeq_id sid("BC042576.1");  // DNA sequence
2468 
2469     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
2470     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
2471 
2472     // Set the options
2473     CRef<CTBlastnOptionsHandle> opts(new CTBlastnOptionsHandle);
2474     opts->SetOutOfFrameMode();
2475     opts->SetFrameShiftPenalty(10);
2476     opts->SetFilterString("m;L");/* NCBI_FAKE_WARNING */
2477     opts->SetEvalueThreshold(0.01);
2478     opts->SetCompositionBasedStats(eNoCompositionBasedStats);
2479 
2480     CBl2Seq blaster(*query, *subj, *opts);
2481     TSeqAlignVector sav(blaster.Run());
2482     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
2483     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2484     BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
2485     testBlastHitCounts(blaster, eTblastn_oof);
2486     testRawCutoffs(blaster, eTblastn, eTblastn_oof);
2487 }
2488 
2489 // test multiple fixes for OOF alignments on both subject strands
BOOST_AUTO_TEST_CASE(TblastnOutOfFrame2)2490 BOOST_AUTO_TEST_CASE(TblastnOutOfFrame2) {
2491     CSeq_id qid("gi|38111923"); // Protein sequence
2492     CSeq_id sid("gi|6648925");  // DNA sequence
2493 
2494     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateSSeqLoc(qid));
2495     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
2496 
2497     // Set the options
2498     CRef<CTBlastnOptionsHandle> opts(new CTBlastnOptionsHandle);
2499     opts->SetOutOfFrameMode();
2500     opts->SetFrameShiftPenalty(5);
2501     opts->SetCompositionBasedStats(eNoCompositionBasedStats);
2502     opts->SetFilterString("L");/* NCBI_FAKE_WARNING */
2503 
2504     CBl2Seq blaster(*query, *subj, *opts);
2505     TSeqAlignVector sav(blaster.Run());
2506     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2507     BOOST_REQUIRE_EQUAL(3, (int)sav[0]->Size());
2508 
2509     // test fix for a bug generating OOF traceback
2510 
2511     const CSeq_align& seqalign = *sar;
2512     BOOST_REQUIRE(seqalign.IsSetScore());
2513     ITERATE(CSeq_align::TScore, itr, seqalign.GetScore()) {
2514         BOOST_REQUIRE((*itr)->IsSetId());
2515         if ((*itr)->GetId().GetStr() == "num_ident") {
2516             BOOST_REQUIRE_EQUAL(80, (*itr)->GetValue().GetInt());
2517             break;
2518         }
2519     }
2520 }
2521 
BOOST_AUTO_TEST_CASE(BlastxOutOfFrame)2522 BOOST_AUTO_TEST_CASE(BlastxOutOfFrame) {
2523     CSeq_id qid("BC042576.1");  // DNA sequence
2524     CSeq_id sid("NP_647642.2"); // Protein sequence
2525 
2526     auto_ptr<SSeqLoc> query(
2527         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
2528     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
2529 
2530     // Set the options
2531     CRef<CBlastxOptionsHandle> opts(new CBlastxOptionsHandle);
2532     opts->SetOutOfFrameMode();
2533     opts->SetFrameShiftPenalty(10);
2534     opts->SetCompositionBasedStats(eNoCompositionBasedStats);
2535     opts->SetFilterString("m;L");/* NCBI_FAKE_WARNING */
2536     opts->SetEvalueThreshold(0.01);
2537 
2538     CBl2Seq blaster(*query, *subj, *opts);
2539     TSeqAlignVector sav(blaster.Run());
2540     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2541     BOOST_REQUIRE_EQUAL(2, (int)sav[0]->Size());
2542     testBlastHitCounts(blaster, eBlastx_oof);
2543     testRawCutoffs(blaster, eBlastx, eBlastx_oof);
2544 }
2545 
2546 // test for a bug computing OOF sequence lengths during traceback
2547 
BOOST_AUTO_TEST_CASE(BlastxOutOfFrame_DifferentFrames)2548 BOOST_AUTO_TEST_CASE(BlastxOutOfFrame_DifferentFrames) {
2549     CSeq_id qid("gi|27486285");  // DNA sequence
2550     CSeq_id sid("gi|7331210"); // Protein sequence
2551 
2552     auto_ptr<SSeqLoc> query(
2553         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
2554     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
2555 
2556     // Set the options
2557     CRef<CBlastxOptionsHandle> opts(new CBlastxOptionsHandle);
2558     opts->SetOutOfFrameMode();
2559     opts->SetFrameShiftPenalty(10);
2560     opts->SetFilterString("L");/* NCBI_FAKE_WARNING */
2561     opts->SetCompositionBasedStats(eNoCompositionBasedStats);
2562 
2563     CBl2Seq blaster(*query, *subj, *opts);
2564     TSeqAlignVector sav(blaster.Run());
2565     BOOST_REQUIRE_EQUAL(5, (int)sav[0]->Size());
2566 }
2567 
2568 // The following 3 functions are for checking results in the strand
2569 // combinations tests.
2570 
x_TestAlignmentQuerySubjStrandCombinations(TSeqAlignVector & sav,string aligned_strands)2571 void x_TestAlignmentQuerySubjStrandCombinations(TSeqAlignVector& sav,
2572                                                 string aligned_strands) {
2573 
2574     // Starting offsets in alignment as query/subject pairs
2575     vector< pair<TSignedSeqPos, TSignedSeqPos> > starts;
2576     starts.push_back(make_pair(7685759, 10));
2577     starts.push_back(make_pair(7685758, -1));
2578     starts.push_back(make_pair(7685718, 269));
2579     starts.push_back(make_pair(7685717, -1));
2580     starts.push_back(make_pair(7685545, 309));
2581 
2582     const size_t kNumSegments(starts.size());
2583 
2584     // Lengths of the aligned regions defined in starts vector
2585     vector<TSeqPos> lengths;
2586     lengths.reserve(kNumSegments);
2587     lengths.push_back(259);
2588     lengths.push_back(1);
2589     lengths.push_back(40);
2590     lengths.push_back(1);
2591     lengths.push_back(172);
2592 
2593     // Strands of the involved aligned segments as query/subject pairs
2594     typedef vector< pair<ENa_strand, ENa_strand> > TStrandPairs;
2595     TStrandPairs strands(kNumSegments,
2596                          make_pair(eNa_strand_minus, eNa_strand_plus));
2597 
2598     // Reverse the contents of the vectors if necessary
2599     if (aligned_strands == "plus-minus") {
2600         reverse(starts.begin(), starts.end());
2601         reverse(lengths.begin(), lengths.end());
2602         NON_CONST_ITERATE(TStrandPairs, itr, strands) {
2603             swap(itr->first, itr->second);
2604         }
2605     }
2606     BOOST_REQUIRE_EQUAL(kNumSegments, lengths.size());
2607     BOOST_REQUIRE_EQUAL(kNumSegments, strands.size());
2608 
2609     // Obtain the data from the Seq-align's dense segs ...
2610     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2611     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
2612     // BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
2613     const CDense_seg& ds = sar->GetSegs().GetDenseg();
2614     // CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
2615     const size_t kNumDim(ds.GetDim());
2616     vector< TSignedSeqPos > seg_starts = ds.GetStarts();
2617     vector< TSeqPos> seg_lengths = ds.GetLens();
2618     vector< ENa_strand> seg_strands = ds.GetStrands();
2619     BOOST_REQUIRE_EQUAL(kNumSegments, seg_lengths.size());
2620     BOOST_REQUIRE_EQUAL(kNumSegments*kNumDim, seg_starts.size());
2621 
2622     // ... and compare it to what is expected
2623     for (size_t index = 0; index < kNumSegments; ++index) {
2624         ostringstream os;
2625         os << "Segment " << index << ": expected " << lengths[index]
2626            << " actual " << seg_lengths[index];
2627         BOOST_REQUIRE_MESSAGE(lengths[index] == seg_lengths[index],
2628                               os.str());
2629 
2630         os.str("");
2631         os << "Segment " << index << ": expected " << starts[index].first
2632            << " actual " << seg_starts[2*index];
2633         BOOST_REQUIRE_MESSAGE(starts[index].first == seg_starts[2*index],
2634                               os.str());
2635         os.str("");
2636         os << "Segment " << index << ": expected " << starts[index].second
2637            << " actual " << seg_starts[2*index];
2638         BOOST_REQUIRE_MESSAGE(starts[index].second == seg_starts[2*index+1],
2639                               os.str());
2640         os.str("");
2641         os << "Segment " << index << ": expected " << (int)strands[index].first
2642            << " actual " << (int)seg_strands[2*index];
2643         BOOST_REQUIRE_MESSAGE(strands[index].first == seg_strands[2*index],
2644                               os.str());
2645         os.str("");
2646         os << "Segment " << index << ": expected " << (int)strands[index].second
2647            << " actual " << (int)seg_strands[2*index];
2648         BOOST_REQUIRE_MESSAGE(strands[index].second == seg_strands[2*index+1],
2649                               os.str());
2650     }
2651 }
2652 
testIntervalWholeAlignment(TSeqAlignVector & sav)2653 static void testIntervalWholeAlignment(TSeqAlignVector& sav)
2654 {
2655     const int num_segs = 5;
2656     const int num_starts = 10;
2657     const int starts[num_starts] = { 7685759, 0, 7685758, -1, 7685718,
2658                                      269, 7685717, -1, 7685545, 309 };
2659     const int lengths[num_segs] = { 269, 1, 40, 1, 172 };
2660     int index;
2661 
2662     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2663     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
2664     CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
2665     vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
2666     vector< TSeqPos> seg_lengths = segs_itr->GetLens();
2667     vector< ENa_strand> seg_strands = segs_itr->GetStrands();
2668     BOOST_REQUIRE_EQUAL(num_segs, (int)seg_lengths.size());
2669     BOOST_REQUIRE_EQUAL(num_starts, (int)seg_starts.size());
2670     for (index = 0; index < num_segs; ++index) {
2671         BOOST_REQUIRE_EQUAL(lengths[index], (int)seg_lengths[index]);
2672         BOOST_REQUIRE_EQUAL(starts[2*index], (int)seg_starts[2*index]);
2673         BOOST_REQUIRE_EQUAL(starts[2*index+1], (int)seg_starts[2*index+1]);
2674         BOOST_REQUIRE(seg_strands[2*index] == eNa_strand_minus);
2675         BOOST_REQUIRE(seg_strands[2*index+1] == eNa_strand_plus);
2676     }
2677 }
2678 
testWholeIntervalAlignment(TSeqAlignVector & sav)2679 static void testWholeIntervalAlignment(TSeqAlignVector& sav)
2680 {
2681     const int num_segs = 5;
2682     const int num_starts = 10;
2683     const int starts[num_starts] = { 309, 7685545, -1, 7685717, 269, 7685718,
2684                                      -1, 7685758, 0, 7685759 };
2685     const int lengths[num_segs] = { 172, 1, 40, 1, 269 };
2686     int index;
2687 
2688     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
2689     BOOST_REQUIRE_EQUAL(1, (int)sav[0]->Size());
2690     CTypeIterator<CDense_seg> segs_itr(Begin(*sar));
2691     vector< TSignedSeqPos > seg_starts = segs_itr->GetStarts();
2692     vector< TSeqPos> seg_lengths = segs_itr->GetLens();
2693     vector< ENa_strand> seg_strands = segs_itr->GetStrands();
2694     BOOST_REQUIRE_EQUAL(num_segs, (int)seg_lengths.size());
2695     BOOST_REQUIRE_EQUAL(num_starts, (int)seg_starts.size());
2696     for (index = 0; index < num_segs; ++index) {
2697         BOOST_REQUIRE_EQUAL(lengths[index], (int)seg_lengths[index]);
2698         BOOST_REQUIRE_EQUAL(starts[2*index], (int)seg_starts[2*index]);
2699         BOOST_REQUIRE_EQUAL(starts[2*index+1], (int)seg_starts[2*index+1]);
2700         BOOST_REQUIRE(seg_strands[2*index] == eNa_strand_minus);
2701         BOOST_REQUIRE(seg_strands[2*index+1] == eNa_strand_plus);
2702     }
2703 }
2704 
2705 // should find alignments
BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjBothStrands)2706 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjBothStrands) {
2707     // Alignment in these sequences is from plus/minus strands
2708     CSeq_id qid("NT_004487.15");
2709     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2710     auto_ptr<SSeqLoc> query(
2711         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2712                                                eNa_strand_both));
2713 
2714     CSeq_id sid("AA441981.1");
2715     range.first = 10;
2716     range.second = 480;
2717     auto_ptr<SSeqLoc> subj(
2718         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2719                                                eNa_strand_both));
2720 
2721     CBlastNucleotideOptionsHandle* opts = new CBlastNucleotideOptionsHandle;
2722     opts->SetTraditionalBlastnDefaults();
2723     CBl2Seq blaster(*query, *subj, *opts);
2724     TSeqAlignVector sav(blaster.Run());
2725     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
2726 }
2727 
2728 // should find alignment
BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjPlusStrand)2729 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjPlusStrand) {
2730     // Alignment in these sequences is from plus/minus strands
2731     CSeq_id qid("NT_004487.15");
2732     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2733     auto_ptr<SSeqLoc> query(
2734         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2735                                                eNa_strand_both));
2736 
2737     CSeq_id sid("AA441981.1");
2738     range.first = 10;
2739     range.second = 480;
2740     auto_ptr<SSeqLoc> subj(
2741         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2742                                                eNa_strand_plus));
2743 
2744     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2745     opts->SetTraditionalBlastnDefaults();
2746     CBl2Seq blaster(*query, *subj, *opts);
2747     TSeqAlignVector sav(blaster.Run());
2748     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
2749 }
2750 
2751 // should find alignment
BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjMinusStrand)2752 BOOST_AUTO_TEST_CASE(Blastn_QueryBothStrands_SubjMinusStrand) {
2753     // Alignment in these sequences is from plus/minus strands
2754     CSeq_id qid("NT_004487.15");
2755     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2756     auto_ptr<SSeqLoc> query(
2757         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2758                                                eNa_strand_both));
2759 
2760     CSeq_id sid("AA441981.1");
2761     range.first = 10;
2762     range.second = 480;
2763     auto_ptr<SSeqLoc> subj(
2764         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2765                                                eNa_strand_minus));
2766 
2767     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2768     opts->SetTraditionalBlastnDefaults();
2769     CBl2Seq blaster(*query, *subj, *opts);
2770     TSeqAlignVector sav(blaster.Run());
2771     x_TestAlignmentQuerySubjStrandCombinations(sav, "plus-minus");
2772 }
2773 
2774 // shouldn't find an alignment
BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjPlusStrand)2775 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjPlusStrand) {
2776     // Alignment in these sequences is from plus/minus strands
2777     CSeq_id qid("NT_004487.15");
2778     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2779     auto_ptr<SSeqLoc> query(
2780         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2781                                                eNa_strand_plus));
2782 
2783     CSeq_id sid("AA441981.1");
2784     range.first = 10;
2785     range.second = 480;
2786     auto_ptr<SSeqLoc> subj(
2787         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2788                                                eNa_strand_plus));
2789 
2790     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2791     opts->SetTraditionalBlastnDefaults();
2792     CBl2Seq blaster(*query, *subj, *opts);
2793     TSeqAlignVector sav(blaster.Run());
2794     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
2795 }
2796 
2797 // should find an alignment
BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjMinusStrand)2798 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjMinusStrand) {
2799     // Alignment in these sequences is from plus/minus strands
2800     CSeq_id qid("NT_004487.15");
2801     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2802     auto_ptr<SSeqLoc> query(
2803         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2804                                                eNa_strand_plus));
2805 
2806     CSeq_id sid("AA441981.1");
2807     range.first = 10;
2808     range.second = 480;
2809     auto_ptr<SSeqLoc> subj(
2810         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2811                                                eNa_strand_minus));
2812 
2813     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2814     opts->SetTraditionalBlastnDefaults();
2815     CBl2Seq blaster(*query, *subj, *opts);
2816     TSeqAlignVector sav(blaster.Run());
2817     x_TestAlignmentQuerySubjStrandCombinations(sav, "plus-minus");
2818 }
2819 
2820 // should NOT find an alignment because we only search the plus strand of
2821 // the subject sequence
BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjBothStrands)2822 BOOST_AUTO_TEST_CASE(Blastn_QueryPlusStrand_SubjBothStrands) {
2823     // Alignment in these sequences is from plus/minus strands
2824     CSeq_id qid("NT_004487.15");
2825     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2826     auto_ptr<SSeqLoc> query(
2827         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2828                                                eNa_strand_plus));
2829 
2830     CSeq_id sid("AA441981.1");
2831     range.first = 10;
2832     range.second = 480;
2833     auto_ptr<SSeqLoc> subj(
2834         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2835                                                eNa_strand_both));
2836 
2837     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2838     opts->SetTraditionalBlastnDefaults();
2839     CBl2Seq blaster(*query, *subj, *opts);
2840     TSeqAlignVector sav(blaster.Run());
2841     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
2842 }
2843 
2844 // should not find an alignment because alignment is on opposite strands
BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjMinusStrand)2845 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjMinusStrand) {
2846     // Alignment in these sequences is from plus/minus strands
2847     CSeq_id qid("NT_004487.15");
2848     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2849     auto_ptr<SSeqLoc> query(
2850         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2851                                                eNa_strand_minus));
2852 
2853     CSeq_id sid("AA441981.1");
2854     range.first = 10;
2855     range.second = 480;
2856     auto_ptr<SSeqLoc> subj(
2857         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2858                                                eNa_strand_minus));
2859 
2860     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2861     opts->SetTraditionalBlastnDefaults();
2862     CBl2Seq blaster(*query, *subj, *opts);
2863     TSeqAlignVector sav(blaster.Run());
2864     BOOST_REQUIRE(sav[0]->IsEmpty() == true);
2865 }
2866 
2867 // should find alignment
BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjPlusStrand)2868 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjPlusStrand) {
2869     // Alignment in these sequences is from plus/minus strands
2870     CSeq_id qid("NT_004487.15");
2871     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2872     auto_ptr<SSeqLoc> query(
2873         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2874                                                eNa_strand_minus));
2875 
2876     CSeq_id sid("AA441981.1");
2877     range.first = 10;
2878     range.second = 480;
2879     auto_ptr<SSeqLoc> subj(
2880         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2881                                                eNa_strand_plus));
2882 
2883     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2884     opts->SetTraditionalBlastnDefaults();
2885     CBl2Seq blaster(*query, *subj, *opts);
2886     TSeqAlignVector sav(blaster.Run());
2887     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
2888 }
2889 
2890 // should find alignment
BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjBothStrands)2891 BOOST_AUTO_TEST_CASE(Blastn_QueryMinusStrand_SubjBothStrands) {
2892     // Alignment in these sequences is from plus/minus strands
2893     CSeq_id qid("NT_004487.15");
2894     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2895     auto_ptr<SSeqLoc> query(
2896         CTestObjMgr::Instance().CreateSSeqLoc(qid, range,
2897                                                eNa_strand_minus));
2898 
2899     CSeq_id sid("AA441981.1");
2900     range.first = 10;
2901     range.second = 480;
2902     auto_ptr<SSeqLoc> subj(
2903         CTestObjMgr::Instance().CreateSSeqLoc(sid, range,
2904                                                eNa_strand_both));
2905 
2906     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2907     opts->SetTraditionalBlastnDefaults();
2908     CBl2Seq blaster(*query, *subj, *opts);
2909     TSeqAlignVector sav(blaster.Run());
2910     x_TestAlignmentQuerySubjStrandCombinations(sav, "minus-plus");
2911 }
2912 
2913 // Should properly find alignment
BOOST_AUTO_TEST_CASE(Blastn_QueryWhole_SubjInterval)2914 BOOST_AUTO_TEST_CASE(Blastn_QueryWhole_SubjInterval)
2915 {
2916     CRef<CSeq_id> qid(new CSeq_id("AA441981.1"));
2917     auto_ptr<SSeqLoc> query(CTestObjMgr::Instance().CreateWholeSSeqLoc(*qid));
2918 
2919     CRef<CSeq_id> sid(new CSeq_id("NT_004487.15"));
2920     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2921     auto_ptr<SSeqLoc> subj(
2922         CTestObjMgr::Instance().CreateSSeqLoc(*sid, range,
2923                                                eNa_strand_both));
2924 
2925     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2926     opts->SetTraditionalBlastnDefaults();
2927     CBl2Seq blaster(*query, *subj, *opts);
2928     TSeqAlignVector sav(blaster.Run());
2929     testWholeIntervalAlignment(sav);
2930 }
2931 
BOOST_AUTO_TEST_CASE(Blastn_QueryInterval_SubjWhole)2932 BOOST_AUTO_TEST_CASE(Blastn_QueryInterval_SubjWhole)
2933 {
2934     CRef<CSeq_id> qid(new CSeq_id("NT_004487.15"));
2935     pair<TSeqPos, TSeqPos> range(7685545, 7686027);
2936     auto_ptr<SSeqLoc> query(
2937         CTestObjMgr::Instance().CreateSSeqLoc(*qid, range,
2938                                                eNa_strand_both));
2939 
2940     CRef<CSeq_id> sid(new CSeq_id("AA441981.1"));
2941     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateWholeSSeqLoc(*sid));
2942 
2943     CRef<CBlastNucleotideOptionsHandle> opts(new CBlastNucleotideOptionsHandle);
2944     opts->SetTraditionalBlastnDefaults();
2945     CBl2Seq blaster(*query, *subj, *opts);
2946     TSeqAlignVector sav(blaster.Run());
2947     testIntervalWholeAlignment(sav);
2948 }
2949 
BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs)2950 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs) {
2951     vector<TIntId> q_gis, s_gis;
2952 
2953     // Setup the queries
2954     q_gis.push_back(6);
2955     q_gis.push_back(129295);
2956     q_gis.push_back(15606659);
2957 
2958     // setup the subjects
2959     s_gis.push_back(129295);
2960     s_gis.push_back(6);
2961     s_gis.push_back(4336138); // no hits with gis 6 and 129295
2962     s_gis.push_back(15606659);
2963     s_gis.push_back(5556);
2964 
2965     TSeqLocVector queries;
2966     ITERATE(vector<TIntId>, itr, q_gis) {
2967         CRef<CSeq_loc> loc(new CSeq_loc());
2968         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
2969 
2970         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
2971         scope->AddDefaults();
2972         queries.push_back(SSeqLoc(loc, scope));
2973     }
2974 
2975     TSeqLocVector subjects;
2976     ITERATE(vector<TIntId>, itr, s_gis) {
2977         CRef<CSeq_loc> loc(new CSeq_loc());
2978         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
2979 
2980         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
2981         scope->AddDefaults();
2982         subjects.push_back(SSeqLoc(loc, scope));
2983     }
2984 
2985     size_t num_queries = queries.size();
2986     size_t num_subjects = subjects.size();
2987 
2988     // BLAST by concatenating all queries
2989     CBl2Seq blaster4all(queries, subjects, eBlastp);
2990     TSeqAlignVector sas_v = blaster4all.Run();
2991     BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
2992     testBlastHitCounts(blaster4all, eBlastp_multi_q_s);
2993     testRawCutoffs(blaster4all, eBlastp, eBlastp_multi_q_s);
2994 
2995     // test the order of queries and subjects:
2996     testResultAlignments(num_queries, num_subjects,
2997                             sas_v);
2998 }
2999 
BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_RunEx)3000 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_RunEx) {
3001     vector<TIntId> q_gis, s_gis;
3002 
3003     // Setup the queries
3004     q_gis.push_back(6);
3005     q_gis.push_back(129295);
3006     q_gis.push_back(15606659);
3007 
3008     // setup the subjects
3009     s_gis.push_back(129295);
3010     s_gis.push_back(6);
3011     s_gis.push_back(4336138); // no hits with gis 6 and 129295
3012     s_gis.push_back(15606659);
3013     s_gis.push_back(5556);
3014 
3015     TSeqLocVector queries;
3016     ITERATE(vector<TIntId>, itr, q_gis) {
3017         CRef<CSeq_loc> loc(new CSeq_loc());
3018         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
3019 
3020         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
3021         scope->AddDefaults();
3022         queries.push_back(SSeqLoc(loc, scope));
3023     }
3024 
3025     TSeqLocVector subjects;
3026     ITERATE(vector<TIntId>, itr, s_gis) {
3027         CRef<CSeq_loc> loc(new CSeq_loc());
3028         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
3029 
3030         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
3031         scope->AddDefaults();
3032         subjects.push_back(SSeqLoc(loc, scope));
3033     }
3034 
3035     size_t num_queries = queries.size();
3036     size_t num_subjects = subjects.size();
3037 
3038     // BLAST by concatenating all queries
3039     CBl2Seq blaster4all(queries, subjects, eBlastp);
3040     CRef<CSearchResultSet> results = blaster4all.RunEx();
3041     BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
3042     BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
3043                         results->GetNumResults());
3044 
3045     // build the seqalign vector from the result set
3046     TSeqAlignVector sas_v;
3047     for (size_t i = 0; i < num_queries; i++)
3048     {
3049         for (size_t j = 0; j < num_subjects; j++)
3050         {
3051             CSearchResults& res_ij = results->GetResults(i, j);
3052             CRef<CSeq_align_set> aln_set;
3053             aln_set.Reset(const_cast<CSeq_align_set*>
3054                           (res_ij.GetSeqAlign().GetPointer()));
3055             sas_v.push_back(aln_set);
3056         }
3057     }
3058 
3059     // do the rest of the tests on sas_v as in the
3060     // BlastpMultipleQueries_MultipleSubjs function:
3061 
3062     BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
3063     testBlastHitCounts(blaster4all, eBlastp_multi_q_s);
3064     testRawCutoffs(blaster4all, eBlastp, eBlastp_multi_q_s);
3065 
3066     // test the order of queries and subjects:
3067     testResultAlignments(num_queries, num_subjects,
3068                             sas_v);
3069 }
3070 
3071 // This closely resembles how the command line applications invoke bl2seq
BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_CLocalBlast)3072 BOOST_AUTO_TEST_CASE(BlastpMultipleQueries_MultipleSubjs_CLocalBlast) {
3073     vector<TIntId> q_gis, s_gis;
3074 
3075     // Setup the queries
3076     q_gis.push_back(6);
3077     q_gis.push_back(129295);
3078     q_gis.push_back(15606659);
3079 
3080     // setup the subjects
3081     s_gis.push_back(129295);
3082     s_gis.push_back(6);
3083     s_gis.push_back(4336138); // no hits with gis 6 and 129295
3084     s_gis.push_back(15606659);
3085     s_gis.push_back(5556);
3086 
3087     TSeqLocVector query_vec;
3088     ITERATE(vector<TIntId>, itr, q_gis) {
3089         CRef<CSeq_loc> loc(new CSeq_loc());
3090         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
3091 
3092         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
3093         scope->AddDefaults();
3094         query_vec.push_back(SSeqLoc(loc, scope));
3095     }
3096     CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(query_vec));
3097 
3098     CRef<CBlastOptionsHandle>
3099         opts_handle(CBlastOptionsFactory::Create(eBlastp));
3100 
3101     TSeqLocVector subj_vec;
3102     ITERATE(vector<TIntId>, itr, s_gis) {
3103         CRef<CSeq_loc> loc(new CSeq_loc());
3104         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
3105 
3106         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
3107         scope->AddDefaults();
3108         subj_vec.push_back(SSeqLoc(loc, scope));
3109     }
3110     CRef<IQueryFactory> subj_qf(new CObjMgr_QueryFactory(subj_vec));
3111     CRef<CLocalDbAdapter> subjects(new CLocalDbAdapter(subj_qf,
3112                                                        opts_handle));
3113 
3114     BOOST_REQUIRE(subjects->IsDbScanMode() == false);
3115 
3116     size_t num_queries = query_vec.size();
3117     size_t num_subjects = subj_vec.size();
3118 
3119     // BLAST by concatenating all queries
3120     CLocalBlast blaster(queries, opts_handle, subjects);
3121     CRef<CSearchResultSet> results = blaster.Run();
3122     BOOST_REQUIRE(results->GetResultType() == eSequenceComparison);
3123     BOOST_REQUIRE_EQUAL((num_queries*num_subjects),
3124                         results->GetNumResults());
3125     BOOST_REQUIRE_EQUAL((num_queries*num_subjects), results->size());
3126     BOOST_REQUIRE_EQUAL(num_queries, results->GetNumQueries());
3127     BOOST_REQUIRE_EQUAL(num_subjects,
3128                         results->GetNumResults()/results->GetNumQueries());
3129 
3130     // build the seqalign vector from the result set
3131     TSeqAlignVector sas_v;
3132     for (size_t i = 0; i < num_queries; i++)
3133     {
3134         for (size_t j = 0; j < num_subjects; j++)
3135         {
3136             CSearchResults& res_ij = results->GetResults(i, j);
3137             CRef<CSeq_align_set> aln_set;
3138             aln_set.Reset(const_cast<CSeq_align_set*>
3139                           (res_ij.GetSeqAlign().GetPointer()));
3140             sas_v.push_back(aln_set);
3141         }
3142     }
3143 
3144     // do the rest of the tests on sas_v as in the
3145     // BlastpMultipleQueries_MultipleSubjs function:
3146     BOOST_REQUIRE_EQUAL(num_queries*num_subjects, sas_v.size());
3147 
3148     // test the order of queries and subjects:
3149     testResultAlignments(num_queries, num_subjects, sas_v);
3150 }
3151 
BOOST_AUTO_TEST_CASE(Blastp_MultipleSubjs_SearchAsSet)3152 BOOST_AUTO_TEST_CASE(Blastp_MultipleSubjs_SearchAsSet) {
3153     vector<TIntId> q_gis, s_gis;
3154 
3155     // Setup the queries
3156     q_gis.push_back(816838863);
3157 
3158     // setup the subjects
3159     s_gis.push_back(16130156);
3160     s_gis.push_back(15644111);
3161     s_gis.push_back(126699345);
3162     s_gis.push_back(504220075);
3163     s_gis.push_back(21222553);
3164     s_gis.push_back(24376189);
3165     s_gis.push_back(15598078);
3166     s_gis.push_back(15599919);
3167     s_gis.push_back(15597767);
3168     s_gis.push_back(16131833);
3169     s_gis.push_back(15599742);
3170     s_gis.push_back(15598387);
3171     s_gis.push_back(15600358);
3172     s_gis.push_back(24375949);
3173     s_gis.push_back(126698248);
3174     s_gis.push_back(24375956);
3175     s_gis.push_back(24375382);
3176     s_gis.push_back(126698598);
3177 
3178     TSeqLocVector query_vec;
3179     ITERATE(vector<TIntId>, itr, q_gis) {
3180         CRef<CSeq_loc> loc(new CSeq_loc());
3181         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
3182 
3183         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
3184         scope->AddDefaults();
3185         query_vec.push_back(SSeqLoc(loc, scope));
3186     }
3187     CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(query_vec));
3188 
3189     CRef<CBlastOptionsHandle>
3190         opts_handle(CBlastOptionsFactory::Create(eBlastp));
3191 
3192     TSeqLocVector subj_vec;
3193     ITERATE(vector<TIntId>, itr, s_gis) {
3194         CRef<CSeq_loc> loc(new CSeq_loc());
3195         loc->SetWhole().SetGi(GI_FROM(TIntId, *itr));
3196 
3197         CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
3198         scope->AddDefaults();
3199         subj_vec.push_back(SSeqLoc(loc, scope));
3200     }
3201     CRef<IQueryFactory> subj_qf(new CObjMgr_QueryFactory(subj_vec));
3202     CRef<CLocalDbAdapter> subjects(new CLocalDbAdapter(subj_qf,
3203                                                        opts_handle, true));
3204 
3205     BOOST_REQUIRE(subjects->IsDbScanMode() == true);
3206 
3207     size_t num_queries = query_vec.size();
3208     size_t num_subjects = subj_vec.size();
3209 
3210     // BLAST by concatenating all queries
3211     CLocalBlast blaster(queries, opts_handle, subjects);
3212     CRef<CSearchResultSet> results = blaster.Run();
3213     BOOST_REQUIRE(results->GetResultType() == eDatabaseSearch);
3214     BOOST_REQUIRE_EQUAL(num_queries,
3215                         results->GetNumResults());
3216     BOOST_REQUIRE_EQUAL(num_queries, results->size());
3217     BOOST_REQUIRE_EQUAL(num_queries, results->GetNumQueries());
3218 
3219     // build the seqalign vector from the result set
3220     CRef<CSeq_align_set> aln_set;
3221     CSearchResults& res = (*results)[0];
3222     aln_set.Reset(const_cast<CSeq_align_set*>
3223 	(res.GetSeqAlign().GetPointer()));
3224     // gi|15599742|ref|NP_253236.1|  has two alignments, all others have one.
3225     BOOST_REQUIRE_EQUAL(num_subjects+1, aln_set->Size());
3226 
3227 }
3228 
BOOST_AUTO_TEST_CASE(BlastOptionsEquality)3229 BOOST_AUTO_TEST_CASE(BlastOptionsEquality) {
3230     // Create options object through factory
3231     auto_ptr<CBlastOptionsHandle> megablast_options_handle(
3232         CBlastOptionsFactory::Create(eMegablast));
3233     CBlastNucleotideOptionsHandle nucl_options_handle;
3234     BOOST_REQUIRE(megablast_options_handle->GetOptions() ==
3235                    nucl_options_handle.GetOptions());
3236 }
3237 
BOOST_AUTO_TEST_CASE(BlastOptionsInequality)3238 BOOST_AUTO_TEST_CASE(BlastOptionsInequality) {
3239     CBlastProteinOptionsHandle prot_options_handle;
3240     CBlastNucleotideOptionsHandle nucl_options_handle;
3241     BOOST_REQUIRE(prot_options_handle.GetOptions() !=
3242                    nucl_options_handle.GetOptions());
3243 
3244     // Blastn and Megablast are different
3245     auto_ptr<CBlastOptionsHandle> blastn_options_handle(
3246         CBlastOptionsFactory::Create(eBlastn));
3247     BOOST_REQUIRE(blastn_options_handle->GetOptions() !=
3248                    nucl_options_handle.GetOptions());
3249 
3250     // Change the matrix and compare
3251     CBlastProteinOptionsHandle prot_options_handle2;
3252     prot_options_handle.SetMatrixName("pam30");
3253     BOOST_REQUIRE(prot_options_handle.GetOptions() !=
3254                    prot_options_handle2.GetOptions());
3255 }
3256 
BOOST_AUTO_TEST_CASE(DiscontiguousMB)3257 BOOST_AUTO_TEST_CASE(DiscontiguousMB) {
3258     CSeq_id qid("gi|408478");  // zebrafish sequence U02544
3259     CSeq_id sid("gi|1546012"); // mouse sequence U61969
3260     auto_ptr<SSeqLoc> query(
3261         CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
3262     auto_ptr<SSeqLoc> subj(CTestObjMgr::Instance().CreateSSeqLoc(sid));
3263 
3264     CBl2Seq blaster(*query, *subj, eDiscMegablast);
3265     TSeqAlignVector sav(blaster.Run());
3266     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
3267 
3268     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
3269     BOOST_REQUIRE_EQUAL(13, (int)sar->GetSegs().GetDenseg().GetNumseg());
3270     testBlastHitCounts(blaster, eDiscMegablast_U02544_U61969);
3271     testRawCutoffs(blaster, eDiscMegablast, eDiscMegablast_U02544_U61969);
3272 }
3273 
BOOST_AUTO_TEST_CASE(BlastnHumanChrom_MRNA)3274 BOOST_AUTO_TEST_CASE(BlastnHumanChrom_MRNA) {
3275     CSeq_id qid("NT_004487.16");
3276     CSeq_id sid("AA621478.1");
3277     pair<TSeqPos, TSeqPos> qrange(7868209-1, 7868602-1);
3278     pair<TSeqPos, TSeqPos> srange(2-1, 397-1);
3279     auto_ptr<SSeqLoc> query(
3280         CTestObjMgr::Instance().CreateSSeqLoc(qid,
3281                                                qrange, eNa_strand_plus));
3282     auto_ptr<SSeqLoc> subj(
3283         CTestObjMgr::Instance().CreateSSeqLoc(sid,
3284                                                srange, eNa_strand_plus));
3285 
3286     CRef<CBlastNucleotideOptionsHandle> options(new CBlastNucleotideOptionsHandle);
3287     CBl2Seq blaster(*query, *subj, *options);
3288     TSeqAlignVector sav(blaster.Run());
3289     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
3290 
3291     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
3292     BOOST_REQUIRE_EQUAL(5, (int)sar->GetSegs().GetDenseg().GetNumseg());
3293     testBlastHitCounts(blaster, eMegablast_chrom_mrna);
3294     testRawCutoffs(blaster, eMegablast, eMegablast_chrom_mrna);
3295 }
3296 
3297 // Checks that results for multiple subjects are put in correct places
3298 // in the vector of Seq-aligns
BOOST_AUTO_TEST_CASE(testOneSubjectResults2CSeqAlign)3299 BOOST_AUTO_TEST_CASE(testOneSubjectResults2CSeqAlign)
3300 {
3301     const int num_subjects = 15;
3302     const int results_size[num_subjects] =
3303         { 1, 1, 0, 1, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 1 };
3304     const int query_gi = 7274302;
3305     const int gi_diff = 28;
3306     CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, query_gi));
3307     auto_ptr<SSeqLoc> sl(
3308         CTestObjMgr::Instance().CreateSSeqLoc(*id, eNa_strand_both));
3309     TSeqLocVector query;
3310     query.push_back(*sl);
3311     TSeqLocVector subjects;
3312     int index;
3313     for (index = 0; index < num_subjects; ++index) {
3314         id.Reset(new CSeq_id(CSeq_id::e_Gi, (query_gi + gi_diff + index)));
3315         //cerr << query_gi + gi_diff + index << endl;
3316         sl.reset(CTestObjMgr::Instance().CreateSSeqLoc(*id,
3317                                                        eNa_strand_both));
3318         subjects.push_back(*sl);
3319     }
3320     CBl2Seq blaster(query, subjects, eMegablast);
3321     TSeqAlignVector seqalign_v = blaster.Run();
3322     BOOST_REQUIRE_EQUAL(num_subjects, (int)seqalign_v.size());
3323 
3324     index = 0;
3325     ITERATE(TSeqAlignVector, itr, seqalign_v)
3326     {
3327         //cerr << index << endl;
3328         BOOST_REQUIRE_EQUAL(results_size[index], (int) (*itr)->Get().size());
3329         index++;
3330     }
3331 }
3332 
BOOST_AUTO_TEST_CASE(testMultiSeqSearchSymmetry)3333 BOOST_AUTO_TEST_CASE(testMultiSeqSearchSymmetry)
3334 {
3335     const size_t num_seqs = 19;
3336     const int gi_list[num_seqs] =
3337         { 1346057, 125527, 121064, 1711551, 125412, 128337, 2507199,
3338           1170625, 1730070, 585365, 140977, 1730069, 20455504, 125206,
3339           125319, 114152, 1706450, 1706307, 125565 };
3340     const int score_cutoff = 70;
3341 
3342     TSeqLocVector seq_vec;
3343     size_t index;
3344     for (index = 0; index < num_seqs; ++index) {
3345         CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, gi_list[index]));
3346         auto_ptr<SSeqLoc> sl(
3347             CTestObjMgr::Instance().CreateSSeqLoc(*id, eNa_strand_both));
3348         seq_vec.push_back(*sl);
3349     }
3350 
3351     CRef<CBlastProteinOptionsHandle> prot_opts(new CBlastProteinOptionsHandle);
3352     prot_opts->SetSegFiltering(false);
3353     prot_opts->SetCutoffScore(score_cutoff);
3354     CBl2Seq blaster(seq_vec, seq_vec, *prot_opts);
3355 #if 0
3356     blaster.RunWithoutSeqalignGeneration(); /* NCBI_FAKE_WARNING */
3357     BlastHSPResults* results = blaster.GetResults(); /* NCBI_FAKE_WARNING */
3358 
3359     int qindex, sindex, qindex1, sindex1;
3360     for (qindex = 0; qindex < num_seqs; ++qindex) {
3361         for (sindex = 0; sindex < results->hitlist_array[qindex]->hsplist_count;
3362              ++sindex) {
3363             BlastHSPList* hsp_list1, *hsp_list2 = NULL;
3364             hsp_list1 = results->hitlist_array[qindex]->hsplist_array[sindex];
3365             qindex1 = hsp_list1->oid;
3366             BlastHitList* hitlist = results->hitlist_array[qindex1];
3367             for (sindex1 = 0; sindex1 < hitlist->hsplist_count; ++sindex1) {
3368                 if (hitlist->hsplist_array[sindex1]->oid == qindex) {
3369                     hsp_list2 = hitlist->hsplist_array[sindex1];
3370                     break;
3371                 }
3372             }
3373             BOOST_REQUIRE(hsp_list2 != NULL);
3374             int hindex;
3375             for (hindex = 0; hindex < hsp_list1->hspcnt; ++hindex) {
3376                 if (hsp_list1->hsp_array[hindex]->score <= score_cutoff)
3377                     break;
3378                 BOOST_REQUIRE(hindex < hsp_list2->hspcnt);
3379                 BOOST_REQUIRE_EQUAL(hsp_list1->hsp_array[hindex]->score,
3380                                      hsp_list2->hsp_array[hindex]->score);
3381             }
3382         }
3383     }
3384 #endif
3385 
3386     CRef<CSearchResultSet> res = blaster.RunEx();
3387     BOOST_REQUIRE_EQUAL(eSequenceComparison, res->GetResultType());
3388     BOOST_REQUIRE_EQUAL(num_seqs, res->GetNumQueries());
3389     BOOST_REQUIRE_EQUAL(num_seqs*num_seqs, res->GetNumResults());
3390     BOOST_REQUIRE_EQUAL(num_seqs*num_seqs, res->size());
3391     typedef CSearchResultSet::size_type size_type;
3392 
3393     for (size_type q = 0; q < res->GetNumQueries(); q++) {
3394         for (size_type s = 0; s < res->GetNumQueries(); s++) {
3395             const CSearchResults& results = res->GetResults(q, s);
3396             if (q == s) {   // self-hit
3397                 BOOST_REQUIRE(results.HasAlignments());
3398                 CConstRef<CSeq_align_set> sas = results.GetSeqAlign();
3399                 BOOST_REQUIRE(sas.NotEmpty());
3400                 BOOST_REQUIRE_EQUAL((size_type)1, sas->Get().size());
3401                 CConstRef<CSeq_align> sa = sas->Get().front();
3402                 const CSeq_id& qid = sa->GetSeq_id(0);
3403                 const CSeq_id& sid = sa->GetSeq_id(1);
3404                 BOOST_REQUIRE(qid.Match(sid));
3405                 int num_ident = 0;
3406                 sa->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
3407                 int seqlen = (int)sequence::GetLength(*seq_vec[q].seqloc,
3408                                                       &*seq_vec[q].scope);
3409                 BOOST_REQUIRE_EQUAL(seqlen, num_ident);
3410             }
3411             const CSearchResults& results2 = res->GetResults(s, q);
3412             BOOST_REQUIRE_EQUAL(results.HasAlignments(),
3413                                 results2.HasAlignments());
3414             if ( !results.HasAlignments() ) {
3415                 continue;
3416             }
3417 
3418             // Compare Q-S and S-Q hits
3419             CConstRef<CSeq_align_set> sas1 = results.GetSeqAlign();
3420             CConstRef<CSeq_align_set> sas2 = results2.GetSeqAlign();
3421             BOOST_REQUIRE(sas1.NotEmpty());
3422             BOOST_REQUIRE(sas2.NotEmpty());
3423             //cerr << "Alignments1 " << MSerial_AsnText << *sas2 << endl;
3424             //cerr << "Alignments2 " << MSerial_AsnText << *sas2 << endl;
3425             BOOST_REQUIRE_EQUAL(sas1->Get().size(), sas2->Get().size());
3426             CSeq_align_set::Tdata::const_iterator i1 = sas1->Get().begin(),
3427                                                   i2 = sas2->Get().begin();
3428             for (; i1 != sas1->Get().end(); ++i1, ++i2) {
3429                 CConstRef<CSeq_align> al1 = *i1;
3430                 CConstRef<CSeq_align> al2 = *i2;
3431                 BOOST_REQUIRE(al1.NotEmpty());
3432                 BOOST_REQUIRE(al2.NotEmpty());
3433                 //cerr << "Align1 " << MSerial_AsnText << *al1 << endl;
3434                 //cerr << "Align2 " << MSerial_AsnText << *al2 << endl;
3435 
3436                 int score1 = 0, score2 = 0;
3437                 al1->GetNamedScore(CSeq_align::eScore_Score, score1);
3438                 al2->GetNamedScore(CSeq_align::eScore_Score, score2);
3439                 BOOST_REQUIRE_EQUAL(score1, score2);
3440 
3441                 double bit_score1 = .0, bit_score2 = .0;
3442                 al1->GetNamedScore(CSeq_align::eScore_BitScore, bit_score1);
3443                 al2->GetNamedScore(CSeq_align::eScore_BitScore, bit_score2);
3444                 BOOST_REQUIRE_CLOSE(bit_score1, bit_score2, 10e-2);
3445 
3446                 double evalue1 = .0, evalue2 = .0;
3447                 al1->GetNamedScore(CSeq_align::eScore_EValue, evalue1);
3448                 al2->GetNamedScore(CSeq_align::eScore_EValue, evalue2);
3449                 BOOST_REQUIRE_CLOSE(evalue1, evalue2, 10e-5);
3450             }
3451         }
3452     }
3453 
3454 
3455     // Repeat the tests above using the TSeqAlignVector
3456     TSeqAlignVector alignments = blaster.Run();
3457     BOOST_REQUIRE_EQUAL(num_seqs*num_seqs, alignments.size());
3458     for (size_t q = 0; q < num_seqs; q++) {
3459         for (size_t s = 0; s < num_seqs; s++) {
3460             size_t idx1 = q*num_seqs + s;
3461             CConstRef<CSeq_align_set> al1 = alignments[idx1];
3462             if (idx1 == (q*num_seqs+q)) {  // self-hit
3463                 BOOST_REQUIRE(al1.NotEmpty());
3464                 BOOST_REQUIRE_EQUAL(1U, al1->Get().size());
3465                 CConstRef<CSeq_align> sa = al1->Get().front();
3466                 const CSeq_id& qid = sa->GetSeq_id(0);
3467                 const CSeq_id& sid = sa->GetSeq_id(1);
3468                 BOOST_REQUIRE(qid.Match(sid));
3469                 int num_ident = 0;
3470                 sa->GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident);
3471                 int seqlen = (int)sequence::GetLength(*seq_vec[q].seqloc,
3472                                                       &*seq_vec[q].scope);
3473                 BOOST_REQUIRE_EQUAL(seqlen, num_ident);
3474                 continue;
3475             }
3476             size_t idx2 = s*num_seqs + q;
3477             CConstRef<CSeq_align_set> al2 = alignments[idx2];
3478             if (al1.Empty() || al2.Empty()) {
3479                 BOOST_REQUIRE_EQUAL(al1.Empty(), al2.Empty());
3480                 continue;
3481             }
3482             BOOST_REQUIRE_EQUAL(al1.NotEmpty(), al2.NotEmpty());
3483             // compare al1 with al2
3484             {{
3485             CConstRef<CSeq_align> sa1 = al1->Get().front();
3486             CConstRef<CSeq_align> sa2 = al2->Get().front();
3487             BOOST_REQUIRE(sa1.NotEmpty());
3488             BOOST_REQUIRE(sa2.NotEmpty());
3489             //cerr << "Align1 " << MSerial_AsnText << *sa1 << endl;
3490             //cerr << "Align2 " << MSerial_AsnText << *sa2 << endl;
3491 
3492             int score1 = 0, score2 = 0;
3493             sa1->GetNamedScore(CSeq_align::eScore_Score, score1);
3494             sa2->GetNamedScore(CSeq_align::eScore_Score, score2);
3495             BOOST_REQUIRE_EQUAL(score1, score2);
3496 
3497             double bit_score1 = .0, bit_score2 = .0;
3498             sa1->GetNamedScore(CSeq_align::eScore_BitScore, bit_score1);
3499             sa2->GetNamedScore(CSeq_align::eScore_BitScore, bit_score2);
3500             BOOST_REQUIRE_CLOSE(bit_score1, bit_score2, 10e-2);
3501 
3502             double evalue1 = .0, evalue2 = .0;
3503             sa1->GetNamedScore(CSeq_align::eScore_EValue, evalue1);
3504             sa2->GetNamedScore(CSeq_align::eScore_EValue, evalue2);
3505             BOOST_REQUIRE_CLOSE(evalue1, evalue2, 10e-5);
3506             }}
3507         }
3508     }
3509 
3510 }
3511 
BOOST_AUTO_TEST_CASE(testInterruptCallbackWithNull)3512 BOOST_AUTO_TEST_CASE(testInterruptCallbackWithNull) {
3513     CSeq_id id("gi|129295");
3514     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
3515 
3516     CBl2Seq blaster(*sl, *sl, eBlastp);
3517     TInterruptFnPtr null_fnptr = 0;
3518     TInterruptFnPtr fnptr = blaster.SetInterruptCallback(null_fnptr);
3519     BOOST_REQUIRE(fnptr == NULL);
3520 
3521     TSeqAlignVector sav(blaster.Run());
3522     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
3523     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
3524 
3525     fnptr = blaster.SetInterruptCallback(interrupt_immediately);
3526     // make sure we get the previous interrupt callback
3527     BOOST_REQUIRE(fnptr == null_fnptr);
3528 
3529     fnptr = blaster.SetInterruptCallback(null_fnptr);
3530     // make sure we get the previous interrupt callback
3531     BOOST_REQUIRE(fnptr == interrupt_immediately);
3532 
3533     // Retry the search now that we've removed the interrupt callback
3534     sav = blaster.Run();
3535     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
3536     sar = *(sav[0]->Get().begin());
3537     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
3538 }
3539 
BOOST_AUTO_TEST_CASE(testInterruptCallbackDoNotInterrupt)3540 BOOST_AUTO_TEST_CASE(testInterruptCallbackDoNotInterrupt) {
3541     CSeq_id id("gi|129295");
3542     auto_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(id));
3543 
3544     CBl2Seq blaster(*sl, *sl, eBlastp);
3545     TInterruptFnPtr fnptr = blaster.SetInterruptCallback(do_not_interrupt);
3546     BOOST_REQUIRE(fnptr == NULL);
3547 
3548     TSeqAlignVector sav(blaster.Run());
3549     BOOST_REQUIRE_EQUAL(1, (int)sav.size());
3550     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
3551     BOOST_REQUIRE_EQUAL(1, (int)sar->GetSegs().GetDenseg().GetNumseg());
3552 }
3553 
3554 #if SEQLOC_MIX_QUERY_OK
BOOST_AUTO_TEST_CASE(MultiIntervalLoc)3555 BOOST_AUTO_TEST_CASE(MultiIntervalLoc) {
3556     const size_t kNumInts = 20;
3557     const size_t kStarts[kNumInts] =
3558         { 838, 1838, 6542, 7459, 9246, 10431, 14807, 16336, 19563,
3559           20606, 21232, 22615, 23822, 27941, 29597, 30136, 31287,
3560           31786, 33315, 35402 };
3561     const size_t kEnds[kNumInts] =
3562         { 961, 2010, 6740, 7573, 9408, 10609, 15043, 16511, 19783,
3563           20748, 21365, 22817, 24049, 28171, 29839, 30348, 31362,
3564           31911, 33485, 37952 };
3565     size_t index;
3566 
3567     CSeq_id qid("gi|3417288");
3568     CRef<CSeq_loc> qloc(new CSeq_loc());
3569     for (index = 0; index < kNumInts; ++index) {
3570         CRef<CSeq_loc> next_loc(new CSeq_loc());
3571         next_loc->SetInt().SetFrom(kStarts[index]);
3572         next_loc->SetInt().SetTo(kEnds[index]);
3573         next_loc->SetInt().SetId(qid);
3574         qloc->SetMix().Set().push_back(next_loc);
3575     }
3576 
3577     CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
3578     scope->AddDefaults();
3579 
3580     auto_ptr<SSeqLoc> query(new SSeqLoc(qloc, scope));
3581 
3582     CSeq_id sid("gi|51511732");
3583     pair<TSeqPos, TSeqPos> range(15595732, 15705419);
3584     auto_ptr<SSeqLoc> subject(
3585         CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
3586     CBl2Seq blaster(*query, *subject, eBlastn);
3587     TSeqAlignVector sav(blaster.Run());
3588     CRef<CSeq_align> sar = *(sav[0]->Get().begin());
3589     BOOST_REQUIRE_EQUAL(60, (int)sar->GetSegs().GetDisc().Get().size());
3590 }
3591 #endif
3592 
BOOST_AUTO_TEST_CASE(QueryMaskIgnoredInMiniExtension)3593 BOOST_AUTO_TEST_CASE(QueryMaskIgnoredInMiniExtension) {
3594     CRef<CSeq_loc> qloc(new CSeq_loc());
3595     qloc->SetWhole().SetGi(GI_CONST(4505696));
3596     CSeq_id sid("gi|29809252");
3597     pair<TSeqPos, TSeqPos> range(662070, 662129);
3598 
3599     CRef<CScope> scope(new CScope(CTestObjMgr::Instance().GetObjMgr()));
3600     scope->AddDefaults();
3601 
3602     auto_ptr<SSeqLoc> query(new SSeqLoc(qloc, scope));
3603     auto_ptr<SSeqLoc> subject(
3604         CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
3605 
3606     CBl2Seq blaster(*query, *subject, eMegablast);
3607     TSeqAlignVector sav(blaster.Run());
3608     CRef<CSeq_align_set> sas = sav.front();
3609     BOOST_REQUIRE(sas->Get().empty());
3610 }
3611 
3612 #endif /* SKIP_DOXYGEN_PROCESSING */
3613 
3614 BOOST_AUTO_TEST_SUITE_END()
3615