1 /*  $Id: psiblast_unit_test.cpp 568561 2018-08-07 18:29:39Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Christiam Camacho
27  *
28  */
29 
30 /** @file psiblast-cppunit.cpp
31  * Unit test module for the PSI-BLAST class
32  */
33 #define NCBI_TEST_APPLICATION
34 #include <ncbi_pch.hpp>
35 #include <corelib/test_boost.hpp>
36 #include <serial/iterator.hpp>
37 
38 // BLAST API includes
39 #include <algo/blast/api/psiblast.hpp>
40 #include <algo/blast/api/uniform_search.hpp>
41 #include <algo/blast/api/local_db_adapter.hpp>
42 #include <algo/blast/api/objmgr_query_data.hpp>
43 
44 #include <algo/blast/blastinput/blast_scope_src.hpp>
45 
46 #include <objtools/blast/seqdb_reader/seqdb.hpp>
47 
48 // Pssm engine includes
49 #include <algo/blast/api/pssm_engine.hpp>
50 #include <algo/blast/api/psi_pssm_input.hpp>
51 
52 // Object includes
53 #include <objects/scoremat/Pssm.hpp>
54 #include <objects/scoremat/PssmFinalData.hpp>
55 #include <objects/scoremat/PssmWithParameters.hpp>
56 #include <objects/scoremat/PssmIntermediateData.hpp>
57 #include <objects/seqalign/Seq_align.hpp>
58 #include "psiblast_aux_priv.hpp"        // for PsiBlastComputePssmScores
59 
60 // Auxiliary test includes
61 #include "blast_test_util.hpp"
62 //#include "psiblast_test_util.hpp"
63 
64 // SeqAlign comparison includes
65 #include "seqalign_cmp.hpp"
66 #include "seqalign_set_convert.hpp"
67 
68 #include <algo/blast/api/psiblast_iteration.hpp>
69 #include "bioseq_extract_data_priv.hpp"
70 #include "blast_objmgr_priv.hpp"
71 
72 /// Calculate the size of a static array
73 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
74 
75 using namespace ncbi;
76 using namespace ncbi::objects;
77 using namespace ncbi::blast;
78 
79 struct CPsiBlastTestFixture {
80 
81     CRef<CPSIBlastOptionsHandle> m_OptHandle;
82     CRef<CPssmWithParameters> m_Pssm;
83     CSearchDatabase* m_SearchDb;
84 
85     /// Contains a single Bioseq
86     CRef<CSeq_entry> m_SeqEntry;
87 
88     /// Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295
89     CRef<CSeq_entry> m_SeqSet;
90 
x_ReadSeqEntriesFromFileCPsiBlastTestFixture91     void x_ReadSeqEntriesFromFile() {
92         const string kSeqEntryFile("data/7450545.seqentry.asn");
93         m_SeqEntry = TestUtil::ReadObject<CSeq_entry>(kSeqEntryFile);
94 
95         m_SeqSet.Reset(new CSeq_entry);
96         m_SeqSet->SetSet().SetSeq_set().push_back(m_SeqEntry);
97         BOOST_REQUIRE(m_Pssm &&
98                m_Pssm->CanGetPssm() &&
99                m_Pssm->GetPssm().CanGetQuery());
100         CRef<CSeq_entry> second_bioseq(&m_Pssm->SetPssm().SetQuery());
101         m_SeqSet->SetSet().SetSeq_set().push_back(second_bioseq);
102     }
103 
x_ReadPssmFromFileCPsiBlastTestFixture104     void x_ReadPssmFromFile() {
105         const string kPssmFile("data/pssm_freq_ratios.asn");
106         m_Pssm = TestUtil::ReadObject<CPssmWithParameters>(kPssmFile);
107         BOOST_REQUIRE(m_Pssm->GetPssm().CanGetQuery());
108         BOOST_REQUIRE(m_Pssm->GetPssm().CanGetIntermediateData());
109         BOOST_REQUIRE(!m_Pssm->GetPssm().CanGetFinalData());
110     }
111 
CPsiBlastTestFixtureCPsiBlastTestFixture112     CPsiBlastTestFixture() {
113         m_OptHandle.Reset(new CPSIBlastOptionsHandle);
114         BOOST_REQUIRE_EQUAL(eCompositionMatrixAdjust,
115                              m_OptHandle->GetCompositionBasedStats());
116         m_SearchDb = new CSearchDatabase("data/seqp",
117                                          CSearchDatabase::eBlastDbIsProtein);
118 
119         x_ReadPssmFromFile();
120         PsiBlastComputePssmScores(m_Pssm, m_OptHandle->GetOptions());
121         BOOST_REQUIRE(m_Pssm->GetPssm().GetFinalData().CanGetScores());
122 
123         x_ReadSeqEntriesFromFile();
124     }
125 
~CPsiBlastTestFixtureCPsiBlastTestFixture126     ~CPsiBlastTestFixture() {
127         m_OptHandle.Reset();
128         m_Pssm.Reset();
129         delete m_SearchDb;
130         m_SeqEntry.Reset();
131         m_SeqSet.Reset();
132     }
133 
s_CountNumberUniqueGIsCPsiBlastTestFixture134     int s_CountNumberUniqueGIs(CConstRef<CSeq_align_set> sas)
135     {
136         int num_gis = 0;
137         TGi last_gi = INVALID_GI;
138         CSeq_id last_id;
139         ITERATE(CSeq_align_set::Tdata, itr, sas->Get()){
140             const CSeq_id& seqid = (*itr)->GetSeq_id(1);
141             if ( !CSeq_id::PreferAccessionOverGi() ) {
142                 TGi new_gi = seqid.GetGi();
143                 if (new_gi != last_gi)
144                 {
145                      num_gis++;
146                      last_gi = new_gi;
147                 }
148             }
149             else if ( !seqid.Equals(last_id) ) {
150                 num_gis++;
151                 last_id.Assign(seqid);
152             }
153         }
154         return num_gis;
155     }
156 
s_SetupSubjectCPsiBlastTestFixture157     IQueryFactory *s_SetupSubject(CConstRef<CBioseq> bioseq) {
158         TSeqLocVector subjects;
159         CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
160         CConstRef<CSeq_id> sid = (scope->AddBioseq(*bioseq)).GetSeqId();
161         CRef<CSeq_loc> sl(new CSeq_loc());
162         sl->SetWhole();
163         sl->SetId(*sid);
164         SSeqLoc ssl(*sl, *scope);
165         subjects.push_back(ssl);
166         return (IQueryFactory *)(new CObjMgr_QueryFactory(subjects));
167     }
168 
s_SetupSubjectCPsiBlastTestFixture169     IQueryFactory *s_SetupSubject(CConstRef<CBioseq_set> bioseq_set) {
170         TSeqLocVector subjects;
171         CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
172         CTypeConstIterator<CBioseq> itr(ConstBegin(*bioseq_set, eDetectLoops));
173         for (; itr; ++itr) {
174             CConstRef<CSeq_id> sid = (scope->AddBioseq(*itr)).GetSeqId();
175             CRef<CSeq_loc> sl(new CSeq_loc());
176             sl->SetWhole();
177             sl->SetId(*sid);
178             SSeqLoc ssl(*sl, *scope);
179             subjects.push_back(ssl);
180         }
181         return (IQueryFactory *)(new CObjMgr_QueryFactory(subjects));
182     }
183 
184     CRef<CPssmWithParameters>
x_ComputePssmForNextIterationCPsiBlastTestFixture185     x_ComputePssmForNextIteration(const CBioseq& query,
186                                   CConstRef<CSeq_align_set> sset,
187                                   CConstRef<CPSIBlastOptionsHandle> opts_handle,
188                                   CConstRef<CBlastAncillaryData> ancillary_data)
189     {
190         CRef<CSeqDB> db(new CSeqDB("data/seqp", CSeqDB::eProtein));
191         CBlastScopeSource blast_om(db);
192         CRef<CScope> scope(blast_om.NewScope());
193         CPSIDiagnosticsRequest diags(PSIDiagnosticsRequestNew());
194         diags->frequency_ratios = true;
195         return blast::PsiBlastComputePssmFromAlignment(query, sset, scope,
196                                                        *opts_handle,
197                                                        ancillary_data,
198                                                        diags);
199     }
200 
201 };
202 
BOOST_FIXTURE_TEST_SUITE(psiblast,CPsiBlastTestFixture)203 BOOST_FIXTURE_TEST_SUITE(psiblast, CPsiBlastTestFixture)
204 
205 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_NoCBS) {
206     m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
207     m_OptHandle->SetEvalueThreshold(1.5);
208     CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
209     CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
210     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
211     CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
212 
213     CSearchResultSet results(*psiblast.Run());
214     BOOST_REQUIRE(results[0].GetErrors().empty());
215 
216     const int kNumExpectedMatchingSeqs = 3;
217     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
218 
219     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
220 
221     const size_t kNumExpectedHSPs = 3;
222     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
223 
224     // HSP # 1
225     expected_results[0].score = 64;
226     expected_results[0].evalue = 2.46806e-1;
227     expected_results[0].bit_score = 292610051e-7;
228     expected_results[0].num_ident = 18;
229     expected_results[0].starts.push_back(34);
230     expected_results[0].starts.push_back(95);
231     expected_results[0].lengths.push_back(53);
232 
233     // HSP # 2
234     {
235         int starts[] = { 203, 280, 220, -1, 226, 297, 258, -1, 265, 329 };
236         int lengths[] = { 17, 6, 32, 7, 39 };
237         expected_results[1].score = 63;
238         expected_results[1].evalue = 3.48342e-1;
239         expected_results[1].bit_score = 288758055e-7;
240         expected_results[1].num_ident = 24;
241         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
242              back_inserter(expected_results[1].starts));
243         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
244              back_inserter(expected_results[1].lengths));
245     }
246 
247     // HSP # 3
248     {
249         int starts[] = { 180, 97, 204, -1, 205, 121, -1, 127, 211, 128,
250             241, -1, 242, 158, -1, 197, 281, 201, 306, -1, 318, 226, 323,
251             -1, 327, 231 };
252         int lengths[] = { 24, 1, 6, 1, 30, 1, 39, 4, 25, 12, 5, 4, 35 };
253         expected_results[2].score = 60;
254         expected_results[2].evalue = 7.36231e-1;
255         expected_results[2].bit_score = 277202068e-7;
256         expected_results[2].num_ident = 42;
257         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
258              back_inserter(expected_results[2].starts));
259         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
260              back_inserter(expected_results[2].lengths));
261     }
262 
263     /* HSP # 4
264     {
265         int starts[] = { 43, 13, -1, 84, 114, 91, 135, -1, 136, 112,
266             149, -1, 151, 125, -1, 146, 172, 147, 177, -1, 179, 152,
267             195, -1, 200, 168, 226, -1, 228, 194 };
268         int lengths[] = { 71, 7, 21, 1, 13, 2, 21, 1, 5, 2, 16, 5, 26, 2,
269             11 };
270         expected_results[3].score = 57;
271         expected_results[3].evalue = 147345337e-8;
272         expected_results[3].bit_score = 265646081e-7;
273         expected_results[3].num_ident = 48;
274         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
275              back_inserter(expected_results[3].starts));
276         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
277              back_inserter(expected_results[3].lengths));
278     }
279     */
280 
281     qa::TSeqAlignSet actual_results;
282     qa::SeqAlignSetConvert(*sas, actual_results);
283 
284     qa::CSeqAlignCmpOpts opts;
285     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
286     string errors;
287     bool identical_results = cmp.Run(&errors);
288 
289     BOOST_REQUIRE_MESSAGE(identical_results, errors);
290 }
291 
BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBS)292 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBS) {
293     m_OptHandle->SetCompositionBasedStats(eCompositionBasedStats);
294     CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
295     CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
296     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
297     CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
298 
299     CSearchResultSet results(*psiblast.Run());
300     BOOST_REQUIRE(results[0].GetErrors().empty());
301 
302     const int kNumExpectedMatchingSeqs = 4;
303     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
304 
305     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
306 
307     const size_t kNumExpectedHSPs = 4;
308     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
309 
310     // HSP # 1
311     expected_results[0].score = 63;
312     expected_results[0].evalue = 2.96035e-1;
313     expected_results[0].bit_score = 288758056e-7;
314     expected_results[0].starts.push_back(34);
315     expected_results[0].starts.push_back(95);
316     expected_results[0].lengths.push_back(53);
317     expected_results[0].num_ident = 18;
318 
319     // HSP # 2
320     expected_results[1].score = 59;
321     expected_results[1].evalue = 9.29330e-1;
322     expected_results[1].bit_score = 273350073e-7;
323     expected_results[1].starts.push_back(203);
324     expected_results[1].starts.push_back(280);
325     expected_results[1].starts.push_back(220);
326     expected_results[1].starts.push_back(-1);
327     expected_results[1].starts.push_back(226);
328     expected_results[1].starts.push_back(297);
329     expected_results[1].starts.push_back(258);
330     expected_results[1].starts.push_back(-1);
331     expected_results[1].starts.push_back(265);
332     expected_results[1].starts.push_back(329);
333     expected_results[1].lengths.push_back(17);
334     expected_results[1].lengths.push_back(6);
335     expected_results[1].lengths.push_back(32);
336     expected_results[1].lengths.push_back(7);
337     expected_results[1].lengths.push_back(39);
338     expected_results[1].num_ident = 24;
339 
340     // HSP # 3
341     expected_results[2].score = 52;
342     expected_results[2].evalue = 6.67208;
343     expected_results[2].bit_score = 246386102e-7;
344     expected_results[2].starts.push_back(322);
345     expected_results[2].starts.push_back(46);
346     expected_results[2].lengths.push_back(28);
347     expected_results[2].num_ident = 10;
348 
349 
350     // HSP # 4
351     expected_results[3].score = 50;
352     expected_results[3].evalue = 6.81719;
353     expected_results[3].bit_score = 23.8682;
354     expected_results[3].starts.push_back(295);
355     expected_results[3].starts.push_back(23);
356     expected_results[3].starts.push_back(301);
357     expected_results[3].starts.push_back(-1);
358     expected_results[3].starts.push_back(304);
359     expected_results[3].starts.push_back(29);
360     expected_results[3].lengths.push_back(6);
361     expected_results[3].lengths.push_back(3);
362     expected_results[3].lengths.push_back(23);
363     expected_results[3].num_ident = 16;
364 
365     qa::TSeqAlignSet actual_results;
366     qa::SeqAlignSetConvert(*sas, actual_results);
367 
368     qa::CSeqAlignCmpOpts opts;
369     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
370     string errors;
371     bool identical_results = cmp.Run(&errors);
372 
373     BOOST_REQUIRE_MESSAGE(identical_results, errors);
374 }
375 
BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSConditional)376 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSConditional) {
377     m_OptHandle->SetCompositionBasedStats(eCompositionMatrixAdjust);
378     m_OptHandle->SetEvalueThreshold(5.0);
379     CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
380     CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
381     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
382     CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
383 
384     CSearchResultSet results(*psiblast.Run());
385     BOOST_REQUIRE(results[0].GetErrors().empty());
386 
387     const int kNumExpectedMatchingSeqs = 3;
388     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
389 
390     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
391 
392     const size_t kNumExpectedHSPs = 3;
393     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
394 
395     // HSP # 1
396     expected_results[0].score = 59;
397     expected_results[0].evalue = 8.66100e-1;
398     expected_results[0].bit_score = 273350073e-7;
399     expected_results[0].starts.push_back(34);
400     expected_results[0].starts.push_back(95);
401     expected_results[0].lengths.push_back(53);
402     expected_results[0].sequence_gis.SetQuery(7450545);
403     expected_results[0].sequence_gis.SetSubject(22982149);
404     expected_results[0].num_ident = 18;
405 
406     // HSP # 3
407     {
408         int starts[] = { 322 , 46 , -1 , 75 , 351 , 81 , -1 , 94 , 364 ,
409             97 , -1 , 106 , 373 , 109 };
410         int lengths[] = { 29 , 6 , 13 , 3 , 9 , 3 , 17 };
411         expected_results[1].score = 53;
412         expected_results[1].evalue = 4.15768;
413         expected_results[1].bit_score = 250238098e-7;
414         expected_results[1].sequence_gis.SetQuery(7450545);
415         expected_results[1].sequence_gis.SetSubject(43121985);
416         expected_results[1].num_ident = 19;
417         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
418              back_inserter(expected_results[1].starts));
419         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
420              back_inserter(expected_results[1].lengths));
421     }
422 
423     // HSP # 2
424     {
425         int starts[] = { 125 , 199 , 146 , -1 , 148 , 220 , -1 , 228 ,
426             156 , 233 , -1 , 250 , 173 , 252 , 179 , -1 , 181 , 258 ,
427             220 , -1 , 226 , 297 , 258 , -1 , 265 , 329 };
428         int lengths[] = { 21 , 2 , 8 , 5 , 17 , 2 , 6 , 2 , 39 , 6 , 32 ,
429             7 , 39 };
430         expected_results[2].score = 54;
431         expected_results[2].evalue = 4.40967;
432         expected_results[2].bit_score = 254090094e-7;
433         expected_results[2].sequence_gis.SetQuery(7450545);
434         expected_results[2].sequence_gis.SetSubject(13242404);
435         expected_results[2].num_ident = 39;
436         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
437              back_inserter(expected_results[2].starts));
438         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
439              back_inserter(expected_results[2].lengths));
440     }
441 
442 
443     // HSP # 4
444 /*
445     expected_results[3].score = 53;
446     expected_results[3].evalue = 5.65033;
447     expected_results[3].bit_score = 25.0238;
448     expected_results[3].sequence_gis.SetQuery(7450545);
449     expected_results[3].sequence_gis.SetSubject(15836829);
450     expected_results[3].starts.push_back(39);
451     expected_results[3].starts.push_back(304);
452     expected_results[3].lengths.push_back(41);
453     expected_results[3].lengths.push_back(41);
454     expected_results[3].num_ident = 15;
455 */
456 
457 
458     qa::TSeqAlignSet actual_results;
459     qa::SeqAlignSetConvert(*sas, actual_results);
460 
461     qa::CSeqAlignCmpOpts opts;
462     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
463     string errors;
464     bool identical_results = cmp.Run(&errors);
465 
466     BOOST_REQUIRE_MESSAGE(identical_results, errors);
467 }
468 
BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSUniversal)469 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSUniversal) {
470     m_OptHandle->SetCompositionBasedStats(eCompoForceFullMatrixAdjust);
471     CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
472     CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
473     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
474     CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
475 
476     CSearchResultSet results(*psiblast.Run());
477     BOOST_REQUIRE(results[0].GetErrors().empty());
478 
479     const int kNumExpectedMatchingSeqs = 6;
480     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
481 
482     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
483 
484     const size_t kNumExpectedHSPs = 6;
485     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
486 
487     // HSP # 1
488     expected_results[0].score = 59;
489     expected_results[0].evalue = 8.66100e-1;
490     expected_results[0].bit_score = 273350073e-7;
491     expected_results[0].starts.push_back(34);
492     expected_results[0].starts.push_back(95);
493     expected_results[0].lengths.push_back(53);
494     expected_results[0].sequence_gis.SetQuery(7450545);
495     expected_results[0].sequence_gis.SetSubject(22982149);
496     expected_results[0].num_ident = 18;
497 
498     // HSP # 2
499     {
500         int starts[] = { 322 , 46 , -1 , 75 , 351 , 81 , -1 , 94 , 364 ,
501             97 , -1 , 106 , 373 , 109 };
502         int lengths[] = { 29 , 6 , 13 , 3 , 9 , 3 , 17 };
503         expected_results[1].score = 53;
504         expected_results[1].evalue = 4.15768;
505         expected_results[1].bit_score = 250238098e-7;
506         expected_results[1].sequence_gis.SetQuery(7450545);
507         expected_results[1].sequence_gis.SetSubject(43121985);
508         expected_results[1].num_ident = 19;
509         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
510              back_inserter(expected_results[1].starts));
511         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
512              back_inserter(expected_results[1].lengths));
513     }
514 
515 
516     // HSP # 3
517     {
518         int starts[] = { 125 , 199 , 146 , -1 , 148 , 220 , -1 , 228 ,
519             156 , 233 , -1 , 250 , 173 , 252 , 179 , -1 , 181 , 258 ,
520             220 , -1 , 226 , 297 , 258 , -1 , 265 , 329 };
521         int lengths[] = { 21 , 2 , 8 , 5 , 17 , 2 , 6 , 2 , 39 , 6 , 32 ,
522             7 , 39 };
523         expected_results[2].score = 54;
524         expected_results[2].evalue = 4.40967;
525         expected_results[2].bit_score = 254090094e-7;
526         expected_results[2].sequence_gis.SetQuery(7450545);
527         expected_results[2].sequence_gis.SetSubject(13242404);
528         expected_results[2].num_ident = 39;
529         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
530              back_inserter(expected_results[2].starts));
531         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
532              back_inserter(expected_results[2].lengths));
533     }
534 
535     /* HSP # 4
536     expected_results[3].score = 53;
537     expected_results[3].evalue = 466014414e-8;
538     expected_results[3].bit_score = 250238098e-7;
539     expected_results[3].sequence_gis.SetQuery(7450545);
540     expected_results[3].sequence_gis.SetSubject(45683609);
541     expected_results[3].starts.push_back(39);
542     expected_results[3].starts.push_back(304);
543     expected_results[3].lengths.push_back(41);
544     */
545 
546     qa::TSeqAlignSet actual_results;
547     qa::SeqAlignSetConvert(*sas, actual_results);
548 
549     qa::CSeqAlignCmpOpts opts;
550     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
551     string errors;
552     bool identical_results = cmp.Run(&errors);
553 
554     BOOST_REQUIRE_MESSAGE(identical_results, errors);
555 }
556 
BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_ProteinAsQuery_NoCBS)557 BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_ProteinAsQuery_NoCBS) {
558     const int kNumIterations = 4;
559     const int kNumExpectedIterations = 2;
560     CPsiBlastIterationState itr(kNumIterations);
561     m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
562 
563     CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
564     CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
565     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
566     CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
567 
568     int hits_below_threshold[kNumIterations] = { 0, 0, 0, 0 };
569     size_t number_hits[kNumIterations] = { 11, 14, 0, 0 };
570 
571     int iteration_counter = 0;
572     while (itr) {
573         CSearchResultSet results = *psiblast.Run();
574         BOOST_REQUIRE(results[0].GetErrors().empty());
575         CConstRef<CSeq_align_set> alignment = results[0].GetSeqAlign();
576         BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
577                              alignment->Get().size());
578 
579         CPsiBlastIterationState::TSeqIds ids;
580         CPsiBlastIterationState::GetSeqIds(alignment, m_OptHandle, ids);
581 
582         string m("On round ");
583         m += NStr::IntToString(itr.GetIterationNumber()) + " found ";
584         m += NStr::SizetToString(ids.size()) + " qualifying ids";
585         BOOST_REQUIRE_MESSAGE(
586                 hits_below_threshold[iteration_counter]==(int)ids.size(), m);
587         itr.Advance(ids);
588 
589         if (itr) {
590             CRef<CPssmWithParameters> pssm =
591                 x_ComputePssmForNextIteration(*bioseq, alignment,
592                       m_OptHandle, results[0].GetAncillaryData());
593             psiblast.SetPssm(pssm);
594         }
595         iteration_counter++;
596     }
597 
598     BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
599 }
600 
BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_NoCBS)601 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_NoCBS) {
602 
603     m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
604     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
605     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
606 
607     CSearchResultSet results(*psiblast.Run());
608     BOOST_REQUIRE(results[0].GetErrors().empty());
609 
610     const int kNumExpectedMatchingSeqs = 6;
611     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
612     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
613 
614     const size_t kNumExpectedHSPs = 7;
615     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
616 
617     // HSP # 1
618     {
619         int starts[] = { 0, 941, -1, 1093, 152, 1094 };
620         int lengths[] = { 152, 1, 80 };
621         expected_results[0].score = 595;
622         expected_results[0].evalue = 2.70189-71;
623         expected_results[0].bit_score = 233623298e-6;
624         expected_results[0].num_ident = 101;
625         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
626              back_inserter(expected_results[0].starts));
627         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
628              back_inserter(expected_results[0].lengths));
629     }
630 
631     // HSP # 2
632     {
633         int starts[] = { 0, 154, -1, 308, 154, 309 };
634         int lengths[] = { 154, 1, 24 };
635         expected_results[1].score = 424;
636         expected_results[1].evalue = 6.54598e-49;
637         expected_results[1].bit_score = 167754171e-6;
638         expected_results[1].num_ident = 73;
639         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
640              back_inserter(expected_results[1].starts));
641         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
642              back_inserter(expected_results[1].lengths));
643     }
644 
645     // HSP # 3
646     {
647         int starts[] =
648             { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
649              -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
650              -1, 374, 190, 380, 200, -1, 202, 390 };
651         int lengths[] =
652             { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
653         expected_results[2].score = 372;
654         expected_results[2].evalue = 1.67386e-43;
655         expected_results[2].bit_score = 147723793e-6;
656         expected_results[2].num_ident = 87;
657         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
658              back_inserter(expected_results[2].starts));
659         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
660              back_inserter(expected_results[2].lengths));
661     }
662 
663     // HSP # 4
664     expected_results[3].score = 53;
665     expected_results[3].evalue = 2.43336;
666     expected_results[3].bit_score = 248451288e-7;
667     expected_results[3].num_ident = 8;
668     expected_results[3].starts.push_back(206);
669     expected_results[3].starts.push_back(46);
670     expected_results[3].lengths.push_back(19);
671 
672     // HSP # 5
673     {
674         int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
675         int lengths[] = { 6, 1, 22, 10, 14 };
676         expected_results[4].score = 52;
677         expected_results[4].evalue = 3.12771;
678         expected_results[4].bit_score = 244599292e-7;
679         expected_results[4].num_ident = 11;
680         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
681              back_inserter(expected_results[4].starts));
682         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
683              back_inserter(expected_results[4].lengths));
684     }
685 
686     // HSP # 6
687     {
688         int starts[] = { 74, 181, 108, -1, 109, 215 };
689         int lengths[] = { 34, 1, 23 };
690         expected_results[5].score = 49;
691         expected_results[5].evalue = 8.37737;
692         expected_results[5].bit_score = 233043305e-7;
693         expected_results[5].num_ident = 14;
694         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
695              back_inserter(expected_results[5].starts));
696         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
697              back_inserter(expected_results[5].lengths));
698     }
699 
700     // HSP # 7
701     expected_results[6].score = 49;
702     expected_results[6].evalue = 8.62465;
703     expected_results[6].bit_score = 233043305e-7;
704     expected_results[6].num_ident = 6;
705     expected_results[6].starts.push_back(188);
706     expected_results[6].starts.push_back(709);
707     expected_results[6].lengths.push_back(30);
708 
709     qa::TSeqAlignSet actual_results;
710     qa::SeqAlignSetConvert(*sas, actual_results);
711 
712     qa::CSeqAlignCmpOpts opts;
713     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
714     string errors;
715     bool identical_results = cmp.Run(&errors);
716 
717     BOOST_REQUIRE_MESSAGE(identical_results, errors);
718 }
719 
BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBS)720 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBS) {
721 
722     m_OptHandle->SetCompositionBasedStats(eCompositionBasedStats);
723     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
724     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
725 
726     CSearchResultSet results(*psiblast.Run());
727     BOOST_REQUIRE(results[0].GetErrors().empty());
728 
729     const int kNumExpectedMatchingSeqs = 4;
730     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
731     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
732 
733     const size_t kNumExpectedHSPs = 5;
734     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
735 
736     // HSP # 1
737     {
738         int starts[] = { 0, 941, -1, 1093, 152, 1094 };
739         int lengths[] = { 152, 1, 80 };
740         expected_results[0].score = 593;
741         expected_results[0].evalue = 1.15934e-71;
742         expected_results[0].bit_score = 232843196e-6;
743         expected_results[0].sequence_gis.SetQuery(129295);
744         expected_results[0].sequence_gis.SetSubject(34878800);
745         expected_results[0].num_ident = 101;
746         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
747              back_inserter(expected_results[0].starts));
748         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
749              back_inserter(expected_results[0].lengths));
750     }
751 
752     // HSP # 2
753     {
754         int starts[] = { 0, 154, -1, 308, 154, 309 };
755         int lengths[] = { 154, 1, 24 };
756         expected_results[1].score = 417;
757         expected_results[1].evalue = 5.13954e-48;
758         expected_results[1].bit_score = 165048071e-6;
759         expected_results[1].sequence_gis.SetQuery(129295);
760         expected_results[1].sequence_gis.SetSubject(34878800);
761         expected_results[1].num_ident = 73;
762         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
763              back_inserter(expected_results[1].starts));
764         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
765              back_inserter(expected_results[1].lengths));
766     }
767 
768     // HSP # 3
769     {
770         int starts[] =
771             { 0, 190, 65, -1, 67, 255, 93, -1, 94, 281, 111, -1, 113, 298,
772              -1, 304, 119, 305, 153, -1, 154, 339, 164, -1, 165, 349,
773              -1, 378, 194, 382 };
774         int lengths[] =
775             { 65, 2, 26, 1, 17, 2, 6, 1, 34, 1, 10, 1, 29, 4, 38 };
776         expected_results[2].score = 359;
777         expected_results[2].evalue = 1.18595e-41;
778         expected_results[2].bit_score = 142706496e-6;
779         expected_results[2].sequence_gis.SetQuery(129295);
780         expected_results[2].sequence_gis.SetSubject(20092202);
781         expected_results[2].num_ident = 85;
782         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
783              back_inserter(expected_results[2].starts));
784         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
785              back_inserter(expected_results[2].lengths));
786     }
787 
788     // HSP # 4
789     expected_results[3].score = 53;
790     expected_results[3].evalue = 2.14427;
791     expected_results[3].bit_score = 248354256e-7;
792     expected_results[3].sequence_gis.SetQuery(129295);
793     expected_results[3].sequence_gis.SetSubject(44343511);
794     expected_results[3].starts.push_back(206);
795     expected_results[3].starts.push_back(46);
796     expected_results[3].lengths.push_back(19);
797     expected_results[3].num_ident = 8;
798 
799     // HSP # 5
800     expected_results[4].score = 51;
801     expected_results[4].evalue = 5.09267;
802     expected_results[4].bit_score = 240650265e-7;
803     expected_results[4].sequence_gis.SetQuery(129295);
804     expected_results[4].sequence_gis.SetSubject(23481125);
805     expected_results[4].starts.push_back(188);
806     expected_results[4].starts.push_back(709);
807     expected_results[4].lengths.push_back(30);
808     expected_results[4].num_ident = 6;
809 
810     /* HSP # 6
811     {
812         int starts[] = { 74, 181, 108, -1, 109, 215 };
813         int lengths[] = { 34, 1, 23 };
814         expected_results[5].score = 48;
815         expected_results[5].evalue = 878932376e-8;
816         expected_results[5].bit_score = 229094278e-7;
817         expected_results[5].sequence_gis.SetQuery(129295);
818         expected_results[5].sequence_gis.SetSubject(38088088);
819         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
820              back_inserter(expected_results[5].starts));
821         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
822              back_inserter(expected_results[5].lengths));
823     }
824     */
825 
826     qa::TSeqAlignSet actual_results;
827     qa::SeqAlignSetConvert(*sas, actual_results);
828 
829     qa::CSeqAlignCmpOpts opts;
830     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
831     string errors;
832     bool identical_results = cmp.Run(&errors);
833 
834     BOOST_REQUIRE_MESSAGE(identical_results, errors);
835 }
836 
837 // N.B.: these can't be done from a PSSM
838 #if 0
839 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBSConditional) {
840 
841     m_OptHandle->SetCompositionBasedStats(eCompositionMatrixAdjust);
842     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
843     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
844 
845     CSearchResultSet results(*psiblast.Run());
846     BOOST_REQUIRE(results[0].GetErrors().empty());
847 
848     const int kNumExpectedMatchingSeqs = 6;
849     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
850     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
851 
852     const size_t kNumExpectedHSPs = 7;
853     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
854 
855     // HSP # 1
856     {
857         int starts[] = { 0, 941, -1, 1093, 152, 1094 };
858         int lengths[] = { 152, 1, 80 };
859         expected_results[0].score = 595;
860         expected_results[0].evalue = 307180919e-71;
861         expected_results[0].bit_score = 233623298e-6;
862         expected_results[0].num_ident = 101;
863         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
864              back_inserter(expected_results[0].starts));
865         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
866              back_inserter(expected_results[0].lengths));
867     }
868 
869     // HSP # 2
870     {
871         int starts[] = { 0, 154, -1, 308, 154, 309 };
872         int lengths[] = { 154, 1, 24 };
873         expected_results[1].score = 424;
874         expected_results[1].evalue = 20700336e-50;
875         expected_results[1].bit_score = 167754171e-6;
876         expected_results[1].num_ident = 73;
877         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
878              back_inserter(expected_results[1].starts));
879         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
880              back_inserter(expected_results[1].lengths));
881     }
882 
883     // HSP # 3
884     {
885         int starts[] =
886             { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
887              -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
888              -1, 374, 190, 380, 200, -1, 202, 390 };
889         int lengths[] =
890             { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
891         expected_results[2].score = 372;
892         expected_results[2].evalue = 221677687e-45;
893         expected_results[2].bit_score = 147723793e-6;
894         expected_results[2].num_ident = 87;
895         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
896              back_inserter(expected_results[2].starts));
897         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
898              back_inserter(expected_results[2].lengths));
899     }
900 
901     // HSP # 4
902     expected_results[3].score = 53;
903     expected_results[3].evalue = 216713461e-8;
904     expected_results[3].bit_score = 248451288e-7;
905     expected_results[3].num_ident = 8;
906     expected_results[3].starts.push_back(206);
907     expected_results[3].starts.push_back(46);
908     expected_results[3].lengths.push_back(19);
909 
910     // HSP # 5
911     {
912         int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
913         int lengths[] = { 6, 1, 22, 10, 14 };
914         expected_results[4].score = 52;
915         expected_results[4].evalue = 283036546e-8;
916         expected_results[4].bit_score = 244599292e-7;
917         expected_results[4].num_ident = 11;
918         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
919              back_inserter(expected_results[4].starts));
920         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
921              back_inserter(expected_results[4].lengths));
922     }
923 
924     // HSP # 6
925     {
926         int starts[] = { 74, 181, 108, -1, 109, 215 };
927         int lengths[] = { 34, 1, 23 };
928         expected_results[5].score = 49;
929         expected_results[5].evalue = 630539642e-8;
930         expected_results[5].bit_score = 233043305e-7;
931         expected_results[5].num_ident = 14;
932         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
933              back_inserter(expected_results[5].starts));
934         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
935              back_inserter(expected_results[5].lengths));
936     }
937 
938     // HSP # 7
939     expected_results[6].score = 49;
940     expected_results[6].evalue = 630539642e-8;
941     expected_results[6].bit_score = 233043305e-7;
942     expected_results[6].num_ident = 6;
943     expected_results[6].starts.push_back(188);
944     expected_results[6].starts.push_back(709);
945     expected_results[6].lengths.push_back(30);
946 
947     qa::TSeqAlignSet actual_results;
948     qa::SeqAlignSetConvert(*sas, actual_results);
949 
950     qa::CSeqAlignCmpOpts opts;
951     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
952     string errors;
953     bool identical_results = cmp.Run(&errors);
954 
955     BOOST_REQUIRE_MESSAGE(identical_results, errors);
956 }
957 
958 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBSUniversal) {
959 
960     m_OptHandle->SetCompositionBasedStats(eCompoForceFullMatrixAdjust);
961     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
962     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
963 
964     CSearchResultSet results(*psiblast.Run());
965     BOOST_REQUIRE(results[0].GetErrors().empty());
966 
967     const int kNumExpectedMatchingSeqs = 6;
968     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
969     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
970 
971     const size_t kNumExpectedHSPs = 7;
972     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
973 
974     // HSP # 1
975     {
976         int starts[] = { 0, 941, -1, 1093, 152, 1094 };
977         int lengths[] = { 152, 1, 80 };
978         expected_results[0].score = 595;
979         expected_results[0].evalue = 307180919e-71;
980         expected_results[0].bit_score = 233623298e-6;
981         expected_results[0].num_ident = 101;
982         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
983              back_inserter(expected_results[0].starts));
984         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
985              back_inserter(expected_results[0].lengths));
986     }
987 
988     // HSP # 2
989     {
990         int starts[] = { 0, 154, -1, 308, 154, 309 };
991         int lengths[] = { 154, 1, 24 };
992         expected_results[1].score = 424;
993         expected_results[1].evalue = 20700336e-50;
994         expected_results[1].bit_score = 167754171e-6;
995         expected_results[1].num_ident = 73;
996         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
997              back_inserter(expected_results[1].starts));
998         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
999              back_inserter(expected_results[1].lengths));
1000     }
1001 
1002     // HSP # 3
1003     {
1004         int starts[] =
1005             { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
1006              -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
1007              -1, 374, 190, 380, 200, -1, 202, 390 };
1008         int lengths[] =
1009             { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
1010         expected_results[2].score = 372;
1011         expected_results[2].evalue = 221677687e-45;
1012         expected_results[2].bit_score = 147723793e-6;
1013         expected_results[2].num_ident = 87;
1014         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
1015              back_inserter(expected_results[2].starts));
1016         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
1017              back_inserter(expected_results[2].lengths));
1018     }
1019 
1020     // HSP # 4
1021     expected_results[3].score = 53;
1022     expected_results[3].evalue = 216713461e-8;
1023     expected_results[3].bit_score = 248451288e-7;
1024     expected_results[3].num_ident = 8;
1025     expected_results[3].starts.push_back(206);
1026     expected_results[3].starts.push_back(46);
1027     expected_results[3].lengths.push_back(19);
1028 
1029     // HSP # 5
1030     {
1031         int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
1032         int lengths[] = { 6, 1, 22, 10, 14 };
1033         expected_results[4].score = 52;
1034         expected_results[4].evalue = 283036546e-8;
1035         expected_results[4].bit_score = 244599292e-7;
1036         expected_results[4].num_ident = 11;
1037         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
1038              back_inserter(expected_results[4].starts));
1039         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
1040              back_inserter(expected_results[4].lengths));
1041     }
1042 
1043     // HSP # 6
1044     {
1045         int starts[] = { 74, 181, 108, -1, 109, 215 };
1046         int lengths[] = { 34, 1, 23 };
1047         expected_results[5].score = 49;
1048         expected_results[5].evalue = 630539642e-8;
1049         expected_results[5].bit_score = 233043305e-7;
1050         expected_results[5].num_ident = 14;
1051         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
1052              back_inserter(expected_results[5].starts));
1053         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
1054              back_inserter(expected_results[5].lengths));
1055     }
1056 
1057     // HSP # 7
1058     expected_results[6].score = 49;
1059     expected_results[6].evalue = 630539642e-8;
1060     expected_results[6].bit_score = 233043305e-7;
1061     expected_results[6].num_ident = 6;
1062     expected_results[6].starts.push_back(188);
1063     expected_results[6].starts.push_back(709);
1064     expected_results[6].lengths.push_back(30);
1065 
1066     qa::TSeqAlignSet actual_results;
1067     qa::SeqAlignSetConvert(*sas, actual_results);
1068 
1069     qa::CSeqAlignCmpOpts opts;
1070     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
1071     string errors;
1072     bool identical_results = cmp.Run(&errors);
1073 
1074     BOOST_REQUIRE_MESSAGE(identical_results, errors);
1075 }
1076 #endif
1077 
1078 // This search will converge after 2 iterations
BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_NoCBS)1079 BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_NoCBS) {
1080 
1081     const int kNumIterations = 4;
1082     const int kNumExpectedIterations = 2;
1083     CPsiBlastIterationState itr(kNumIterations);
1084     m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
1085     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1086     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1087 
1088     int hits_below_threshold[kNumIterations] = { 2, 2, 0, 0 };
1089     int number_hits[kNumIterations] = { 6, 5, 0, 0 };
1090 
1091     int iteration_counter = 0;
1092     while (itr) {
1093         CSearchResultSet results = *psiblast.Run();
1094         BOOST_REQUIRE(results[0].GetErrors().empty());
1095         CConstRef<CSeq_align_set> alignment = results[0].GetSeqAlign();
1096 
1097         BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
1098                              s_CountNumberUniqueGIs(alignment));
1099 
1100         CPsiBlastIterationState::TSeqIds ids;
1101         CPsiBlastIterationState::GetSeqIds(alignment, m_OptHandle, ids);
1102 
1103         string m("On round ");
1104         m += NStr::IntToString(itr.GetIterationNumber()) + " found ";
1105         m += NStr::SizetToString(ids.size()) + " qualifying ids";
1106         BOOST_REQUIRE_EQUAL(hits_below_threshold[iteration_counter],
1107                              (int)ids.size());
1108         itr.Advance(ids);
1109 
1110         if (itr) {
1111             const CBioseq& query = m_Pssm->GetPssm().GetQuery().GetSeq();
1112             CRef<CPssmWithParameters> pssm =
1113                 x_ComputePssmForNextIteration(query, alignment,
1114                       m_OptHandle, results[0].GetAncillaryData());
1115             psiblast.SetPssm(pssm);
1116         }
1117         iteration_counter++;
1118     }
1119 
1120     BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
1121 }
1122 
1123 // This search will converge after 2 iterations
BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_CBS)1124 BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_CBS) {
1125 
1126     const int kNumIterations = 4;
1127     const int kNumExpectedIterations = 2;
1128     CPsiBlastIterationState itr(kNumIterations);
1129     m_OptHandle->SetCompositionBasedStats(eCompositionBasedStats);
1130     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1131     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1132 
1133     int hits_below_threshold[kNumIterations] = { 2, 2, 0, 0 };
1134     int number_hits[kNumIterations] = { 4, 3, 0, 0 };
1135 
1136     int iteration_counter = 0;
1137     while (itr) {
1138         CSearchResultSet results = *psiblast.Run();
1139         BOOST_REQUIRE(results[0].GetErrors().empty());
1140         CConstRef<CSeq_align_set> alignment = results[0].GetSeqAlign();
1141         BOOST_REQUIRE(alignment.NotEmpty());
1142         BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
1143                              s_CountNumberUniqueGIs(alignment));
1144 
1145         CPsiBlastIterationState::TSeqIds ids;
1146         CPsiBlastIterationState::GetSeqIds(alignment, m_OptHandle, ids);
1147 
1148         string m("On round ");
1149         m += NStr::IntToString(itr.GetIterationNumber()) + " found ";
1150         m += NStr::SizetToString(ids.size()) + " qualifying ids";
1151         BOOST_REQUIRE_EQUAL(hits_below_threshold[iteration_counter],
1152                              (int)ids.size());
1153         itr.Advance(ids);
1154 
1155         if (itr) {
1156             const CBioseq& query = m_Pssm->GetPssm().GetQuery().GetSeq();
1157             CRef<CPssmWithParameters> pssm =
1158                 x_ComputePssmForNextIteration(query, alignment,
1159                       m_OptHandle, results[0].GetAncillaryData());
1160             psiblast.SetPssm(pssm);
1161         }
1162         iteration_counter++;
1163     }
1164 
1165     BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
1166 }
1167 
1168 // Should throw exception as only one query sequence/pssm is allowed
BOOST_AUTO_TEST_CASE(TestMultipleQueries)1169 BOOST_AUTO_TEST_CASE(TestMultipleQueries) {
1170     CConstRef<CBioseq_set> bioseq_set(&m_SeqSet->GetSet());
1171     CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq_set));
1172     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1173     BOOST_REQUIRE_THROW(CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle), CBlastException);
1174 }
1175 
BOOST_AUTO_TEST_CASE(TestNullQuery)1176 BOOST_AUTO_TEST_CASE(TestNullQuery) {
1177     CRef<IQueryFactory> query_factory;
1178     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1179     BOOST_REQUIRE_THROW(CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle),
1180                         CBlastException);
1181 }
1182 
BOOST_AUTO_TEST_CASE(TestFrequencyRatiosWithAllZerosInPssm)1183 BOOST_AUTO_TEST_CASE(TestFrequencyRatiosWithAllZerosInPssm) {
1184     NON_CONST_ITERATE(CPssmIntermediateData::TFreqRatios, fr,
1185             m_Pssm->SetPssm().SetIntermediateData().SetFreqRatios()) {
1186         *fr = 0.0;
1187     }
1188 
1189     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1190     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1191     CSearchResultSet r = *psiblast.Run();
1192     TQueryMessages messages = r[0].GetErrors(eBlastSevWarning);
1193     BOOST_REQUIRE( !messages.empty() );
1194 
1195     string expected_warning("Frequency ratios for PSSM are all zeros");
1196     string warning;
1197     ITERATE(TQueryMessages, m, messages) {
1198         if (((*m)->GetSeverity() == eBlastSevWarning) &&
1199             ((*m)->GetMessage().find(expected_warning) != string::npos)) {
1200                 warning = (*m)->GetMessage();
1201                 break;
1202         }
1203     }
1204     BOOST_REQUIRE_MESSAGE(!warning.empty(), "Did not find expected warning");
1205 }
1206 
BOOST_AUTO_TEST_CASE(TestNullPssm)1207 BOOST_AUTO_TEST_CASE(TestNullPssm) {
1208     m_Pssm.Reset();
1209     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1210     BOOST_REQUIRE_THROW(CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle),
1211                         CBlastException);
1212 }
1213 
BOOST_AUTO_TEST_CASE(TestSetNullPssm)1214 BOOST_AUTO_TEST_CASE(TestSetNullPssm) {
1215     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1216     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1217     CRef<CPssmWithParameters> pssm;
1218     BOOST_REQUIRE_THROW(psiblast.SetPssm(pssm), CBlastException);
1219 }
1220 
BOOST_AUTO_TEST_CASE(TestNonExistantDb)1221 BOOST_AUTO_TEST_CASE(TestNonExistantDb) {
1222     m_SearchDb->SetDatabaseName("dummy");
1223     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1224     CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1225     BOOST_REQUIRE_THROW(psiblast.Run(),CSeqDBException);
1226 }
1227 
BOOST_AUTO_TEST_CASE(TestNullOptions)1228 BOOST_AUTO_TEST_CASE(TestNullOptions) {
1229     m_OptHandle.Reset();
1230     CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
1231     CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
1232     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1233     BOOST_REQUIRE_THROW(CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle),
1234                         CBlastException);
1235 }
1236 
1237 BOOST_AUTO_TEST_SUITE_END()
1238