1 /*  $Id: delta_unit_test.cpp 531968 2017-03-30 17:34:38Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Christiam Camacho, Greg Boratyn
27  *
28  */
29 
30 /** @file delta_unit_test.cpp
31  * Unit test module for deltablast.
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 // Serial library includes
37 #include <serial/serial.hpp>
38 #include <serial/objistr.hpp>
39 
40 // Object includes
41 #include <objects/seqloc/Seq_id.hpp>
42 #include <objects/seqalign/Score.hpp>
43 #include <objects/seqalign/Dense_seg.hpp>
44 #include <objects/seqalign/Seq_align.hpp>
45 #include <objects/seqalign/Seq_align_set.hpp>
46 
47 #include <objmgr/scope.hpp>
48 
49 // PSSM includes
50 #include <objects/scoremat/Pssm.hpp>
51 #include <objects/scoremat/PssmWithParameters.hpp>
52 
53 // BLAST includes
54 #include <algo/blast/api/deltablast.hpp>
55 #include <algo/blast/api/objmgr_query_data.hpp>
56 #include <algo/blast/api/deltablast_options.hpp>
57 #include <algo/blast/api/uniform_search.hpp>
58 #include <algo/blast/api/local_db_adapter.hpp>
59 #include <algo/blast/api/blast_rps_options.hpp>
60 
61 // SeqAlign comparison includes
62 #include "seqalign_cmp.hpp"
63 #include "seqalign_set_convert.hpp"
64 
65 // Unit test auxiliary includes
66 #include "blast_test_util.hpp"
67 #include "test_objmgr.hpp"
68 
69 /// Calculate the size of a static array
70 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
71 
72 using namespace std;
73 using namespace ncbi;
74 using namespace ncbi::objects;
75 using namespace ncbi::blast;
76 
77 struct CDeltaBlastTestFixture {
78 
79     CRef<CDeltaBlastOptionsHandle> m_OptHandle;
80     CRef<CSearchDatabase> m_SearchDb;
81     CRef<CSearchDatabase> m_DomainDb;
82 
83     // This is needed only to get sequence for gi 129295 from asn1 file
84     CRef<CPssmWithParameters> m_Pssm;
85 
86     /// Contains a single Bioseq
87     CRef<CSeq_entry> m_SeqEntry;
88 
89     /// Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295
90     CRef<CSeq_entry> m_SeqSet;
91 
92     /// Scope
93     CRef<CScope> m_Scope;
94 
95     /// Seq-locs for creating instances of CObjMgr_QueryFactory
96     vector< CRef<CSeq_loc> > m_Seq_locs;
97 
x_ReadSeqEntriesFromFileCDeltaBlastTestFixture98     void x_ReadSeqEntriesFromFile() {
99 
100         const string kPssmFile("data/pssm_freq_ratios.asn");
101         m_Pssm = TestUtil::ReadObject<CPssmWithParameters>(kPssmFile);
102         BOOST_REQUIRE(m_Pssm->GetPssm().CanGetQuery());
103         m_SeqEntry.Reset(&m_Pssm->SetPssm().SetQuery());
104         BOOST_REQUIRE(m_SeqEntry->IsSeq());
105 
106         CRef<CSeq_id> id(
107                     const_cast<CSeq_id*>(m_SeqEntry->SetSeq().GetFirstId()));
108         CRef<CSeq_loc> seqloc(new CSeq_loc(CSeq_loc::e_Whole));
109         seqloc->SetId(*id);
110         m_Seq_locs.push_back(seqloc);
111 
112 
113         const string kSeqEntryFile("data/7450545.seqentry.asn");
114         CRef<CSeq_entry> seq_entry =
115             TestUtil::ReadObject<CSeq_entry>(kSeqEntryFile);
116 
117         m_SeqSet.Reset(new CSeq_entry);
118         m_SeqSet->SetSet().SetSeq_set().push_back(m_SeqEntry);
119         m_SeqSet->SetSet().SetSeq_set().push_back(seq_entry);
120         BOOST_REQUIRE(m_SeqEntry->IsSeq());
121 
122         id.Reset(const_cast<CSeq_id*>(seq_entry->SetSeq().GetFirstId()));
123         seqloc.Reset(new CSeq_loc(CSeq_loc::e_Whole));
124         seqloc->SetId(*id);
125         m_Seq_locs.push_back(seqloc);
126     }
127 
x_InitScopeCDeltaBlastTestFixture128     void x_InitScope() {
129         m_Scope.Reset(new CScope(*CObjectManager::GetInstance()));
130         m_Scope->AddTopLevelSeqEntry(*m_SeqSet);
131     }
132 
CDeltaBlastTestFixtureCDeltaBlastTestFixture133     CDeltaBlastTestFixture() {
134         m_OptHandle.Reset(new CDeltaBlastOptionsHandle);
135         BOOST_REQUIRE_EQUAL(eCompositionBasedStats,
136                              m_OptHandle->GetCompositionBasedStats());
137         m_SearchDb.Reset(new CSearchDatabase("data/seqp",
138                                      CSearchDatabase::eBlastDbIsProtein));
139         m_DomainDb.Reset(new CSearchDatabase("data/deltatest",
140                                      CSearchDatabase::eBlastDbIsProtein));
141 
142         x_ReadSeqEntriesFromFile();
143         x_InitScope();
144     }
145 
~CDeltaBlastTestFixtureCDeltaBlastTestFixture146     ~CDeltaBlastTestFixture() {
147         m_OptHandle.Reset();
148         m_SearchDb.Reset();
149         m_DomainDb.Reset();
150         m_SeqEntry.Reset();
151         m_SeqSet.Reset();
152         m_Seq_locs.clear();
153         m_Scope.Reset();
154     }
155 
x_CountNumberUniqueIdsCDeltaBlastTestFixture156     int x_CountNumberUniqueIds(CConstRef<CSeq_align_set> sas)
157     {
158         int num_ids = 0;
159         TGi last_id = INVALID_GI;
160         string last_acc;
161         CTextseq_id::TVersion last_ver = -1;
162         ITERATE(CSeq_align_set::Tdata, itr, sas->Get()) {
163             const CSeq_id& seqid = (*itr)->GetSeq_id(1);
164             if ( !CSeq_id::PreferAccessionOverGi() ) {
165                 BOOST_REQUIRE(seqid.IsGi() || seqid.IsGeneral());
166             }
167             else {
168                 BOOST_REQUIRE(seqid.IsOther() ||
169                     seqid.IsGeneral() ||
170                     seqid.IsGenbank());
171             }
172 
173             const CTextseq_id* txt_id = seqid.GetTextseq_Id();
174             if (CSeq_id::PreferAccessionOverGi() && txt_id) {
175                 string acc = txt_id->GetAccession();
176                 CTextseq_id::TVersion ver = txt_id->IsSetVersion() ?
177                     txt_id->GetVersion() : -1;
178                 if (acc != last_acc || ver != last_ver) {
179                     num_ids++;
180                     last_acc = acc;
181                     last_ver = ver;
182                 }
183                 last_id = INVALID_GI;
184             }
185             else if (!CSeq_id::PreferAccessionOverGi() && seqid.IsGi()) {
186                 TGi new_gi = seqid.GetGi();
187 
188                 if (new_gi != last_id) {
189                         num_ids++;
190                         last_id = new_gi;
191                 }
192                 last_acc.clear();
193                 last_ver = -1;
194             }
195             else {
196                 BOOST_REQUIRE(seqid.GetGeneral().IsSetTag());
197                 int new_tag = seqid.GetGeneral().GetTag().GetId();
198 
199                 if (new_tag != GI_TO(int, last_id)) {
200                     num_ids++;
201                     last_id = GI_FROM(int, new_tag);
202                 }
203                 last_acc.clear();
204                 last_ver = -1;
205             }
206 
207         }
208         return num_ids;
209     }
210 };
211 
212 
BOOST_FIXTURE_TEST_SUITE(deltablast,CDeltaBlastTestFixture)213 BOOST_FIXTURE_TEST_SUITE(deltablast, CDeltaBlastTestFixture)
214 
215 BOOST_AUTO_TEST_CASE(TestSingleQuery_CBS)
216 {
217     TSeqLocVector query;
218     query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
219     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
220 
221 
222     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
223     CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
224 
225     // create rpsblast options hanlde and set composition based statistics
226     CRef<CBlastRPSOptionsHandle> rps_opts(new CBlastRPSOptionsHandle);
227     rps_opts->SetCompositionBasedStats(true);
228     rps_opts->SetEvalueThreshold(m_OptHandle->GetDomainInclusionThreshold());
229     rps_opts->SetFilterString("F");
230 
231     CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
232                            m_OptHandle, rps_opts);
233 
234     CSearchResultSet results(*deltablast.Run());
235 
236 
237     // check intermediate results: alignments with domains
238     CRef<CSearchResultSet> domain_results = deltablast.GetDomainResults();
239     BOOST_REQUIRE((*domain_results)[0].GetErrors().empty());
240 
241     const int kNumExpectedMatchingDomains = 3;
242     CConstRef<CSeq_align_set> domain_sas = (*domain_results)[0].GetSeqAlign();
243 
244     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
245                         x_CountNumberUniqueIds(domain_sas));
246 
247     const size_t kNumExpectedDomainHSPs = 3;
248     qa::TSeqAlignSet expected_domain_results(kNumExpectedDomainHSPs);
249 
250     // HSP # 1
251     {
252         expected_domain_results[0].score = 742;
253         expected_domain_results[0].evalue = 5.21679e-102;
254         expected_domain_results[0].bit_score = 2.89507e+02;
255         expected_domain_results[0].num_ident = 111;
256         int starts[] = {1, 139, 80, -1, 81, 218};
257         int lengths[] = {79, 1, 151};
258         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
259              back_inserter(expected_domain_results[0].starts));
260         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
261              back_inserter(expected_domain_results[0].lengths));
262     }
263 
264     // HSP # 2
265     {
266         expected_domain_results[1].score = 713;
267         expected_domain_results[1].evalue = 1.33296e-97;
268         expected_domain_results[1].bit_score = 2.78295e+02;
269         expected_domain_results[1].num_ident = 107;
270         int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
271                         213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
272                         202, 329};
273         int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
274         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
275              back_inserter(expected_domain_results[1].starts));
276         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
277              back_inserter(expected_domain_results[1].lengths));
278     }
279 
280     // HSP # 3
281     {
282         expected_domain_results[2].score = 673;
283         expected_domain_results[2].evalue = 1.63233e-91;
284         expected_domain_results[2].bit_score = 2.62959e+02;
285         expected_domain_results[2].num_ident = 106;
286         int starts[] = {0, 137, 115, -1, 117, 252 };
287         int lengths[] = {115, 2, 112};
288         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
289              back_inserter(expected_domain_results[2].starts));
290         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
291              back_inserter(expected_domain_results[2].lengths));
292     }
293 
294 
295     qa::TSeqAlignSet actual_domain_results;
296     qa::SeqAlignSetConvert(*domain_sas, actual_domain_results);
297 
298     qa::CSeqAlignCmpOpts opts;
299     qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
300     string errors;
301     bool identical_results = cmp_d.Run(&errors);
302 
303     BOOST_REQUIRE_MESSAGE(identical_results, errors);
304 
305     // check PSSM
306 
307     CRef<CPssmWithParameters> pssm = deltablast.GetPssm();
308     BOOST_REQUIRE(pssm->HasQuery());
309     BOOST_REQUIRE(pssm->GetQuery().GetSeq().IsSetInst());
310     BOOST_REQUIRE_EQUAL(pssm->GetQuery().GetSeq().GetFirstId()->GetGi(),
311                         GI_CONST(129295));
312 
313     // check alignments from sequence search results
314 
315     BOOST_REQUIRE(results[0].GetErrors().empty());
316 
317     BOOST_REQUIRE_EQUAL(
318                 results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
319                 GI_CONST(129295));
320 
321     const int kNumExpectedMatchingSeqs = 8;
322     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
323 
324     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
325 
326     const size_t kNumExpectedHSPs = 9;
327     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
328 
329     // HSP # 1
330     {
331         expected_results[0].score = 861;
332         expected_results[0].evalue = 2.80386e-109;
333         expected_results[0].bit_score = 3.35656e+02;
334         expected_results[0].num_ident = 101;
335         int starts[] = {0, 941, -1, 1094, 153, 1095};
336         int lengths[] = {153, 1, 79};
337         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
338              back_inserter(expected_results[0].starts));
339         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
340              back_inserter(expected_results[0].lengths));
341     }
342 
343     // HSP # 2
344     {
345         expected_results[1].score = 633;
346         expected_results[1].evalue = 3.86624e-77;
347         expected_results[1].bit_score = 2.47830e+02;
348         expected_results[1].num_ident = 73;
349         int starts[] = {0, 154, -1, 307, 153, 308};
350         int lengths[] = {153, 1, 25};
351         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
352              back_inserter(expected_results[1].starts));
353         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
354              back_inserter(expected_results[1].lengths));
355     }
356 
357     // HSP # 3
358     {
359         expected_results[2].score = 645;
360         expected_results[2].evalue = 6.62145e-84;
361         expected_results[2].bit_score = 2.52452e+02;
362         expected_results[2].num_ident = 80;
363         int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
364                         119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
365                         367, 183, 371};
366         int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 21, 4, 49};
367         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
368              back_inserter(expected_results[2].starts));
369         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
370              back_inserter(expected_results[2].lengths));
371     }
372 
373     // HSP #4
374     {
375         expected_results[3].score = 53;
376         expected_results[3].evalue = 3.83197e+00;
377         expected_results[3].bit_score = 244103049e-7;
378         expected_results[3].num_ident = 7;
379         int starts[] = {127, 104, 132, -1, 134, 109};
380         int lengths[] = {5, 2, 15};
381         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
382              back_inserter(expected_results[3].starts));
383         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
384              back_inserter(expected_results[3].lengths));
385     }
386 
387 
388     // HSP # 5
389     {
390         expected_results[4].score = 51;
391         expected_results[4].evalue = 5.55808;
392         expected_results[4].bit_score = 23.6440;
393         expected_results[4].num_ident = 5;
394         int starts[] = {137, 20, 151, -1, 156, 34};
395         int lengths[] = {14, 5, 17};
396         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
397              back_inserter(expected_results[4].starts));
398         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
399              back_inserter(expected_results[4].lengths));
400     }
401 
402     // HSP # 6
403     {
404         expected_results[5].score = 51;
405         expected_results[5].evalue = 6.14178;
406         expected_results[5].bit_score = 23.6440;
407         expected_results[5].num_ident = 8;
408         int starts[] = {153, 102, -1, 122, 173, 127};
409         int lengths[] = {20, 5, 12};
410         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
411              back_inserter(expected_results[5].starts));
412         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
413              back_inserter(expected_results[5].lengths));
414     }
415 
416     // HSP # 7
417     {
418         expected_results[6].score = 51;
419         expected_results[6].evalue = 6.33109;
420         expected_results[6].bit_score = 23.6440;
421         expected_results[6].num_ident = 7;
422         int starts[] = {172, 305, 179, -1, 182, 312};
423         int lengths[] = {7, 3, 33};
424         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
425              back_inserter(expected_results[6].starts));
426         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
427              back_inserter(expected_results[6].lengths));
428     }
429 
430 
431     // HSP # 8
432     {
433         expected_results[7].score = 48;
434         expected_results[7].evalue = 7.91913;
435         expected_results[7].bit_score = 22.4884;
436         expected_results[7].num_ident = 5;
437         int starts[] = {155, 78};
438         int lengths[] = {18};
439         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
440              back_inserter(expected_results[7].starts));
441         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
442              back_inserter(expected_results[7].lengths));
443     }
444 
445 
446     // HSP # 9
447     {
448         expected_results[8].score = 49;
449         expected_results[8].evalue = 8.65824;
450         expected_results[8].bit_score = 22.8736;
451         expected_results[8].num_ident = 4;
452         int starts[] = {175, 11};
453         int lengths[] = {14};
454         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
455              back_inserter(expected_results[8].starts));
456         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
457              back_inserter(expected_results[8].lengths));
458     }
459 
460 
461     qa::TSeqAlignSet actual_results;
462     qa::SeqAlignSetConvert(*sas, actual_results);
463 
464     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
465     identical_results = cmp.Run(&errors);
466 
467     BOOST_REQUIRE_MESSAGE(identical_results, errors);
468 }
469 
470 
BOOST_AUTO_TEST_CASE(TestSingleQuery_NoCBS)471 BOOST_AUTO_TEST_CASE(TestSingleQuery_NoCBS)
472 {
473     m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
474     // no cbs for rpsblast
475     CRef<CBlastRPSOptionsHandle> rps_opts(new CBlastRPSOptionsHandle());
476     rps_opts->SetEvalueThreshold(m_OptHandle->GetDomainInclusionThreshold());
477     rps_opts->SetCompositionBasedStats(false);
478 
479     m_OptHandle->SetEvalueThreshold(5);
480 
481     TSeqLocVector query;
482     query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
483     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
484 
485     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
486 
487     // use CDD database that does not have freq ratios file,
488     // for no CBS option rpsblast does not need the '.freq' file
489     m_DomainDb.Reset(new CSearchDatabase("data/deltatest_nocbs",
490                                          CSearchDatabase::eBlastDbIsProtein));
491     CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
492     CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
493                            m_OptHandle, rps_opts);
494 
495     CSearchResultSet results(*deltablast.Run());
496 
497 
498     // check intermediate results: alignments with domains
499     CRef<CSearchResultSet> domain_results = deltablast.GetDomainResults();
500     BOOST_REQUIRE((*domain_results)[0].GetErrors().empty());
501 
502     const int kNumExpectedMatchingDomains = 3;
503     CConstRef<CSeq_align_set> domain_sas = (*domain_results)[0].GetSeqAlign();
504 
505     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
506                         x_CountNumberUniqueIds(domain_sas));
507 
508     const size_t kNumExpectedDomainHSPs = 3;
509     qa::TSeqAlignSet expected_domain_results(kNumExpectedDomainHSPs);
510 
511     // HSP # 1
512     {
513         expected_domain_results[0].score = 728;
514         expected_domain_results[0].evalue = 6.61511e-100;
515         expected_domain_results[0].bit_score = 2841082571e-7;
516         expected_domain_results[0].num_ident = 111;
517         int starts[] = {1, 139, 80, -1, 81, 218, 162, -1, 163, 299};
518         int lengths[] = {79, 1, 81, 1, 69};
519         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
520              back_inserter(expected_domain_results[0].starts));
521         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
522              back_inserter(expected_domain_results[0].lengths));
523     }
524 
525     // HSP # 2
526     {
527         expected_domain_results[1].score = 698;
528         expected_domain_results[1].evalue = 2.17510e-95;
529         expected_domain_results[1].bit_score = 2725169055e-7;
530         expected_domain_results[1].num_ident = 107;
531         int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
532                         213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
533                         202, 329};
534         int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
535         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
536              back_inserter(expected_domain_results[1].starts));
537         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
538              back_inserter(expected_domain_results[1].lengths));
539     }
540 
541     // HSP # 3
542     {
543         expected_domain_results[2].score = 661;
544         expected_domain_results[2].evalue = 9.15785e-90;
545         expected_domain_results[2].bit_score = 2583366987e-7;
546         expected_domain_results[2].num_ident = 106;
547         int starts[] = {0, 137, 115, -1, 117, 252 };
548         int lengths[] = {115, 2, 112};
549         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
550              back_inserter(expected_domain_results[2].starts));
551         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
552              back_inserter(expected_domain_results[2].lengths));
553     }
554 
555 
556     qa::TSeqAlignSet actual_domain_results;
557     qa::SeqAlignSetConvert(*domain_sas, actual_domain_results);
558 
559     qa::CSeqAlignCmpOpts opts;
560     qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
561     string errors;
562     bool identical_results = cmp_d.Run(&errors);
563 
564     BOOST_REQUIRE_MESSAGE(identical_results, errors);
565 
566     // check PSSM
567 
568     CRef<CPssmWithParameters> pssm = deltablast.GetPssm();
569     BOOST_REQUIRE(pssm->HasQuery());
570     BOOST_REQUIRE(pssm->GetQuery().GetSeq().IsSetInst());
571     BOOST_REQUIRE_EQUAL(pssm->GetQuery().GetSeq().GetFirstId()->GetGi(),
572                         GI_CONST(129295));
573 
574     // check alignments from sequence search results
575 
576     BOOST_REQUIRE(results[0].GetErrors().empty());
577 
578     BOOST_REQUIRE_EQUAL(
579                 results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
580                 GI_CONST(129295));
581 
582     const int kNumExpectedMatchingSeqs = 5;
583     CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
584 
585     BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
586 
587     const size_t kNumExpectedHSPs = 6;
588     qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
589 
590     // HSP # 1
591     {
592         expected_results[0].score = 876;
593         expected_results[0].evalue = 2.04885e-111;
594         expected_results[0].bit_score = 3414303038e-7;
595         expected_results[0].num_ident = 101;
596         int starts[] = {0, 941, -1, 1094, 153, 1095};
597         int lengths[] = {153, 1, 79};
598         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
599              back_inserter(expected_results[0].starts));
600         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
601              back_inserter(expected_results[0].lengths));
602     }
603 
604 
605     // HSP # 2
606     {
607         expected_results[1].score = 642;
608         expected_results[1].evalue = 2.54740e-78;
609         expected_results[1].bit_score = 2512936031e-7;
610         expected_results[1].num_ident = 73;
611         int starts[] = {0, 154, -1, 307, 153, 308};
612         int lengths[] = {153, 1, 25};
613         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
614              back_inserter(expected_results[1].starts));
615         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
616              back_inserter(expected_results[1].lengths));
617     }
618 
619     // HSP # 3
620     {
621         expected_results[2].score = 736;
622         expected_results[2].evalue = 1.10324e-97;
623         expected_results[2].bit_score = 2875023632e-7;
624         expected_results[2].num_ident = 83;
625         int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
626                         119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
627                         374, 190, 378};
628         int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 28, 4, 42};
629         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
630              back_inserter(expected_results[2].starts));
631         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
632              back_inserter(expected_results[2].lengths));
633     }
634 
635     // HSP # 4
636     {
637         expected_results[3].score = 53;
638         expected_results[3].evalue = 3.45771;
639         expected_results[3].bit_score = 2441105291e-8;
640         expected_results[3].num_ident = 4;
641         int starts[] = {139, 22, 151, -1, 156, 34};
642         int lengths[] = {12, 5, 17};
643         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
644              back_inserter(expected_results[3].starts));
645         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
646              back_inserter(expected_results[3].lengths));
647     }
648 
649 
650     // HSP # 5
651     {
652         expected_results[4].score = 52;
653         expected_results[4].evalue = 4.61874;
654         expected_results[4].bit_score = 2402585333e-8;
655         expected_results[4].num_ident = 7;
656         int starts[] = {172, 305, 179, -1, 182, 312};
657         int lengths[] = {7, 3, 33};
658         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
659              back_inserter(expected_results[4].starts));
660         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
661              back_inserter(expected_results[4].lengths));
662     }
663 
664     // HSP # 6
665     {
666         expected_results[5].score = 52;
667         expected_results[5].evalue = 4.62283;
668         expected_results[5].bit_score = 2402585333e-8;
669         expected_results[5].num_ident = 7;
670         int starts[] = {127, 104, 132, -1, 134, 109};
671         int lengths[] = {5, 2, 15};
672         copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
673              back_inserter(expected_results[5].starts));
674         copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
675              back_inserter(expected_results[5].lengths));
676     }
677 
678     qa::TSeqAlignSet actual_results;
679     qa::SeqAlignSetConvert(*sas, actual_results);
680 
681     qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
682     identical_results = cmp.Run(&errors);
683 
684     BOOST_REQUIRE_MESSAGE(identical_results, errors);
685 }
686 
687 
BOOST_AUTO_TEST_CASE(TestMultipleQueries)688 BOOST_AUTO_TEST_CASE(TestMultipleQueries)
689 {
690     TSeqLocVector queries;
691     ITERATE (vector< CRef<CSeq_loc> >, it, m_Seq_locs) {
692         queries.push_back(SSeqLoc(**it, *m_Scope));
693     }
694     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
695 
696     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
697     CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
698     CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
699                            m_OptHandle);
700 
701     CSearchResultSet results(*deltablast.Run());
702 
703     // check results
704     BOOST_REQUIRE(results[0].GetErrors().empty());
705     BOOST_REQUIRE(results[1].GetErrors().empty());
706 
707     // verify query id in Seq_aligns
708     BOOST_REQUIRE_EQUAL(
709                  results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
710                  GI_CONST(129295));
711 
712     BOOST_REQUIRE_EQUAL(
713      results[1].GetSeqAlign()->Get().front()->GetSeq_id(0).GetPir().GetName(),
714      "H70430");
715 
716 
717     // verify query id in PSSMs
718     BOOST_REQUIRE_EQUAL(
719            deltablast.GetPssm(0)->GetQuery().GetSeq().GetFirstId()->GetGi(),
720            GI_CONST(129295));
721 
722     BOOST_REQUIRE_EQUAL(
723      deltablast.GetPssm(1)->GetQuery().GetSeq().GetFirstId()->GetPir().GetName(),
724      "H70430");
725 }
726 
727 // Verify that null inputs result in exceptions
BOOST_AUTO_TEST_CASE(TestNullQuery)728 BOOST_AUTO_TEST_CASE(TestNullQuery)
729 {
730     CRef<IQueryFactory> query_factory;
731     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
732     CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
733     BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
734                                                domain_dbadapter, m_OptHandle),
735                         CBlastException);
736 }
737 
738 
BOOST_AUTO_TEST_CASE(TestNullOptions)739 BOOST_AUTO_TEST_CASE(TestNullOptions)
740 {
741     m_OptHandle.Reset();
742     TSeqLocVector query;
743     query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
744     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
745     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
746     CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
747     BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
748                                                domain_dbadapter, m_OptHandle),
749                         CBlastException);
750 }
751 
752 
BOOST_AUTO_TEST_CASE(TestNullDatabase)753 BOOST_AUTO_TEST_CASE(TestNullDatabase)
754 {
755     TSeqLocVector query;
756     query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
757     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
758     CRef<CLocalDbAdapter> dbadapter;
759     CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
760     BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
761                                                domain_dbadapter, m_OptHandle),
762                         CBlastException);
763 }
764 
765 
BOOST_AUTO_TEST_CASE(TestNullDomainDatabase)766 BOOST_AUTO_TEST_CASE(TestNullDomainDatabase)
767 {
768     TSeqLocVector query;
769     query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
770     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
771     CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
772     CRef<CLocalDbAdapter> domain_dbadapter;
773     BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
774                                                domain_dbadapter, m_OptHandle),
775                         CBlastException);
776 }
777 
778 
779 BOOST_AUTO_TEST_SUITE_END()
780 
781