1 /* $Id: delta_unit_test.cpp 531968 2017-03-30 17:34:38Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho, Greg Boratyn
27 *
28 */
29
30 /** @file delta_unit_test.cpp
31 * Unit test module for deltablast.
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35
36 // Serial library includes
37 #include <serial/serial.hpp>
38 #include <serial/objistr.hpp>
39
40 // Object includes
41 #include <objects/seqloc/Seq_id.hpp>
42 #include <objects/seqalign/Score.hpp>
43 #include <objects/seqalign/Dense_seg.hpp>
44 #include <objects/seqalign/Seq_align.hpp>
45 #include <objects/seqalign/Seq_align_set.hpp>
46
47 #include <objmgr/scope.hpp>
48
49 // PSSM includes
50 #include <objects/scoremat/Pssm.hpp>
51 #include <objects/scoremat/PssmWithParameters.hpp>
52
53 // BLAST includes
54 #include <algo/blast/api/deltablast.hpp>
55 #include <algo/blast/api/objmgr_query_data.hpp>
56 #include <algo/blast/api/deltablast_options.hpp>
57 #include <algo/blast/api/uniform_search.hpp>
58 #include <algo/blast/api/local_db_adapter.hpp>
59 #include <algo/blast/api/blast_rps_options.hpp>
60
61 // SeqAlign comparison includes
62 #include "seqalign_cmp.hpp"
63 #include "seqalign_set_convert.hpp"
64
65 // Unit test auxiliary includes
66 #include "blast_test_util.hpp"
67 #include "test_objmgr.hpp"
68
69 /// Calculate the size of a static array
70 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
71
72 using namespace std;
73 using namespace ncbi;
74 using namespace ncbi::objects;
75 using namespace ncbi::blast;
76
77 struct CDeltaBlastTestFixture {
78
79 CRef<CDeltaBlastOptionsHandle> m_OptHandle;
80 CRef<CSearchDatabase> m_SearchDb;
81 CRef<CSearchDatabase> m_DomainDb;
82
83 // This is needed only to get sequence for gi 129295 from asn1 file
84 CRef<CPssmWithParameters> m_Pssm;
85
86 /// Contains a single Bioseq
87 CRef<CSeq_entry> m_SeqEntry;
88
89 /// Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295
90 CRef<CSeq_entry> m_SeqSet;
91
92 /// Scope
93 CRef<CScope> m_Scope;
94
95 /// Seq-locs for creating instances of CObjMgr_QueryFactory
96 vector< CRef<CSeq_loc> > m_Seq_locs;
97
x_ReadSeqEntriesFromFileCDeltaBlastTestFixture98 void x_ReadSeqEntriesFromFile() {
99
100 const string kPssmFile("data/pssm_freq_ratios.asn");
101 m_Pssm = TestUtil::ReadObject<CPssmWithParameters>(kPssmFile);
102 BOOST_REQUIRE(m_Pssm->GetPssm().CanGetQuery());
103 m_SeqEntry.Reset(&m_Pssm->SetPssm().SetQuery());
104 BOOST_REQUIRE(m_SeqEntry->IsSeq());
105
106 CRef<CSeq_id> id(
107 const_cast<CSeq_id*>(m_SeqEntry->SetSeq().GetFirstId()));
108 CRef<CSeq_loc> seqloc(new CSeq_loc(CSeq_loc::e_Whole));
109 seqloc->SetId(*id);
110 m_Seq_locs.push_back(seqloc);
111
112
113 const string kSeqEntryFile("data/7450545.seqentry.asn");
114 CRef<CSeq_entry> seq_entry =
115 TestUtil::ReadObject<CSeq_entry>(kSeqEntryFile);
116
117 m_SeqSet.Reset(new CSeq_entry);
118 m_SeqSet->SetSet().SetSeq_set().push_back(m_SeqEntry);
119 m_SeqSet->SetSet().SetSeq_set().push_back(seq_entry);
120 BOOST_REQUIRE(m_SeqEntry->IsSeq());
121
122 id.Reset(const_cast<CSeq_id*>(seq_entry->SetSeq().GetFirstId()));
123 seqloc.Reset(new CSeq_loc(CSeq_loc::e_Whole));
124 seqloc->SetId(*id);
125 m_Seq_locs.push_back(seqloc);
126 }
127
x_InitScopeCDeltaBlastTestFixture128 void x_InitScope() {
129 m_Scope.Reset(new CScope(*CObjectManager::GetInstance()));
130 m_Scope->AddTopLevelSeqEntry(*m_SeqSet);
131 }
132
CDeltaBlastTestFixtureCDeltaBlastTestFixture133 CDeltaBlastTestFixture() {
134 m_OptHandle.Reset(new CDeltaBlastOptionsHandle);
135 BOOST_REQUIRE_EQUAL(eCompositionBasedStats,
136 m_OptHandle->GetCompositionBasedStats());
137 m_SearchDb.Reset(new CSearchDatabase("data/seqp",
138 CSearchDatabase::eBlastDbIsProtein));
139 m_DomainDb.Reset(new CSearchDatabase("data/deltatest",
140 CSearchDatabase::eBlastDbIsProtein));
141
142 x_ReadSeqEntriesFromFile();
143 x_InitScope();
144 }
145
~CDeltaBlastTestFixtureCDeltaBlastTestFixture146 ~CDeltaBlastTestFixture() {
147 m_OptHandle.Reset();
148 m_SearchDb.Reset();
149 m_DomainDb.Reset();
150 m_SeqEntry.Reset();
151 m_SeqSet.Reset();
152 m_Seq_locs.clear();
153 m_Scope.Reset();
154 }
155
x_CountNumberUniqueIdsCDeltaBlastTestFixture156 int x_CountNumberUniqueIds(CConstRef<CSeq_align_set> sas)
157 {
158 int num_ids = 0;
159 TGi last_id = INVALID_GI;
160 string last_acc;
161 CTextseq_id::TVersion last_ver = -1;
162 ITERATE(CSeq_align_set::Tdata, itr, sas->Get()) {
163 const CSeq_id& seqid = (*itr)->GetSeq_id(1);
164 if ( !CSeq_id::PreferAccessionOverGi() ) {
165 BOOST_REQUIRE(seqid.IsGi() || seqid.IsGeneral());
166 }
167 else {
168 BOOST_REQUIRE(seqid.IsOther() ||
169 seqid.IsGeneral() ||
170 seqid.IsGenbank());
171 }
172
173 const CTextseq_id* txt_id = seqid.GetTextseq_Id();
174 if (CSeq_id::PreferAccessionOverGi() && txt_id) {
175 string acc = txt_id->GetAccession();
176 CTextseq_id::TVersion ver = txt_id->IsSetVersion() ?
177 txt_id->GetVersion() : -1;
178 if (acc != last_acc || ver != last_ver) {
179 num_ids++;
180 last_acc = acc;
181 last_ver = ver;
182 }
183 last_id = INVALID_GI;
184 }
185 else if (!CSeq_id::PreferAccessionOverGi() && seqid.IsGi()) {
186 TGi new_gi = seqid.GetGi();
187
188 if (new_gi != last_id) {
189 num_ids++;
190 last_id = new_gi;
191 }
192 last_acc.clear();
193 last_ver = -1;
194 }
195 else {
196 BOOST_REQUIRE(seqid.GetGeneral().IsSetTag());
197 int new_tag = seqid.GetGeneral().GetTag().GetId();
198
199 if (new_tag != GI_TO(int, last_id)) {
200 num_ids++;
201 last_id = GI_FROM(int, new_tag);
202 }
203 last_acc.clear();
204 last_ver = -1;
205 }
206
207 }
208 return num_ids;
209 }
210 };
211
212
BOOST_FIXTURE_TEST_SUITE(deltablast,CDeltaBlastTestFixture)213 BOOST_FIXTURE_TEST_SUITE(deltablast, CDeltaBlastTestFixture)
214
215 BOOST_AUTO_TEST_CASE(TestSingleQuery_CBS)
216 {
217 TSeqLocVector query;
218 query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
219 CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
220
221
222 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
223 CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
224
225 // create rpsblast options hanlde and set composition based statistics
226 CRef<CBlastRPSOptionsHandle> rps_opts(new CBlastRPSOptionsHandle);
227 rps_opts->SetCompositionBasedStats(true);
228 rps_opts->SetEvalueThreshold(m_OptHandle->GetDomainInclusionThreshold());
229 rps_opts->SetFilterString("F");
230
231 CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
232 m_OptHandle, rps_opts);
233
234 CSearchResultSet results(*deltablast.Run());
235
236
237 // check intermediate results: alignments with domains
238 CRef<CSearchResultSet> domain_results = deltablast.GetDomainResults();
239 BOOST_REQUIRE((*domain_results)[0].GetErrors().empty());
240
241 const int kNumExpectedMatchingDomains = 3;
242 CConstRef<CSeq_align_set> domain_sas = (*domain_results)[0].GetSeqAlign();
243
244 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
245 x_CountNumberUniqueIds(domain_sas));
246
247 const size_t kNumExpectedDomainHSPs = 3;
248 qa::TSeqAlignSet expected_domain_results(kNumExpectedDomainHSPs);
249
250 // HSP # 1
251 {
252 expected_domain_results[0].score = 742;
253 expected_domain_results[0].evalue = 5.21679e-102;
254 expected_domain_results[0].bit_score = 2.89507e+02;
255 expected_domain_results[0].num_ident = 111;
256 int starts[] = {1, 139, 80, -1, 81, 218};
257 int lengths[] = {79, 1, 151};
258 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
259 back_inserter(expected_domain_results[0].starts));
260 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
261 back_inserter(expected_domain_results[0].lengths));
262 }
263
264 // HSP # 2
265 {
266 expected_domain_results[1].score = 713;
267 expected_domain_results[1].evalue = 1.33296e-97;
268 expected_domain_results[1].bit_score = 2.78295e+02;
269 expected_domain_results[1].num_ident = 107;
270 int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
271 213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
272 202, 329};
273 int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
274 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
275 back_inserter(expected_domain_results[1].starts));
276 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
277 back_inserter(expected_domain_results[1].lengths));
278 }
279
280 // HSP # 3
281 {
282 expected_domain_results[2].score = 673;
283 expected_domain_results[2].evalue = 1.63233e-91;
284 expected_domain_results[2].bit_score = 2.62959e+02;
285 expected_domain_results[2].num_ident = 106;
286 int starts[] = {0, 137, 115, -1, 117, 252 };
287 int lengths[] = {115, 2, 112};
288 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
289 back_inserter(expected_domain_results[2].starts));
290 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
291 back_inserter(expected_domain_results[2].lengths));
292 }
293
294
295 qa::TSeqAlignSet actual_domain_results;
296 qa::SeqAlignSetConvert(*domain_sas, actual_domain_results);
297
298 qa::CSeqAlignCmpOpts opts;
299 qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
300 string errors;
301 bool identical_results = cmp_d.Run(&errors);
302
303 BOOST_REQUIRE_MESSAGE(identical_results, errors);
304
305 // check PSSM
306
307 CRef<CPssmWithParameters> pssm = deltablast.GetPssm();
308 BOOST_REQUIRE(pssm->HasQuery());
309 BOOST_REQUIRE(pssm->GetQuery().GetSeq().IsSetInst());
310 BOOST_REQUIRE_EQUAL(pssm->GetQuery().GetSeq().GetFirstId()->GetGi(),
311 GI_CONST(129295));
312
313 // check alignments from sequence search results
314
315 BOOST_REQUIRE(results[0].GetErrors().empty());
316
317 BOOST_REQUIRE_EQUAL(
318 results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
319 GI_CONST(129295));
320
321 const int kNumExpectedMatchingSeqs = 8;
322 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
323
324 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
325
326 const size_t kNumExpectedHSPs = 9;
327 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
328
329 // HSP # 1
330 {
331 expected_results[0].score = 861;
332 expected_results[0].evalue = 2.80386e-109;
333 expected_results[0].bit_score = 3.35656e+02;
334 expected_results[0].num_ident = 101;
335 int starts[] = {0, 941, -1, 1094, 153, 1095};
336 int lengths[] = {153, 1, 79};
337 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
338 back_inserter(expected_results[0].starts));
339 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
340 back_inserter(expected_results[0].lengths));
341 }
342
343 // HSP # 2
344 {
345 expected_results[1].score = 633;
346 expected_results[1].evalue = 3.86624e-77;
347 expected_results[1].bit_score = 2.47830e+02;
348 expected_results[1].num_ident = 73;
349 int starts[] = {0, 154, -1, 307, 153, 308};
350 int lengths[] = {153, 1, 25};
351 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
352 back_inserter(expected_results[1].starts));
353 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
354 back_inserter(expected_results[1].lengths));
355 }
356
357 // HSP # 3
358 {
359 expected_results[2].score = 645;
360 expected_results[2].evalue = 6.62145e-84;
361 expected_results[2].bit_score = 2.52452e+02;
362 expected_results[2].num_ident = 80;
363 int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
364 119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
365 367, 183, 371};
366 int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 21, 4, 49};
367 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
368 back_inserter(expected_results[2].starts));
369 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
370 back_inserter(expected_results[2].lengths));
371 }
372
373 // HSP #4
374 {
375 expected_results[3].score = 53;
376 expected_results[3].evalue = 3.83197e+00;
377 expected_results[3].bit_score = 244103049e-7;
378 expected_results[3].num_ident = 7;
379 int starts[] = {127, 104, 132, -1, 134, 109};
380 int lengths[] = {5, 2, 15};
381 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
382 back_inserter(expected_results[3].starts));
383 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
384 back_inserter(expected_results[3].lengths));
385 }
386
387
388 // HSP # 5
389 {
390 expected_results[4].score = 51;
391 expected_results[4].evalue = 5.55808;
392 expected_results[4].bit_score = 23.6440;
393 expected_results[4].num_ident = 5;
394 int starts[] = {137, 20, 151, -1, 156, 34};
395 int lengths[] = {14, 5, 17};
396 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
397 back_inserter(expected_results[4].starts));
398 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
399 back_inserter(expected_results[4].lengths));
400 }
401
402 // HSP # 6
403 {
404 expected_results[5].score = 51;
405 expected_results[5].evalue = 6.14178;
406 expected_results[5].bit_score = 23.6440;
407 expected_results[5].num_ident = 8;
408 int starts[] = {153, 102, -1, 122, 173, 127};
409 int lengths[] = {20, 5, 12};
410 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
411 back_inserter(expected_results[5].starts));
412 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
413 back_inserter(expected_results[5].lengths));
414 }
415
416 // HSP # 7
417 {
418 expected_results[6].score = 51;
419 expected_results[6].evalue = 6.33109;
420 expected_results[6].bit_score = 23.6440;
421 expected_results[6].num_ident = 7;
422 int starts[] = {172, 305, 179, -1, 182, 312};
423 int lengths[] = {7, 3, 33};
424 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
425 back_inserter(expected_results[6].starts));
426 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
427 back_inserter(expected_results[6].lengths));
428 }
429
430
431 // HSP # 8
432 {
433 expected_results[7].score = 48;
434 expected_results[7].evalue = 7.91913;
435 expected_results[7].bit_score = 22.4884;
436 expected_results[7].num_ident = 5;
437 int starts[] = {155, 78};
438 int lengths[] = {18};
439 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
440 back_inserter(expected_results[7].starts));
441 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
442 back_inserter(expected_results[7].lengths));
443 }
444
445
446 // HSP # 9
447 {
448 expected_results[8].score = 49;
449 expected_results[8].evalue = 8.65824;
450 expected_results[8].bit_score = 22.8736;
451 expected_results[8].num_ident = 4;
452 int starts[] = {175, 11};
453 int lengths[] = {14};
454 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
455 back_inserter(expected_results[8].starts));
456 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
457 back_inserter(expected_results[8].lengths));
458 }
459
460
461 qa::TSeqAlignSet actual_results;
462 qa::SeqAlignSetConvert(*sas, actual_results);
463
464 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
465 identical_results = cmp.Run(&errors);
466
467 BOOST_REQUIRE_MESSAGE(identical_results, errors);
468 }
469
470
BOOST_AUTO_TEST_CASE(TestSingleQuery_NoCBS)471 BOOST_AUTO_TEST_CASE(TestSingleQuery_NoCBS)
472 {
473 m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
474 // no cbs for rpsblast
475 CRef<CBlastRPSOptionsHandle> rps_opts(new CBlastRPSOptionsHandle());
476 rps_opts->SetEvalueThreshold(m_OptHandle->GetDomainInclusionThreshold());
477 rps_opts->SetCompositionBasedStats(false);
478
479 m_OptHandle->SetEvalueThreshold(5);
480
481 TSeqLocVector query;
482 query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
483 CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
484
485 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
486
487 // use CDD database that does not have freq ratios file,
488 // for no CBS option rpsblast does not need the '.freq' file
489 m_DomainDb.Reset(new CSearchDatabase("data/deltatest_nocbs",
490 CSearchDatabase::eBlastDbIsProtein));
491 CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
492 CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
493 m_OptHandle, rps_opts);
494
495 CSearchResultSet results(*deltablast.Run());
496
497
498 // check intermediate results: alignments with domains
499 CRef<CSearchResultSet> domain_results = deltablast.GetDomainResults();
500 BOOST_REQUIRE((*domain_results)[0].GetErrors().empty());
501
502 const int kNumExpectedMatchingDomains = 3;
503 CConstRef<CSeq_align_set> domain_sas = (*domain_results)[0].GetSeqAlign();
504
505 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
506 x_CountNumberUniqueIds(domain_sas));
507
508 const size_t kNumExpectedDomainHSPs = 3;
509 qa::TSeqAlignSet expected_domain_results(kNumExpectedDomainHSPs);
510
511 // HSP # 1
512 {
513 expected_domain_results[0].score = 728;
514 expected_domain_results[0].evalue = 6.61511e-100;
515 expected_domain_results[0].bit_score = 2841082571e-7;
516 expected_domain_results[0].num_ident = 111;
517 int starts[] = {1, 139, 80, -1, 81, 218, 162, -1, 163, 299};
518 int lengths[] = {79, 1, 81, 1, 69};
519 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
520 back_inserter(expected_domain_results[0].starts));
521 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
522 back_inserter(expected_domain_results[0].lengths));
523 }
524
525 // HSP # 2
526 {
527 expected_domain_results[1].score = 698;
528 expected_domain_results[1].evalue = 2.17510e-95;
529 expected_domain_results[1].bit_score = 2725169055e-7;
530 expected_domain_results[1].num_ident = 107;
531 int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
532 213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
533 202, 329};
534 int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
535 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
536 back_inserter(expected_domain_results[1].starts));
537 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
538 back_inserter(expected_domain_results[1].lengths));
539 }
540
541 // HSP # 3
542 {
543 expected_domain_results[2].score = 661;
544 expected_domain_results[2].evalue = 9.15785e-90;
545 expected_domain_results[2].bit_score = 2583366987e-7;
546 expected_domain_results[2].num_ident = 106;
547 int starts[] = {0, 137, 115, -1, 117, 252 };
548 int lengths[] = {115, 2, 112};
549 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
550 back_inserter(expected_domain_results[2].starts));
551 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
552 back_inserter(expected_domain_results[2].lengths));
553 }
554
555
556 qa::TSeqAlignSet actual_domain_results;
557 qa::SeqAlignSetConvert(*domain_sas, actual_domain_results);
558
559 qa::CSeqAlignCmpOpts opts;
560 qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
561 string errors;
562 bool identical_results = cmp_d.Run(&errors);
563
564 BOOST_REQUIRE_MESSAGE(identical_results, errors);
565
566 // check PSSM
567
568 CRef<CPssmWithParameters> pssm = deltablast.GetPssm();
569 BOOST_REQUIRE(pssm->HasQuery());
570 BOOST_REQUIRE(pssm->GetQuery().GetSeq().IsSetInst());
571 BOOST_REQUIRE_EQUAL(pssm->GetQuery().GetSeq().GetFirstId()->GetGi(),
572 GI_CONST(129295));
573
574 // check alignments from sequence search results
575
576 BOOST_REQUIRE(results[0].GetErrors().empty());
577
578 BOOST_REQUIRE_EQUAL(
579 results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
580 GI_CONST(129295));
581
582 const int kNumExpectedMatchingSeqs = 5;
583 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
584
585 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
586
587 const size_t kNumExpectedHSPs = 6;
588 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
589
590 // HSP # 1
591 {
592 expected_results[0].score = 876;
593 expected_results[0].evalue = 2.04885e-111;
594 expected_results[0].bit_score = 3414303038e-7;
595 expected_results[0].num_ident = 101;
596 int starts[] = {0, 941, -1, 1094, 153, 1095};
597 int lengths[] = {153, 1, 79};
598 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
599 back_inserter(expected_results[0].starts));
600 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
601 back_inserter(expected_results[0].lengths));
602 }
603
604
605 // HSP # 2
606 {
607 expected_results[1].score = 642;
608 expected_results[1].evalue = 2.54740e-78;
609 expected_results[1].bit_score = 2512936031e-7;
610 expected_results[1].num_ident = 73;
611 int starts[] = {0, 154, -1, 307, 153, 308};
612 int lengths[] = {153, 1, 25};
613 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
614 back_inserter(expected_results[1].starts));
615 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
616 back_inserter(expected_results[1].lengths));
617 }
618
619 // HSP # 3
620 {
621 expected_results[2].score = 736;
622 expected_results[2].evalue = 1.10324e-97;
623 expected_results[2].bit_score = 2875023632e-7;
624 expected_results[2].num_ident = 83;
625 int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
626 119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
627 374, 190, 378};
628 int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 28, 4, 42};
629 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
630 back_inserter(expected_results[2].starts));
631 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
632 back_inserter(expected_results[2].lengths));
633 }
634
635 // HSP # 4
636 {
637 expected_results[3].score = 53;
638 expected_results[3].evalue = 3.45771;
639 expected_results[3].bit_score = 2441105291e-8;
640 expected_results[3].num_ident = 4;
641 int starts[] = {139, 22, 151, -1, 156, 34};
642 int lengths[] = {12, 5, 17};
643 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
644 back_inserter(expected_results[3].starts));
645 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
646 back_inserter(expected_results[3].lengths));
647 }
648
649
650 // HSP # 5
651 {
652 expected_results[4].score = 52;
653 expected_results[4].evalue = 4.61874;
654 expected_results[4].bit_score = 2402585333e-8;
655 expected_results[4].num_ident = 7;
656 int starts[] = {172, 305, 179, -1, 182, 312};
657 int lengths[] = {7, 3, 33};
658 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
659 back_inserter(expected_results[4].starts));
660 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
661 back_inserter(expected_results[4].lengths));
662 }
663
664 // HSP # 6
665 {
666 expected_results[5].score = 52;
667 expected_results[5].evalue = 4.62283;
668 expected_results[5].bit_score = 2402585333e-8;
669 expected_results[5].num_ident = 7;
670 int starts[] = {127, 104, 132, -1, 134, 109};
671 int lengths[] = {5, 2, 15};
672 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
673 back_inserter(expected_results[5].starts));
674 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
675 back_inserter(expected_results[5].lengths));
676 }
677
678 qa::TSeqAlignSet actual_results;
679 qa::SeqAlignSetConvert(*sas, actual_results);
680
681 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
682 identical_results = cmp.Run(&errors);
683
684 BOOST_REQUIRE_MESSAGE(identical_results, errors);
685 }
686
687
BOOST_AUTO_TEST_CASE(TestMultipleQueries)688 BOOST_AUTO_TEST_CASE(TestMultipleQueries)
689 {
690 TSeqLocVector queries;
691 ITERATE (vector< CRef<CSeq_loc> >, it, m_Seq_locs) {
692 queries.push_back(SSeqLoc(**it, *m_Scope));
693 }
694 CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
695
696 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
697 CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
698 CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
699 m_OptHandle);
700
701 CSearchResultSet results(*deltablast.Run());
702
703 // check results
704 BOOST_REQUIRE(results[0].GetErrors().empty());
705 BOOST_REQUIRE(results[1].GetErrors().empty());
706
707 // verify query id in Seq_aligns
708 BOOST_REQUIRE_EQUAL(
709 results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
710 GI_CONST(129295));
711
712 BOOST_REQUIRE_EQUAL(
713 results[1].GetSeqAlign()->Get().front()->GetSeq_id(0).GetPir().GetName(),
714 "H70430");
715
716
717 // verify query id in PSSMs
718 BOOST_REQUIRE_EQUAL(
719 deltablast.GetPssm(0)->GetQuery().GetSeq().GetFirstId()->GetGi(),
720 GI_CONST(129295));
721
722 BOOST_REQUIRE_EQUAL(
723 deltablast.GetPssm(1)->GetQuery().GetSeq().GetFirstId()->GetPir().GetName(),
724 "H70430");
725 }
726
727 // Verify that null inputs result in exceptions
BOOST_AUTO_TEST_CASE(TestNullQuery)728 BOOST_AUTO_TEST_CASE(TestNullQuery)
729 {
730 CRef<IQueryFactory> query_factory;
731 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
732 CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
733 BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
734 domain_dbadapter, m_OptHandle),
735 CBlastException);
736 }
737
738
BOOST_AUTO_TEST_CASE(TestNullOptions)739 BOOST_AUTO_TEST_CASE(TestNullOptions)
740 {
741 m_OptHandle.Reset();
742 TSeqLocVector query;
743 query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
744 CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
745 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
746 CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
747 BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
748 domain_dbadapter, m_OptHandle),
749 CBlastException);
750 }
751
752
BOOST_AUTO_TEST_CASE(TestNullDatabase)753 BOOST_AUTO_TEST_CASE(TestNullDatabase)
754 {
755 TSeqLocVector query;
756 query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
757 CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
758 CRef<CLocalDbAdapter> dbadapter;
759 CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
760 BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
761 domain_dbadapter, m_OptHandle),
762 CBlastException);
763 }
764
765
BOOST_AUTO_TEST_CASE(TestNullDomainDatabase)766 BOOST_AUTO_TEST_CASE(TestNullDomainDatabase)
767 {
768 TSeqLocVector query;
769 query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
770 CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
771 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
772 CRef<CLocalDbAdapter> domain_dbadapter;
773 BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
774 domain_dbadapter, m_OptHandle),
775 CBlastException);
776 }
777
778
779 BOOST_AUTO_TEST_SUITE_END()
780
781