1 /* $Id: psiblast_unit_test.cpp 568561 2018-08-07 18:29:39Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 */
29
30 /** @file psiblast-cppunit.cpp
31 * Unit test module for the PSI-BLAST class
32 */
33 #define NCBI_TEST_APPLICATION
34 #include <ncbi_pch.hpp>
35 #include <corelib/test_boost.hpp>
36 #include <serial/iterator.hpp>
37
38 // BLAST API includes
39 #include <algo/blast/api/psiblast.hpp>
40 #include <algo/blast/api/uniform_search.hpp>
41 #include <algo/blast/api/local_db_adapter.hpp>
42 #include <algo/blast/api/objmgr_query_data.hpp>
43
44 #include <algo/blast/blastinput/blast_scope_src.hpp>
45
46 #include <objtools/blast/seqdb_reader/seqdb.hpp>
47
48 // Pssm engine includes
49 #include <algo/blast/api/pssm_engine.hpp>
50 #include <algo/blast/api/psi_pssm_input.hpp>
51
52 // Object includes
53 #include <objects/scoremat/Pssm.hpp>
54 #include <objects/scoremat/PssmFinalData.hpp>
55 #include <objects/scoremat/PssmWithParameters.hpp>
56 #include <objects/scoremat/PssmIntermediateData.hpp>
57 #include <objects/seqalign/Seq_align.hpp>
58 #include "psiblast_aux_priv.hpp" // for PsiBlastComputePssmScores
59
60 // Auxiliary test includes
61 #include "blast_test_util.hpp"
62 //#include "psiblast_test_util.hpp"
63
64 // SeqAlign comparison includes
65 #include "seqalign_cmp.hpp"
66 #include "seqalign_set_convert.hpp"
67
68 #include <algo/blast/api/psiblast_iteration.hpp>
69 #include "bioseq_extract_data_priv.hpp"
70 #include "blast_objmgr_priv.hpp"
71
72 /// Calculate the size of a static array
73 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
74
75 using namespace ncbi;
76 using namespace ncbi::objects;
77 using namespace ncbi::blast;
78
79 struct CPsiBlastTestFixture {
80
81 CRef<CPSIBlastOptionsHandle> m_OptHandle;
82 CRef<CPssmWithParameters> m_Pssm;
83 CSearchDatabase* m_SearchDb;
84
85 /// Contains a single Bioseq
86 CRef<CSeq_entry> m_SeqEntry;
87
88 /// Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295
89 CRef<CSeq_entry> m_SeqSet;
90
x_ReadSeqEntriesFromFileCPsiBlastTestFixture91 void x_ReadSeqEntriesFromFile() {
92 const string kSeqEntryFile("data/7450545.seqentry.asn");
93 m_SeqEntry = TestUtil::ReadObject<CSeq_entry>(kSeqEntryFile);
94
95 m_SeqSet.Reset(new CSeq_entry);
96 m_SeqSet->SetSet().SetSeq_set().push_back(m_SeqEntry);
97 BOOST_REQUIRE(m_Pssm &&
98 m_Pssm->CanGetPssm() &&
99 m_Pssm->GetPssm().CanGetQuery());
100 CRef<CSeq_entry> second_bioseq(&m_Pssm->SetPssm().SetQuery());
101 m_SeqSet->SetSet().SetSeq_set().push_back(second_bioseq);
102 }
103
x_ReadPssmFromFileCPsiBlastTestFixture104 void x_ReadPssmFromFile() {
105 const string kPssmFile("data/pssm_freq_ratios.asn");
106 m_Pssm = TestUtil::ReadObject<CPssmWithParameters>(kPssmFile);
107 BOOST_REQUIRE(m_Pssm->GetPssm().CanGetQuery());
108 BOOST_REQUIRE(m_Pssm->GetPssm().CanGetIntermediateData());
109 BOOST_REQUIRE(!m_Pssm->GetPssm().CanGetFinalData());
110 }
111
CPsiBlastTestFixtureCPsiBlastTestFixture112 CPsiBlastTestFixture() {
113 m_OptHandle.Reset(new CPSIBlastOptionsHandle);
114 BOOST_REQUIRE_EQUAL(eCompositionMatrixAdjust,
115 m_OptHandle->GetCompositionBasedStats());
116 m_SearchDb = new CSearchDatabase("data/seqp",
117 CSearchDatabase::eBlastDbIsProtein);
118
119 x_ReadPssmFromFile();
120 PsiBlastComputePssmScores(m_Pssm, m_OptHandle->GetOptions());
121 BOOST_REQUIRE(m_Pssm->GetPssm().GetFinalData().CanGetScores());
122
123 x_ReadSeqEntriesFromFile();
124 }
125
~CPsiBlastTestFixtureCPsiBlastTestFixture126 ~CPsiBlastTestFixture() {
127 m_OptHandle.Reset();
128 m_Pssm.Reset();
129 delete m_SearchDb;
130 m_SeqEntry.Reset();
131 m_SeqSet.Reset();
132 }
133
s_CountNumberUniqueGIsCPsiBlastTestFixture134 int s_CountNumberUniqueGIs(CConstRef<CSeq_align_set> sas)
135 {
136 int num_gis = 0;
137 TGi last_gi = INVALID_GI;
138 CSeq_id last_id;
139 ITERATE(CSeq_align_set::Tdata, itr, sas->Get()){
140 const CSeq_id& seqid = (*itr)->GetSeq_id(1);
141 if ( !CSeq_id::PreferAccessionOverGi() ) {
142 TGi new_gi = seqid.GetGi();
143 if (new_gi != last_gi)
144 {
145 num_gis++;
146 last_gi = new_gi;
147 }
148 }
149 else if ( !seqid.Equals(last_id) ) {
150 num_gis++;
151 last_id.Assign(seqid);
152 }
153 }
154 return num_gis;
155 }
156
s_SetupSubjectCPsiBlastTestFixture157 IQueryFactory *s_SetupSubject(CConstRef<CBioseq> bioseq) {
158 TSeqLocVector subjects;
159 CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
160 CConstRef<CSeq_id> sid = (scope->AddBioseq(*bioseq)).GetSeqId();
161 CRef<CSeq_loc> sl(new CSeq_loc());
162 sl->SetWhole();
163 sl->SetId(*sid);
164 SSeqLoc ssl(*sl, *scope);
165 subjects.push_back(ssl);
166 return (IQueryFactory *)(new CObjMgr_QueryFactory(subjects));
167 }
168
s_SetupSubjectCPsiBlastTestFixture169 IQueryFactory *s_SetupSubject(CConstRef<CBioseq_set> bioseq_set) {
170 TSeqLocVector subjects;
171 CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
172 CTypeConstIterator<CBioseq> itr(ConstBegin(*bioseq_set, eDetectLoops));
173 for (; itr; ++itr) {
174 CConstRef<CSeq_id> sid = (scope->AddBioseq(*itr)).GetSeqId();
175 CRef<CSeq_loc> sl(new CSeq_loc());
176 sl->SetWhole();
177 sl->SetId(*sid);
178 SSeqLoc ssl(*sl, *scope);
179 subjects.push_back(ssl);
180 }
181 return (IQueryFactory *)(new CObjMgr_QueryFactory(subjects));
182 }
183
184 CRef<CPssmWithParameters>
x_ComputePssmForNextIterationCPsiBlastTestFixture185 x_ComputePssmForNextIteration(const CBioseq& query,
186 CConstRef<CSeq_align_set> sset,
187 CConstRef<CPSIBlastOptionsHandle> opts_handle,
188 CConstRef<CBlastAncillaryData> ancillary_data)
189 {
190 CRef<CSeqDB> db(new CSeqDB("data/seqp", CSeqDB::eProtein));
191 CBlastScopeSource blast_om(db);
192 CRef<CScope> scope(blast_om.NewScope());
193 CPSIDiagnosticsRequest diags(PSIDiagnosticsRequestNew());
194 diags->frequency_ratios = true;
195 return blast::PsiBlastComputePssmFromAlignment(query, sset, scope,
196 *opts_handle,
197 ancillary_data,
198 diags);
199 }
200
201 };
202
BOOST_FIXTURE_TEST_SUITE(psiblast,CPsiBlastTestFixture)203 BOOST_FIXTURE_TEST_SUITE(psiblast, CPsiBlastTestFixture)
204
205 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_NoCBS) {
206 m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
207 m_OptHandle->SetEvalueThreshold(1.5);
208 CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
209 CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
210 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
211 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
212
213 CSearchResultSet results(*psiblast.Run());
214 BOOST_REQUIRE(results[0].GetErrors().empty());
215
216 const int kNumExpectedMatchingSeqs = 3;
217 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
218
219 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
220
221 const size_t kNumExpectedHSPs = 3;
222 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
223
224 // HSP # 1
225 expected_results[0].score = 64;
226 expected_results[0].evalue = 2.46806e-1;
227 expected_results[0].bit_score = 292610051e-7;
228 expected_results[0].num_ident = 18;
229 expected_results[0].starts.push_back(34);
230 expected_results[0].starts.push_back(95);
231 expected_results[0].lengths.push_back(53);
232
233 // HSP # 2
234 {
235 int starts[] = { 203, 280, 220, -1, 226, 297, 258, -1, 265, 329 };
236 int lengths[] = { 17, 6, 32, 7, 39 };
237 expected_results[1].score = 63;
238 expected_results[1].evalue = 3.48342e-1;
239 expected_results[1].bit_score = 288758055e-7;
240 expected_results[1].num_ident = 24;
241 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
242 back_inserter(expected_results[1].starts));
243 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
244 back_inserter(expected_results[1].lengths));
245 }
246
247 // HSP # 3
248 {
249 int starts[] = { 180, 97, 204, -1, 205, 121, -1, 127, 211, 128,
250 241, -1, 242, 158, -1, 197, 281, 201, 306, -1, 318, 226, 323,
251 -1, 327, 231 };
252 int lengths[] = { 24, 1, 6, 1, 30, 1, 39, 4, 25, 12, 5, 4, 35 };
253 expected_results[2].score = 60;
254 expected_results[2].evalue = 7.36231e-1;
255 expected_results[2].bit_score = 277202068e-7;
256 expected_results[2].num_ident = 42;
257 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
258 back_inserter(expected_results[2].starts));
259 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
260 back_inserter(expected_results[2].lengths));
261 }
262
263 /* HSP # 4
264 {
265 int starts[] = { 43, 13, -1, 84, 114, 91, 135, -1, 136, 112,
266 149, -1, 151, 125, -1, 146, 172, 147, 177, -1, 179, 152,
267 195, -1, 200, 168, 226, -1, 228, 194 };
268 int lengths[] = { 71, 7, 21, 1, 13, 2, 21, 1, 5, 2, 16, 5, 26, 2,
269 11 };
270 expected_results[3].score = 57;
271 expected_results[3].evalue = 147345337e-8;
272 expected_results[3].bit_score = 265646081e-7;
273 expected_results[3].num_ident = 48;
274 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
275 back_inserter(expected_results[3].starts));
276 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
277 back_inserter(expected_results[3].lengths));
278 }
279 */
280
281 qa::TSeqAlignSet actual_results;
282 qa::SeqAlignSetConvert(*sas, actual_results);
283
284 qa::CSeqAlignCmpOpts opts;
285 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
286 string errors;
287 bool identical_results = cmp.Run(&errors);
288
289 BOOST_REQUIRE_MESSAGE(identical_results, errors);
290 }
291
BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBS)292 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBS) {
293 m_OptHandle->SetCompositionBasedStats(eCompositionBasedStats);
294 CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
295 CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
296 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
297 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
298
299 CSearchResultSet results(*psiblast.Run());
300 BOOST_REQUIRE(results[0].GetErrors().empty());
301
302 const int kNumExpectedMatchingSeqs = 4;
303 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
304
305 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
306
307 const size_t kNumExpectedHSPs = 4;
308 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
309
310 // HSP # 1
311 expected_results[0].score = 63;
312 expected_results[0].evalue = 2.96035e-1;
313 expected_results[0].bit_score = 288758056e-7;
314 expected_results[0].starts.push_back(34);
315 expected_results[0].starts.push_back(95);
316 expected_results[0].lengths.push_back(53);
317 expected_results[0].num_ident = 18;
318
319 // HSP # 2
320 expected_results[1].score = 59;
321 expected_results[1].evalue = 9.29330e-1;
322 expected_results[1].bit_score = 273350073e-7;
323 expected_results[1].starts.push_back(203);
324 expected_results[1].starts.push_back(280);
325 expected_results[1].starts.push_back(220);
326 expected_results[1].starts.push_back(-1);
327 expected_results[1].starts.push_back(226);
328 expected_results[1].starts.push_back(297);
329 expected_results[1].starts.push_back(258);
330 expected_results[1].starts.push_back(-1);
331 expected_results[1].starts.push_back(265);
332 expected_results[1].starts.push_back(329);
333 expected_results[1].lengths.push_back(17);
334 expected_results[1].lengths.push_back(6);
335 expected_results[1].lengths.push_back(32);
336 expected_results[1].lengths.push_back(7);
337 expected_results[1].lengths.push_back(39);
338 expected_results[1].num_ident = 24;
339
340 // HSP # 3
341 expected_results[2].score = 52;
342 expected_results[2].evalue = 6.67208;
343 expected_results[2].bit_score = 246386102e-7;
344 expected_results[2].starts.push_back(322);
345 expected_results[2].starts.push_back(46);
346 expected_results[2].lengths.push_back(28);
347 expected_results[2].num_ident = 10;
348
349
350 // HSP # 4
351 expected_results[3].score = 50;
352 expected_results[3].evalue = 6.81719;
353 expected_results[3].bit_score = 23.8682;
354 expected_results[3].starts.push_back(295);
355 expected_results[3].starts.push_back(23);
356 expected_results[3].starts.push_back(301);
357 expected_results[3].starts.push_back(-1);
358 expected_results[3].starts.push_back(304);
359 expected_results[3].starts.push_back(29);
360 expected_results[3].lengths.push_back(6);
361 expected_results[3].lengths.push_back(3);
362 expected_results[3].lengths.push_back(23);
363 expected_results[3].num_ident = 16;
364
365 qa::TSeqAlignSet actual_results;
366 qa::SeqAlignSetConvert(*sas, actual_results);
367
368 qa::CSeqAlignCmpOpts opts;
369 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
370 string errors;
371 bool identical_results = cmp.Run(&errors);
372
373 BOOST_REQUIRE_MESSAGE(identical_results, errors);
374 }
375
BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSConditional)376 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSConditional) {
377 m_OptHandle->SetCompositionBasedStats(eCompositionMatrixAdjust);
378 m_OptHandle->SetEvalueThreshold(5.0);
379 CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
380 CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
381 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
382 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
383
384 CSearchResultSet results(*psiblast.Run());
385 BOOST_REQUIRE(results[0].GetErrors().empty());
386
387 const int kNumExpectedMatchingSeqs = 3;
388 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
389
390 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
391
392 const size_t kNumExpectedHSPs = 3;
393 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
394
395 // HSP # 1
396 expected_results[0].score = 59;
397 expected_results[0].evalue = 8.66100e-1;
398 expected_results[0].bit_score = 273350073e-7;
399 expected_results[0].starts.push_back(34);
400 expected_results[0].starts.push_back(95);
401 expected_results[0].lengths.push_back(53);
402 expected_results[0].sequence_gis.SetQuery(7450545);
403 expected_results[0].sequence_gis.SetSubject(22982149);
404 expected_results[0].num_ident = 18;
405
406 // HSP # 3
407 {
408 int starts[] = { 322 , 46 , -1 , 75 , 351 , 81 , -1 , 94 , 364 ,
409 97 , -1 , 106 , 373 , 109 };
410 int lengths[] = { 29 , 6 , 13 , 3 , 9 , 3 , 17 };
411 expected_results[1].score = 53;
412 expected_results[1].evalue = 4.15768;
413 expected_results[1].bit_score = 250238098e-7;
414 expected_results[1].sequence_gis.SetQuery(7450545);
415 expected_results[1].sequence_gis.SetSubject(43121985);
416 expected_results[1].num_ident = 19;
417 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
418 back_inserter(expected_results[1].starts));
419 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
420 back_inserter(expected_results[1].lengths));
421 }
422
423 // HSP # 2
424 {
425 int starts[] = { 125 , 199 , 146 , -1 , 148 , 220 , -1 , 228 ,
426 156 , 233 , -1 , 250 , 173 , 252 , 179 , -1 , 181 , 258 ,
427 220 , -1 , 226 , 297 , 258 , -1 , 265 , 329 };
428 int lengths[] = { 21 , 2 , 8 , 5 , 17 , 2 , 6 , 2 , 39 , 6 , 32 ,
429 7 , 39 };
430 expected_results[2].score = 54;
431 expected_results[2].evalue = 4.40967;
432 expected_results[2].bit_score = 254090094e-7;
433 expected_results[2].sequence_gis.SetQuery(7450545);
434 expected_results[2].sequence_gis.SetSubject(13242404);
435 expected_results[2].num_ident = 39;
436 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
437 back_inserter(expected_results[2].starts));
438 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
439 back_inserter(expected_results[2].lengths));
440 }
441
442
443 // HSP # 4
444 /*
445 expected_results[3].score = 53;
446 expected_results[3].evalue = 5.65033;
447 expected_results[3].bit_score = 25.0238;
448 expected_results[3].sequence_gis.SetQuery(7450545);
449 expected_results[3].sequence_gis.SetSubject(15836829);
450 expected_results[3].starts.push_back(39);
451 expected_results[3].starts.push_back(304);
452 expected_results[3].lengths.push_back(41);
453 expected_results[3].lengths.push_back(41);
454 expected_results[3].num_ident = 15;
455 */
456
457
458 qa::TSeqAlignSet actual_results;
459 qa::SeqAlignSetConvert(*sas, actual_results);
460
461 qa::CSeqAlignCmpOpts opts;
462 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
463 string errors;
464 bool identical_results = cmp.Run(&errors);
465
466 BOOST_REQUIRE_MESSAGE(identical_results, errors);
467 }
468
BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSUniversal)469 BOOST_AUTO_TEST_CASE(TestSingleIteration_ProteinAsQuery_CBSUniversal) {
470 m_OptHandle->SetCompositionBasedStats(eCompoForceFullMatrixAdjust);
471 CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
472 CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
473 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
474 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
475
476 CSearchResultSet results(*psiblast.Run());
477 BOOST_REQUIRE(results[0].GetErrors().empty());
478
479 const int kNumExpectedMatchingSeqs = 6;
480 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
481
482 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
483
484 const size_t kNumExpectedHSPs = 6;
485 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
486
487 // HSP # 1
488 expected_results[0].score = 59;
489 expected_results[0].evalue = 8.66100e-1;
490 expected_results[0].bit_score = 273350073e-7;
491 expected_results[0].starts.push_back(34);
492 expected_results[0].starts.push_back(95);
493 expected_results[0].lengths.push_back(53);
494 expected_results[0].sequence_gis.SetQuery(7450545);
495 expected_results[0].sequence_gis.SetSubject(22982149);
496 expected_results[0].num_ident = 18;
497
498 // HSP # 2
499 {
500 int starts[] = { 322 , 46 , -1 , 75 , 351 , 81 , -1 , 94 , 364 ,
501 97 , -1 , 106 , 373 , 109 };
502 int lengths[] = { 29 , 6 , 13 , 3 , 9 , 3 , 17 };
503 expected_results[1].score = 53;
504 expected_results[1].evalue = 4.15768;
505 expected_results[1].bit_score = 250238098e-7;
506 expected_results[1].sequence_gis.SetQuery(7450545);
507 expected_results[1].sequence_gis.SetSubject(43121985);
508 expected_results[1].num_ident = 19;
509 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
510 back_inserter(expected_results[1].starts));
511 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
512 back_inserter(expected_results[1].lengths));
513 }
514
515
516 // HSP # 3
517 {
518 int starts[] = { 125 , 199 , 146 , -1 , 148 , 220 , -1 , 228 ,
519 156 , 233 , -1 , 250 , 173 , 252 , 179 , -1 , 181 , 258 ,
520 220 , -1 , 226 , 297 , 258 , -1 , 265 , 329 };
521 int lengths[] = { 21 , 2 , 8 , 5 , 17 , 2 , 6 , 2 , 39 , 6 , 32 ,
522 7 , 39 };
523 expected_results[2].score = 54;
524 expected_results[2].evalue = 4.40967;
525 expected_results[2].bit_score = 254090094e-7;
526 expected_results[2].sequence_gis.SetQuery(7450545);
527 expected_results[2].sequence_gis.SetSubject(13242404);
528 expected_results[2].num_ident = 39;
529 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
530 back_inserter(expected_results[2].starts));
531 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
532 back_inserter(expected_results[2].lengths));
533 }
534
535 /* HSP # 4
536 expected_results[3].score = 53;
537 expected_results[3].evalue = 466014414e-8;
538 expected_results[3].bit_score = 250238098e-7;
539 expected_results[3].sequence_gis.SetQuery(7450545);
540 expected_results[3].sequence_gis.SetSubject(45683609);
541 expected_results[3].starts.push_back(39);
542 expected_results[3].starts.push_back(304);
543 expected_results[3].lengths.push_back(41);
544 */
545
546 qa::TSeqAlignSet actual_results;
547 qa::SeqAlignSetConvert(*sas, actual_results);
548
549 qa::CSeqAlignCmpOpts opts;
550 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
551 string errors;
552 bool identical_results = cmp.Run(&errors);
553
554 BOOST_REQUIRE_MESSAGE(identical_results, errors);
555 }
556
BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_ProteinAsQuery_NoCBS)557 BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_ProteinAsQuery_NoCBS) {
558 const int kNumIterations = 4;
559 const int kNumExpectedIterations = 2;
560 CPsiBlastIterationState itr(kNumIterations);
561 m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
562
563 CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
564 CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
565 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
566 CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle);
567
568 int hits_below_threshold[kNumIterations] = { 0, 0, 0, 0 };
569 size_t number_hits[kNumIterations] = { 11, 14, 0, 0 };
570
571 int iteration_counter = 0;
572 while (itr) {
573 CSearchResultSet results = *psiblast.Run();
574 BOOST_REQUIRE(results[0].GetErrors().empty());
575 CConstRef<CSeq_align_set> alignment = results[0].GetSeqAlign();
576 BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
577 alignment->Get().size());
578
579 CPsiBlastIterationState::TSeqIds ids;
580 CPsiBlastIterationState::GetSeqIds(alignment, m_OptHandle, ids);
581
582 string m("On round ");
583 m += NStr::IntToString(itr.GetIterationNumber()) + " found ";
584 m += NStr::SizetToString(ids.size()) + " qualifying ids";
585 BOOST_REQUIRE_MESSAGE(
586 hits_below_threshold[iteration_counter]==(int)ids.size(), m);
587 itr.Advance(ids);
588
589 if (itr) {
590 CRef<CPssmWithParameters> pssm =
591 x_ComputePssmForNextIteration(*bioseq, alignment,
592 m_OptHandle, results[0].GetAncillaryData());
593 psiblast.SetPssm(pssm);
594 }
595 iteration_counter++;
596 }
597
598 BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
599 }
600
BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_NoCBS)601 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_NoCBS) {
602
603 m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
604 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
605 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
606
607 CSearchResultSet results(*psiblast.Run());
608 BOOST_REQUIRE(results[0].GetErrors().empty());
609
610 const int kNumExpectedMatchingSeqs = 6;
611 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
612 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
613
614 const size_t kNumExpectedHSPs = 7;
615 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
616
617 // HSP # 1
618 {
619 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
620 int lengths[] = { 152, 1, 80 };
621 expected_results[0].score = 595;
622 expected_results[0].evalue = 2.70189-71;
623 expected_results[0].bit_score = 233623298e-6;
624 expected_results[0].num_ident = 101;
625 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
626 back_inserter(expected_results[0].starts));
627 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
628 back_inserter(expected_results[0].lengths));
629 }
630
631 // HSP # 2
632 {
633 int starts[] = { 0, 154, -1, 308, 154, 309 };
634 int lengths[] = { 154, 1, 24 };
635 expected_results[1].score = 424;
636 expected_results[1].evalue = 6.54598e-49;
637 expected_results[1].bit_score = 167754171e-6;
638 expected_results[1].num_ident = 73;
639 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
640 back_inserter(expected_results[1].starts));
641 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
642 back_inserter(expected_results[1].lengths));
643 }
644
645 // HSP # 3
646 {
647 int starts[] =
648 { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
649 -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
650 -1, 374, 190, 380, 200, -1, 202, 390 };
651 int lengths[] =
652 { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
653 expected_results[2].score = 372;
654 expected_results[2].evalue = 1.67386e-43;
655 expected_results[2].bit_score = 147723793e-6;
656 expected_results[2].num_ident = 87;
657 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
658 back_inserter(expected_results[2].starts));
659 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
660 back_inserter(expected_results[2].lengths));
661 }
662
663 // HSP # 4
664 expected_results[3].score = 53;
665 expected_results[3].evalue = 2.43336;
666 expected_results[3].bit_score = 248451288e-7;
667 expected_results[3].num_ident = 8;
668 expected_results[3].starts.push_back(206);
669 expected_results[3].starts.push_back(46);
670 expected_results[3].lengths.push_back(19);
671
672 // HSP # 5
673 {
674 int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
675 int lengths[] = { 6, 1, 22, 10, 14 };
676 expected_results[4].score = 52;
677 expected_results[4].evalue = 3.12771;
678 expected_results[4].bit_score = 244599292e-7;
679 expected_results[4].num_ident = 11;
680 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
681 back_inserter(expected_results[4].starts));
682 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
683 back_inserter(expected_results[4].lengths));
684 }
685
686 // HSP # 6
687 {
688 int starts[] = { 74, 181, 108, -1, 109, 215 };
689 int lengths[] = { 34, 1, 23 };
690 expected_results[5].score = 49;
691 expected_results[5].evalue = 8.37737;
692 expected_results[5].bit_score = 233043305e-7;
693 expected_results[5].num_ident = 14;
694 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
695 back_inserter(expected_results[5].starts));
696 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
697 back_inserter(expected_results[5].lengths));
698 }
699
700 // HSP # 7
701 expected_results[6].score = 49;
702 expected_results[6].evalue = 8.62465;
703 expected_results[6].bit_score = 233043305e-7;
704 expected_results[6].num_ident = 6;
705 expected_results[6].starts.push_back(188);
706 expected_results[6].starts.push_back(709);
707 expected_results[6].lengths.push_back(30);
708
709 qa::TSeqAlignSet actual_results;
710 qa::SeqAlignSetConvert(*sas, actual_results);
711
712 qa::CSeqAlignCmpOpts opts;
713 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
714 string errors;
715 bool identical_results = cmp.Run(&errors);
716
717 BOOST_REQUIRE_MESSAGE(identical_results, errors);
718 }
719
BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBS)720 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBS) {
721
722 m_OptHandle->SetCompositionBasedStats(eCompositionBasedStats);
723 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
724 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
725
726 CSearchResultSet results(*psiblast.Run());
727 BOOST_REQUIRE(results[0].GetErrors().empty());
728
729 const int kNumExpectedMatchingSeqs = 4;
730 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
731 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
732
733 const size_t kNumExpectedHSPs = 5;
734 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
735
736 // HSP # 1
737 {
738 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
739 int lengths[] = { 152, 1, 80 };
740 expected_results[0].score = 593;
741 expected_results[0].evalue = 1.15934e-71;
742 expected_results[0].bit_score = 232843196e-6;
743 expected_results[0].sequence_gis.SetQuery(129295);
744 expected_results[0].sequence_gis.SetSubject(34878800);
745 expected_results[0].num_ident = 101;
746 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
747 back_inserter(expected_results[0].starts));
748 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
749 back_inserter(expected_results[0].lengths));
750 }
751
752 // HSP # 2
753 {
754 int starts[] = { 0, 154, -1, 308, 154, 309 };
755 int lengths[] = { 154, 1, 24 };
756 expected_results[1].score = 417;
757 expected_results[1].evalue = 5.13954e-48;
758 expected_results[1].bit_score = 165048071e-6;
759 expected_results[1].sequence_gis.SetQuery(129295);
760 expected_results[1].sequence_gis.SetSubject(34878800);
761 expected_results[1].num_ident = 73;
762 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
763 back_inserter(expected_results[1].starts));
764 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
765 back_inserter(expected_results[1].lengths));
766 }
767
768 // HSP # 3
769 {
770 int starts[] =
771 { 0, 190, 65, -1, 67, 255, 93, -1, 94, 281, 111, -1, 113, 298,
772 -1, 304, 119, 305, 153, -1, 154, 339, 164, -1, 165, 349,
773 -1, 378, 194, 382 };
774 int lengths[] =
775 { 65, 2, 26, 1, 17, 2, 6, 1, 34, 1, 10, 1, 29, 4, 38 };
776 expected_results[2].score = 359;
777 expected_results[2].evalue = 1.18595e-41;
778 expected_results[2].bit_score = 142706496e-6;
779 expected_results[2].sequence_gis.SetQuery(129295);
780 expected_results[2].sequence_gis.SetSubject(20092202);
781 expected_results[2].num_ident = 85;
782 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
783 back_inserter(expected_results[2].starts));
784 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
785 back_inserter(expected_results[2].lengths));
786 }
787
788 // HSP # 4
789 expected_results[3].score = 53;
790 expected_results[3].evalue = 2.14427;
791 expected_results[3].bit_score = 248354256e-7;
792 expected_results[3].sequence_gis.SetQuery(129295);
793 expected_results[3].sequence_gis.SetSubject(44343511);
794 expected_results[3].starts.push_back(206);
795 expected_results[3].starts.push_back(46);
796 expected_results[3].lengths.push_back(19);
797 expected_results[3].num_ident = 8;
798
799 // HSP # 5
800 expected_results[4].score = 51;
801 expected_results[4].evalue = 5.09267;
802 expected_results[4].bit_score = 240650265e-7;
803 expected_results[4].sequence_gis.SetQuery(129295);
804 expected_results[4].sequence_gis.SetSubject(23481125);
805 expected_results[4].starts.push_back(188);
806 expected_results[4].starts.push_back(709);
807 expected_results[4].lengths.push_back(30);
808 expected_results[4].num_ident = 6;
809
810 /* HSP # 6
811 {
812 int starts[] = { 74, 181, 108, -1, 109, 215 };
813 int lengths[] = { 34, 1, 23 };
814 expected_results[5].score = 48;
815 expected_results[5].evalue = 878932376e-8;
816 expected_results[5].bit_score = 229094278e-7;
817 expected_results[5].sequence_gis.SetQuery(129295);
818 expected_results[5].sequence_gis.SetSubject(38088088);
819 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
820 back_inserter(expected_results[5].starts));
821 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
822 back_inserter(expected_results[5].lengths));
823 }
824 */
825
826 qa::TSeqAlignSet actual_results;
827 qa::SeqAlignSetConvert(*sas, actual_results);
828
829 qa::CSeqAlignCmpOpts opts;
830 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
831 string errors;
832 bool identical_results = cmp.Run(&errors);
833
834 BOOST_REQUIRE_MESSAGE(identical_results, errors);
835 }
836
837 // N.B.: these can't be done from a PSSM
838 #if 0
839 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBSConditional) {
840
841 m_OptHandle->SetCompositionBasedStats(eCompositionMatrixAdjust);
842 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
843 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
844
845 CSearchResultSet results(*psiblast.Run());
846 BOOST_REQUIRE(results[0].GetErrors().empty());
847
848 const int kNumExpectedMatchingSeqs = 6;
849 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
850 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
851
852 const size_t kNumExpectedHSPs = 7;
853 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
854
855 // HSP # 1
856 {
857 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
858 int lengths[] = { 152, 1, 80 };
859 expected_results[0].score = 595;
860 expected_results[0].evalue = 307180919e-71;
861 expected_results[0].bit_score = 233623298e-6;
862 expected_results[0].num_ident = 101;
863 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
864 back_inserter(expected_results[0].starts));
865 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
866 back_inserter(expected_results[0].lengths));
867 }
868
869 // HSP # 2
870 {
871 int starts[] = { 0, 154, -1, 308, 154, 309 };
872 int lengths[] = { 154, 1, 24 };
873 expected_results[1].score = 424;
874 expected_results[1].evalue = 20700336e-50;
875 expected_results[1].bit_score = 167754171e-6;
876 expected_results[1].num_ident = 73;
877 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
878 back_inserter(expected_results[1].starts));
879 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
880 back_inserter(expected_results[1].lengths));
881 }
882
883 // HSP # 3
884 {
885 int starts[] =
886 { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
887 -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
888 -1, 374, 190, 380, 200, -1, 202, 390 };
889 int lengths[] =
890 { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
891 expected_results[2].score = 372;
892 expected_results[2].evalue = 221677687e-45;
893 expected_results[2].bit_score = 147723793e-6;
894 expected_results[2].num_ident = 87;
895 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
896 back_inserter(expected_results[2].starts));
897 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
898 back_inserter(expected_results[2].lengths));
899 }
900
901 // HSP # 4
902 expected_results[3].score = 53;
903 expected_results[3].evalue = 216713461e-8;
904 expected_results[3].bit_score = 248451288e-7;
905 expected_results[3].num_ident = 8;
906 expected_results[3].starts.push_back(206);
907 expected_results[3].starts.push_back(46);
908 expected_results[3].lengths.push_back(19);
909
910 // HSP # 5
911 {
912 int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
913 int lengths[] = { 6, 1, 22, 10, 14 };
914 expected_results[4].score = 52;
915 expected_results[4].evalue = 283036546e-8;
916 expected_results[4].bit_score = 244599292e-7;
917 expected_results[4].num_ident = 11;
918 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
919 back_inserter(expected_results[4].starts));
920 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
921 back_inserter(expected_results[4].lengths));
922 }
923
924 // HSP # 6
925 {
926 int starts[] = { 74, 181, 108, -1, 109, 215 };
927 int lengths[] = { 34, 1, 23 };
928 expected_results[5].score = 49;
929 expected_results[5].evalue = 630539642e-8;
930 expected_results[5].bit_score = 233043305e-7;
931 expected_results[5].num_ident = 14;
932 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
933 back_inserter(expected_results[5].starts));
934 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
935 back_inserter(expected_results[5].lengths));
936 }
937
938 // HSP # 7
939 expected_results[6].score = 49;
940 expected_results[6].evalue = 630539642e-8;
941 expected_results[6].bit_score = 233043305e-7;
942 expected_results[6].num_ident = 6;
943 expected_results[6].starts.push_back(188);
944 expected_results[6].starts.push_back(709);
945 expected_results[6].lengths.push_back(30);
946
947 qa::TSeqAlignSet actual_results;
948 qa::SeqAlignSetConvert(*sas, actual_results);
949
950 qa::CSeqAlignCmpOpts opts;
951 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
952 string errors;
953 bool identical_results = cmp.Run(&errors);
954
955 BOOST_REQUIRE_MESSAGE(identical_results, errors);
956 }
957
958 BOOST_AUTO_TEST_CASE(TestSingleIteration_PssmAsQuery_CBSUniversal) {
959
960 m_OptHandle->SetCompositionBasedStats(eCompoForceFullMatrixAdjust);
961 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
962 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
963
964 CSearchResultSet results(*psiblast.Run());
965 BOOST_REQUIRE(results[0].GetErrors().empty());
966
967 const int kNumExpectedMatchingSeqs = 6;
968 CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
969 BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, s_CountNumberUniqueGIs(sas));
970
971 const size_t kNumExpectedHSPs = 7;
972 qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
973
974 // HSP # 1
975 {
976 int starts[] = { 0, 941, -1, 1093, 152, 1094 };
977 int lengths[] = { 152, 1, 80 };
978 expected_results[0].score = 595;
979 expected_results[0].evalue = 307180919e-71;
980 expected_results[0].bit_score = 233623298e-6;
981 expected_results[0].num_ident = 101;
982 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
983 back_inserter(expected_results[0].starts));
984 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
985 back_inserter(expected_results[0].lengths));
986 }
987
988 // HSP # 2
989 {
990 int starts[] = { 0, 154, -1, 308, 154, 309 };
991 int lengths[] = { 154, 1, 24 };
992 expected_results[1].score = 424;
993 expected_results[1].evalue = 20700336e-50;
994 expected_results[1].bit_score = 167754171e-6;
995 expected_results[1].num_ident = 73;
996 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
997 back_inserter(expected_results[1].starts));
998 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
999 back_inserter(expected_results[1].lengths));
1000 }
1001
1002 // HSP # 3
1003 {
1004 int starts[] =
1005 { 0, 190, 65, -1, 67, 255, 91, -1, 92, 279, 111, -1, 113, 298,
1006 -1, 304, 119, 305, 151, -1, 152, 337, 163, -1, 164, 348,
1007 -1, 374, 190, 380, 200, -1, 202, 390 };
1008 int lengths[] =
1009 { 65, 2, 24, 1, 19, 2, 6, 1, 32, 1, 11, 1, 26, 6, 10, 2, 30 };
1010 expected_results[2].score = 372;
1011 expected_results[2].evalue = 221677687e-45;
1012 expected_results[2].bit_score = 147723793e-6;
1013 expected_results[2].num_ident = 87;
1014 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
1015 back_inserter(expected_results[2].starts));
1016 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
1017 back_inserter(expected_results[2].lengths));
1018 }
1019
1020 // HSP # 4
1021 expected_results[3].score = 53;
1022 expected_results[3].evalue = 216713461e-8;
1023 expected_results[3].bit_score = 248451288e-7;
1024 expected_results[3].num_ident = 8;
1025 expected_results[3].starts.push_back(206);
1026 expected_results[3].starts.push_back(46);
1027 expected_results[3].lengths.push_back(19);
1028
1029 // HSP # 5
1030 {
1031 int starts[] = { 177, 100, -1, 106, 183, 107, 205, -1, 215, 129 };
1032 int lengths[] = { 6, 1, 22, 10, 14 };
1033 expected_results[4].score = 52;
1034 expected_results[4].evalue = 283036546e-8;
1035 expected_results[4].bit_score = 244599292e-7;
1036 expected_results[4].num_ident = 11;
1037 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
1038 back_inserter(expected_results[4].starts));
1039 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
1040 back_inserter(expected_results[4].lengths));
1041 }
1042
1043 // HSP # 6
1044 {
1045 int starts[] = { 74, 181, 108, -1, 109, 215 };
1046 int lengths[] = { 34, 1, 23 };
1047 expected_results[5].score = 49;
1048 expected_results[5].evalue = 630539642e-8;
1049 expected_results[5].bit_score = 233043305e-7;
1050 expected_results[5].num_ident = 14;
1051 copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
1052 back_inserter(expected_results[5].starts));
1053 copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
1054 back_inserter(expected_results[5].lengths));
1055 }
1056
1057 // HSP # 7
1058 expected_results[6].score = 49;
1059 expected_results[6].evalue = 630539642e-8;
1060 expected_results[6].bit_score = 233043305e-7;
1061 expected_results[6].num_ident = 6;
1062 expected_results[6].starts.push_back(188);
1063 expected_results[6].starts.push_back(709);
1064 expected_results[6].lengths.push_back(30);
1065
1066 qa::TSeqAlignSet actual_results;
1067 qa::SeqAlignSetConvert(*sas, actual_results);
1068
1069 qa::CSeqAlignCmpOpts opts;
1070 qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
1071 string errors;
1072 bool identical_results = cmp.Run(&errors);
1073
1074 BOOST_REQUIRE_MESSAGE(identical_results, errors);
1075 }
1076 #endif
1077
1078 // This search will converge after 2 iterations
BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_NoCBS)1079 BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_NoCBS) {
1080
1081 const int kNumIterations = 4;
1082 const int kNumExpectedIterations = 2;
1083 CPsiBlastIterationState itr(kNumIterations);
1084 m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
1085 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1086 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1087
1088 int hits_below_threshold[kNumIterations] = { 2, 2, 0, 0 };
1089 int number_hits[kNumIterations] = { 6, 5, 0, 0 };
1090
1091 int iteration_counter = 0;
1092 while (itr) {
1093 CSearchResultSet results = *psiblast.Run();
1094 BOOST_REQUIRE(results[0].GetErrors().empty());
1095 CConstRef<CSeq_align_set> alignment = results[0].GetSeqAlign();
1096
1097 BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
1098 s_CountNumberUniqueGIs(alignment));
1099
1100 CPsiBlastIterationState::TSeqIds ids;
1101 CPsiBlastIterationState::GetSeqIds(alignment, m_OptHandle, ids);
1102
1103 string m("On round ");
1104 m += NStr::IntToString(itr.GetIterationNumber()) + " found ";
1105 m += NStr::SizetToString(ids.size()) + " qualifying ids";
1106 BOOST_REQUIRE_EQUAL(hits_below_threshold[iteration_counter],
1107 (int)ids.size());
1108 itr.Advance(ids);
1109
1110 if (itr) {
1111 const CBioseq& query = m_Pssm->GetPssm().GetQuery().GetSeq();
1112 CRef<CPssmWithParameters> pssm =
1113 x_ComputePssmForNextIteration(query, alignment,
1114 m_OptHandle, results[0].GetAncillaryData());
1115 psiblast.SetPssm(pssm);
1116 }
1117 iteration_counter++;
1118 }
1119
1120 BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
1121 }
1122
1123 // This search will converge after 2 iterations
BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_CBS)1124 BOOST_AUTO_TEST_CASE(TestMultipleIterationsAndConvergence_PssmAsQuery_CBS) {
1125
1126 const int kNumIterations = 4;
1127 const int kNumExpectedIterations = 2;
1128 CPsiBlastIterationState itr(kNumIterations);
1129 m_OptHandle->SetCompositionBasedStats(eCompositionBasedStats);
1130 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1131 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1132
1133 int hits_below_threshold[kNumIterations] = { 2, 2, 0, 0 };
1134 int number_hits[kNumIterations] = { 4, 3, 0, 0 };
1135
1136 int iteration_counter = 0;
1137 while (itr) {
1138 CSearchResultSet results = *psiblast.Run();
1139 BOOST_REQUIRE(results[0].GetErrors().empty());
1140 CConstRef<CSeq_align_set> alignment = results[0].GetSeqAlign();
1141 BOOST_REQUIRE(alignment.NotEmpty());
1142 BOOST_REQUIRE_EQUAL(number_hits[iteration_counter],
1143 s_CountNumberUniqueGIs(alignment));
1144
1145 CPsiBlastIterationState::TSeqIds ids;
1146 CPsiBlastIterationState::GetSeqIds(alignment, m_OptHandle, ids);
1147
1148 string m("On round ");
1149 m += NStr::IntToString(itr.GetIterationNumber()) + " found ";
1150 m += NStr::SizetToString(ids.size()) + " qualifying ids";
1151 BOOST_REQUIRE_EQUAL(hits_below_threshold[iteration_counter],
1152 (int)ids.size());
1153 itr.Advance(ids);
1154
1155 if (itr) {
1156 const CBioseq& query = m_Pssm->GetPssm().GetQuery().GetSeq();
1157 CRef<CPssmWithParameters> pssm =
1158 x_ComputePssmForNextIteration(query, alignment,
1159 m_OptHandle, results[0].GetAncillaryData());
1160 psiblast.SetPssm(pssm);
1161 }
1162 iteration_counter++;
1163 }
1164
1165 BOOST_REQUIRE_EQUAL(kNumExpectedIterations, iteration_counter);
1166 }
1167
1168 // Should throw exception as only one query sequence/pssm is allowed
BOOST_AUTO_TEST_CASE(TestMultipleQueries)1169 BOOST_AUTO_TEST_CASE(TestMultipleQueries) {
1170 CConstRef<CBioseq_set> bioseq_set(&m_SeqSet->GetSet());
1171 CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq_set));
1172 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1173 BOOST_REQUIRE_THROW(CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle), CBlastException);
1174 }
1175
BOOST_AUTO_TEST_CASE(TestNullQuery)1176 BOOST_AUTO_TEST_CASE(TestNullQuery) {
1177 CRef<IQueryFactory> query_factory;
1178 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1179 BOOST_REQUIRE_THROW(CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle),
1180 CBlastException);
1181 }
1182
BOOST_AUTO_TEST_CASE(TestFrequencyRatiosWithAllZerosInPssm)1183 BOOST_AUTO_TEST_CASE(TestFrequencyRatiosWithAllZerosInPssm) {
1184 NON_CONST_ITERATE(CPssmIntermediateData::TFreqRatios, fr,
1185 m_Pssm->SetPssm().SetIntermediateData().SetFreqRatios()) {
1186 *fr = 0.0;
1187 }
1188
1189 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1190 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1191 CSearchResultSet r = *psiblast.Run();
1192 TQueryMessages messages = r[0].GetErrors(eBlastSevWarning);
1193 BOOST_REQUIRE( !messages.empty() );
1194
1195 string expected_warning("Frequency ratios for PSSM are all zeros");
1196 string warning;
1197 ITERATE(TQueryMessages, m, messages) {
1198 if (((*m)->GetSeverity() == eBlastSevWarning) &&
1199 ((*m)->GetMessage().find(expected_warning) != string::npos)) {
1200 warning = (*m)->GetMessage();
1201 break;
1202 }
1203 }
1204 BOOST_REQUIRE_MESSAGE(!warning.empty(), "Did not find expected warning");
1205 }
1206
BOOST_AUTO_TEST_CASE(TestNullPssm)1207 BOOST_AUTO_TEST_CASE(TestNullPssm) {
1208 m_Pssm.Reset();
1209 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1210 BOOST_REQUIRE_THROW(CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle),
1211 CBlastException);
1212 }
1213
BOOST_AUTO_TEST_CASE(TestSetNullPssm)1214 BOOST_AUTO_TEST_CASE(TestSetNullPssm) {
1215 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1216 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1217 CRef<CPssmWithParameters> pssm;
1218 BOOST_REQUIRE_THROW(psiblast.SetPssm(pssm), CBlastException);
1219 }
1220
BOOST_AUTO_TEST_CASE(TestNonExistantDb)1221 BOOST_AUTO_TEST_CASE(TestNonExistantDb) {
1222 m_SearchDb->SetDatabaseName("dummy");
1223 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1224 CPsiBlast psiblast(m_Pssm, dbadapter, m_OptHandle);
1225 BOOST_REQUIRE_THROW(psiblast.Run(),CSeqDBException);
1226 }
1227
BOOST_AUTO_TEST_CASE(TestNullOptions)1228 BOOST_AUTO_TEST_CASE(TestNullOptions) {
1229 m_OptHandle.Reset();
1230 CConstRef<CBioseq> bioseq(&m_SeqEntry->GetSeq());
1231 CRef<IQueryFactory> query_factory(s_SetupSubject(bioseq));
1232 CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
1233 BOOST_REQUIRE_THROW(CPsiBlast psiblast(query_factory, dbadapter, m_OptHandle),
1234 CBlastException);
1235 }
1236
1237 BOOST_AUTO_TEST_SUITE_END()
1238