1 /* $Id: remote_blast_unit_test.cpp 600218 2020-01-14 17:23:41Z fongah2 $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 * File Description:
29 * Unit test module to test the remote BLAST API
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/test_boost.hpp>
36 #include <objtools/blast/services/blast_services.hpp>
37 #include <algo/blast/api/remote_blast.hpp>
38 #include "test_objmgr.hpp"
39 #include <objects/seqloc/Seq_id.hpp>
40 #include <objects/seqset/Bioseq_set.hpp>
41 #include <objects/seqalign/Seq_align.hpp>
42 #include <algo/blast/api/blast_options.hpp>
43 #include <algo/blast/api/blast_rps_options.hpp>
44 #include <algo/blast/api/blast_nucl_options.hpp>
45 #include <algo/blast/api/disc_nucl_options.hpp>
46 #include <algo/blast/api/objmgr_query_data.hpp>
47 #include <algo/blast/blastinput/blast_fasta_input.hpp>
48 #include <algo/blast/blastinput/blast_input_aux.hpp>
49 #include <serial/serial.hpp>
50 #include <serial/objostr.hpp>
51 #include <serial/exception.hpp>
52 #include <util/range.hpp>
53
54 using namespace std;
55 using namespace ncbi;
56 using namespace ncbi::objects;
57 using namespace ncbi::blast;
58
NCBITEST_INIT_TREE()59 NCBITEST_INIT_TREE()
60 {
61 CNcbiEnvironment env;
62 const string& under_valgrind = env.Get("NCBI_RUN_UNDER_VALGRIND");
63 if ( !under_valgrind.empty() ) {
64 try {
65 if (NStr::StringToBool(under_valgrind)) {
66 NCBITEST_DISABLE(CheckPrimerBlastRID);
67 }
68 } catch (const exception&) {
69 ERR_POST(Warning << "Unsupported NCBI_RUN_UNDER_VALGRIND value "
70 << under_valgrind << "; treating as false.");
71 }
72 }
73 }
74
75 static int
x_CountHits(const string & rid)76 x_CountHits(const string & rid)
77 {
78 // Another preserved query, with mixed ID types.
79
80 CRemoteBlast rb(rid);
81
82 TSeqAlignVector sav(rb.GetSeqAlignSets());
83
84 int total = 0;
85
86 for(unsigned i = 0; i < sav.size(); i++) {
87 total += sav[i]->Get().size();
88 }
89
90 return total;
91 }
92
93 static void
x_PushPairVec(vector<pair<string,string>> & rids,string a,string b)94 x_PushPairVec(vector< pair<string,string> > & rids, string a, string b)
95 {
96 pair<string,string> ab;
97 ab.first = a;
98 ab.second = b;
99
100 rids.push_back(ab);
101 }
102
103 template<class TOBJ>
x_Stringify(TOBJ & obj)104 string x_Stringify(TOBJ & obj)
105 {
106 CNcbiOstrstream oss;
107
108 auto_ptr<CObjectOStream>
109 outpstr(CObjectOStream::Open(eSerial_AsnText, oss));
110
111 *outpstr << obj;
112
113 return CNcbiOstrstreamToString(oss);
114 }
115
116 BOOST_AUTO_TEST_SUITE(remote_blast)
117
BOOST_AUTO_TEST_CASE(MaskedQueryRegions)118 BOOST_AUTO_TEST_CASE(MaskedQueryRegions) {
119 const EBlastProgramType prog = eBlastTypeBlastn;
120 CRef<CBlastOptionsHandle> oh(
121 CBlastOptionsFactory::Create(eBlastn, CBlastOptions::eRemote));
122 oh->SetDbLength(5000000);
123 // only our masking specified below should be used
124 oh->SetFilterString("F");/* NCBI_FAKE_WARNING */
125
126 CRemoteBlast rmt_blast(oh);
127 //rmt_blast.SetVerbose();
128 rmt_blast.SetDatabase("UniVec");
129
130 const size_t kNumQueries(2);
131 CRemoteBlast::TSeqLocList query_seqlocs(kNumQueries);
132 TSeqLocInfoVector query_masks(kNumQueries);
133
134 // Setup the first query
135 {
136 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 555));
137 query_seqlocs.front().Reset(new CSeq_loc);
138 query_seqlocs.front()->SetWhole(*id);
139
140 CRef<CSeq_interval> si(new CSeq_interval(*id, 50, 100));
141 CRef<CSeqLocInfo> sli(new CSeqLocInfo(si,
142 CSeqLocInfo::eFramePlus1));
143 query_masks.front().push_back(sli);
144 }
145
146 // Setup the second query
147 {
148 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 556));
149 query_seqlocs.back().Reset(new CSeq_loc);
150 query_seqlocs.back()->SetWhole(*id);
151
152 // this mask should be ignored... and a warning issued
153 CRef<CSeq_interval> si(new CSeq_interval(*id, 200, 100));
154 CRef<CSeqLocInfo> sli(new CSeqLocInfo(si,
155 CSeqLocInfo::eFrameMinus1));
156 query_masks.back().push_back(sli);
157
158 si.Reset(new CSeq_interval(*id, 200, 300));
159 sli.Reset(new CSeqLocInfo(si, CSeqLocInfo::eFramePlus1));
160 query_masks.back().push_back(sli);
161 }
162 const string kClientId("remote_blast_unit_test.cpp");
163 rmt_blast.SetClientId(kClientId);
164
165 rmt_blast.SetQueries(query_seqlocs,
166 query_masks);
167 BOOST_REQUIRE_EQUAL(true, rmt_blast.Submit());
168 BOOST_REQUIRE_EQUAL(CRemoteBlast::eStatus_Pending, rmt_blast.CheckStatus());
169 BOOST_REQUIRE_EQUAL(false, rmt_blast.GetRID().empty());
170 BOOST_REQUIRE_EQUAL(kClientId, rmt_blast.GetClientId());
171
172 BOOST_REQUIRE(rmt_blast.GetErrors().empty());
173
174 vector<string> warnings;
175 const CBlast4_get_search_results_reply::TMasks& network_masks =
176 CRemoteBlast::ConvertToRemoteMasks(query_masks, prog, &warnings);
177 BOOST_REQUIRE_EQUAL(kNumQueries, network_masks.size());
178
179 CRef<CBlast4_mask> mask = network_masks.front();
180 BOOST_REQUIRE_EQUAL((size_t)1, mask->GetLocations().size());
181 CRef<CSeq_loc> sl = mask->GetLocations().front();
182 BOOST_REQUIRE(sl->IsPacked_int());
183 BOOST_REQUIRE(sl->GetPacked_int().Get().size() == 1);
184
185 CRef<CSeq_interval> si = sl->GetPacked_int().Get().front();
186 BOOST_REQUIRE_EQUAL((TSeqPos) 50, si->GetFrom());
187 BOOST_REQUIRE_EQUAL((TSeqPos) 100, si->GetTo());
188
189 mask = network_masks.back();
190 BOOST_REQUIRE_EQUAL((size_t)1, mask->GetLocations().size());
191 sl = mask->GetLocations().front();
192 BOOST_REQUIRE(sl->IsPacked_int());
193 BOOST_REQUIRE(sl->GetPacked_int().Get().size() == 2);
194
195 si = sl->GetPacked_int().Get().front();
196 BOOST_REQUIRE_EQUAL((TSeqPos) 200, si->GetFrom());
197 BOOST_REQUIRE_EQUAL((TSeqPos) 100, si->GetTo());
198 }
199
200 // Note that no CRemoteBlast constructor takes a CBlastRPSOptionsHandle, so
201 // the constructor which takes a CBlastOptionsHandle will be invoked
BOOST_AUTO_TEST_CASE(CheckRemoteRPSBlastOptionsHandle)202 BOOST_AUTO_TEST_CASE(CheckRemoteRPSBlastOptionsHandle) {
203 CBlastRPSOptionsHandle rps_opts(CBlastOptions::eRemote);
204
205 CRemoteBlast rmt_blaster(&rps_opts);
206 rmt_blaster.SetDatabase("cdd");
207 CRemoteBlast::TSeqLocList query_seqlocs;
208 CRef<CSeq_loc> sl(new CSeq_loc);
209 sl->SetWhole().SetGi(GI_CONST(129295));
210 query_seqlocs.push_back(sl);
211 rmt_blaster.SetQueries(query_seqlocs);
212
213 BOOST_REQUIRE_EQUAL(true, rmt_blaster.Submit());
214 }
215
216 // Search of GIs 555 and 3090 against ecoli
BOOST_AUTO_TEST_CASE(CheckBlastnMasks)217 BOOST_AUTO_TEST_CASE(CheckBlastnMasks) {
218 const string rid("BW3U058R01R");
219 CRemoteBlast rmt_blaster(rid);
220
221 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
222 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
223 BOOST_REQUIRE_EQUAL(kEmptyStr, rmt_blaster.GetErrors());
224 BOOST_REQUIRE(rmt_blaster.GetDbFilteringAlgorithmId() == -1);
225 BOOST_REQUIRE(rmt_blaster.GetDbFilteringAlgorithmKey() == kEmptyStr);
226
227 const EBlastProgramType prog =
228 NetworkProgram2BlastProgramType(rmt_blaster.GetProgram(),
229 rmt_blaster.GetService());
230
231 TSeqLocInfoVector masks = rmt_blaster.GetMasks();
232 vector<string> warnings;
233 const CBlast4_get_search_results_reply::TMasks& network_masks =
234 CRemoteBlast::ConvertToRemoteMasks(masks, prog, &warnings);
235 BOOST_REQUIRE(!masks.empty());
236 BOOST_REQUIRE(!network_masks.empty());
237 BOOST_REQUIRE(warnings.empty());
238 const size_t kNumQueries = 2;
239 BOOST_REQUIRE_EQUAL(kNumQueries, masks.size());
240 BOOST_REQUIRE_EQUAL(kNumQueries, network_masks.size());
241
242 size_t index = 0;
243 vector<TSeqRange> expected_masks;
244 expected_masks.push_back(TSeqRange(78, 89));
245 BOOST_REQUIRE_EQUAL(expected_masks.size(), masks.front().size());
246
247 ITERATE(TMaskedQueryRegions, seqlocinfo, masks.front()) {
248 BOOST_REQUIRE(seqlocinfo->NotEmpty());
249 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
250 (*seqlocinfo)->GetInterval().GetFrom());
251 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
252 (*seqlocinfo)->GetInterval().GetTo());
253 BOOST_REQUIRE_EQUAL((int)CSeqLocInfo::eFrameNotSet,
254 (*seqlocinfo)->GetFrame());
255 index++;
256 }
257 index = 0;
258 BOOST_REQUIRE_EQUAL(eBlast4_frame_type_plus1,
259 network_masks.front()->GetFrame());
260 CBlast4_mask::TLocations const* net_masks =
261 &network_masks.front()->GetLocations();
262 BOOST_REQUIRE_EQUAL((size_t)1, net_masks->size());
263 ITERATE(CPacked_seqint::Tdata, seqint,
264 net_masks->front()->GetPacked_int().Get()) {
265 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
266 (*seqint)->GetFrom());
267 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
268 (*seqint)->GetTo());
269 index++;
270 }
271
272 index = 0;
273 expected_masks.clear();
274 expected_masks.push_back(TSeqRange(25, 31));
275 expected_masks.push_back(TSeqRange(35, 101));
276 expected_masks.push_back(TSeqRange(116, 123));
277 expected_masks.push_back(TSeqRange(131, 195));
278 expected_masks.push_back(TSeqRange(2022, 2337));
279 BOOST_REQUIRE_EQUAL(expected_masks.size(), masks.back().size());
280 ITERATE(TMaskedQueryRegions, seqlocinfo, masks.back()) {
281 BOOST_REQUIRE(seqlocinfo->NotEmpty());
282 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
283 (*seqlocinfo)->GetInterval().GetFrom());
284 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
285 (*seqlocinfo)->GetInterval().GetTo());
286 BOOST_REQUIRE_EQUAL((int)CSeqLocInfo::eFrameNotSet,
287 (*seqlocinfo)->GetFrame());
288 index++;
289 }
290
291 index = 0;
292 BOOST_REQUIRE_EQUAL(eBlast4_frame_type_plus1,
293 network_masks.back()->GetFrame());
294 net_masks = &network_masks.back()->GetLocations();
295 BOOST_REQUIRE_EQUAL((size_t)1, net_masks->size());
296 ITERATE(CPacked_seqint::Tdata, seqint,
297 net_masks->front()->GetPacked_int().Get()) {
298 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
299 (*seqint)->GetFrom());
300 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
301 (*seqint)->GetTo());
302 index++;
303 }
304 }
305
BOOST_AUTO_TEST_CASE(CheckBlastpMasks)306 BOOST_AUTO_TEST_CASE(CheckBlastpMasks) {
307 const string rid("BD4H4FMN014");
308 CRemoteBlast rmt_blaster(rid);
309
310 BOOST_REQUIRE_MESSAGE(rid == rmt_blaster.GetRID(), "RID=" << rid);
311 BOOST_REQUIRE_MESSAGE(rmt_blaster.CheckDone(), "RID=" << rid);
312 BOOST_REQUIRE_MESSAGE(kEmptyStr == rmt_blaster.GetErrors(), "RID=" << rid);
313
314 const EBlastProgramType prog =
315 NetworkProgram2BlastProgramType(rmt_blaster.GetProgram(),
316 rmt_blaster.GetService());
317
318 TSeqLocInfoVector masks = rmt_blaster.GetMasks();
319 vector<string> warnings;
320 const CBlast4_get_search_results_reply::TMasks& network_masks =
321 CRemoteBlast::ConvertToRemoteMasks(masks, prog, &warnings);
322 BOOST_REQUIRE(!masks.empty());
323 BOOST_REQUIRE(!network_masks.empty());
324 BOOST_REQUIRE(warnings.empty());
325 const size_t kNumQueries = 2;
326 BOOST_REQUIRE_EQUAL(kNumQueries, masks.size());
327 BOOST_REQUIRE_EQUAL(kNumQueries, network_masks.size());
328
329 size_t index = 0;
330 vector<TSeqRange> expected_masks;
331 expected_masks.push_back(TSeqRange(95, 119));
332 expected_masks.push_back(TSeqRange(196, 207));
333 BOOST_REQUIRE_EQUAL(expected_masks.size(), masks.front().size());
334
335 ITERATE(TMaskedQueryRegions, seqlocinfo, masks.front()) {
336 BOOST_REQUIRE(seqlocinfo->NotEmpty());
337 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
338 (*seqlocinfo)->GetInterval().GetFrom());
339 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
340 (*seqlocinfo)->GetInterval().GetTo());
341 BOOST_REQUIRE_EQUAL((int)CSeqLocInfo::eFrameNotSet,
342 (*seqlocinfo)->GetFrame());
343 index++;
344 }
345 index = 0;
346 BOOST_REQUIRE_EQUAL(eBlast4_frame_type_notset,
347 network_masks.front()->GetFrame());
348 CBlast4_mask::TLocations const* net_masks =
349 &network_masks.front()->GetLocations();
350 BOOST_REQUIRE_EQUAL((size_t)1, net_masks->size());
351 ITERATE(CPacked_seqint::Tdata, seqint,
352 net_masks->front()->GetPacked_int().Get()) {
353 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
354 (*seqint)->GetFrom());
355 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
356 (*seqint)->GetTo());
357 index++;
358 }
359
360 index = 0;
361 expected_masks.clear();
362 expected_masks.push_back(TSeqRange(91, 103));
363 expected_masks.push_back(TSeqRange(270, 289));
364 BOOST_REQUIRE_EQUAL(expected_masks.size(), masks.back().size());
365 ITERATE(TMaskedQueryRegions, seqlocinfo, masks.back()) {
366 BOOST_REQUIRE(seqlocinfo->NotEmpty());
367 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
368 (*seqlocinfo)->GetInterval().GetFrom());
369 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
370 (*seqlocinfo)->GetInterval().GetTo());
371 BOOST_REQUIRE_EQUAL((int)CSeqLocInfo::eFrameNotSet,
372 (*seqlocinfo)->GetFrame());
373 index++;
374 }
375 index = 0;
376 BOOST_REQUIRE_EQUAL(eBlast4_frame_type_notset,
377 network_masks.back()->GetFrame());
378 net_masks = &network_masks.back()->GetLocations();
379 BOOST_REQUIRE_EQUAL((size_t)1, net_masks->size());
380 ITERATE(CPacked_seqint::Tdata, seqint,
381 net_masks->front()->GetPacked_int().Get()) {
382 BOOST_REQUIRE_EQUAL(expected_masks[index].GetFrom(),
383 (*seqint)->GetFrom());
384 BOOST_REQUIRE_EQUAL(expected_masks[index].GetTo(),
385 (*seqint)->GetTo());
386 index++;
387 }
388 }
389
390 // Search of GIs 555 and 3090 against ecoli
BOOST_AUTO_TEST_CASE(CheckBlastxMasks)391 BOOST_AUTO_TEST_CASE(CheckBlastxMasks) {
392 const string rid("BW41NPVB014");
393 CRemoteBlast rmt_blaster(rid);
394
395 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
396 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
397 BOOST_REQUIRE_EQUAL(kEmptyStr, rmt_blaster.GetErrors());
398
399 const EBlastProgramType prog =
400 NetworkProgram2BlastProgramType(rmt_blaster.GetProgram(),
401 rmt_blaster.GetService());
402
403 TSeqLocInfoVector masks = rmt_blaster.GetMasks();
404 vector<string> warnings;
405 const CBlast4_get_search_results_reply::TMasks& network_masks =
406 CRemoteBlast::ConvertToRemoteMasks(masks, prog, &warnings);
407 BOOST_REQUIRE(!masks.empty());
408 BOOST_REQUIRE(!network_masks.empty());
409 const size_t kNumQueries = 2;
410 const size_t kNumNetMasks = 7;
411 BOOST_REQUIRE_EQUAL(kNumQueries, masks.size());
412 BOOST_REQUIRE_EQUAL(kNumNetMasks, network_masks.size());
413
414 TMaskedQueryRegions query1_masks = masks.front();
415 size_t index = 0;
416 typedef pair<TSeqRange, CSeqLocInfo::ETranslationFrame> TMask;
417 typedef vector<TMask> TQueryMasks;
418 TQueryMasks expected_masks;
419 expected_masks.push_back(make_pair(TSeqRange(114, 155),
420 CSeqLocInfo::eFrameMinus1));
421 BOOST_REQUIRE_EQUAL(expected_masks.size(), query1_masks.size());
422
423 ITERATE(TMaskedQueryRegions, seqlocinfo, query1_masks) {
424 BOOST_REQUIRE(seqlocinfo->NotEmpty());
425 const TMask& mask = expected_masks[index++];
426 BOOST_REQUIRE_EQUAL(mask.first.GetFrom(),
427 (*seqlocinfo)->GetInterval().GetFrom());
428 BOOST_REQUIRE_EQUAL(mask.first.GetTo(),
429 (*seqlocinfo)->GetInterval().GetTo());
430 BOOST_REQUIRE_EQUAL((int)mask.second, (*seqlocinfo)->GetFrame());
431 }
432 index = 0;
433 BOOST_REQUIRE_EQUAL(eBlast4_frame_type_minus1,
434 network_masks.front()->GetFrame());
435 CBlast4_mask::TLocations const* net_masks =
436 &network_masks.front()->GetLocations();
437 BOOST_REQUIRE_EQUAL((size_t)1, net_masks->size());
438 ITERATE(CPacked_seqint::Tdata, seqint,
439 net_masks->front()->GetPacked_int().Get()) {
440 const TMask& mask = expected_masks[index++];
441 BOOST_REQUIRE_EQUAL(mask.first.GetFrom(),
442 (*seqint)->GetFrom());
443 BOOST_REQUIRE_EQUAL(mask.first.GetTo(),
444 (*seqint)->GetTo());
445 }
446
447 TMaskedQueryRegions query2_masks = masks.back();
448
449 index = 0;
450 expected_masks.clear();
451 expected_masks.push_back(make_pair(TSeqRange(36, 66), CSeqLocInfo::eFramePlus1));
452 expected_masks.push_back(make_pair(TSeqRange(129, 240), CSeqLocInfo::eFramePlus1));
453 expected_masks.push_back(make_pair(TSeqRange(363, 393), CSeqLocInfo::eFramePlus1));
454 expected_masks.push_back(make_pair(TSeqRange(423, 471), CSeqLocInfo::eFramePlus1));
455 expected_masks.push_back(make_pair(TSeqRange(933, 972), CSeqLocInfo::eFramePlus1));
456 expected_masks.push_back(make_pair(TSeqRange(1092, 1137), CSeqLocInfo::eFramePlus1));
457 expected_masks.push_back(make_pair(TSeqRange(1158, 1206), CSeqLocInfo::eFramePlus1));
458 expected_masks.push_back(make_pair(TSeqRange(1224, 1260), CSeqLocInfo::eFramePlus1));
459 expected_masks.push_back(make_pair(TSeqRange(1665, 1734), CSeqLocInfo::eFramePlus1));
460 expected_masks.push_back(make_pair(TSeqRange(1842, 1899), CSeqLocInfo::eFramePlus1));
461 expected_masks.push_back(make_pair(TSeqRange(1971, 2010), CSeqLocInfo::eFramePlus1));
462 expected_masks.push_back(make_pair(TSeqRange(2058, 2226), CSeqLocInfo::eFramePlus1));
463 expected_masks.push_back(make_pair(TSeqRange(2256, 2334), CSeqLocInfo::eFramePlus1));
464 expected_masks.push_back(make_pair(TSeqRange(37, 64), CSeqLocInfo::eFramePlus2));
465 expected_masks.push_back(make_pair(TSeqRange(607, 652), CSeqLocInfo::eFramePlus2));
466 expected_masks.push_back(make_pair(TSeqRange(1153, 1192), CSeqLocInfo::eFramePlus2));
467 expected_masks.push_back(make_pair(TSeqRange(1702, 1744), CSeqLocInfo::eFramePlus2));
468 expected_masks.push_back(make_pair(TSeqRange(2014, 2092), CSeqLocInfo::eFramePlus2));
469 expected_masks.push_back(make_pair(TSeqRange(2104, 2194), CSeqLocInfo::eFramePlus2));
470 expected_masks.push_back(make_pair(TSeqRange(2251, 2278), CSeqLocInfo::eFramePlus2));
471 expected_masks.push_back(make_pair(TSeqRange(2305, 2335), CSeqLocInfo::eFramePlus2));
472 expected_masks.push_back(make_pair(TSeqRange(35, 56), CSeqLocInfo::eFramePlus3));
473 expected_masks.push_back(make_pair(TSeqRange(92, 173), CSeqLocInfo::eFramePlus3));
474 expected_masks.push_back(make_pair(TSeqRange(239, 275), CSeqLocInfo::eFramePlus3));
475 expected_masks.push_back(make_pair(TSeqRange(359, 398), CSeqLocInfo::eFramePlus3));
476 expected_masks.push_back(make_pair(TSeqRange(1679, 1733), CSeqLocInfo::eFramePlus3));
477 expected_masks.push_back(make_pair(TSeqRange(2072, 2135), CSeqLocInfo::eFramePlus3));
478 expected_masks.push_back(make_pair(TSeqRange(2159, 2294), CSeqLocInfo::eFramePlus3));
479 expected_masks.push_back(make_pair(TSeqRange(2309, 2333), CSeqLocInfo::eFramePlus3));
480 expected_masks.push_back(make_pair(TSeqRange(2311, 2337), CSeqLocInfo::eFrameMinus1));
481 expected_masks.push_back(make_pair(TSeqRange(2221, 2280), CSeqLocInfo::eFrameMinus1));
482 expected_masks.push_back(make_pair(TSeqRange(2155, 2202), CSeqLocInfo::eFrameMinus1));
483 expected_masks.push_back(make_pair(TSeqRange(2035, 2148), CSeqLocInfo::eFrameMinus1));
484 expected_masks.push_back(make_pair(TSeqRange(1816, 1857), CSeqLocInfo::eFrameMinus1));
485 expected_masks.push_back(make_pair(TSeqRange(1684, 1761), CSeqLocInfo::eFrameMinus1));
486 expected_masks.push_back(make_pair(TSeqRange(1348, 1389), CSeqLocInfo::eFrameMinus1));
487 expected_masks.push_back(make_pair(TSeqRange(1249, 1287), CSeqLocInfo::eFrameMinus1));
488 expected_masks.push_back(make_pair(TSeqRange(982, 1014), CSeqLocInfo::eFrameMinus1));
489 expected_masks.push_back(make_pair(TSeqRange(613, 654), CSeqLocInfo::eFrameMinus1));
490 expected_masks.push_back(make_pair(TSeqRange(514, 552), CSeqLocInfo::eFrameMinus1));
491 expected_masks.push_back(make_pair(TSeqRange(256, 279), CSeqLocInfo::eFrameMinus1));
492 expected_masks.push_back(make_pair(TSeqRange(121, 174), CSeqLocInfo::eFrameMinus1));
493 expected_masks.push_back(make_pair(TSeqRange(22, 84), CSeqLocInfo::eFrameMinus1));
494 expected_masks.push_back(make_pair(TSeqRange(2274, 2336), CSeqLocInfo::eFrameMinus2));
495 expected_masks.push_back(make_pair(TSeqRange(2004, 2261), CSeqLocInfo::eFrameMinus2));
496 expected_masks.push_back(make_pair(TSeqRange(222, 242), CSeqLocInfo::eFrameMinus2));
497 expected_masks.push_back(make_pair(TSeqRange(183, 203), CSeqLocInfo::eFrameMinus2));
498 expected_masks.push_back(make_pair(TSeqRange(132, 164), CSeqLocInfo::eFrameMinus2));
499 expected_masks.push_back(make_pair(TSeqRange(30, 83), CSeqLocInfo::eFrameMinus2));
500 expected_masks.push_back(make_pair(TSeqRange(2255, 2335), CSeqLocInfo::eFrameMinus3));
501 expected_masks.push_back(make_pair(TSeqRange(2192, 2239), CSeqLocInfo::eFrameMinus3));
502 expected_masks.push_back(make_pair(TSeqRange(2060, 2185), CSeqLocInfo::eFrameMinus3));
503 expected_masks.push_back(make_pair(TSeqRange(1964, 2011), CSeqLocInfo::eFrameMinus3));
504 expected_masks.push_back(make_pair(TSeqRange(1850, 1888), CSeqLocInfo::eFrameMinus3));
505 expected_masks.push_back(make_pair(TSeqRange(1673, 1741), CSeqLocInfo::eFrameMinus3));
506 expected_masks.push_back(make_pair(TSeqRange(1226, 1261), CSeqLocInfo::eFrameMinus3));
507 expected_masks.push_back(make_pair(TSeqRange(1166, 1204), CSeqLocInfo::eFrameMinus3));
508 expected_masks.push_back(make_pair(TSeqRange(1097, 1135), CSeqLocInfo::eFrameMinus3));
509 expected_masks.push_back(make_pair(TSeqRange(431, 469), CSeqLocInfo::eFrameMinus3));
510 expected_masks.push_back(make_pair(TSeqRange(365, 394), CSeqLocInfo::eFrameMinus3));
511 expected_masks.push_back(make_pair(TSeqRange(242, 289), CSeqLocInfo::eFrameMinus3));
512 expected_masks.push_back(make_pair(TSeqRange(131, 208), CSeqLocInfo::eFrameMinus3));
513 expected_masks.push_back(make_pair(TSeqRange(38, 70), CSeqLocInfo::eFrameMinus3));
514
515 BOOST_REQUIRE_EQUAL(expected_masks.size(), query2_masks.size());
516 ITERATE(TMaskedQueryRegions, seqlocinfo, masks.back()) {
517 BOOST_REQUIRE(seqlocinfo->NotEmpty());
518 const TMask& mask = expected_masks[index++];
519 BOOST_REQUIRE_EQUAL(mask.first.GetFrom(),
520 (*seqlocinfo)->GetInterval().GetFrom());
521 BOOST_REQUIRE_EQUAL(mask.first.GetTo(),
522 (*seqlocinfo)->GetInterval().GetTo());
523 BOOST_REQUIRE_EQUAL((int)mask.second, (*seqlocinfo)->GetFrame());
524 }
525 }
526
527 // This tests some of the functionality in get_filter_options.[hc]pp
BOOST_AUTO_TEST_CASE(SetFilteringOptions)528 BOOST_AUTO_TEST_CASE(SetFilteringOptions) {
529 CBlastProteinOptionsHandle prot_opts(CBlastOptions::eRemote);
530 prot_opts.SetSegFiltering(false);
531 {
532 TAutoCharPtr tmp = prot_opts.GetFilterString();/* NCBI_FAKE_WARNING */
533 BOOST_REQUIRE_EQUAL(string("F"), string(tmp.get()));
534 }
535
536 CRemoteBlast rmt_blaster(&prot_opts);
537 rmt_blaster.SetDatabase("nr");
538
539 CRemoteBlast::TSeqLocList query_seqloc(1);
540 // use gi with low complexity regions
541 CRef<CSeq_id> id(new CSeq_id("ABI82289.1"));
542 query_seqloc.front().Reset(new CSeq_loc);
543 query_seqloc.front()->SetWhole(*id);
544 rmt_blaster.SetQueries(query_seqloc);
545
546 BOOST_REQUIRE_EQUAL(true, rmt_blaster.Submit());
547
548 TSeqLocInfoVector masks = rmt_blaster.GetMasks();
549 BOOST_REQUIRE(masks.size() == 1);
550 BOOST_REQUIRE(masks.front().empty());
551 }
552
BOOST_AUTO_TEST_CASE(SubmitNullDatabase)553 BOOST_AUTO_TEST_CASE(SubmitNullDatabase) {
554 CBlastProteinOptionsHandle prot_opts(CBlastOptions::eRemote);
555
556 CRemoteBlast rmt_blaster(&prot_opts);
557 BOOST_REQUIRE_THROW(rmt_blaster.SetDatabase(""), CBlastException);
558 }
559
BOOST_AUTO_TEST_CASE(SubmitNullQueries)560 BOOST_AUTO_TEST_CASE(SubmitNullQueries) {
561 CBlastProteinOptionsHandle prot_opts(CBlastOptions::eRemote);
562
563 CRemoteBlast rmt_blaster(&prot_opts);
564 CRef<CBioseq_set> no_queries;
565 BOOST_REQUIRE_THROW(rmt_blaster.SetQueries(no_queries),
566 CBlastException);
567 }
568
569 BOOST_AUTO_TEST_CASE_TIMEOUT(CheckPrimerBlastRID, 45);
BOOST_AUTO_TEST_CASE(CheckPrimerBlastRID)570 BOOST_AUTO_TEST_CASE(CheckPrimerBlastRID) {
571 // Permanent RID provided by Jian
572 const string rid("TNNF2YHZ016");
573
574 CRemoteBlast rmt_blaster(rid);
575
576 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
577 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
578 BOOST_REQUIRE_EQUAL(kEmptyStr, rmt_blaster.GetErrors());
579 BOOST_REQUIRE_EQUAL(CRemoteBlast::eStatus_Done, rmt_blaster.CheckStatus());
580
581 CRef<CSeq_align_set> sas = rmt_blaster.GetAlignments();
582 BOOST_REQUIRE(sas.GetPointer() != NULL);
583 }
584
BOOST_AUTO_TEST_CASE(CheckRID)585 BOOST_AUTO_TEST_CASE(CheckRID) {
586 // Permanent RID provided by Yan
587 const string rid("5VPB2NH1014");
588 CRemoteBlast rmt_blaster(rid);
589
590 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
591 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
592 BOOST_REQUIRE_EQUAL(kEmptyStr, rmt_blaster.GetErrors());
593 BOOST_REQUIRE_EQUAL(CRemoteBlast::eStatus_Done, rmt_blaster.CheckStatus());
594
595 CRef<CSeq_align_set> sas = rmt_blaster.GetAlignments();
596 BOOST_REQUIRE(sas.GetPointer() != NULL);
597 }
598
BOOST_AUTO_TEST_CASE(CheckColoRID)599 BOOST_AUTO_TEST_CASE(CheckColoRID) {
600 // Colo RID that is preserved permanently
601 const string rid("953V6EF901N");
602 CRemoteBlast rmt_blaster(rid);
603 //rmt_blaster.SetVerbose();
604
605 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
606 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
607 BOOST_REQUIRE_EQUAL(kEmptyStr, rmt_blaster.GetErrors());
608
609 CRef<CSeq_align_set> sas = rmt_blaster.GetAlignments();
610 BOOST_REQUIRE(sas.GetPointer() != NULL);
611
612 TSeqAlignVector sav = rmt_blaster.GetSeqAlignSets();
613 BOOST_REQUIRE(! sav.empty());
614 BOOST_REQUIRE(sav[0].NotEmpty());
615 }
616
BOOST_AUTO_TEST_CASE(GetErrorsFromFailedRID)617 BOOST_AUTO_TEST_CASE(GetErrorsFromFailedRID) {
618 // Uncomment to redirect to test system
619 //CAutoEnvironmentVariable tmp_env("BLAST4_CONN_SERVICE_NAME", "blast4_test");
620 const string rid("HU0G064A013");
621 CRemoteBlast rmt_blaster(rid);
622 //rmt_blaster.SetVerbose();
623
624 CRef<CSeq_align_set> sas = rmt_blaster.GetAlignments();
625 BOOST_REQUIRE(sas.GetPointer() == NULL);
626
627 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
628 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
629 BOOST_REQUIRE_EQUAL(kEmptyStr, rmt_blaster.GetWarnings());
630
631 const string error("CPU usage limit was exceeded");
632 BOOST_REQUIRE(rmt_blaster.GetErrors().empty() == false);
633 BOOST_REQUIRE(NStr::FindNoCase(rmt_blaster.GetErrors(), error) != NPOS);
634 BOOST_REQUIRE_EQUAL(CRemoteBlast::eStatus_Failed,
635 rmt_blaster.CheckStatus());
636 }
637
638 // This tests an expired/invalid RID
BOOST_AUTO_TEST_CASE(RetrieveInvalidRID)639 BOOST_AUTO_TEST_CASE(RetrieveInvalidRID) {
640 // Uncomment to redirect to test system
641 //CAutoEnvironmentVariable tmp_env("BLAST4_CONN_SERVICE_NAME", "blast4_test");
642 const string non_existent_rid("1068741992-11111-263425.BLASTQ3");
643 CRemoteBlast rmt_blaster(non_existent_rid);
644 //rmt_blaster.SetVerbose();
645
646 BOOST_REQUIRE_EQUAL(non_existent_rid, rmt_blaster.GetRID());
647 // make sure error is something like: RID not found
648 BOOST_REQUIRE_EQUAL(false, rmt_blaster.CheckDone());
649 //cerr << "Errors: '" << rmt_blaster.GetErrors() << "'" << endl;
650 BOOST_REQUIRE(rmt_blaster.GetErrors() != kEmptyStr);
651 BOOST_REQUIRE_EQUAL(CRemoteBlast::eStatus_Unknown,
652 rmt_blaster.CheckStatus());
653 }
654
BOOST_AUTO_TEST_CASE(RetrieveRIDWithError)655 BOOST_AUTO_TEST_CASE(RetrieveRIDWithError) {
656 // Uncomment to redirect to test system
657 //CAutoEnvironmentVariable tmp_env("BLAST4_CONN_SERVICE_NAME", "blast4_test");
658 const string rid("HTXY8N45013");
659 CRemoteBlast rmt_blaster(rid);
660 //rmt_blaster.SetVerbose();
661
662 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
663 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
664 BOOST_REQUIRE_MESSAGE(NStr::Find(rmt_blaster.GetErrors(),
665 "CPU usage limit was exceeded, resulting in SIGXCPU") != NPOS,
666 "RID=" << rid);
667 BOOST_REQUIRE_EQUAL(CRemoteBlast::eStatus_Failed, rmt_blaster.CheckStatus());
668 }
669
BOOST_AUTO_TEST_CASE(RetrieveRIDWithSIGXCPU)670 BOOST_AUTO_TEST_CASE(RetrieveRIDWithSIGXCPU) {
671 // Uncomment to redirect to test system
672 //CAutoEnvironmentVariable tmp_env("BLAST4_CONN_SERVICE_NAME", "blast4_test");
673 const string rid("HTSZX47V013");
674 CRemoteBlast rmt_blaster(rid);
675 //rmt_blaster.SetVerbose();
676
677 BOOST_REQUIRE_EQUAL(rid, rmt_blaster.GetRID());
678 BOOST_REQUIRE_EQUAL(true, rmt_blaster.CheckDone());
679 //cerr << "Errors: '" << rmt_blaster.GetErrors() << "'" << endl;
680 BOOST_REQUIRE_MESSAGE(NStr::Find(rmt_blaster.GetErrors(),
681 "Error: CPU usage limit was exceeded") != NPOS,
682 "RID=" << rid);
683 BOOST_REQUIRE_EQUAL(CRemoteBlast::eStatus_Failed,
684 rmt_blaster.CheckStatus());
685 }
686
687
688 // BOOST_AUTO_TEST_CASE(SubmitNonExistentDatabase) {
689 // CBlastProteinOptionsHandle prot_opts(CBlastOptions::eRemote);
690 //
691 // CRemoteBlast rmt_blaster(& prot_opts);
692 // rmt_blaster.SetDatabase("non_existent_database");
693 // BOOST_REQUIRE_EQUAL(true, rmt_blaster.Submit());
694 // }
695
BOOST_AUTO_TEST_CASE(CheckRemoteNuclOptionsHandle)696 BOOST_AUTO_TEST_CASE(CheckRemoteNuclOptionsHandle) {
697 CBlastNucleotideOptionsHandle nucl_opts(CBlastOptions::eRemote);
698 // These should not produce errors, although some of them would not have the desired
699 // effect either, because there are no remote name-value pairs corresponding to these
700 // options.
701 nucl_opts.SetWordSize(23);
702 try {
703 nucl_opts.GetWordSize();
704 } catch (const CBlastException& exptn) {
705 BOOST_REQUIRE(!strcmp("Error: GetWordSize() not available.",
706 exptn.GetMsg().c_str()));
707 }
708 }
709
BOOST_AUTO_TEST_CASE(CheckRemoteDiscNuclOptionsHandle)710 BOOST_AUTO_TEST_CASE(CheckRemoteDiscNuclOptionsHandle) {
711 CDiscNucleotideOptionsHandle nucl_opts(CBlastOptions::eRemote);
712 CBlastOptions& opts = nucl_opts.SetOptions();
713 const int kWordSize = 12;
714 nucl_opts.SetWordSize(kWordSize);
715
716 typedef ncbi::objects::CBlast4_parameters TBlast4Opts;
717 TBlast4Opts* blast4_opts = opts.GetBlast4AlgoOpts();
718
719 BOOST_REQUIRE_EQUAL(kWordSize,
720 blast4_opts->GetParamByName("WordSize")->GetValue().GetInteger());
721 BOOST_REQUIRE_EQUAL(18,
722 blast4_opts->GetParamByName("MBTemplateLength")->GetValue().GetInteger());
723 BOOST_REQUIRE_EQUAL(0,
724 blast4_opts->GetParamByName("MBTemplateType")->GetValue().GetInteger());
725 BOOST_REQUIRE_EQUAL(BLAST_WINDOW_SIZE_DISC,
726 blast4_opts->GetParamByName("WindowSize")->GetValue().GetInteger());
727
728 /*
729 ITERATE(ncbi::objects::CBlast4_parameters_Base::Tdata,
730 it, blast4_opts->Set())
731 {
732 cerr << '\n';
733 cerr << (*it)->GetName() << '\n';
734 }
735 */
736 }
737
BOOST_AUTO_TEST_CASE(RetrieveMultipleQueryResults)738 BOOST_AUTO_TEST_CASE(RetrieveMultipleQueryResults)
739 {
740 // A preserved query of 129295, 129296, and 129297.
741
742 string rid("1112991234-9646-26841459756.BLASTQ3");
743 CRemoteBlast rb(rid);
744
745 TSeqAlignVector sav(rb.GetSeqAlignSets());
746
747 BOOST_REQUIRE_EQUAL(3, (int)sav.size());
748
749 vector<string> ids;
750 for(int i = 0; i < (int)sav.size(); i++) {
751 string L;
752 sav[i]->Get().front()->GetSeq_id(0).GetLabel(& L);
753 ids.push_back(L);
754 }
755
756 BOOST_REQUIRE_EQUAL(string("gi|129295"), ids[0]);
757 BOOST_REQUIRE_EQUAL(string("gi|129296"), ids[1]);
758 BOOST_REQUIRE_EQUAL(string("gi|129297"), ids[2]);
759 }
760
BOOST_AUTO_TEST_CASE(RetrieveQuerySet)761 BOOST_AUTO_TEST_CASE(RetrieveQuerySet)
762 {
763 // Another preserved query, with mixed ID types.
764
765 string rid("BWX50RMX016"); // GIs 104501, 129295, and FASTA for 400260645 vs swissprot
766
767 CRemoteBlast rb(rid);
768
769 TSeqAlignVector sav(rb.GetSeqAlignSets());
770
771 BOOST_REQUIRE_EQUAL(3, (int)sav.size());
772
773 vector<string> ids;
774 for(int i = 0; i < (int)sav.size(); i++) {
775 string L;
776 sav[i]->Get().front()->GetSeq_id(0).GetLabel(& L);
777 ids.push_back(L);
778 }
779
780 BOOST_REQUIRE_EQUAL(string("gi|104501"), ids[0]);
781 BOOST_REQUIRE_EQUAL(string("gi|129295"), ids[1]);
782 BOOST_REQUIRE_EQUAL(string("lcl|47622"), ids[2]);
783 }
784
BOOST_AUTO_TEST_CASE(GetRequestInfo)785 BOOST_AUTO_TEST_CASE(GetRequestInfo)
786 {
787 string rid("1138040498-4204-115424753375.BLASTQ4");
788
789 CRemoteBlast rb(rid);
790
791 string db_name = "nr";
792
793 CRef<CBlast4_database> dbs = rb.GetDatabases();
794
795 BOOST_REQUIRE_EQUAL(dbs->GetName(), db_name);
796 BOOST_REQUIRE_EQUAL(dbs->GetType(), eBlast4_residue_type_nucleotide);
797
798 BOOST_REQUIRE_EQUAL(rb.GetProgram(), string("blastn"));
799 BOOST_REQUIRE_EQUAL(rb.GetService(), string("megablast"));
800 BOOST_REQUIRE_EQUAL(rb.GetCreatedBy(), string("newblast"));
801
802 CRef<CBlast4_queries> queries = rb.GetQueries();
803
804 BOOST_REQUIRE_EQUAL(queries->Which(), CBlast4_queries::e_Seq_loc_list);
805
806 }
807
BOOST_AUTO_TEST_CASE(FetchQuerySequence)808 BOOST_AUTO_TEST_CASE(FetchQuerySequence)
809 {
810 // Uncomment to redirect to test system
811 //CAutoEnvironmentVariable autoenv("BLAST4_CONN_SERVICE_NAME", "blast4_test");
812
813 // This RID refers to a search by Seq-loc - this tests the
814 // ability of the CRemoteBlast class to fetch the query
815 // sequence (data) associated with the described sequence,
816 // and checks that the length is correct.
817
818 string rid("BWY0XPAV014"); // P38398.2 and 129295 vs swissprot
819
820 CRemoteBlast rb(rid);
821
822 // Get queries - assume its a list o' Seq-loc.
823
824 CRef<CBlast4_queries> queries = rb.GetQueries();
825
826 // Get databases
827
828 CRef<CBlast4_database> dbs = rb.GetDatabases();
829
830 // And database type
831
832 char db_type;
833 string db_name;
834
835 if (dbs->GetType() == eBlast4_residue_type_nucleotide) {
836 db_type = 'n';
837 db_name = "nucl_dbs";
838 } else {
839 db_type = 'p';
840 db_name = "prot_dbs";
841 }
842
843 // Get first query Seq-loc.
844
845 CRef<CSeq_loc> query1(queries->SetSeq_loc_list().front());
846
847 // Assuming it is a "whole" Seq-loc, make a vector of Seq-ids.
848
849 CRef<CSeq_id> seqid(&query1->SetWhole());
850
851 CBlastServices::TSeqIdVector getseq_queries;
852 getseq_queries.push_back(seqid);
853
854 // Now fetch the sequence.
855
856 string warnings, errors;
857 CBlastServices::TBioseqVector results;
858
859 CBlastServices::GetSequences(getseq_queries,
860 db_name,
861 db_type,
862 results, // out
863 errors, // out
864 warnings); // out
865
866 BOOST_REQUIRE(results.size());
867 BOOST_REQUIRE(results[0].NotEmpty());
868 BOOST_REQUIRE(results[0]->CanGetInst());
869
870 int length = results[0]->GetInst().GetLength();
871 BOOST_REQUIRE_EQUAL(length, 1863);
872 //length = results[1]->GetInst().GetLength();
873 //BOOST_REQUIRE_EQUAL(length, 232);
874 }
875
BOOST_AUTO_TEST_CASE(FetchQuerySequence_NotFound)876 BOOST_AUTO_TEST_CASE(FetchQuerySequence_NotFound)
877 {
878 // Uncomment to redirect to test system
879 //CAutoEnvironmentVariable autoenv("BLAST4_CONN_SERVICE_NAME", "blast4_test");
880 const int kGi(129295);
881 CRef<CSeq_id> seqid(new CSeq_id(CSeq_id::e_Gi, kGi));
882 CBlastServices::TSeqIdVector getseq_queries;
883 getseq_queries.push_back(seqid);
884
885 string warnings, errors;
886 CBlastServices::TBioseqVector results;
887
888 CBlastServices::GetSequences(getseq_queries, "nr", 'n',
889 results, // out
890 errors, // out
891 warnings/*, // out
892 true*/); // out
893
894 BOOST_REQUIRE(results.empty());
895 BOOST_REQUIRE( !errors.empty() );
896 BOOST_REQUIRE( errors.find("Failed to fetch sequence") != NPOS );
897 BOOST_REQUIRE( errors.find(NStr::IntToString(kGi)) != NPOS );
898 BOOST_REQUIRE(warnings.empty());
899 }
900
BOOST_AUTO_TEST_CASE(SearchOptionsFromRID)901 BOOST_AUTO_TEST_CASE(SearchOptionsFromRID)
902 {
903 {
904 // Nucleotide
905
906 string rid("5VPRD45W015");
907 CRemoteBlast rmt(rid);
908
909 CRef<CBlastOptionsHandle> cboh = rmt.GetSearchOptions();
910
911 BOOST_REQUIRE(cboh.NotEmpty());
912
913 BOOST_REQUIRE_EQUAL((Int8) 0, (Int8) cboh->GetDbLength());
914 BOOST_REQUIRE_EQUAL((Int8) 0, (Int8) cboh->GetEffectiveSearchSpace());
915 BOOST_REQUIRE_EQUAL(10.0, cboh->GetEvalueThreshold());
916 {
917 TAutoCharPtr tmp = cboh->GetFilterString();/* NCBI_FAKE_WARNING */
918 BOOST_REQUIRE_EQUAL(string("L;R -d repeat/repeat_9606;m;"),
919 string(tmp.get()));
920 }
921 BOOST_REQUIRE_EQUAL(100, cboh->GetHitlistSize());
922 BOOST_REQUIRE_EQUAL(0.0, cboh->GetPercentIdentity());
923 BOOST_REQUIRE_EQUAL(true, cboh->GetGappedMode());
924 BOOST_REQUIRE_EQUAL(0, cboh->GetWindowSize());
925 }
926 {
927 // Protein
928
929 string rid("BD4HUK3X014");
930
931 CRemoteBlast rmt(rid);
932 CRef<CBlastOptionsHandle> cboh = rmt.GetSearchOptions();
933
934 BOOST_REQUIRE(cboh.NotEmpty());
935
936 BOOST_REQUIRE_EQUAL((Int8) 0, (Int8) cboh->GetDbLength());
937 BOOST_REQUIRE_EQUAL((Int8) 0, (Int8) cboh->GetEffectiveSearchSpace());
938 BOOST_REQUIRE_EQUAL(13.0, cboh->GetEvalueThreshold());
939 {
940 TAutoCharPtr tmp = cboh->GetFilterString();/* NCBI_FAKE_WARNING */
941 BOOST_REQUIRE_EQUAL(string("L;"), string(tmp.get()));
942 }
943 BOOST_REQUIRE_EQUAL(500, cboh->GetHitlistSize());
944 BOOST_REQUIRE_EQUAL(0.0, cboh->GetPercentIdentity());
945 BOOST_REQUIRE_EQUAL(true, cboh->GetGappedMode());
946 BOOST_REQUIRE_EQUAL(40, cboh->GetWindowSize());
947 }
948 {
949 // Some of everything
950 vector< pair<string,string> > rids;
951
952 x_PushPairVec(rids, "1126029035-8294-165438177459.BLASTQ3", "blastp/plain");
953 x_PushPairVec(rids, "1125682249-11093-192188840277.BLASTQ3", "blastn/plain");
954 x_PushPairVec(rids, "1125679472-29663-68767107779.BLASTQ3", "tblastn/plain");
955 x_PushPairVec(rids, "1125682851-24545-80609495337.BLASTQ3", "tblastx/plain");
956 x_PushPairVec(rids, "1125682308-9604-184897235466.BLASTQ3", "blastx/plain");
957 x_PushPairVec(rids, "BD4GD2RE016", "blastn/megablast");
958
959 for(size_t i = 0; i < rids.size(); i++) {
960 CRemoteBlast rmt(rids[i].first);
961 CRef<CBlastOptionsHandle> cboh = rmt.GetSearchOptions();
962
963 BOOST_REQUIRE(cboh.NotEmpty());
964
965 string ps = rmt.GetProgram() + "/" + rmt.GetService();
966 BOOST_REQUIRE_EQUAL(ps, rids[i].second);
967 }
968 }
969 }
970
BOOST_AUTO_TEST_CASE(CheckLongLifeHits)971 BOOST_AUTO_TEST_CASE(CheckLongLifeHits)
972 {
973 string has_hits = "1154969303-04718-55159010680.BLASTQ4";
974 string no_hits = "1154969303-04728-192386478174.BLASTQ4";
975
976 BOOST_REQUIRE_EQUAL(22, x_CountHits(has_hits));
977 BOOST_REQUIRE_EQUAL(0, x_CountHits(no_hits));
978 }
979
BOOST_AUTO_TEST_CASE(CheckShortRIDs)980 BOOST_AUTO_TEST_CASE(CheckShortRIDs)
981 {
982 BOOST_REQUIRE_EQUAL(102, x_CountHits("BD4HYZYB014"));
983 BOOST_REQUIRE_EQUAL(102, x_CountHits("15ASW73R015"));
984 BOOST_REQUIRE_EQUAL(102, x_CountHits("15AU5834013"));
985 BOOST_REQUIRE_EQUAL(102, x_CountHits("BD4ENAS7014"));
986 }
987
BOOST_AUTO_TEST_CASE(CheckDuplicateOptions)988 BOOST_AUTO_TEST_CASE(CheckDuplicateOptions)
989 {
990 CRef<CBlastProteinOptionsHandle> oh
991 (new CBlastProteinOptionsHandle(CBlastOptions::eRemote));
992
993 oh->SetWordSize(10);
994 oh->SetWordSize(11);
995 oh->SetWordSize(12);
996 oh->SetWordSize(13);
997 oh->SetWordSize(14);
998
999 ncbi::objects::CBlast4_parameters * L =
1000 oh->SetOptions().GetBlast4AlgoOpts();
1001 typedef ncbi::objects::CBlast4_parameter TParam;
1002 typedef list< CRef<TParam> > TParamList;
1003
1004 int count = 0;
1005 int value = 0;
1006
1007 ITERATE(TParamList, iter, L->Set()) {
1008 const TParam & p = **iter;
1009
1010 if (p.GetName() == "WordSize") {
1011 BOOST_REQUIRE(p.CanGetValue());
1012 BOOST_REQUIRE(p.GetValue().IsInteger());
1013
1014 count ++;
1015 value = p.GetValue().GetInteger();
1016 }
1017 }
1018
1019 BOOST_REQUIRE_EQUAL(1, count);
1020 BOOST_REQUIRE_EQUAL(14, value);
1021 }
1022
1023 // Test that when a query with a range restriction is NOT provided, no
1024 // RequiredEnd and RequiredStart fields are sent over the network
BOOST_AUTO_TEST_CASE(GetSearchStrategy_FullQuery)1025 BOOST_AUTO_TEST_CASE(GetSearchStrategy_FullQuery) {
1026 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 555));
1027 auto_ptr<blast::SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(*id));
1028 TSeqLocVector queries(1, *sl.get());
1029 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
1030 const string kDbName("nt");
1031 const CSearchDatabase target_db(kDbName,
1032 CSearchDatabase::eBlastDbIsNucleotide);
1033
1034 CRef<CBlastOptionsHandle> opts
1035 (CBlastOptionsFactory::Create(eBlastn, CBlastOptions::eRemote));
1036
1037 CRemoteBlast rmt_blast(qf, opts, target_db);
1038 CRef<CBlast4_request> ss = rmt_blast.GetSearchStrategy();
1039 BOOST_REQUIRE(ss.NotEmpty());
1040
1041 bool found_query_range = false;
1042
1043 const CBlast4_request_body& body = ss->GetBody();
1044 BOOST_REQUIRE(body.IsQueue_search());
1045 const CBlast4_queue_search_request& qsr = body.GetQueue_search();
1046
1047 // These are the parameters that we are looking for
1048 vector<string> param_names;
1049 param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredStart));
1050 param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredEnd));
1051
1052 // Get the program options
1053 if (qsr.CanGetProgram_options()) {
1054 const CBlast4_parameters& prog_options = qsr.GetProgram_options();
1055 ITERATE(vector<string>, pname, param_names) {
1056 CRef<CBlast4_parameter> p = prog_options.GetParamByName(*pname);
1057 if (p.NotEmpty()) {
1058 found_query_range = true;
1059 break;
1060 }
1061 }
1062 }
1063 BOOST_REQUIRE(found_query_range == false);
1064
1065 // (check also the algorithm options, just in case they ever get misplaced)
1066 if (qsr.CanGetAlgorithm_options()) {
1067 const CBlast4_parameters& algo_options = qsr.GetAlgorithm_options();
1068 ITERATE(vector<string>, pname, param_names) {
1069 CRef<CBlast4_parameter> p = algo_options.GetParamByName(*pname);
1070 if (p.NotEmpty()) {
1071 found_query_range = true;
1072 break;
1073 }
1074 }
1075 }
1076 BOOST_REQUIRE(found_query_range == false);
1077
1078 // just as a bonus, check the database
1079 BOOST_REQUIRE(qsr.CanGetSubject());
1080 BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
1081 }
1082
1083 // Test that when a query with a range restriction is provided, the appropriate
1084 // RequiredEnd and RequiredStart fields are sent over the network
BOOST_AUTO_TEST_CASE(GetSearchStrategy_QueryWithRange)1085 BOOST_AUTO_TEST_CASE(GetSearchStrategy_QueryWithRange) {
1086 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 555));
1087 TSeqRange query_range(1,200);
1088 auto_ptr<blast::SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(*id,
1089 query_range));
1090 TSeqLocVector queries(1, *sl.get());
1091 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
1092 const string kDbName("nt");
1093 const CSearchDatabase target_db(kDbName,
1094 CSearchDatabase::eBlastDbIsNucleotide);
1095
1096 CRef<CBlastOptionsHandle> opts
1097 (CBlastOptionsFactory::Create(eBlastn, CBlastOptions::eRemote));
1098
1099 CRemoteBlast rmt_blast(qf, opts, target_db);
1100 CRef<CBlast4_request> ss = rmt_blast.GetSearchStrategy();
1101 BOOST_REQUIRE(ss.NotEmpty());
1102
1103 bool found_query_range = false;
1104
1105 const CBlast4_request_body& body = ss->GetBody();
1106 BOOST_REQUIRE(body.IsQueue_search());
1107 const CBlast4_queue_search_request& qsr = body.GetQueue_search();
1108
1109 // These are the parameters that we are looking for
1110 vector<string> param_names;
1111 param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredStart));
1112 param_names.push_back(CBlast4Field::GetName(eBlastOpt_RequiredEnd));
1113
1114 // Get the program options
1115 if (qsr.CanGetProgram_options()) {
1116 const CBlast4_parameters& prog_options = qsr.GetProgram_options();
1117 ITERATE(vector<string>, pname, param_names) {
1118 CRef<CBlast4_parameter> p = prog_options.GetParamByName(*pname);
1119 if (p.NotEmpty()) {
1120 BOOST_REQUIRE(p->CanGetValue());
1121 found_query_range = true;
1122 if (*pname == CBlast4Field::GetName(eBlastOpt_RequiredStart)) {
1123 BOOST_REQUIRE_EQUAL((int)query_range.GetFrom(),
1124 (int)p->GetValue().GetInteger());
1125 }
1126 if (*pname == CBlast4Field::GetName(eBlastOpt_RequiredEnd)) {
1127 BOOST_REQUIRE_EQUAL((int)query_range.GetTo(),
1128 (int)p->GetValue().GetInteger());
1129 }
1130 }
1131 }
1132 }
1133 BOOST_REQUIRE(found_query_range == true);
1134
1135 found_query_range = false;
1136 // Check that this option is NOT specified in the algorithm options
1137 if (qsr.CanGetAlgorithm_options()) {
1138 const CBlast4_parameters& algo_options = qsr.GetAlgorithm_options();
1139 ITERATE(vector<string>, pname, param_names) {
1140 CRef<CBlast4_parameter> p = algo_options.GetParamByName(*pname);
1141 if (p.NotEmpty()) {
1142 found_query_range = true;
1143 break;
1144 }
1145 }
1146 }
1147 BOOST_REQUIRE(found_query_range == false);
1148
1149 // just as a bonus, check the database
1150 BOOST_REQUIRE(qsr.CanGetSubject());
1151 BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
1152 }
1153
1154 // Test that when no identifier is provided for the sequence data, a Bioseq
1155 // should be submitted
BOOST_AUTO_TEST_CASE(GetSearchStrategy_QueryWithLocalIds)1156 BOOST_AUTO_TEST_CASE(GetSearchStrategy_QueryWithLocalIds) {
1157
1158 CSeq_entry seq_entry;
1159 ifstream in("data/seq_entry_lcl_id.asn");
1160 in >> MSerial_AsnText >> seq_entry;
1161 CSeq_id& id = const_cast<CSeq_id&>(*seq_entry.GetSeq().GetFirstId());
1162 in.close();
1163
1164 CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
1165 scope->AddTopLevelSeqEntry(seq_entry);
1166 CRef<CSeq_loc> sl(new CSeq_loc(id, (TSeqPos)0, (TSeqPos)11));
1167 TSeqLocVector query_loc(1, SSeqLoc(sl, scope));
1168 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query_loc));
1169 const string kDbName("nt");
1170 const CSearchDatabase target_db(kDbName,
1171 CSearchDatabase::eBlastDbIsNucleotide);
1172
1173 CRef<CBlastOptionsHandle> opts
1174 (CBlastOptionsFactory::Create(eBlastn, CBlastOptions::eRemote));
1175
1176 CRemoteBlast rmt_blast(qf, opts, target_db);
1177 CRef<CBlast4_request> ss = rmt_blast.GetSearchStrategy();
1178 BOOST_REQUIRE(ss.NotEmpty());
1179
1180
1181 const CBlast4_request_body& body = ss->GetBody();
1182 BOOST_REQUIRE(body.IsQueue_search());
1183 const CBlast4_queue_search_request& qsr = body.GetQueue_search();
1184 BOOST_REQUIRE(qsr.CanGetQueries());
1185 const CBlast4_queries& b4_queries = qsr.GetQueries();
1186 BOOST_REQUIRE_EQUAL(query_loc.size(), b4_queries.GetNumQueries());
1187 BOOST_REQUIRE(b4_queries.IsBioseq_set());
1188 BOOST_REQUIRE( !b4_queries.IsPssm() );
1189 BOOST_REQUIRE( !b4_queries.IsSeq_loc_list() );
1190
1191 // just as a bonus, check the database
1192 BOOST_REQUIRE(qsr.CanGetSubject());
1193 BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
1194 }
1195
1196 // Test that when GIs are provided as the queries, no bioseq
1197 // should be submitted, instead a list of seqlocs should be sent
BOOST_AUTO_TEST_CASE(GetSearchStrategy_QueryWithGIs)1198 BOOST_AUTO_TEST_CASE(GetSearchStrategy_QueryWithGIs) {
1199
1200 CRef<CScope> scope(new CScope(*CObjectManager::GetInstance()));
1201 typedef pair<int, int> TGiLength;
1202 vector<TGiLength> gis;
1203 gis.push_back(TGiLength(555, 624));
1204 gis.push_back(TGiLength(556, 310));
1205 ifstream in("data/seq_entry_gis.asn");
1206 TSeqLocVector query_loc;
1207
1208 ITERATE(vector<TGiLength>, gi, gis) {
1209 CRef<CSeq_entry> seq_entry(new CSeq_entry);
1210 in >> MSerial_AsnText >> *seq_entry;
1211 scope->AddTopLevelSeqEntry(*seq_entry);
1212 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, gi->first));
1213 CRef<CSeq_loc> sl(new CSeq_loc(*id, 0, gi->second));
1214 query_loc.push_back(SSeqLoc(sl, scope));
1215 }
1216 in.close();
1217
1218 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query_loc));
1219 const string kDbName("nt");
1220 const CSearchDatabase target_db(kDbName,
1221 CSearchDatabase::eBlastDbIsNucleotide);
1222
1223 CRef<CBlastOptionsHandle> opts
1224 (CBlastOptionsFactory::Create(eBlastn, CBlastOptions::eRemote));
1225
1226 CRemoteBlast rmt_blast(qf, opts, target_db);
1227 CRef<CBlast4_request> ss = rmt_blast.GetSearchStrategy();
1228 BOOST_REQUIRE(ss.NotEmpty());
1229
1230
1231 const CBlast4_request_body& body = ss->GetBody();
1232 BOOST_REQUIRE(body.IsQueue_search());
1233 const CBlast4_queue_search_request& qsr = body.GetQueue_search();
1234 BOOST_REQUIRE(qsr.CanGetQueries());
1235 const CBlast4_queries& b4_queries = qsr.GetQueries();
1236 BOOST_REQUIRE_EQUAL(query_loc.size(), b4_queries.GetNumQueries());
1237 BOOST_REQUIRE( !b4_queries.IsBioseq_set() );
1238 BOOST_REQUIRE( !b4_queries.IsPssm() );
1239 BOOST_REQUIRE( b4_queries.IsSeq_loc_list() );
1240
1241 // just as a bonus, check the database
1242 BOOST_REQUIRE(qsr.CanGetSubject());
1243 BOOST_REQUIRE(qsr.GetSubject().GetDatabase() == kDbName);
1244 }
1245
BOOST_AUTO_TEST_CASE(ReadSearchStrategy_TextAsn1)1246 BOOST_AUTO_TEST_CASE(ReadSearchStrategy_TextAsn1)
1247 {
1248 const char* fname = "data/ss.asn";
1249 ifstream in(fname);
1250 BOOST_REQUIRE(in);
1251 CRef<CBlast4_request> search_strategy = ExtractBlast4Request(in);
1252 BOOST_REQUIRE(search_strategy.NotEmpty());
1253 BOOST_REQUIRE(search_strategy->GetBody().GetQueue_search().GetProgram()
1254 == "blastn");
1255 BOOST_REQUIRE(search_strategy->GetBody().GetQueue_search().GetService()
1256 == "megablast");
1257 }
1258
BOOST_AUTO_TEST_CASE(ReadSearchStrategy_Xml)1259 BOOST_AUTO_TEST_CASE(ReadSearchStrategy_Xml)
1260 {
1261 const char* fname = "data/ss.xml";
1262 ifstream in(fname);
1263 BOOST_REQUIRE(in);
1264 CRef<CBlast4_request> search_strategy = ExtractBlast4Request(in);
1265 BOOST_REQUIRE(search_strategy.NotEmpty());
1266 BOOST_REQUIRE(search_strategy->GetBody().GetQueue_search().GetProgram()
1267 == "blastn");
1268 BOOST_REQUIRE(search_strategy->GetBody().GetQueue_search().GetService()
1269 == "plain");
1270 }
1271
BOOST_AUTO_TEST_CASE(ReadSearchStrategy_Invalid)1272 BOOST_AUTO_TEST_CASE(ReadSearchStrategy_Invalid)
1273 {
1274 const char* fname = "data/seq_entry_gis.asn";
1275 ifstream in(fname);
1276 BOOST_REQUIRE(in);
1277 CRef<CBlast4_request> search_strategy;
1278 BOOST_REQUIRE_THROW(search_strategy = ExtractBlast4Request(in),
1279 CSerialException);
1280 }
1281
BOOST_AUTO_TEST_CASE(ReadArchiveFormat)1282 BOOST_AUTO_TEST_CASE(ReadArchiveFormat)
1283 {
1284 const char* fname = "data/archive.asn";
1285 ifstream in(fname);
1286 CRemoteBlast rb(in);
1287 rb.LoadFromArchive();
1288 BOOST_REQUIRE(rb.GetProgram() == "blastn");
1289 BOOST_REQUIRE(rb.GetService() == "megablast");
1290 BOOST_REQUIRE(rb.GetCreatedBy() == "tom");
1291 CRef<CBlast4_database> blast_db = rb.GetDatabases();
1292 BOOST_REQUIRE(blast_db->GetName() == "refseq_rna");
1293 BOOST_REQUIRE(rb.GetDbFilteringAlgorithmId() == -1);
1294 BOOST_REQUIRE(rb.GetDbFilteringAlgorithmKey() == kEmptyStr);
1295 CBlastNucleotideOptionsHandle* opts_handle =
1296 dynamic_cast<CBlastNucleotideOptionsHandle*> (&*(rb.GetSearchOptions()));
1297 BOOST_REQUIRE(string(opts_handle->GetRepeatFilteringDB()) == "repeat/repeat_9606");
1298 }
1299
BOOST_AUTO_TEST_CASE(ReadBadArchiveFormat)1300 BOOST_AUTO_TEST_CASE(ReadBadArchiveFormat)
1301 {
1302 const char* fname = "data/selenocysteines.fsa";
1303 ifstream in(fname);
1304 BOOST_REQUIRE_THROW(CRemoteBlast rb(in), CBlastException);
1305
1306 }
1307
BOOST_AUTO_TEST_CASE(ReadBl2seqArchiveFormat)1308 BOOST_AUTO_TEST_CASE(ReadBl2seqArchiveFormat)
1309 {
1310 const char* fname = "data/archive.bl2seq.asn";
1311 ifstream in(fname);
1312 CRemoteBlast rb(in);
1313 rb.LoadFromArchive();
1314 BOOST_REQUIRE(rb.GetProgram() == "blastn");
1315 BOOST_REQUIRE(rb.GetService() == "megablast");
1316 BOOST_REQUIRE(rb.GetCreatedBy() == "tom");
1317 }
1318
BOOST_AUTO_TEST_CASE(ReadArchiveFormatMultipleQueries)1319 BOOST_AUTO_TEST_CASE(ReadArchiveFormatMultipleQueries)
1320 {
1321 const char* fname = "data/archive.multiple_queries.asn";
1322 ifstream in(fname);
1323 CRemoteBlast rb(in);
1324 rb.LoadFromArchive();
1325 BOOST_REQUIRE(rb.GetProgram() == "blastn");
1326 BOOST_REQUIRE(rb.GetService() == "plain");
1327 BOOST_REQUIRE(rb.GetCreatedBy() == "tom");
1328 CRef<CBlast4_database> blast_db = rb.GetDatabases();
1329 BOOST_REQUIRE(blast_db->GetName() == "nt");
1330 CRef<CSearchResultSet> result_set = rb.GetResultSet();
1331 BOOST_REQUIRE(result_set->GetNumQueries() == 3);
1332 BOOST_REQUIRE(result_set->GetNumResults() == 3);
1333 }
1334
1335 class CDiagLevelGuard {
1336 public:
CDiagLevelGuard(EDiagSev target)1337 CDiagLevelGuard(EDiagSev target) { m_Orig = SetDiagPostLevel(target); }
~CDiagLevelGuard()1338 ~CDiagLevelGuard() { SetDiagPostLevel(m_Orig); }
1339 private:
1340 EDiagSev m_Orig;
1341 };
1342
BOOST_AUTO_TEST_CASE(GetBlast4Parameters)1343 BOOST_AUTO_TEST_CASE(GetBlast4Parameters)
1344 {
1345 const string kUnknown("-");
1346 CBlast4Field p = CBlast4Field::Get(eBlastOpt_Web_ExclModels);
1347 BOOST_REQUIRE(p.GetName() != kUnknown);
1348 p = CBlast4Field::Get(eBlastOpt_MbIndexName);
1349 BOOST_REQUIRE(p.GetName() != kUnknown);
1350
1351 // These shouldn't be found, supress diagnostics
1352 CDiagLevelGuard g(eDiag_Error);
1353 p = CBlast4Field::Get(eBlastOpt_MaxValue);
1354 BOOST_REQUIRE(p.GetName() == kUnknown);
1355 p = CBlast4Field::Get(eBlastOpt_Program);
1356 BOOST_REQUIRE(p.GetName() == kUnknown);
1357
1358 p = CBlast4Field::Get(eBlastOpt_QueryCovHspPerc);
1359 BOOST_REQUIRE(p.GetName() != kUnknown);
1360 }
1361 BOOST_AUTO_TEST_SUITE_END()
1362