1 /* $Id: split_query_unit_test.cpp 607143 2020-04-30 13:01:21Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 * File Description:
29 * Unit test module for code to split query sequences
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 #include "test_objmgr.hpp"
36
37 #include <blast_objmgr_priv.hpp>
38 #include <algo/blast/core/split_query.h>
39 #include "blast_aux_priv.hpp"
40 #include "split_query_aux_priv.hpp"
41 #include <algo/blast/api/blast_options_handle.hpp>
42 #include "split_query.hpp"
43 #include <algo/blast/api/objmgr_query_data.hpp>
44 #include <algo/blast/api/local_blast.hpp>
45 #include <util/random_gen.hpp>
46 #include <objtools/simple/simple_om.hpp>
47
48 /* IMPORTANT NOTE: If you have made changes to the query splitting code, the
49 * data in the configuration file (split_query.ini) might need to be updated.
50 * To aid in this, the xblast library supports tracing messages that output the
51 * internal data structure's contents to facilitate updating this file. To
52 * enable this, please run the unit_test application with the DIAG_TRACE
53 * environment variable set.
54 */
55
56 typedef vector<vector<Uint4> > TSplitQueryChunkMap;
57
58 using namespace std;
59 using namespace ncbi;
60 using namespace ncbi::objects;
61 using namespace ncbi::blast;
62
63 /// Calculate and assign the maximum length field in the BlastQueryInfo
64 /// structure
s_CalculateMaxLength(BlastQueryInfo * query_info)65 static void s_CalculateMaxLength(BlastQueryInfo* query_info)
66 {
67 query_info->max_length = 0;
68 for (int i = query_info->first_context; i <= query_info->last_context; i++)
69 {
70 BOOST_REQUIRE(query_info->contexts[i].query_length >= 0);
71 query_info->max_length =
72 max<Uint4>(query_info->max_length,
73 query_info->contexts[i].query_length);
74 }
75 }
76
77 /// Pair for gis and their length (in that order)
78 typedef pair<TIntId, size_t> TGiLenPair;
79 /// Vector containing pairs of gis and their length
80 typedef vector<TGiLenPair> TGiLengthVector;
81
82 /// Convert a vector of GIs with its lengths into a TSeqLocVector
83 /// @param gi_length vector of TGiLenPair containing GIs and their length [in]
84 /// @param retval the return value of this function [out]
85 /// @param tot_length total length of sequence data contained in gi_length
86 /// (optional) [in]
87 /// @param strands vector of strands to use (optional), if provided it must
88 /// match the size of the gi_length vector [in]
89 /// @param masks vector of masks (optional), if provided it must match the size
90 /// of the gi_length vector [in]
91 static void
s_ConvertToBlastQueries(const TGiLengthVector & gi_length,TSeqLocVector & retval,size_t * tot_length=NULL,vector<ENa_strand> * strands=NULL,const TSeqLocInfoVector * masks=NULL)92 s_ConvertToBlastQueries(const TGiLengthVector& gi_length,
93 TSeqLocVector& retval,
94 size_t* tot_length = NULL,
95 vector<ENa_strand>* strands = NULL,
96 const TSeqLocInfoVector* masks = NULL)
97 {
98 if (tot_length) {
99 *tot_length = 0;
100 }
101 retval.clear();
102 retval.reserve(gi_length.size());
103
104 if (strands) {
105 BOOST_REQUIRE(strands->size() == gi_length.size());
106 }
107 if (masks) {
108 BOOST_REQUIRE(masks->size() == gi_length.size());
109 }
110
111 for (size_t i = 0; i < gi_length.size(); i++) {
112 CRef<CSeq_loc> loc(new CSeq_loc());
113 if (strands) {
114 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, gi_length[i].first));
115 loc->SetInt().SetFrom(0);
116 loc->SetInt().SetTo(gi_length[i].second-1);
117 loc->SetId(*id);
118 loc->SetStrand((*strands)[i]);
119 } else {
120 loc->SetWhole().SetGi(GI_FROM(TIntId, gi_length[i].first));
121 }
122 CRef<CScope> scope(CSimpleOM::NewScope());
123 retval.push_back(SSeqLoc(loc, &*scope));
124 if (tot_length) {
125 *tot_length += gi_length[i].second;
126 }
127 }
128
129 if (masks == NULL) {
130 return;
131 }
132
133 for (size_t i = 0; i < masks->size(); i++) {
134 const TMaskedQueryRegions& single_query_masks = (*masks)[i];
135 // FIXME: don't make the distinction between single and multiple masks
136 CRef<CSeq_loc> m(new CSeq_loc);
137
138 if (single_query_masks.size() == 1) {
139 const CSeq_interval& interval =
140 single_query_masks.front()->GetInterval();
141 m->SetInt(const_cast<CSeq_interval&>(interval));
142 } else {
143 ITERATE(TMaskedQueryRegions, mask, single_query_masks) {
144 const CSeq_interval& interval = (*mask)->GetInterval();
145 m->SetPacked_int().AddInterval(interval);
146 }
147 }
148 BOOST_REQUIRE(m->IsInt() || m->IsPacked_int());
149 retval[i].mask = m;
150 }
151 }
152
153 class CSplitQueryTestFixture {
154 public:
155 /// This represents the split_query.ini configuration file
156 CRef<CNcbiRegistry> m_Config;
157 /// Default value used when a field is not present in the config file
158 static const int kDefaultIntValue = -1;
159
CSplitQueryTestFixture()160 CSplitQueryTestFixture() {
161 // read the configuration file if it hasn't been read yet
162 if (m_Config.Empty()) {
163 const IRegistry::TFlags flags =
164 IRegistry::fNoOverride |
165 IRegistry::fTransient |
166 IRegistry::fNotJustCore |
167 IRegistry::fTruncate;
168
169 const string fname("data/split_query.ini");
170 ifstream config_file(fname.c_str());
171 m_Config.Reset(new CNcbiRegistry(config_file, flags));
172
173 if (m_Config->Empty()) {
174 throw runtime_error("Failed to read configuration file" +
175 fname);
176 }
177 }
178 }
179
~CSplitQueryTestFixture()180 ~CSplitQueryTestFixture() {
181 BOOST_REQUIRE(m_Config.NotEmpty());
182 }
183
184 /// Populate a BLAST_SequenceBlk and BlastQueryInfo structures out of an
185 /// array of GIs
186 /// @param gis array of GIs, last element must be -1 indicating the end of
187 /// the array [in]
188 /// @param program program for which the query data will be created [in]
189 /// @param seq_blk BLAST_SequenceBlk structure to populate [out]
190 /// @param qinfo BlastQueryInfo structure to populate [out]
191 /// @param strand strand to use (optional) [in]
x_PrepareBlastQueryStructures(TIntId gis[],EProgram program,BLAST_SequenceBlk ** seq_blk,BlastQueryInfo ** qinfo,ENa_strand * strand=NULL)192 void x_PrepareBlastQueryStructures(TIntId gis[],
193 EProgram program,
194 BLAST_SequenceBlk** seq_blk,
195 BlastQueryInfo** qinfo,
196 ENa_strand* strand = NULL)
197 {
198 BOOST_REQUIRE(seq_blk);
199 BOOST_REQUIRE(qinfo);
200 TSeqLocVector queries;
201
202 for (int i = 0; gis[i] != -1; i++) {
203 CRef<CSeq_loc> loc(new CSeq_loc());
204 loc->SetWhole().SetGi(GI_FROM(TIntId, gis[i]));
205 CScope* scope = new CScope(CTestObjMgr::Instance().GetObjMgr());
206 scope->AddDefaults();
207 queries.push_back(SSeqLoc(loc, scope));
208 }
209
210 CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(program));
211
212 TSearchMessages msgs;
213
214 const CBlastOptions& kOpts = opts->GetOptions();
215 EBlastProgramType prog = kOpts.GetProgramType();
216 ENa_strand strand_opt = (strand != NULL)
217 ? *strand : kOpts.GetStrandOption();
218
219 SetupQueryInfo(queries, prog, strand_opt, qinfo);
220 SetupQueries(queries, *qinfo, seq_blk,
221 prog, strand_opt, msgs);
222 BOOST_REQUIRE(msgs.HasMessages() == false);
223 }
224
x_TestCContextTranslator(TGiLengthVector & gi_length,size_t chunk_size,size_t num_chunks,blast::EProgram program,vector<vector<int>> & starting_chunks,vector<vector<int>> & absolute_contexts,vector<vector<size_t>> * context_offsets,ENa_strand strand,vector<ENa_strand> * query_strands=NULL)225 void x_TestCContextTranslator(TGiLengthVector& gi_length,
226 size_t chunk_size,
227 size_t num_chunks,
228 blast::EProgram program,
229 vector< vector<int> >& starting_chunks,
230 vector< vector<int> >& absolute_contexts,
231 vector< vector<size_t> >* context_offsets,
232 ENa_strand strand,
233 vector<ENa_strand>* query_strands = NULL) {
234
235 if (query_strands) {
236 BOOST_REQUIRE_EQUAL(gi_length.size(), query_strands->size());
237 }
238
239 size_t tot_length;
240 TSeqLocVector queries;
241 s_ConvertToBlastQueries(gi_length, queries, &tot_length, query_strands);
242
243 size_t nc = SplitQuery_CalculateNumChunks(
244 EProgramToEBlastProgramType(program),
245 &chunk_size, tot_length, queries.size());
246 BOOST_REQUIRE_EQUAL(num_chunks, nc);
247
248 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
249 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(program));
250 CRef<CBlastOptions> opts(&opts_h->SetOptions());
251 if ( !query_strands ) {
252 opts->SetStrandOption(strand);
253 }
254 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
255
256 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE",
257 NStr::SizetToString(chunk_size,
258 NStr::fConvErr_NoThrow));
259 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
260 CRef<CSplitQueryBlk> sqb = splitter->Split();
261
262 BOOST_REQUIRE_EQUAL((size_t)splitter->GetNumberOfChunks(), num_chunks);
263
264 CContextTranslator ctx_translator(*sqb);
265
266 ostringstream os;
267 for (size_t chunk_num = 0; chunk_num < num_chunks; chunk_num++) {
268 // Test the starting chunks
269 vector<int>& st_chunks = starting_chunks[chunk_num];
270 for (size_t context_in_chunk = 0;
271 context_in_chunk < st_chunks.size();
272 context_in_chunk++) {
273 os.str("");
274 os << "Starting chunks: ";
275 os << "Chunk " << chunk_num << ", context " << context_in_chunk;
276 int sc = ctx_translator.GetStartingChunk(chunk_num,
277 context_in_chunk);
278 BOOST_REQUIRE_MESSAGE(st_chunks[context_in_chunk]==sc,os.str());
279 }
280
281 // Test the absolute contexts
282 vector<int>& abs_ctxts = absolute_contexts[chunk_num];
283 for (size_t context_in_chunk = 0;
284 context_in_chunk < abs_ctxts.size();
285 context_in_chunk++) {
286 os.str("");
287 os << "Absolute contexts: ";
288 os << "Chunk " << chunk_num << ", context " << context_in_chunk;
289 int abs_ctx =
290 ctx_translator.GetAbsoluteContext(chunk_num,
291 context_in_chunk);
292 BOOST_REQUIRE_MESSAGE(abs_ctxts[context_in_chunk]==abs_ctx,os.str());
293 }
294 }
295
296 // Check the context offsets
297 if ( !context_offsets ) {
298 return;
299 }
300
301 const BLAST_SequenceBlk* global_seq = query_data->GetSequenceBlk();
302 const BlastQueryInfo* global_qinfo = query_data->GetQueryInfo();
303 CRef<CSplitQueryBlk> split_query_blk = splitter->m_SplitBlk;
304 for (size_t chunk_num = 0; chunk_num < num_chunks; chunk_num++) {
305 vector<size_t> test_ctx_off =
306 split_query_blk->GetContextOffsets(chunk_num);
307 const vector<size_t>& ref_ctx_off = (*context_offsets)[chunk_num];
308
309 os.str("");
310 os << "Number of context offsets in chunk " << chunk_num;
311 BOOST_REQUIRE_MESSAGE(ref_ctx_off.size()==test_ctx_off.size(),os.str());
312
313 CRef<IQueryFactory> chunk_qf =
314 splitter->GetQueryFactoryForChunk(chunk_num);
315 CRef<ILocalQueryData> chunk_qd(chunk_qf->MakeLocalQueryData(opts));
316 const BLAST_SequenceBlk* chunk_seq = chunk_qd->GetSequenceBlk();
317 const BlastQueryInfo* chunk_qinfo = chunk_qd->GetQueryInfo();
318
319 for (size_t i = 0; i < ref_ctx_off.size(); i++) {
320 size_t correction = ref_ctx_off[i];
321 os.str("");
322 os << "Context correction in chunk " << chunk_num
323 << ", context " << i << " value now " << test_ctx_off[i]
324 << " not " << correction;
325 BOOST_REQUIRE_MESSAGE(correction==test_ctx_off[i],os.str());
326
327 int absolute_context =
328 ctx_translator.GetAbsoluteContext(chunk_num, i);
329 if (absolute_context == kInvalidContext) {
330 continue;
331 }
332
333 int global_offset =
334 global_qinfo->contexts[absolute_context].query_offset +
335 correction;
336 int chunk_offset = chunk_qinfo->contexts[i].query_offset;
337 int num_bases2compare =
338 min(10, chunk_qinfo->contexts[i].query_length);
339
340 os.str("");
341 os << "Sequence data in chunk " << chunk_num
342 << ", context " << i;
343 bool rv =
344 x_CmpSequenceData(&global_seq->sequence[global_offset],
345 &chunk_seq->sequence[chunk_offset],
346 num_bases2compare);
347 BOOST_REQUIRE_MESSAGE(rv,os.str());
348 }
349
350 }
351 }
352
353 /** Auxiliary function that compares bytes of sequence data to validate the
354 * context offset corrections
355 * @param global global query sequence data [in]
356 * @param chunk sequence data for chunk [in]
357 * @param len length of the data to compare [in]
358 * @return true if sequence data is identical, false otherwise
359 */
x_CmpSequenceData(const Uint1 * global,const Uint1 * chunk,size_t len)360 bool x_CmpSequenceData(const Uint1* global, const Uint1* chunk, size_t len)
361 {
362 for (size_t i = 0; i < len; i++) {
363 if (global[i] != chunk[i]) {
364 return false;
365 }
366 }
367 return true;
368 }
369
QuerySplitter_BlastnSingleQueryMultiChunk(const string & kTestName,ENa_strand strand)370 void QuerySplitter_BlastnSingleQueryMultiChunk(const string& kTestName,
371 ENa_strand strand)
372 {
373 CBlastQueryVector query;
374 CSeq_id id(CSeq_id::e_Gi, 112422322); // 122347 bases long
375 query.AddQuery(CTestObjMgr::Instance().CreateBlastSearchQuery(id));
376
377 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query));
378 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastn));
379 CRef<CBlastOptions> opts(&opts_h->SetOptions());
380 opts->SetStrandOption(strand);
381 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
382
383 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
384 CRef<CSplitQueryBlk> sqb = splitter->Split();
385
386 CQuerySplitter::TSplitQueryVector split_query_vector;
387 x_ReadQueryBoundsPerChunk(kTestName, sqb, split_query_vector);
388 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
389
390 x_ValidateChunkBounds(splitter->GetChunkSize(),
391 query_data->GetSumOfSequenceLengths(),
392 *sqb, opts->GetProgramType());
393
394 const size_t kNumChunks = (size_t)m_Config->GetInt(kTestName,
395 "NumChunks",
396 kDefaultIntValue);
397 BOOST_REQUIRE_EQUAL(kNumChunks, (size_t)splitter->GetNumberOfChunks());
398 BOOST_REQUIRE_EQUAL(kNumChunks, sqb->GetNumChunks());
399
400 vector< vector<size_t> > queries_per_chunk;
401 x_ReadVectorOfVectorsForTest(kTestName, "Queries", queries_per_chunk);
402 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
403
404 vector< vector<int> > ctxs_per_chunk;
405 x_ReadVectorOfVectorsForTest(kTestName, "Contexts", ctxs_per_chunk);
406 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
407
408 vector< vector<size_t> > ctx_offsets_per_chunk;
409 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
410 ctx_offsets_per_chunk);
411 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
412
413 vector<BlastQueryInfo*> split_query_info;
414 x_ReadSplitQueryInfoForTest(kTestName, opts->GetProgramType(),
415 split_query_info);
416 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
417 NON_CONST_ITERATE(vector<BlastQueryInfo*>, itr, split_query_info) {
418 *itr = BlastQueryInfoFree(*itr);
419 }
420 }
421
QuerySplitter_BlastnMultiQueryMultiChunk(const string & kTestName,ENa_strand strand,vector<ENa_strand> * query_strands=NULL)422 void QuerySplitter_BlastnMultiQueryMultiChunk(const string& kTestName,
423 ENa_strand strand,
424 vector<ENa_strand>*
425 query_strands = NULL)
426 {
427 TGiLengthVector gi_length;
428 gi_length.push_back(make_pair<int, size_t>(112258880, 362959));
429 gi_length.push_back(make_pair<int, size_t>(112253843, 221853));
430 gi_length.push_back(make_pair<int, size_t>(112193060, 194837));
431 gi_length.push_back(make_pair<int, size_t>(112193059, 204796));
432 if (query_strands) {
433 BOOST_REQUIRE_EQUAL(gi_length.size(), query_strands->size());
434 }
435
436 size_t tot_length;
437 TSeqLocVector queries;
438 s_ConvertToBlastQueries(gi_length, queries, &tot_length, query_strands);
439
440 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
441 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastn));
442 CRef<CBlastOptions> opts(&opts_h->SetOptions());
443 if ( !query_strands ) {
444 opts->SetStrandOption(strand);
445 }
446 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
447
448 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
449 CRef<CSplitQueryBlk> sqb = splitter->Split();
450
451 CQuerySplitter::TSplitQueryVector split_query_vector;
452 x_ReadQueryBoundsPerChunk(kTestName, sqb, split_query_vector);
453 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
454
455 x_ValidateChunkBounds(splitter->GetChunkSize(),
456 query_data->GetSumOfSequenceLengths(),
457 *sqb, opts->GetProgramType());
458
459 const size_t kNumChunks = (size_t)m_Config->GetInt(kTestName,
460 "NumChunks",
461 kDefaultIntValue);
462 BOOST_REQUIRE_EQUAL(kNumChunks, (size_t)splitter->GetNumberOfChunks());
463 BOOST_REQUIRE_EQUAL(kNumChunks, sqb->GetNumChunks());
464
465 vector< vector<size_t> > queries_per_chunk;
466 x_ReadVectorOfVectorsForTest(kTestName, "Queries", queries_per_chunk);
467 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
468
469 vector< vector<int> > ctxs_per_chunk;
470 x_ReadVectorOfVectorsForTest(kTestName, "Contexts", ctxs_per_chunk);
471 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
472
473 vector< vector<size_t> > ctx_offsets_per_chunk;
474 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
475 ctx_offsets_per_chunk);
476 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
477
478 vector<BlastQueryInfo*> split_query_info;
479 x_ReadSplitQueryInfoForTest(kTestName, opts->GetProgramType(),
480 split_query_info);
481 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
482 NON_CONST_ITERATE(vector<BlastQueryInfo*>, itr, split_query_info) {
483 *itr = BlastQueryInfoFree(*itr);
484 }
485 }
486
QuerySplitter_BlastxSingleQueryMultiChunk(const string & kTestName,ENa_strand strand)487 void QuerySplitter_BlastxSingleQueryMultiChunk(const string& kTestName,
488 ENa_strand strand)
489 {
490 const size_t kLength = 122347; // length of the sequence below
491 CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 63122693));
492 TSeqRange range(0, kLength);
493 TSeqLocVector query;
494 query.push_back(*CTestObjMgr::Instance().
495 CreateSSeqLoc(*id, range, strand));
496
497 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query));
498 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastx));
499 CRef<CBlastOptions> opts(&opts_h->SetOptions());
500 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
501
502 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
503 CRef<CSplitQueryBlk> sqb = splitter->Split();
504
505 BOOST_REQUIRE_EQUAL(m_Config->GetInt(kTestName, "ChunkSize",
506 kDefaultIntValue),
507 (int)splitter->GetChunkSize());
508
509 x_ValidateChunkBounds(splitter->GetChunkSize(),
510 query_data->GetSumOfSequenceLengths(),
511 *sqb, opts->GetProgramType());
512
513 const size_t kNumChunks = (size_t)m_Config->GetInt(kTestName,
514 "NumChunks",
515 kDefaultIntValue);
516 BOOST_REQUIRE_EQUAL(kNumChunks, (size_t)splitter->GetNumberOfChunks());
517 BOOST_REQUIRE_EQUAL(kNumChunks, sqb->GetNumChunks());
518
519 vector< vector<size_t> > queries_per_chunk;
520 x_ReadVectorOfVectorsForTest(kTestName, "Queries", queries_per_chunk);
521 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
522
523 vector< vector<int> > ctxs_per_chunk;
524 x_ReadVectorOfVectorsForTest(kTestName, "Contexts", ctxs_per_chunk);
525 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
526
527 vector< vector<size_t> > ctx_offsets_per_chunk;
528 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
529 ctx_offsets_per_chunk);
530 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
531
532 vector<BlastQueryInfo*> split_query_info;
533 x_ReadSplitQueryInfoForTest(kTestName, opts->GetProgramType(),
534 split_query_info);
535 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
536 NON_CONST_ITERATE(vector<BlastQueryInfo*>, itr, split_query_info) {
537 *itr = BlastQueryInfoFree(*itr);
538 }
539 }
540
QuerySplitter_BlastxMultiQueryMultiChunk(const string & kTestName,ENa_strand strand,vector<ENa_strand> * query_strands=NULL)541 void QuerySplitter_BlastxMultiQueryMultiChunk(const string& kTestName,
542 ENa_strand strand,
543 vector<ENa_strand>*
544 query_strands = NULL)
545 {
546 TGiLengthVector gi_length;
547 gi_length.push_back(make_pair<int, size_t>(112817621, 5567));
548 gi_length.push_back(make_pair<int, size_t>(112585373, 5987));
549 gi_length.push_back(make_pair<int, size_t>(112585216, 5531));
550 gi_length.push_back(make_pair<int, size_t>(112585119, 5046));
551 if (query_strands) {
552 BOOST_REQUIRE_EQUAL(gi_length.size(), query_strands->size());
553 }
554
555 size_t tot_length;
556 TSeqLocVector queries;
557 s_ConvertToBlastQueries(gi_length, queries, &tot_length, query_strands);
558
559 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
560 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastx));
561 CRef<CBlastOptions> opts(&opts_h->SetOptions());
562 if ( !query_strands ) {
563 opts->SetStrandOption(strand);
564 }
565 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
566
567 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
568 CRef<CSplitQueryBlk> sqb = splitter->Split();
569
570 BOOST_REQUIRE_EQUAL(m_Config->GetInt(kTestName, "ChunkSize",
571 kDefaultIntValue),
572 (int)splitter->GetChunkSize());
573
574 BOOST_REQUIRE_EQUAL(tot_length, query_data->GetSumOfSequenceLengths());
575 x_ValidateChunkBounds(splitter->GetChunkSize(),
576 query_data->GetSumOfSequenceLengths(),
577 *sqb, opts->GetProgramType());
578
579 const size_t kNumChunks = (size_t)m_Config->GetInt(kTestName,
580 "NumChunks",
581 kDefaultIntValue);
582 BOOST_REQUIRE_EQUAL(kNumChunks, (size_t)splitter->GetNumberOfChunks());
583 BOOST_REQUIRE_EQUAL(kNumChunks, sqb->GetNumChunks());
584
585 vector< vector<size_t> > queries_per_chunk;
586 x_ReadVectorOfVectorsForTest(kTestName, "Queries", queries_per_chunk);
587 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
588
589 vector< vector<int> > ctxs_per_chunk;
590 x_ReadVectorOfVectorsForTest(kTestName, "Contexts", ctxs_per_chunk);
591 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
592
593 vector< vector<size_t> > ctx_offsets_per_chunk;
594 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
595 ctx_offsets_per_chunk);
596 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
597
598 vector<BlastQueryInfo*> split_query_info;
599 x_ReadSplitQueryInfoForTest(kTestName, opts->GetProgramType(),
600 split_query_info);
601 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
602 NON_CONST_ITERATE(vector<BlastQueryInfo*>, itr, split_query_info) {
603 *itr = BlastQueryInfoFree(*itr);
604 }
605 }
606
607 /************ Auxiliary functions **********************************/
608
609 /// Incrementally compute the query chunk bounds. This will have a direct
610 /// impact on the success of x_ValidateChunkBounds. This function assumes
611 /// that the chunk size doesn't vary between each invocation and that the
612 /// first time this function is called, the chunk_range is initialized with
613 /// its default constructor (e.g.: TChunkRange::GetEmpty())
614 /// @param chunk_range range of the query chunk [in|out]
615 /// @param chunk_size size of the chunk [in]
616 /// @param concatenated_query_length length of the full query [in]
617 /// @param overlap length of the overlap region between each chunk [in]
x_ComputeQueryChunkBounds(TChunkRange & chunk_range,size_t chunk_size,size_t concatenated_query_length,size_t overlap)618 void x_ComputeQueryChunkBounds(TChunkRange& chunk_range,
619 size_t chunk_size,
620 size_t concatenated_query_length,
621 size_t overlap)
622 {
623 if (chunk_range == TChunkRange::GetEmpty()) {
624 chunk_range.SetFrom(0);
625 chunk_range.SetLength(chunk_size);
626 } else {
627 const TSeqPos kIncrement = chunk_size - overlap;
628 chunk_range.SetFrom(chunk_range.GetFrom() + kIncrement);
629 chunk_range.SetToOpen(chunk_range.GetToOpen() + kIncrement);
630 }
631 BOOST_REQUIRE(chunk_range.NotEmpty());
632
633 if (chunk_range.GetToOpen() > concatenated_query_length) {
634 chunk_range.SetToOpen(concatenated_query_length);
635 }
636 }
637
638 /// This function reads values in the split_query.ini file with the format
639 /// ChunkNQueryM (where N is the chunk number and M is the query number).
640 /// Each of these entries should have 3 comma-separeted elements: the
641 /// query's starting offset, ending offset, and its strand's enumeration
642 /// value.
643 /// @param kTestName name of the test to read data for [in]
644 /// @param sqb CSplitQueryBlk object from which to get query indices for
645 /// each chunk [in]
646 /// @param split_query_vector query vector where the data from config file
647 /// will be read [out]
x_ReadQueryBoundsPerChunk(const string & kTestName,CConstRef<CSplitQueryBlk> sqb,CQuerySplitter::TSplitQueryVector & split_query_vector)648 void x_ReadQueryBoundsPerChunk(const string& kTestName,
649 CConstRef<CSplitQueryBlk> sqb,
650 CQuerySplitter::TSplitQueryVector& split_query_vector)
651 {
652 CRef<CScope> scope(CSimpleOM::NewScope());
653 TMaskedQueryRegions empty_mask;
654 split_query_vector.clear();
655
656 ostringstream os;
657
658 const int kNumChunks = m_Config->GetInt(kTestName, "NumChunks",
659 kDefaultIntValue);
660 if (kNumChunks == kDefaultIntValue) {
661 throw runtime_error("Invalid number of chunks in " + kTestName);
662 }
663
664 split_query_vector.assign(kNumChunks, CRef<CBlastQueryVector>());
665
666 for (int i = 0; i < kNumChunks; i++) {
667 os.str("");
668 os << "Chunk" << i;
669 const vector<size_t> kQueryIndices = sqb->GetQueryIndices(i);
670
671 BOOST_REQUIRE( !kQueryIndices.empty() );
672 split_query_vector[i].Reset(new CBlastQueryVector);
673
674 ITERATE(vector<size_t>, itr, kQueryIndices) {
675 ostringstream out;
676 out << "Query" << *itr;
677
678 const string& value = m_Config->Get(kTestName,
679 os.str() + out.str());
680 // This data corresponds to entries in split_query.ini of the
681 // form ChunkNQueryM, and each line should contain 3 elements:
682 // the start and stop for each query in each chunk and the
683 // strand's enumeration value
684 vector<size_t> query_data;
685 x_ParseConfigLine(value, query_data);
686 BOOST_REQUIRE_MESSAGE((size_t)3==query_data.size(),os.str() + out.str());
687
688 CRef<CSeq_loc> sl(new CSeq_loc);
689 sl->SetInt().SetFrom(query_data[0]);
690 sl->SetInt().SetTo(query_data[1]);
691 sl->SetStrand(static_cast<ENa_strand>(query_data[2]));
692 CRef<CBlastSearchQuery> bsq(new CBlastSearchQuery(*sl,
693 *scope,
694 empty_mask));
695 split_query_vector[i]->AddQuery(bsq);
696 }
697 }
698 }
699
700 /// Compare the query data (start, stop, strand) for each chunk computed by
701 /// the splitter vs. the data read from the split_query.ini file
702 /// @param splitter object which performs query splitting [in]
703 /// @param split_query_vector data instantiated from what was read from the
704 /// split_query.ini file
705 /// @param splitter CQuerySplitter object to test [in]
706 /// @param split_query_vector data read from config file to test against
707 /// [in]
x_ValidateQuerySeqLocsPerChunk(CRef<CQuerySplitter> splitter,const CQuerySplitter::TSplitQueryVector & split_query_vector)708 void x_ValidateQuerySeqLocsPerChunk(CRef<CQuerySplitter> splitter,
709 const CQuerySplitter::TSplitQueryVector& split_query_vector)
710 {
711 if (split_query_vector.empty()) {
712 return;
713 }
714
715 ostringstream os;
716 os << "Different split query vector sizes";
717
718 BOOST_REQUIRE_MESSAGE(split_query_vector.size()==(size_t)splitter->m_NumChunks,os.str());
719
720 for (size_t i = 0; i < splitter->m_NumChunks; i++) {
721 CRef<CBlastQueryVector> ref_qvector = split_query_vector[i];
722 CRef<CBlastQueryVector> test_qvector =
723 splitter->m_SplitQueriesInChunk[i];
724
725 os.str("");
726 os << "Different split query vector sizes for chunk " << i;
727 BOOST_REQUIRE_MESSAGE(ref_qvector->Size()==test_qvector->Size(),os.str());
728
729 for (size_t j = 0; j < ref_qvector->Size(); j++) {
730 CConstRef<CSeq_loc> ref_qloc = ref_qvector->GetQuerySeqLoc(j);
731 CConstRef<CSeq_loc> test_qloc = test_qvector->GetQuerySeqLoc(j);
732 CSeq_loc::TRange ref_query_range = ref_qloc->GetTotalRange();
733 CSeq_loc::TRange test_query_range = test_qloc->GetTotalRange();
734
735 os.str("");
736 os << "Starting offset for query " << j << " in chunk " << i << " is now " << test_query_range.GetFrom() << " and not " << ref_query_range.GetFrom();
737 BOOST_REQUIRE_MESSAGE(ref_query_range.GetFrom()==test_query_range.GetFrom(),os.str());
738 os.str("");
739 os << "Ending offset for query " << j << " in chunk " << i << " is now " << test_query_range.GetToOpen() << " and not " << ref_query_range.GetTo();
740 BOOST_REQUIRE_MESSAGE(ref_query_range.GetTo()==test_query_range.GetToOpen(),os.str());
741 os.str("");
742 os << "Strand for query " << j << " in chunk " << i << " is now "
743 << (int)test_qloc->GetStrand() << " and not " << (int)ref_qloc->GetStrand();
744 BOOST_REQUIRE_MESSAGE(ref_qloc->GetStrand()==test_qloc->GetStrand(),os.str());
745 }
746 }
747 }
748
749 /// Reads data to populate multiple BlastQueryInfo structures. This data is
750 /// formatted in the config file as
751 /// BlastQueryInfoN.X[.Y] where N is the chunk number, X is the field of
752 /// the BlastQueryInfo structure and Y is the field of the BlastContextInfo
753 /// structure (only applicable if X has the value contextM, where M is the
754 /// context number)
755 /// @param kTestName name of the test to read data for [in]
756 /// @param program blast program [in]
757 /// @param retval vector of BlastQueryInfo structures, there will be as
758 /// many elements as there are chunks for this test. Caller is
759 /// responsible for deallocating the contents of this vector [out]
x_ReadSplitQueryInfoForTest(const string & kTestName,EBlastProgramType program,vector<BlastQueryInfo * > & retval)760 void x_ReadSplitQueryInfoForTest(const string& kTestName,
761 EBlastProgramType program,
762 vector<BlastQueryInfo*>& retval)
763 {
764 ostringstream os, errors;
765
766 const int kNumChunks = m_Config->GetInt(kTestName, "NumChunks",
767 kDefaultIntValue);
768 if (kNumChunks == kDefaultIntValue) {
769 throw runtime_error("Invalid number of chunks in " + kTestName);
770 }
771
772 retval.clear();
773 retval.reserve(kNumChunks);
774 retval.assign(kNumChunks, static_cast<BlastQueryInfo*>(0));
775
776 for (int i = 0; i < kNumChunks; i++) {
777 os.str("");
778 os << "BlastQueryInfo" << i << ".";
779 const string kPrefix(os.str());
780 errors.str("Chunk ");
781 errors << i << ": ";
782 const int kNumQueries = m_Config->GetInt(kTestName,
783 kPrefix + "num_queries",
784 kDefaultIntValue);
785 if (kNumQueries == kDefaultIntValue) {
786 string msg("Invalid BlastQueryInfo::num_queries in ");
787 msg += kTestName + " or value not specified";
788 return; // FIXME
789 //throw runtime_error(msg);
790 }
791
792 retval[i] = BlastQueryInfoNew(program, kNumQueries);
793 errors << "Failed to allocate BlastQueryInfo structure"
794 << " (Number of queries=" << kNumQueries << ")";
795 BOOST_REQUIRE_MESSAGE(retval[i],errors.str());
796
797 retval[i]->first_context = m_Config->GetInt(kTestName,
798 kPrefix +
799 "first_context",
800 kDefaultIntValue);
801 errors.str("Chunk ");
802 errors << i;
803 BOOST_REQUIRE_MESSAGE(retval[i]->first_context >= 0,errors.str());
804
805 retval[i]->last_context = m_Config->GetInt(kTestName,
806 kPrefix +
807 "last_context",
808 kDefaultIntValue);
809 BOOST_REQUIRE_MESSAGE(retval[i]->last_context >= 0,errors.str());
810 BOOST_REQUIRE_MESSAGE(retval[i]->first_context <= retval[i]->last_context,errors.str());
811
812 for (int c = retval[i]->first_context;
813 c <= retval[i]->last_context;
814 c++) {
815
816 errors.str("");
817 errors << "Chunk " << i << ", BlastQueryInfo::context " << c;
818
819 ostringstream ctx;
820 ctx << kPrefix << "context" << c << ".";
821
822 retval[i]->contexts[c].query_offset =
823 m_Config->GetInt(kTestName, ctx.str() +
824 "query_offset", kDefaultIntValue);
825 BOOST_REQUIRE_MESSAGE(retval[i]->contexts[c].query_offset >= 0,
826 errors.str() + " query_offset >= 0");
827
828 retval[i]->contexts[c].query_length =
829 m_Config->GetInt(kTestName, ctx.str() +
830 "query_length", kDefaultIntValue);
831 BOOST_REQUIRE_MESSAGE(retval[i]->contexts[c].query_length >= 0,
832 errors.str() + " query_length >= 0");
833
834 retval[i]->contexts[c].eff_searchsp =
835 m_Config->GetInt(kTestName, ctx.str() +
836 "eff_searchsp", kDefaultIntValue);
837 BOOST_REQUIRE_MESSAGE(retval[i]->contexts[c].eff_searchsp >= 0,
838 errors.str() + " eff_searchsp >= 0");
839
840 retval[i]->contexts[c].length_adjustment =
841 m_Config->GetInt(kTestName, ctx.str() +
842 "length_adjustment", kDefaultIntValue);
843 BOOST_REQUIRE_MESSAGE(retval[i]->contexts[c].length_adjustment >= 0,
844 errors.str() + " length_adjustment >= 0");
845
846 retval[i]->contexts[c].query_index =
847 m_Config->GetInt(kTestName, ctx.str() +
848 "query_index", kDefaultIntValue);
849 BOOST_REQUIRE_MESSAGE(retval[i]->contexts[c].query_index >= 0,
850 errors.str() + " query_index");
851
852 retval[i]->contexts[c].frame =
853 m_Config->GetInt(kTestName, ctx.str() +
854 "frame", kDefaultIntValue);
855 BOOST_REQUIRE_MESSAGE(retval[i]->contexts[c].frame == 1
856 || retval[i]->contexts[c].frame == 2
857 || retval[i]->contexts[c].frame == 3
858 || retval[i]->contexts[c].frame == -1
859 || retval[i]->contexts[c].frame == -2
860 || retval[i]->contexts[c].frame == -3
861 || retval[i]->contexts[c].frame == 0,
862 errors.str() + " frame");
863
864 retval[i]->contexts[c].is_valid =
865 m_Config->GetBool(kTestName, ctx.str() +
866 "is_valid", false);
867 BOOST_REQUIRE_MESSAGE(retval[i]->contexts[c].is_valid,
868 errors.str() + " is_valid");
869 }
870 s_CalculateMaxLength(retval[i]);
871 }
872 }
873
874 /// This method reads entries in the config file of the format
875 /// ChunkNX, here N is the chunk number and X is the value of data_to_read
876 /// @param kTestName name of the test to read data for [in]
877 /// @param data_to_read data for a chunk to read [in]
878 /// @param retval vector of vectors where the data will be returned. The
879 /// first vector will contain as many elements are there are chunks, and
880 /// the contained vectors will contain as many elements as there are items
881 /// on the config file (comma separated values) [out]
882 template <class T>
x_ReadVectorOfVectorsForTest(const string & kTestName,const char * data_to_read,vector<vector<T>> & retval)883 void x_ReadVectorOfVectorsForTest(const string& kTestName,
884 const char* data_to_read,
885 vector< vector<T> >& retval)
886 {
887 ostringstream os;
888
889 const int kNumChunks = m_Config->GetInt(kTestName, "NumChunks",
890 kDefaultIntValue);
891 if (kNumChunks == kDefaultIntValue) {
892 throw runtime_error("Invalid number of chunks in " + kTestName);
893 }
894
895 retval.clear();
896 retval.resize(kNumChunks);
897
898 for (int i = 0; i < kNumChunks; i++) {
899 os.str("");
900 os << "Chunk" << i << data_to_read;
901
902 const string& value = m_Config->Get(kTestName, os.str());
903 x_ParseConfigLine(value, retval[i]);
904 }
905 }
906
907 /// Tokenizes a string containing comma-separated values into a vector of
908 /// values
909 /// @param input string to tokenize [in]
910 /// @param retval vector containing elements found in input string [out]
911 template <class T>
x_ParseConfigLine(const string & input,vector<T> & retval)912 void x_ParseConfigLine(const string& input, vector<T>& retval)
913 {
914 retval.clear();
915 vector<string> tokens;
916 NStr::Split(input, ",", tokens);
917 retval.reserve(tokens.size());
918 ITERATE(vector<string>, token, tokens) {
919 retval.push_back(NStr::StringToInt(NStr::TruncateSpaces(*token)));
920 }
921 }
922
923 /***************** Generic validation methods ****************************/
924
925 /// Auxiliary method to validate the chunk bounds calculated by the
926 /// CSplitQueryBlk object and the x_ComputeQueryChunkBounds method
927 /// @param kChunkSize size of the chunk [in]
928 /// @param kQuerySize size of the full query [in]
929 /// @param sqb the CSplitQueryBlk object to test [in]
930 /// @param p the program type [in]
x_ValidateChunkBounds(size_t kChunkSize,size_t kQuerySize,const CSplitQueryBlk & sqb,EBlastProgramType p)931 void x_ValidateChunkBounds(size_t kChunkSize,
932 size_t kQuerySize,
933 const CSplitQueryBlk& sqb,
934 EBlastProgramType p)
935 {
936 const size_t kNumChunks(sqb.GetNumChunks());
937 const size_t kQueryChunkOverlapSize = SplitQuery_GetOverlapChunkSize(p);
938
939 TChunkRange expected_chunk_range(TChunkRange::GetEmpty());
940 for (size_t i = 0; i < kNumChunks; i++) {
941 x_ComputeQueryChunkBounds(expected_chunk_range, kChunkSize,
942 kQuerySize, kQueryChunkOverlapSize);
943 TChunkRange chunk_range = sqb.GetChunkBounds(i);
944 BOOST_REQUIRE_EQUAL(expected_chunk_range.GetFrom(),
945 chunk_range.GetFrom());
946 BOOST_REQUIRE_EQUAL(expected_chunk_range.GetToOpen(),
947 chunk_range.GetToOpen());
948 TSeqPos chunk_start = i*kChunkSize - (i*kQueryChunkOverlapSize);
949 TSeqPos chunk_end = chunk_start + kChunkSize > kQuerySize
950 ? kQuerySize
951 : chunk_start + kChunkSize;
952 BOOST_REQUIRE_EQUAL(expected_chunk_range.GetFrom(), chunk_start);
953 BOOST_REQUIRE_EQUAL(expected_chunk_range.GetToOpen(), chunk_end);
954 TSeqPos chunk_length = chunk_end - chunk_start;
955 BOOST_REQUIRE_EQUAL(chunk_length,
956 expected_chunk_range.GetLength());
957 }
958 }
959
960 /// Validates the query sequences (by index) assigned to all the chunks
961 /// This compares the data calculated by the sqb parameter to the data read
962 /// from the config file in queries_per_chunk
963 /// @param sqb CSplitQueryBlk object to test [in]
964 /// @param queries_per_chunk data read from config file [in]
x_ValidateQueriesPerChunkAssignment(const CSplitQueryBlk & sqb,const vector<vector<size_t>> & queries_per_chunk)965 void x_ValidateQueriesPerChunkAssignment(const CSplitQueryBlk& sqb,
966 const vector< vector<size_t> >&
967 queries_per_chunk)
968 {
969 const size_t kNumChunks = sqb.GetNumChunks();
970 BOOST_REQUIRE_EQUAL(kNumChunks, queries_per_chunk.size());
971
972 for (size_t i = 0; i < kNumChunks; i++) {
973 ostringstream os;
974 os << "Chunk number " << i << " has an invalid number of queries";
975
976 vector<size_t> data2test = sqb.GetQueryIndices(i);
977 BOOST_REQUIRE_MESSAGE(queries_per_chunk[i].size()==data2test.size(),os.str());
978
979 for (size_t j = 0; j < data2test.size(); j++) {
980 os.str("");
981 os << "Query index mismatch in chunk number " << i
982 << " entry number " << j;
983 BOOST_REQUIRE_MESSAGE(queries_per_chunk[i][j]==data2test[j],os.str());
984 }
985 }
986 }
987
988 /// Validates the query contexts assigned to all the chunks
989 /// @param sqb CSplitQueryBlk object to test [in]
990 /// @param contexts_per_chunk data read from config file [in]
x_ValidateQueryContextsPerChunkAssignment(const CSplitQueryBlk & sqb,const vector<vector<int>> & contexts_per_chunk)991 void x_ValidateQueryContextsPerChunkAssignment(const CSplitQueryBlk& sqb,
992 const vector< vector<int> >&
993 contexts_per_chunk)
994 {
995 const size_t kNumChunks = sqb.GetNumChunks();
996
997 BOOST_REQUIRE_EQUAL(kNumChunks, contexts_per_chunk.size());
998 for (size_t i = 0; i < kNumChunks; i++) {
999 ostringstream os;
1000 os << "Chunk number " << i << " has an invalid number of contexts";
1001
1002 vector<int> data2test = sqb.GetQueryContexts(i);
1003 BOOST_REQUIRE_MESSAGE(contexts_per_chunk[i].size()==data2test.size(),os.str());
1004
1005 for (size_t j = 0; j < data2test.size(); j++) {
1006 os.str("");
1007 os << "Context index mismatch in chunk number " << i
1008 << " entry number " << j;
1009 BOOST_REQUIRE_MESSAGE(contexts_per_chunk[i][j]==data2test[j],os.str());
1010 }
1011 }
1012 }
1013
1014 /// Validates the context offsets assigned to all the chunks
1015 /// @param sqb CSplitQueryBlk object to test [in]
1016 /// @param contexts_offsets_per_chunk data read from config file [in]
x_ValidateContextOffsetsPerChunkAssignment(const CSplitQueryBlk & sqb,const vector<vector<size_t>> & contexts_offsets_per_chunk)1017 void x_ValidateContextOffsetsPerChunkAssignment(const CSplitQueryBlk& sqb,
1018 const vector< vector<size_t> >&
1019 contexts_offsets_per_chunk)
1020 {
1021 const size_t kNumChunks(sqb.GetNumChunks());
1022 BOOST_REQUIRE_EQUAL(kNumChunks, contexts_offsets_per_chunk.size());
1023 for (size_t i = 0; i < kNumChunks; i++) {
1024 ostringstream os;
1025 os << "Chunk number " << i
1026 << " has an invalid number of context offsets";
1027
1028 vector<size_t> data2test = sqb.GetContextOffsets(i);
1029 BOOST_REQUIRE_MESSAGE(contexts_offsets_per_chunk[i].size()==data2test.size(),os.str());
1030
1031 for (size_t j = 0; j < data2test.size(); j++) {
1032 os.str("");
1033 os << "Context offset mismatch in chunk number " << i
1034 << " entry number " << j << " value now " << data2test[j]
1035 << " not " << contexts_offsets_per_chunk[i][j];
1036 // TLM cerr << "data2test " << data2test[j] << " ";
1037 BOOST_REQUIRE_MESSAGE(contexts_offsets_per_chunk[i][j]==data2test[j],os.str());
1038 }
1039 // TLM cerr << endl;
1040 }
1041 }
1042
1043 /// Validate the query info structure generated (test) against the expected
1044 /// one (reference) (N.B.: this is called from x_ValidateLocalQueryData)
1045 /// @param reference The "good" BlastQueryInfo structure [in]
1046 /// @param test the BlastQueryInfo structure to test [in]
1047 /// @param the chunk number being tested, this is needed for error
1048 /// reporting purposes [in]
x_ValidateQueryInfoForChunk(const BlastQueryInfo * reference,const BlastQueryInfo * test,size_t chunk_num)1049 void x_ValidateQueryInfoForChunk(const BlastQueryInfo* reference,
1050 const BlastQueryInfo* test,
1051 size_t chunk_num)
1052 {
1053 ostringstream os;
1054
1055 os << "Chunk " << chunk_num << ": BlastQueryInfo::first_context";
1056 BOOST_REQUIRE_MESSAGE(reference->first_context==test->first_context,os.str());
1057
1058 os.str("");
1059 os << "Chunk " << chunk_num << ": BlastQueryInfo::last_context";
1060 BOOST_REQUIRE_MESSAGE(reference->last_context==test->last_context,os.str());
1061
1062 os.str("");
1063 os << "Chunk " << chunk_num << ": BlastQueryInfo::num_queries";
1064 BOOST_REQUIRE_MESSAGE(reference->num_queries==test->num_queries,os.str());
1065
1066 os.str("");
1067 os << "Chunk " << chunk_num << ": BlastQueryInfo::max_length";
1068 BOOST_REQUIRE_MESSAGE(reference->max_length==test->max_length,os.str());
1069
1070 os.str("");
1071 os << "Chunk " << chunk_num << ": BlastQueryInfo::pattern_info";
1072 BOOST_REQUIRE_MESSAGE(reference->pattern_info==test->pattern_info,os.str());
1073
1074 for (Int4 ctx = reference->first_context;
1075 ctx <= reference->last_context;
1076 ctx++) {
1077
1078 os.str("");
1079 os << "Chunk " << chunk_num << ", context " << ctx;
1080 BOOST_REQUIRE_MESSAGE(reference->contexts[ctx].query_offset==test->contexts[ctx].query_offset,
1081 os.str() + " query_offset");
1082 BOOST_REQUIRE_MESSAGE(reference->contexts[ctx].query_length==test->contexts[ctx].query_length,
1083 os.str() + " query_length");
1084 BOOST_REQUIRE_MESSAGE(reference->contexts[ctx].eff_searchsp==test->contexts[ctx].eff_searchsp,
1085 os.str() + " eff_searchsp");
1086 BOOST_REQUIRE_MESSAGE(reference->contexts[ctx].query_index==test->contexts[ctx].query_index,
1087 os.str() + " query_index");
1088 BOOST_REQUIRE_MESSAGE((int)reference->contexts[ctx].frame==(int)test->contexts[ctx].frame,
1089 os.str() + " frame");
1090 BOOST_REQUIRE_MESSAGE(reference->contexts[ctx].is_valid==test->contexts[ctx].is_valid,
1091 os.str() + " is_valid");
1092
1093 }
1094 }
1095
1096 /// Validate the local query data for all chunks, comparing data produced
1097 /// by the CQuerySplitter object and the BlastQueryInfo structures read
1098 /// from the config file (BLAST_SequenceBlk's are not tested)
1099 /// @param splitter object to test [in]
1100 /// @param options BLAST options [in]
1101 /// @param split_query_info_structs the data to compare to (reference) [in]
x_ValidateLocalQueryData(CRef<CQuerySplitter> splitter,const CBlastOptions * options,vector<BlastQueryInfo * > split_query_info_structs)1102 void x_ValidateLocalQueryData(CRef<CQuerySplitter> splitter,
1103 const CBlastOptions* options,
1104 vector<BlastQueryInfo*>
1105 split_query_info_structs)
1106 {
1107 ostringstream os;
1108 BOOST_REQUIRE(options);
1109 const size_t kNumChunks(splitter->GetNumberOfChunks());
1110
1111 CRef<CSplitQueryBlk> sqb = splitter->Split();
1112 BOOST_REQUIRE_EQUAL(kNumChunks, split_query_info_structs.size());
1113
1114 for (size_t i = 0; i < kNumChunks; i++) {
1115 os.str("");
1116 os << "Chunk " << i << ": ";
1117 CRef<IQueryFactory> qf = splitter->GetQueryFactoryForChunk(i);
1118 BOOST_REQUIRE_MESSAGE(qf.NotEmpty(),os.str() + "NULL query factory");
1119 CRef<ILocalQueryData> qd = qf->MakeLocalQueryData(options);
1120 BOOST_REQUIRE_MESSAGE(qd.NotEmpty(),os.str() + "NULL local query data");
1121
1122 os << "Different number of queries";
1123 BOOST_REQUIRE_MESSAGE((size_t)sqb->GetNumQueriesForChunk(i)==(size_t)qd->GetNumQueries(),os.str());
1124
1125 // FIXME: turned off for now
1126 // Validate the query info structure
1127 //x_ValidateQueryInfoForChunk(split_query_info_structs[i],
1128 // qd->GetQueryInfo(), i);
1129
1130 //x_ValidateSequenceBlkForChunk();
1131
1132 // Validate that query in this chunk is indeed valid
1133 //for (int qindex = 0; qindex < qd->GetNumQueries(); qindex++) {
1134 // os.str("Chunk ");
1135 // os << i << ": query " << qindex << " is invalid";
1136 // BOOST_REQUIRE_MESSAGE(qd->IsValidQuery(qindex),os.str());
1137 //}
1138
1139 }
1140
1141 }
1142 };
1143
BOOST_FIXTURE_TEST_SUITE(split_query,CSplitQueryTestFixture)1144 BOOST_FIXTURE_TEST_SUITE(split_query, CSplitQueryTestFixture)
1145
1146 /*********** Actual unit tests ***************************************/
1147 BOOST_AUTO_TEST_CASE(SplitQueriesIn1Chunk) {
1148 CRef<CSplitQueryBlk> sqb(new CSplitQueryBlk(1));
1149 Int2 rv;
1150
1151 rv = SplitQueryBlk_AddQueryToChunk(sqb->GetCStruct(), 41, 2);
1152 BOOST_REQUIRE_EQUAL(kBadParameter, rv);
1153
1154 /// This will be reused for both query indices and contexts
1155 vector<Int4> query_indices_expected;
1156 query_indices_expected.push_back(45);
1157 query_indices_expected.push_back(0);
1158 query_indices_expected.push_back(7);
1159
1160 ITERATE(vector<Int4>, qi, query_indices_expected) {
1161 rv = SplitQueryBlk_AddQueryToChunk(sqb->GetCStruct(), *qi, 0);
1162 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1163 rv = SplitQueryBlk_AddContextToChunk(sqb->GetCStruct(), *qi, 0);
1164 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1165 }
1166
1167 Uint4* query_indices = NULL;
1168 rv = SplitQueryBlk_GetQueryIndicesForChunk(sqb->GetCStruct(), 0,
1169 &query_indices);
1170 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1171 for (int i = 0; query_indices[i] != UINT4_MAX; i++) {
1172 BOOST_REQUIRE_EQUAL(query_indices_expected[i],
1173 (Int4)query_indices[i]);
1174 }
1175 sfree(query_indices);
1176
1177 Int4* query_contexts = NULL;
1178 Uint4 num_query_contexts = 0;
1179 rv = SplitQueryBlk_GetQueryContextsForChunk(sqb->GetCStruct(), 0,
1180 &query_contexts,
1181 &num_query_contexts);
1182 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1183 for (Uint4 i = 0; i < num_query_contexts; i++) {
1184 BOOST_REQUIRE_EQUAL(query_indices_expected[i], query_contexts[i]);
1185 }
1186 sfree(query_contexts);
1187
1188 size_t num_queries(0);
1189 rv = SplitQueryBlk_GetNumQueriesForChunk(sqb->GetCStruct(), 0,
1190 &num_queries);
1191 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1192 BOOST_REQUIRE_EQUAL(query_indices_expected.size(), num_queries);
1193 }
1194
BOOST_AUTO_TEST_CASE(SplitQueriesRandomly)1195 BOOST_AUTO_TEST_CASE(SplitQueriesRandomly) {
1196 CRandom random((CRandom::TValue)time(0));
1197 const Uint4 kNumChunks(random.GetRand(1, 100));
1198 TSplitQueryChunkMap map;
1199 map.resize(kNumChunks);
1200 Uint4 query_index = 0;
1201
1202 // Set up the artificial data
1203 for (Uint4 chunk_num = 0; chunk_num < kNumChunks; chunk_num++) {
1204 const Uint4 kQueriesPerChunk(random.GetRand(1, 365));
1205 for (Uint4 i = 0; i < kQueriesPerChunk; i++) {
1206 map[chunk_num].push_back(query_index++);
1207 }
1208 }
1209
1210 // Set up the SplitQueryBlk structure
1211 CRef<CSplitQueryBlk> sqb(new CSplitQueryBlk(kNumChunks));
1212 for (size_t chunk_num = 0; chunk_num < map.size(); chunk_num++) {
1213 ITERATE(vector<Uint4>, qi, map[chunk_num]) {
1214 Int2 rv = SplitQueryBlk_AddQueryToChunk(sqb->GetCStruct(), *qi,
1215 chunk_num);
1216 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1217 }
1218 }
1219
1220 for (Uint4 chunk_num = 0; chunk_num < kNumChunks; chunk_num++) {
1221 vector<Uint4> query_indices_expected = map[chunk_num];
1222
1223 Uint4* query_indices = NULL;
1224 Int2 rv = SplitQueryBlk_GetQueryIndicesForChunk(sqb->GetCStruct(),
1225 chunk_num,
1226 &query_indices);
1227 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1228 BOOST_REQUIRE(query_indices != NULL);
1229
1230 size_t i;
1231 for (i = 0; i < query_indices_expected.size(); i++) {
1232 BOOST_REQUIRE_EQUAL(query_indices_expected[i],
1233 query_indices[i]);
1234 }
1235 BOOST_REQUIRE_EQUAL((Uint4)UINT4_MAX, query_indices[i]);
1236 sfree(query_indices);
1237
1238 size_t num_queries(0);
1239 rv = SplitQueryBlk_GetNumQueriesForChunk(sqb->GetCStruct(), chunk_num,
1240 &num_queries);
1241 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1242 BOOST_REQUIRE_EQUAL(query_indices_expected.size(), num_queries);
1243 }
1244 }
1245
BOOST_AUTO_TEST_CASE(Split4QueriesIn3Chunks)1246 BOOST_AUTO_TEST_CASE(Split4QueriesIn3Chunks) {
1247 const Uint4 kNumChunks = 3;
1248 TSplitQueryChunkMap map;
1249 map.resize(kNumChunks);
1250 map[0].push_back(0);
1251 map[0].push_back(1);
1252 map[1].push_back(2);
1253 map[2].push_back(3);
1254
1255 CRef<CSplitQueryBlk> sqb(new CSplitQueryBlk(kNumChunks));
1256
1257 for (Uint4 chunk_num = 0; chunk_num < map.size(); chunk_num++) {
1258 ITERATE(vector<Uint4>, qi, map[chunk_num]) {
1259 Int2 rv = SplitQueryBlk_AddQueryToChunk(sqb->GetCStruct(), *qi,
1260 chunk_num);
1261 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1262 }
1263 }
1264
1265 for (Uint4 chunk_num = 0; chunk_num < kNumChunks; chunk_num++) {
1266 vector<Uint4> query_indices_expected = map[chunk_num];
1267
1268 Uint4* query_indices = NULL;
1269 Int2 rv = SplitQueryBlk_GetQueryIndicesForChunk(sqb->GetCStruct(),
1270 chunk_num,
1271 &query_indices);
1272 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1273 BOOST_REQUIRE(query_indices != NULL);
1274
1275 size_t i;
1276 for (i = 0; i < query_indices_expected.size(); i++) {
1277 BOOST_REQUIRE_EQUAL(query_indices_expected[i],
1278 query_indices[i]);
1279 }
1280 BOOST_REQUIRE_EQUAL((Uint4)UINT4_MAX, query_indices[i]);
1281 sfree(query_indices);
1282
1283 size_t num_queries(0);
1284 rv = SplitQueryBlk_GetNumQueriesForChunk(sqb->GetCStruct(), chunk_num,
1285 &num_queries);
1286 BOOST_REQUIRE_EQUAL((Int2)0, rv);
1287 BOOST_REQUIRE_EQUAL(query_indices_expected.size(), num_queries);
1288 }
1289 }
1290
1291 /// Tests query splitting for blastn of both strands of a single query into
1292 /// multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnSingleQueryMultiChunk_BothStrands)1293 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnSingleQueryMultiChunk_BothStrands) {
1294 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1295 const string
1296 kTestName("QuerySplitter_BlastnSingleQueryMultiChunk_BothStrands");
1297
1298 QuerySplitter_BlastnSingleQueryMultiChunk(kTestName, eNa_strand_both);
1299 }
1300
1301 /// Tests query splitting for blastn of the plus strands of a single query
1302 /// into multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnSingleQueryMultiChunk_PlusStrand)1303 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnSingleQueryMultiChunk_PlusStrand) {
1304 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1305 const string
1306 kTestName("QuerySplitter_BlastnSingleQueryMultiChunk_PlusStrand");
1307
1308 QuerySplitter_BlastnSingleQueryMultiChunk(kTestName, eNa_strand_plus);
1309 }
1310
1311 /// Tests query splitting for blastn of the minus strands of a single query
1312 /// into multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnSingleQueryMultiChunk_MinusStrand)1313 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnSingleQueryMultiChunk_MinusStrand) {
1314 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1315 const string
1316 kTestName("QuerySplitter_BlastnSingleQueryMultiChunk_MinusStrand");
1317
1318 QuerySplitter_BlastnSingleQueryMultiChunk(kTestName, eNa_strand_minus);
1319 }
1320
1321 /// Tests query splitting for blastn of the plus strands of multiple queries
1322 /// into multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_PlusStrand)1323 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_PlusStrand) {
1324 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1325 const string
1326 kTestName("QuerySplitter_BlastnMultiQueryMultiChunk_PlusStrand");
1327
1328 QuerySplitter_BlastnMultiQueryMultiChunk(kTestName, eNa_strand_plus);
1329 }
1330
1331 /// Tests query splitting for blastn of the minus strands of multiple
1332 /// queries into multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_MinusStrand)1333 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_MinusStrand) {
1334 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1335 const string
1336 kTestName("QuerySplitter_BlastnMultiQueryMultiChunk_MinusStrand");
1337
1338 QuerySplitter_BlastnMultiQueryMultiChunk(kTestName, eNa_strand_minus);
1339 }
1340
1341 /// Tests query splitting for blastn of both strands of multiple
1342 /// queries into multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_BothStrands)1343 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_BothStrands) {
1344 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1345 const string
1346 kTestName("QuerySplitter_BlastnMultiQueryMultiChunk_BothStrands");
1347 QuerySplitter_BlastnMultiQueryMultiChunk(kTestName, eNa_strand_both);
1348 }
1349
1350 /// Tests query splitting for blastn with multiple queries in multiple
1351 /// chunks with each query using different strands
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_MixedStrands)1352 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastnMultiQueryMultiChunk_MixedStrands) {
1353 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1354 const string
1355 kTestName("QuerySplitter_BlastnMultiQueryMultiChunk_MixedStrands");
1356 vector<ENa_strand> query_strands;
1357 query_strands.reserve(4);
1358 query_strands.push_back(eNa_strand_plus);
1359 query_strands.push_back(eNa_strand_both);
1360 query_strands.push_back(eNa_strand_minus);
1361 query_strands.push_back(eNa_strand_unknown);
1362
1363 QuerySplitter_BlastnMultiQueryMultiChunk(kTestName,
1364 eNa_strand_unknown,
1365 &query_strands);
1366 }
1367
1368 /********* This functionality has not been implemented **************/
1369 #if 0
1370 /// Tests blastx of both strands of a single query into multiple chunks
1371 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastxSingleQueryMultiChunk_BothStrands) {
1372 const string
1373 kTestName("QuerySplitter_BlastxSingleQueryMultiChunk_BothStrands");
1374
1375 QuerySplitter_BlastxSingleQueryMultiChunk(kTestName, eNa_strand_both);
1376 }
1377
1378 /// Tests blastx of the plus strand of a single query into multiple chunks
1379 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastxSingleQueryMultiChunk_PlusStrand) {
1380 const string
1381 kTestName("QuerySplitter_BlastxSingleQueryMultiChunk_PlusStrand");
1382
1383 QuerySplitter_BlastxSingleQueryMultiChunk(kTestName, eNa_strand_plus);
1384 }
1385
1386 /// Tests blastx of the minus strand of a single query into multiple chunks
1387 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastxSingleQueryMultiChunk_MinusStrand) {
1388 const string
1389 kTestName("QuerySplitter_BlastxSingleQueryMultiChunk_MinusStrand");
1390
1391 QuerySplitter_BlastxSingleQueryMultiChunk(kTestName, eNa_strand_minus);
1392 }
1393
1394
1395 /// Tests blastx of the plus strand of multiple queries into multiple chunks
1396 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastxMultiQueryMultiChunk_PlusStrand) {
1397 const string
1398 kTestName("QuerySplitter_BlastxMultiQueryMultiChunk_PlusStrand");
1399
1400 QuerySplitter_BlastxMultiQueryMultiChunk(kTestName, eNa_strand_plus);
1401 }
1402
1403 /// Tests blastx of the minus strand of multiple queries into multiple
1404 /// chunks
1405 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastxMultiQueryMultiChunk_MinusStrand) {
1406 const string
1407 kTestName("QuerySplitter_BlastxMultiQueryMultiChunk_MinusStrand");
1408
1409 QuerySplitter_BlastxMultiQueryMultiChunk(kTestName, eNa_strand_minus);
1410 }
1411
1412 /// Tests blastx of both strands of multiple queries into multiple
1413 /// chunks
1414 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastxMultiQueryMultiChunk_BothStrands) {
1415 const string
1416 kTestName("QuerySplitter_BlastxMultiQueryMultiChunk_BothStrands");
1417
1418 QuerySplitter_BlastxMultiQueryMultiChunk(kTestName, eNa_strand_both);
1419 }
1420
1421 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastxMultiQueryMultiChunk_MixedStrands) {
1422 const string
1423 kTestName("QuerySplitter_BlastxMultiQueryMultiChunk_MixedStrands");
1424 vector<ENa_strand> query_strands;
1425 query_strands.reserve(4);
1426 query_strands.push_back(eNa_strand_unknown);
1427 query_strands.push_back(eNa_strand_plus);
1428 query_strands.push_back(eNa_strand_both);
1429 query_strands.push_back(eNa_strand_minus);
1430
1431 QuerySplitter_BlastxMultiQueryMultiChunk(kTestName, eNa_strand_unknown,
1432 &query_strands);
1433 }
1434
1435 #endif
1436
1437 /// Tests blastp of a single query into multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastpSingleQueryMultiChunk)1438 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastpSingleQueryMultiChunk) {
1439 const string kTestName("QuerySplitter_BlastpSingleQueryMultiChunk");
1440
1441 const size_t kLength = 33423; // query length
1442 CBlastQueryVector query;
1443 CSeq_id id(CSeq_id::e_Gi, 110349719);
1444 query.AddQuery(CTestObjMgr::Instance().CreateBlastSearchQuery(id));
1445
1446 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query));
1447 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastp));
1448 CRef<CBlastOptions> opts(&opts_h->SetOptions());
1449 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
1450
1451 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
1452 CRef<CSplitQueryBlk> sqb = splitter->Split();
1453
1454 BOOST_REQUIRE_EQUAL(m_Config->GetInt(kTestName, "ChunkSize",
1455 kDefaultIntValue),
1456 (int)splitter->GetChunkSize());
1457
1458 CQuerySplitter::TSplitQueryVector split_query_vector;
1459 x_ReadQueryBoundsPerChunk(kTestName, sqb, split_query_vector);
1460 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
1461
1462 BOOST_REQUIRE_EQUAL(kLength, query_data->GetSumOfSequenceLengths());
1463 x_ValidateChunkBounds(splitter->GetChunkSize(),
1464 query_data->GetSumOfSequenceLengths(),
1465 *sqb, opts->GetProgramType());
1466
1467 const size_t kNumChunks = (size_t)m_Config->GetInt(kTestName,
1468 "NumChunks",
1469 kDefaultIntValue);
1470 BOOST_REQUIRE_EQUAL(kNumChunks, (size_t)splitter->GetNumberOfChunks());
1471 BOOST_REQUIRE_EQUAL(kNumChunks, sqb->GetNumChunks());
1472
1473 vector< vector<size_t> > queries_per_chunk;
1474 x_ReadVectorOfVectorsForTest(kTestName, "Queries", queries_per_chunk);
1475 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
1476
1477 vector< vector<int> > ctxs_per_chunk;
1478 x_ReadVectorOfVectorsForTest(kTestName, "Contexts", ctxs_per_chunk);
1479 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
1480
1481 vector< vector<size_t> > ctx_offsets_per_chunk;
1482 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1483 ctx_offsets_per_chunk);
1484 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
1485
1486 vector<BlastQueryInfo*> split_query_info;
1487 x_ReadSplitQueryInfoForTest(kTestName, opts->GetProgramType(),
1488 split_query_info);
1489 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
1490 NON_CONST_ITERATE(vector<BlastQueryInfo*>, itr, split_query_info) {
1491 *itr = BlastQueryInfoFree(*itr);
1492 }
1493 }
1494
1495 /// Tests blastp of multiple queries into multiple chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_BlastpMultiQueryMultiChunk)1496 BOOST_AUTO_TEST_CASE(QuerySplitter_BlastpMultiQueryMultiChunk) {
1497 const string kTestName("QuerySplitter_BlastpMultiQueryMultiChunk");
1498
1499 TGiLengthVector gi_length;
1500 gi_length.push_back(make_pair<int, size_t>(33624848, 6883));
1501 gi_length.push_back(make_pair<int, size_t>(4758794, 6669));
1502 gi_length.push_back(make_pair<int, size_t>(66821305, 6061));
1503 gi_length.push_back(make_pair<int, size_t>(109075552, 5007));
1504
1505 size_t tot_length;
1506 TSeqLocVector queries;
1507 s_ConvertToBlastQueries(gi_length, queries, &tot_length);
1508
1509 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
1510 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastp));
1511 CRef<CBlastOptions> opts(&opts_h->SetOptions());
1512 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
1513
1514 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
1515 CRef<CSplitQueryBlk> sqb = splitter->Split();
1516
1517 BOOST_REQUIRE_EQUAL(m_Config->GetInt(kTestName, "ChunkSize",
1518 kDefaultIntValue),
1519 (int)splitter->GetChunkSize());
1520
1521 CQuerySplitter::TSplitQueryVector split_query_vector;
1522 x_ReadQueryBoundsPerChunk(kTestName, sqb, split_query_vector);
1523 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
1524
1525 BOOST_REQUIRE_EQUAL(tot_length, query_data->GetSumOfSequenceLengths());
1526 x_ValidateChunkBounds(splitter->GetChunkSize(),
1527 query_data->GetSumOfSequenceLengths(),
1528 *sqb, opts->GetProgramType());
1529
1530 const size_t kNumChunks = (size_t)m_Config->GetInt(kTestName,
1531 "NumChunks",
1532 kDefaultIntValue);
1533 BOOST_REQUIRE_EQUAL(kNumChunks, (size_t)splitter->GetNumberOfChunks());
1534 BOOST_REQUIRE_EQUAL(kNumChunks, sqb->GetNumChunks());
1535
1536 vector< vector<size_t> > queries_per_chunk;
1537 x_ReadVectorOfVectorsForTest(kTestName, "Queries", queries_per_chunk);
1538 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
1539
1540 vector< vector<int> > ctxs_per_chunk;
1541 x_ReadVectorOfVectorsForTest(kTestName, "Contexts", ctxs_per_chunk);
1542 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
1543
1544 vector< vector<size_t> > ctx_offsets_per_chunk;
1545 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1546 ctx_offsets_per_chunk);
1547 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
1548
1549 vector<BlastQueryInfo*> split_query_info;
1550 x_ReadSplitQueryInfoForTest(kTestName, opts->GetProgramType(),
1551 split_query_info);
1552 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
1553 NON_CONST_ITERATE(vector<BlastQueryInfo*>, itr, split_query_info) {
1554 *itr = BlastQueryInfoFree(*itr);
1555 }
1556 }
1557
1558 /// Tests the CContextTranslator class for blastn of both strands of
1559 /// multiple queries
BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastnMultiQuery_BothStrands)1560 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastnMultiQuery_BothStrands) {
1561 const string
1562 kTestName("TestCContextTranslator_BlastnMultiQuery_BothStrands");
1563 TGiLengthVector gi_length;
1564 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1565 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1566 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1567
1568 const size_t chunk_size = 500;
1569 const size_t num_chunks = 9;
1570
1571 vector< vector<int> > starting_chunks(num_chunks);
1572 vector< vector<int> > absolute_contexts(num_chunks);
1573 vector< vector<size_t> > context_offset_corrections(num_chunks);
1574
1575 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1576 starting_chunks);
1577 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1578 absolute_contexts);
1579 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1580 context_offset_corrections);
1581
1582 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastn,
1583 starting_chunks, absolute_contexts,
1584 &context_offset_corrections,
1585 eNa_strand_both);
1586 }
1587
1588 /// Tests the CContextTranslator class for blastn of the plus strand of
1589 /// multiple queries
BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastnMultiQuery_PlusStrand)1590 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastnMultiQuery_PlusStrand) {
1591 const string
1592 kTestName("TestCContextTranslator_BlastnMultiQuery_PlusStrand");
1593 TGiLengthVector gi_length;
1594 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1595 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1596 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1597
1598 const size_t chunk_size = 500;
1599 const size_t num_chunks = 9;
1600
1601 vector< vector<int> > starting_chunks(num_chunks);
1602 vector< vector<int> > absolute_contexts(num_chunks);
1603 vector< vector<size_t> > context_offset_corrections(num_chunks);
1604
1605 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1606 starting_chunks);
1607 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1608 absolute_contexts);
1609 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1610 context_offset_corrections);
1611
1612 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastn,
1613 starting_chunks, absolute_contexts,
1614 &context_offset_corrections,
1615 eNa_strand_plus);
1616 }
1617
1618 /// Tests the CContextTranslator class for blastn of the minus strand of
1619 /// multiple queries
BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastnMultiQuery_MinusStrand)1620 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastnMultiQuery_MinusStrand) {
1621 const string
1622 kTestName("TestCContextTranslator_BlastnMultiQuery_MinusStrand");
1623 TGiLengthVector gi_length;
1624 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1625 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1626 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1627
1628 const size_t chunk_size = 500;
1629 const size_t num_chunks = 9;
1630
1631 vector< vector<int> > starting_chunks(num_chunks);
1632 vector< vector<int> > absolute_contexts(num_chunks);
1633 vector< vector<size_t> > context_offset_corrections(num_chunks);
1634
1635 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1636 starting_chunks);
1637 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1638 absolute_contexts);
1639 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1640 context_offset_corrections);
1641
1642 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastn,
1643 starting_chunks, absolute_contexts,
1644 &context_offset_corrections,
1645 eNa_strand_minus);
1646 }
1647
1648 /// Tests the CContextTranslator class for blastx of both strands of
1649 /// a single query with length divisible by CODON_LENGTH
BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxSingleQuery_BothStrands_0)1650 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxSingleQuery_BothStrands_0) {
1651 const string
1652 kTestName("TestCContextTranslator_BlastxSingleQuery_BothStrands_0");
1653 TGiLengthVector gi_length;
1654 gi_length.push_back(make_pair<int, size_t>(116001669, 33));
1655
1656 const size_t chunk_size = 15;
1657 const size_t num_chunks = 3;
1658 CAutoEnvironmentVariable tmp_env("OVERLAP_CHUNK_SIZE", "6");
1659
1660 vector< vector<int> > starting_chunks(num_chunks);
1661 vector< vector<int> > absolute_contexts(num_chunks);
1662 vector< vector<size_t> > context_offset_corrections(num_chunks);
1663
1664 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1665 starting_chunks);
1666 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1667 absolute_contexts);
1668 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1669 context_offset_corrections);
1670
1671 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastx,
1672 starting_chunks, absolute_contexts,
1673 &context_offset_corrections,
1674 eNa_strand_both);
1675 }
1676
1677 /// Tests the CContextTranslator class for blastx of both strands of
1678 /// a single query with length not divisible by CODON_LENGTH, instead, the
1679 /// (query length % CODON_LENGTH == 1)
BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxSingleQuery_BothStrands_1)1680 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxSingleQuery_BothStrands_1) {
1681 const string
1682 kTestName("TestCContextTranslator_BlastxSingleQuery_BothStrands_1");
1683 TGiLengthVector gi_length;
1684 gi_length.push_back(make_pair<int, size_t>(116001673, 34));
1685
1686 const size_t chunk_size = 15;
1687 const size_t num_chunks = 3;
1688 CAutoEnvironmentVariable tmp_env("OVERLAP_CHUNK_SIZE", "6");
1689
1690 vector< vector<int> > starting_chunks(num_chunks);
1691 vector< vector<int> > absolute_contexts(num_chunks);
1692 vector< vector<size_t> > context_offset_corrections(num_chunks);
1693
1694 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1695 starting_chunks);
1696 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1697 absolute_contexts);
1698 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1699 context_offset_corrections);
1700
1701 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastx,
1702 starting_chunks, absolute_contexts,
1703 &context_offset_corrections,
1704 eNa_strand_both);
1705 }
1706
1707 /// Tests the CContextTranslator class for blastx of both strands of
1708 /// a single query with length not divisible by CODON_LENGTH, instead, the
1709 /// (query length % CODON_LENGTH == 2)
BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxSingleQuery_BothStrands_2)1710 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxSingleQuery_BothStrands_2) {
1711 const string
1712 kTestName("TestCContextTranslator_BlastxSingleQuery_BothStrands_2");
1713 TGiLengthVector gi_length;
1714 gi_length.push_back(make_pair<int, size_t>(116001668, 35));
1715
1716 const size_t chunk_size = 15;
1717 const size_t kNumChunks = m_Config->GetInt(kTestName, "NumChunks",
1718 kDefaultIntValue);
1719 CAutoEnvironmentVariable tmp_env("OVERLAP_CHUNK_SIZE", "6");
1720
1721 vector< vector<int> > starting_chunks(kNumChunks);
1722 vector< vector<int> > absolute_contexts(kNumChunks);
1723 vector< vector<size_t> > context_offset_corrections(kNumChunks);
1724
1725 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1726 starting_chunks);
1727 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1728 absolute_contexts);
1729 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1730 context_offset_corrections);
1731
1732 x_TestCContextTranslator(gi_length, chunk_size, kNumChunks, eBlastx,
1733 starting_chunks, absolute_contexts,
1734 &context_offset_corrections,
1735 eNa_strand_both);
1736 }
1737
1738 /********* This functionality has not been implemented **************/
1739 #if 0
1740
1741 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxMultiQuery_BothStrands) {
1742 const string
1743 kTestName("TestCContextTranslator_BlastxMultiQuery_BothStrands");
1744 TGiLengthVector gi_length;
1745 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1746 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1747 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1748
1749 const size_t chunk_size = 501;
1750 const size_t num_chunks = 10;
1751
1752 vector< vector<int> > starting_chunks(num_chunks);
1753 vector< vector<int> > absolute_contexts(num_chunks);
1754 vector< vector<size_t> > context_offset_corrections(num_chunks);
1755
1756 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1757 starting_chunks);
1758 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1759 absolute_contexts);
1760 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1761 context_offset_corrections);
1762
1763 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastx,
1764 starting_chunks, absolute_contexts,
1765 &context_offset_corrections,
1766 eNa_strand_both);
1767 }
1768
1769 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxMultiQuery_PlusStrand) {
1770 const string
1771 kTestName("TestCContextTranslator_BlastxMultiQuery_PlusStrand");
1772 TGiLengthVector gi_length;
1773 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1774 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1775 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1776
1777 const size_t chunk_size = 500;
1778 const size_t num_chunks = 10;
1779
1780 vector< vector<int> > starting_chunks(num_chunks);
1781 vector< vector<int> > absolute_contexts(num_chunks);
1782 vector< vector<size_t> > context_offset_corrections(num_chunks);
1783
1784 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1785 starting_chunks);
1786 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1787 absolute_contexts);
1788 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1789 context_offset_corrections);
1790
1791 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastx,
1792 starting_chunks, absolute_contexts,
1793 &context_offset_corrections,
1794 eNa_strand_plus);
1795 }
1796
1797 BOOST_AUTO_TEST_CASE(TestCContextTranslator_BlastxMultiQuery_MinusStrand) {
1798 const string
1799 kTestName("TestCContextTranslator_BlastxMultiQuery_MinusStrand");
1800 TGiLengthVector gi_length;
1801 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1802 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1803 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1804
1805 const size_t chunk_size = 500;
1806 const size_t num_chunks = 10;
1807
1808 vector< vector<int> > starting_chunks(num_chunks);
1809 vector< vector<int> > absolute_contexts(num_chunks);
1810 vector< vector<size_t> > context_offset_corrections(num_chunks);
1811
1812 x_ReadVectorOfVectorsForTest(kTestName, "StartingChunks",
1813 starting_chunks);
1814 x_ReadVectorOfVectorsForTest(kTestName, "AbsoluteContexts",
1815 absolute_contexts);
1816 x_ReadVectorOfVectorsForTest(kTestName, "ContextOffsets",
1817 context_offset_corrections);
1818
1819 x_TestCContextTranslator(gi_length, chunk_size, num_chunks, eBlastx,
1820 starting_chunks, absolute_contexts,
1821 &context_offset_corrections,
1822 eNa_strand_minus);
1823 }
1824 #endif
1825
1826
1827 /// Tests the CQuerySplitter class when no splitting should occur
BOOST_AUTO_TEST_CASE(QuerySplitter_NoSplit)1828 BOOST_AUTO_TEST_CASE(QuerySplitter_NoSplit) {
1829 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1830 const string kTestName("QuerySplitter_NoSplit");
1831 CBlastQueryVector query;
1832 CSeq_id id(CSeq_id::e_Gi, 555);
1833 query.AddQuery(CTestObjMgr::Instance().CreateBlastSearchQuery(id));
1834
1835 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query));
1836 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastn));
1837 CRef<CBlastOptions> opts(&opts_h->SetOptions());
1838
1839 const size_t kNumChunks = m_Config->GetInt(kTestName, "NumChunks",
1840 kDefaultIntValue);
1841 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
1842
1843 BOOST_REQUIRE_EQUAL(false, splitter->IsQuerySplit());
1844 BOOST_REQUIRE_EQUAL(m_Config->GetInt(kTestName, "ChunkSize",
1845 kDefaultIntValue),
1846 (int)splitter->GetChunkSize());
1847 BOOST_REQUIRE_EQUAL(kNumChunks, (size_t)splitter->GetNumberOfChunks());
1848
1849 CRef<CSplitQueryBlk> sqb = splitter->Split();
1850 BOOST_REQUIRE_EQUAL(false, splitter->IsQuerySplit());
1851 BOOST_REQUIRE_EQUAL(kNumChunks, sqb->GetNumChunks());
1852
1853 try {
1854 // try passing an out-of-range index
1855 (void)sqb->GetNumQueriesForChunk(kNumChunks + 8);
1856 BOOST_REQUIRE(false);
1857 } catch (const runtime_error&) {
1858 BOOST_REQUIRE(true);
1859 }
1860
1861 CRef<IQueryFactory> chunk_query_factory =
1862 splitter->GetQueryFactoryForChunk(0);
1863 BOOST_REQUIRE_EQUAL(qf, chunk_query_factory);
1864 }
1865
1866 /// Tests the CQuerySplitter class for retrieval of IQueryFactory objects
1867 /// for given chunks
BOOST_AUTO_TEST_CASE(QuerySplitter_ValidateQueryFactoriesBlastn)1868 BOOST_AUTO_TEST_CASE(QuerySplitter_ValidateQueryFactoriesBlastn) {
1869 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "30000");
1870 TGiLengthVector gi_length;
1871 gi_length.push_back(make_pair<int, size_t>(95116755, 35000));
1872 gi_length.push_back(make_pair<int, size_t>(112123020, 35580));
1873
1874 TSeqLocVector queries;
1875 s_ConvertToBlastQueries(gi_length, queries);
1876
1877 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(queries));
1878 CRef<CBlastOptionsHandle> opts_h(CBlastOptionsFactory::Create(eBlastn));
1879 CRef<CBlastOptions> opts(&opts_h->SetOptions());
1880
1881 CRef<CQuerySplitter> splitter(new CQuerySplitter(qf, &*opts));
1882 const size_t kNumChunks(2);
1883
1884 try {
1885 (void)splitter->GetQueryFactoryForChunk(kNumChunks);
1886 BOOST_REQUIRE(false);
1887 } catch (const out_of_range& ) {
1888 BOOST_REQUIRE(true);
1889 }
1890
1891 CRef<IQueryFactory> chunk_0 = splitter->GetQueryFactoryForChunk(0);
1892 CRef<IQueryFactory> chunk_1 = splitter->GetQueryFactoryForChunk(1);
1893
1894 BOOST_REQUIRE(chunk_0 != qf);
1895 BOOST_REQUIRE(chunk_1 != qf);
1896
1897 BOOST_REQUIRE(chunk_0.NotEmpty());
1898 BOOST_REQUIRE(chunk_1.NotEmpty());
1899 }
1900
BOOST_AUTO_TEST_CASE(CalculateNumberChunks)1901 BOOST_AUTO_TEST_CASE(CalculateNumberChunks)
1902 {
1903 EBlastProgramType program = eBlastTypeBlastx;
1904 size_t chunk_size = 10002;
1905 Uint4 retval = SplitQuery_CalculateNumChunks(program,
1906 &chunk_size, 10240000, 1);
1907 BOOST_REQUIRE_EQUAL(1055, retval);
1908
1909 retval = SplitQuery_CalculateNumChunks(eBlastTypeBlastx,
1910 &chunk_size, chunk_size/2, 1);
1911
1912 BOOST_REQUIRE_EQUAL(1, retval);
1913
1914 retval = SplitQuery_CalculateNumChunks(program,
1915 &chunk_size,
1916 3*chunk_size-2*SplitQuery_GetOverlapChunkSize(program), 1);
1917
1918 BOOST_REQUIRE_EQUAL(3, retval);
1919
1920 retval = SplitQuery_CalculateNumChunks(program,
1921 &chunk_size,
1922 1+2*chunk_size+SplitQuery_GetOverlapChunkSize(program), 1);
1923
1924 BOOST_REQUIRE_EQUAL(2, retval);
1925 }
1926
BOOST_AUTO_TEST_CASE(InvalidChunkSizeBlastx)1927 BOOST_AUTO_TEST_CASE(InvalidChunkSizeBlastx)
1928 {
1929 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1930 BOOST_REQUIRE_THROW(SplitQuery_GetChunkSize(blast::eBlastx), CBlastException);
1931 }
1932
BOOST_AUTO_TEST_CASE(InvalidChunkSizeTblastx)1933 BOOST_AUTO_TEST_CASE(InvalidChunkSizeTblastx)
1934 {
1935 CAutoEnvironmentVariable tmp_env("CHUNK_SIZE", "40000");
1936 BOOST_REQUIRE_THROW(SplitQuery_GetChunkSize(blast::eTblastx), CBlastException);
1937 }
1938
1939 BOOST_AUTO_TEST_SUITE_END()
1940