1 /* $Id: test_csra_loader.cpp 562583 2018-04-24 16:07:23Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description:
29 * Unit tests for CSRA data loader
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <sra/readers/sra/exception.hpp>
36 #include <sra/data_loaders/csra/csraloader.hpp>
37 #include <sra/readers/ncbi_traces_path.hpp>
38 #include <objmgr/scope.hpp>
39 #include <objmgr/bioseq_handle.hpp>
40 #include <objmgr/align_ci.hpp>
41 #include <objmgr/graph_ci.hpp>
42 #include <objtools/data_loaders/genbank/gbloader.hpp>
43 #include <objects/seqalign/seqalign__.hpp>
44 #include <objects/seqres/seqres__.hpp>
45 #include <corelib/ncbi_system.hpp>
46 #include <objtools/readers/idmapper.hpp>
47 #include <serial/iterator.hpp>
48
49 #include <corelib/test_boost.hpp>
50
51 #include <common/test_data_path.h>
52 #include <common/test_assert.h> /* This header must go last */
53
54 #include <objmgr/impl/scope_info.hpp>
55 #include <objmgr/impl/data_source.hpp>
56
57 USING_NCBI_SCOPE;
58 USING_SCOPE(objects);
59
60 #define PILEUP_NAME_SUFFIX " pileup graphs"
61
sx_GetOM(void)62 CRef<CObjectManager> sx_GetOM(void)
63 {
64 SetDiagPostLevel(eDiag_Info);
65 CRef<CObjectManager> om = CObjectManager::GetInstance();
66 CObjectManager::TRegisteredNames names;
67 om->GetRegisteredNames(names);
68 ITERATE ( CObjectManager::TRegisteredNames, it, names ) {
69 om->RevokeDataLoader(*it);
70 }
71 return om;
72 }
73
sx_CheckNames(CScope & scope,const CSeq_loc & loc,const string & name)74 void sx_CheckNames(CScope& scope, const CSeq_loc& loc,
75 const string& name)
76 {
77 SAnnotSelector sel;
78 sel.SetSearchUnresolved();
79 sel.SetCollectNames();
80 CAnnotTypes_CI it(CSeq_annot::C_Data::e_not_set, scope, loc, &sel);
81 CAnnotTypes_CI::TAnnotNames names = it.GetAnnotNames();
82 ITERATE ( CAnnotTypes_CI::TAnnotNames, i, names ) {
83 if ( i->IsNamed() ) {
84 NcbiCout << "Named annot: " << i->GetName()
85 << NcbiEndl;
86 }
87 else {
88 NcbiCout << "Unnamed annot"
89 << NcbiEndl;
90 }
91 }
92 //NcbiCout << "Checking for name: " << name << NcbiEndl;
93 BOOST_CHECK_EQUAL(names.count(name), 1u);
94 if ( names.size() > 1 ) {
95 BOOST_CHECK_EQUAL(names.count(name+PILEUP_NAME_SUFFIX), 1u);
96 }
97 else {
98 BOOST_CHECK_EQUAL(names.size(), 1u);
99 }
100 }
101
sx_GetPath(const string & dir)102 string sx_GetPath(const string& dir)
103 {
104 vector<string> reps;
105 NStr::Split("traces02:traces04", ":", reps);
106 ITERATE ( vector<string>, it, reps ) {
107 string path = CFile::MakePath(CFile::MakePath(NCBI_GetTestDataPath(), *it), dir);
108 if ( CDirEntry(path).Exists() ) {
109 return path;
110 }
111 }
112 return dir;
113 }
114
sx_ReportCSraLoaderName(const string & name)115 void sx_ReportCSraLoaderName(const string& name)
116 {
117 NcbiCout << "CSRA loader: " << name << NcbiEndl;
118 CDataLoader* loader = CObjectManager::GetInstance()->FindDataLoader(name);
119 CCSRADataLoader* csra_loader = dynamic_cast<CCSRADataLoader*>(loader);
120 BOOST_REQUIRE(csra_loader);
121 CCSRADataLoader::TAnnotNames names = csra_loader->GetPossibleAnnotNames();
122 ITERATE ( CCSRADataLoader::TAnnotNames, it, names ) {
123 NcbiCout << " annot name: " << it->GetName() << NcbiEndl;
124 }
125 }
126
sx_CheckSeq(CScope & scope,const CSeq_id_Handle & main_idh,const CSeq_id & id)127 void sx_CheckSeq(CScope& scope,
128 const CSeq_id_Handle& main_idh,
129 const CSeq_id& id)
130 {
131 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(id);
132 if ( idh == main_idh ) {
133 return;
134 }
135 const CBioseq_Handle::TId& ids = scope.GetIds(idh);
136 BOOST_REQUIRE(!ids.empty());
137 //BOOST_CHECK_EQUAL(ids[0], idh);
138 CBioseq_Handle bh = scope.GetBioseqHandle(idh);
139 BOOST_REQUIRE(bh);
140 }
141
BOOST_AUTO_TEST_CASE(FetchSeq1)142 BOOST_AUTO_TEST_CASE(FetchSeq1)
143 {
144 CRef<CObjectManager> om = sx_GetOM();
145
146 CCSRADataLoader::SLoaderParams params;
147 string csra_name, id;
148 TSeqPos from, to, align_count;
149
150 {
151 //string path = NCBI_TRACES01_PATH "/compress/DATA/ASW/NA19909.mapped.illumina.mosaik.ASW.exome.20110411";
152 string path = NCBI_TRACES01_PATH "/compress/1KG/ASW/NA19909";
153 params.m_DirPath = sx_GetPath(path);
154 csra_name = "exome.ILLUMINA.MOSAIK.csra";
155 id = "NC_000001.10";
156 from = 0;
157 to = 100000;
158 align_count = 37; // 36 if mapq>=1
159 }
160 csra_name = CFile::MakePath(params.m_DirPath, csra_name);
161 params.m_DirPath.erase();
162 params.m_CSRAFiles.push_back(csra_name);
163 string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
164 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
165
166 string loader_name =
167 CCSRADataLoader::RegisterInObjectManager(*om, params,
168 CObjectManager::eDefault)
169 .GetLoader()->GetName();
170 sx_ReportCSraLoaderName(loader_name);
171 CScope scope(*om);
172 scope.AddDefaults();
173
174 CRef<CSeq_id> seqid(new CSeq_id(id));
175 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
176 CRef<CSeq_loc> loc(new CSeq_loc);
177 loc->SetInt().SetId(*seqid);
178 loc->SetInt().SetFrom(from);
179 loc->SetInt().SetTo(to);
180 sx_CheckNames(scope, *loc, annot_name);
181 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
182 sel.SetSearchUnresolved();
183 sel.ExcludeNamedAnnots(pileup_name);
184
185 if ( 1 ) {
186 CGraph_CI git(scope, *loc, sel);
187 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
188 }
189
190 if ( 1 ) {
191 CAlign_CI it(scope, *loc, sel);
192 if ( it ) {
193 cout << "Align count: "<<it.GetSize()<<endl;
194 if ( it.GetAnnot().IsNamed() ) {
195 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
196 }
197 }
198 BOOST_CHECK_EQUAL(align_count, it.GetSize());
199
200 for ( ; it; ++it ) {
201 const CSeq_align& align = *it;
202 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
203 sx_CheckSeq(scope, idh, **j);
204 }
205 }
206 }
207
208 if ( 1 ) {
209 sel.ResetAnnotsNames();
210 sel.AddNamedAnnots(pileup_name);
211 CGraph_CI git(scope, *loc, sel);
212 BOOST_CHECK(git.GetSize() % 6 == 0);
213 if ( 0 ) {
214 for ( ; git; ++git ) {
215 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
216 }
217 }
218 }
219 }
220
BOOST_AUTO_TEST_CASE(FetchSeq2)221 BOOST_AUTO_TEST_CASE(FetchSeq2)
222 {
223 CRef<CObjectManager> om = sx_GetOM();
224
225 CCSRADataLoader::SLoaderParams params;
226 string csra_name, id;
227 TSeqPos from, to, align_count;
228
229 {
230 //string path = NCBI_TRACES01_PATH "/compress/DATA/ASW/NA19909.mapped.illumina.mosaik.ASW.exome.20110411";
231 string path = NCBI_TRACES01_PATH "/compress/1KG/ASW/NA19909";
232 params.m_DirPath = sx_GetPath(path);
233 csra_name = "exome.ILLUMINA.MOSAIK.csra";
234 id = "NC_000001.10";
235 if ( 1 ) {
236 CNcbiIfstream mapfile("mapfile");
237 BOOST_CHECK(mapfile);
238 params.m_IdMapper.reset(new CIdMapperConfig(mapfile, "", false));
239 id = "89161185";
240 //id = "NC_000001.9";
241 }
242 from = 0;
243 to = 100000;
244 align_count = 37; // 36 if mapq>=1
245 }
246 params.m_CSRAFiles.push_back(csra_name);
247 string annot_name = "csra_name"; //CDirEntry(csra_name).GetBase();
248 params.m_AnnotName = "csra_name";
249 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
250
251 string loader_name =
252 CCSRADataLoader::RegisterInObjectManager(*om, params,
253 CObjectManager::eDefault)
254 .GetLoader()->GetName();
255 sx_ReportCSraLoaderName(loader_name);
256 CScope scope(*om);
257 scope.AddDefaults();
258
259 CRef<CSeq_id> seqid(new CSeq_id(id));
260 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
261 CRef<CSeq_loc> loc(new CSeq_loc);
262 loc->SetInt().SetId(*seqid);
263 loc->SetInt().SetFrom(from);
264 loc->SetInt().SetTo(to);
265 sx_CheckNames(scope, *loc, annot_name);
266 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
267 sel.SetSearchUnresolved();
268 sel.ExcludeNamedAnnots(pileup_name);
269
270 if ( 1 ) {
271 CGraph_CI git(scope, *loc, sel);
272 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
273 }
274
275 if ( 1 ) {
276 CAlign_CI it(scope, *loc, sel);
277 if ( it ) {
278 cout << "Align count: "<<it.GetSize()<<endl;
279 if ( it.GetAnnot().IsNamed() ) {
280 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
281 }
282 }
283 BOOST_CHECK_EQUAL(align_count, it.GetSize());
284
285 for ( ; it; ++it ) {
286 const CSeq_align& align = *it;
287 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
288 sx_CheckSeq(scope, idh, **j);
289 }
290 }
291 }
292
293 if ( 1 ) {
294 sel.ResetAnnotsNames();
295 sel.AddNamedAnnots(pileup_name);
296 CGraph_CI git(scope, *loc, sel);
297 BOOST_CHECK(git.GetSize() % 6 == 0);
298 if ( 0 ) {
299 for ( ; git; ++git ) {
300 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
301 }
302 }
303 }
304 }
305
BOOST_AUTO_TEST_CASE(FetchSeq3)306 BOOST_AUTO_TEST_CASE(FetchSeq3)
307 {
308 CRef<CObjectManager> om = sx_GetOM();
309
310 CCSRADataLoader::SLoaderParams params;
311 string csra_name, id;
312 TSeqPos from, to, align_count;
313
314 {
315 string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12043";
316 params.m_DirPath = sx_GetPath(path);
317 csra_name = "genome.LS454.SSAHA2.csra";
318 id = "NC_000001.10";
319 if ( 0 ) {
320 CNcbiIfstream mapfile("mapfile");
321 BOOST_CHECK(mapfile);
322 params.m_IdMapper.reset(new CIdMapperConfig(mapfile, "", false));
323 id = "89161185";
324 //id = "NC_000001.9";
325 }
326 from = 0;
327 to = 100000;
328 align_count = 617; // 95 if mapq>=1
329 }
330 params.m_CSRAFiles.push_back(csra_name);
331 string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
332 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
333
334 string loader_name =
335 CCSRADataLoader::RegisterInObjectManager(*om, params,
336 CObjectManager::eDefault)
337 .GetLoader()->GetName();
338 sx_ReportCSraLoaderName(loader_name);
339 CScope scope(*om);
340 scope.AddDefaults();
341
342 CRef<CSeq_id> seqid(new CSeq_id(id));
343 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
344 CRef<CSeq_loc> loc(new CSeq_loc);
345 loc->SetInt().SetId(*seqid);
346 loc->SetInt().SetFrom(from);
347 loc->SetInt().SetTo(to);
348 sx_CheckNames(scope, *loc, annot_name);
349 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
350 sel.SetSearchUnresolved();
351 sel.ExcludeNamedAnnots(pileup_name);
352
353 if ( 1 ) {
354 CGraph_CI git(scope, *loc, sel);
355 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
356 }
357
358 if ( 1 ) {
359 CAlign_CI it(scope, *loc, sel);
360 if ( it ) {
361 cout << "Align count: "<<it.GetSize()<<endl;
362 if ( it.GetAnnot().IsNamed() ) {
363 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
364 }
365 }
366 BOOST_CHECK_EQUAL(align_count, it.GetSize());
367
368 for ( ; it; ++it ) {
369 const CSeq_align& align = *it;
370 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
371 sx_CheckSeq(scope, idh, **j);
372 }
373 }
374 }
375
376 if ( 1 ) {
377 sel.ResetAnnotsNames();
378 sel.AddNamedAnnots(pileup_name);
379 CGraph_CI git(scope, *loc, sel);
380 BOOST_CHECK(git.GetSize() % 6 == 0);
381 if ( 0 ) {
382 for ( ; git; ++git ) {
383 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
384 }
385 }
386 }
387 }
388
BOOST_AUTO_TEST_CASE(FetchSeq4)389 BOOST_AUTO_TEST_CASE(FetchSeq4)
390 {
391 CRef<CObjectManager> om = sx_GetOM();
392
393 CCSRADataLoader::SLoaderParams params;
394 string csra_name, id;
395 TSeqPos from, to, align_count;
396
397 {
398 string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
399 params.m_DirPath = sx_GetPath(path);
400 //csra_name = "genome.ILLUMINA.BWA.csra";
401 csra_name = "exome.ILLUMINA.MOSAIK.csra";
402 id = "NC_000023.10";
403 if ( 0 ) {
404 CNcbiIfstream mapfile("mapfile");
405 BOOST_CHECK(mapfile);
406 params.m_IdMapper.reset(new CIdMapperConfig(mapfile, "", false));
407 id = "89161185";
408 //id = "NC_000001.9";
409 }
410 from = 31137345;
411 to = 31157726;
412 align_count = 730;
413 //to = 33357726;
414 //align_count = 129595;
415 }
416 params.m_CSRAFiles.push_back(csra_name);
417 string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
418 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
419
420 CStopWatch sw(CStopWatch::eStart);
421 string loader_name =
422 CCSRADataLoader::RegisterInObjectManager(*om, params,
423 CObjectManager::eDefault)
424 .GetLoader()->GetName();
425 sx_ReportCSraLoaderName(loader_name);
426 CScope scope(*om);
427 scope.AddDefaults();
428
429 CRef<CSeq_id> seqid(new CSeq_id(id));
430 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
431 CRef<CSeq_loc> loc(new CSeq_loc);
432 loc->SetInt().SetId(*seqid);
433 loc->SetInt().SetFrom(from);
434 loc->SetInt().SetTo(to);
435 sx_CheckNames(scope, *loc, annot_name);
436 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
437 sel.SetSearchUnresolved();
438 sel.ExcludeNamedAnnots(pileup_name);
439
440 if ( 1 ) {
441 CGraph_CI git(scope, *loc, sel);
442 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
443 }
444
445 if ( 1 ) {
446 CAlign_CI it(scope, *loc, sel);
447 if ( it ) {
448 cout << "Align count: "<<it.GetSize()<<endl;
449 if ( it.GetAnnot().IsNamed() ) {
450 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
451 }
452 }
453 BOOST_CHECK_EQUAL(align_count, it.GetSize());
454 NcbiCout << "Elapsed: " << sw.Elapsed() << NcbiEndl;
455
456 for ( ; it; ++it ) {
457 const CSeq_align& align = it.GetOriginalSeq_align();
458 //NcbiCout << MSerial_AsnText << align;
459 if ( 1 ) {
460 ITERATE( CDense_seg::TIds, j,
461 align.GetSegs().GetDenseg().GetIds() ) {
462 sx_CheckSeq(scope, idh, **j);
463 CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle(**j);
464 if ( id2 != idh ) {
465 CBioseq_Handle bh = scope.GetBioseqHandle(id2);
466 BOOST_CHECK(bh);
467 //NcbiCout << MSerial_AsnText << *bh.GetCompleteBioseq();
468 }
469 }
470 }
471 }
472 NcbiCout << "Elapsed: " << sw.Elapsed() << NcbiEndl;
473 }
474
475 if ( 1 ) {
476 sel.ResetAnnotsNames();
477 sel.AddNamedAnnots(pileup_name);
478 CGraph_CI git(scope, *loc, sel);
479 BOOST_CHECK(git.GetSize() % 6 == 0);
480 if ( 0 ) {
481 for ( ; git; ++git ) {
482 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
483 }
484 }
485 NcbiCout << "Elapsed: " << sw.Elapsed() << NcbiEndl;
486 }
487 }
488
489
BOOST_AUTO_TEST_CASE(FetchSeq4l)490 BOOST_AUTO_TEST_CASE(FetchSeq4l)
491 {
492 CRef<CObjectManager> om = sx_GetOM();
493
494 CCSRADataLoader::SLoaderParams params;
495 string csra_name, id;
496
497 {
498 string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
499 path = sx_GetPath(path);
500 csra_name = CDirEntry::MakePath(path, "exome.ILLUMINA.MOSAIK.csra");
501 id = csra_name;
502 replace(id.begin(), id.end(), '/', '\\');
503 id = "gnl|SRA|"+id+".6.1";
504 }
505 params.m_CSRAFiles.push_back(csra_name);
506 params.m_PathInId = true;
507 params.m_QualityGraphs = true;
508
509 CStopWatch sw(CStopWatch::eStart);
510 string loader_name =
511 CCSRADataLoader::RegisterInObjectManager(*om, params,
512 CObjectManager::eDefault)
513 .GetLoader()->GetName();
514 sx_ReportCSraLoaderName(loader_name);
515
516 for ( int t = 0; t < 2; ++t ) {
517 CScope scope(*om);
518 scope.AddDefaults();
519
520 CRef<CSeq_id> seqid(new CSeq_id(id));
521 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
522 CBioseq_Handle bh = scope.GetBioseqHandle(idh);
523 BOOST_REQUIRE(bh);
524 BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
525 CGraph_CI git(bh);
526 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
527 }
528 }
529
530
BOOST_AUTO_TEST_CASE(FetchSeq4sx)531 BOOST_AUTO_TEST_CASE(FetchSeq4sx)
532 {
533 CRef<CObjectManager> om = sx_GetOM();
534
535 CCSRADataLoader::SLoaderParams params;
536 string csra_name, id;
537
538 {
539 string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
540 path = sx_GetPath(path);
541 csra_name = CDirEntry::MakePath(path, "exome.ILLUMINA.MOSAIK.csra");
542 id = csra_name;
543 replace(id.begin(), id.end(), '/', '\\');
544 id = "gnl|SRA|"+id+".6.1";
545 }
546 params.m_CSRAFiles.push_back(csra_name);
547 params.m_PathInId = true;
548 params.m_QualityGraphs = true;
549
550 CStopWatch sw(CStopWatch::eStart);
551 string loader_name =
552 CCSRADataLoader::RegisterInObjectManager(*om, params,
553 CObjectManager::eDefault)
554 .GetLoader()->GetName();
555 sx_ReportCSraLoaderName(loader_name);
556
557 for ( int t = 0; t < 2; ++t ) {
558 CScope scope(*om);
559 scope.AddDefaults();
560
561 CRef<CSeq_id> seqid(new CSeq_id(id));
562 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
563 CBioseq_Handle bh = scope.GetBioseqHandle(idh);
564 BOOST_REQUIRE(bh);
565 BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
566 CGraph_CI git(bh);
567 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
568 }
569 }
570
571
BOOST_AUTO_TEST_CASE(FetchSeq4sd)572 BOOST_AUTO_TEST_CASE(FetchSeq4sd)
573 {
574 CRef<CObjectManager> om = sx_GetOM();
575
576 CCSRADataLoader::SLoaderParams params;
577 string csra_name, id;
578
579 {
580 string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
581 string file;
582 CDirEntry::SplitPath(sx_GetPath(path), ¶ms.m_DirPath, &file);
583 csra_name = CDirEntry::MakePath(file, "exome.ILLUMINA.MOSAIK.csra");
584 id = CDirEntry::MakePath(file, "exome.ILLUMINA.MOSAIK");
585 replace(id.begin(), id.end(), '/', '\\');
586 id = "gnl|SRA|"+id+".6.1";
587 }
588 params.m_CSRAFiles.push_back(csra_name);
589 params.m_PathInId = false;
590 params.m_QualityGraphs = true;
591
592 CStopWatch sw(CStopWatch::eStart);
593 string loader_name =
594 CCSRADataLoader::RegisterInObjectManager(*om, params,
595 CObjectManager::eDefault)
596 .GetLoader()->GetName();
597 sx_ReportCSraLoaderName(loader_name);
598
599 for ( int t = 0; t < 2; ++t ) {
600 CScope scope(*om);
601 scope.AddDefaults();
602
603 CRef<CSeq_id> seqid(new CSeq_id(id));
604 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
605 CBioseq_Handle bh = scope.GetBioseqHandle(idh);
606 BOOST_REQUIRE(bh);
607 BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
608 CGraph_CI git(bh);
609 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
610 }
611 }
612
613
BOOST_AUTO_TEST_CASE(FetchSeq4s)614 BOOST_AUTO_TEST_CASE(FetchSeq4s)
615 {
616 CRef<CObjectManager> om = sx_GetOM();
617
618 CCSRADataLoader::SLoaderParams params;
619 string csra_name, id;
620
621 {
622 string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
623 params.m_DirPath = sx_GetPath(path);
624 csra_name = "exome.ILLUMINA.MOSAIK.csra";
625 id = "gnl|SRA|exome.ILLUMINA.MOSAIK.6.1";
626 }
627 params.m_CSRAFiles.push_back(csra_name);
628 params.m_PathInId = false;
629 params.m_QualityGraphs = true;
630
631 CStopWatch sw(CStopWatch::eStart);
632 string loader_name =
633 CCSRADataLoader::RegisterInObjectManager(*om, params,
634 CObjectManager::eDefault)
635 .GetLoader()->GetName();
636 sx_ReportCSraLoaderName(loader_name);
637 for ( int t = 0; t < 2; ++t ) {
638 CScope scope(*om);
639 scope.AddDefaults();
640
641 CRef<CSeq_id> seqid(new CSeq_id(id));
642 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
643 CBioseq_Handle bh = scope.GetBioseqHandle(idh);
644 BOOST_REQUIRE(bh);
645 BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
646 CGraph_CI git(bh);
647 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
648 }
649 }
650
651
BOOST_AUTO_TEST_CASE(FetchSeq5)652 BOOST_AUTO_TEST_CASE(FetchSeq5)
653 {
654 CRef<CObjectManager> om = sx_GetOM();
655
656 CCSRADataLoader::SLoaderParams params;
657 string csra_name, id;
658 TSeqPos from, to, align_count;
659
660 #if 1
661 {
662 csra_name = "SRR389414";
663 params.m_DirPath = csra_name;
664 id = "NC_000023.10";
665 from = 1000000;
666 to = 2000000;
667 align_count = 3016; // 3005 if mapq>=1
668 }
669 string loader_name =
670 CCSRADataLoader::RegisterInObjectManager(*om, params,
671 CObjectManager::eDefault)
672 .GetLoader()->GetName();
673 #else
674 {
675 csra_name = "SRR389414";
676 id = "NC_000023.10";
677 from = 1000000;
678 to = 2000000;
679 align_count = 3016; // 3005 if mapq>=1
680 }
681 string loader_name =
682 CCSRADataLoader::RegisterInObjectManager(*om, csra_name,
683 CObjectManager::eDefault)
684 .GetLoader()->GetName();
685 #endif
686 sx_ReportCSraLoaderName(loader_name);
687 CScope scope(*om);
688 scope.AddDefaults();
689
690 string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
691 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
692
693 CRef<CSeq_id> seqid(new CSeq_id(id));
694 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
695 CRef<CSeq_loc> loc(new CSeq_loc);
696 loc->SetInt().SetId(*seqid);
697 loc->SetInt().SetFrom(from);
698 loc->SetInt().SetTo(to);
699 sx_CheckNames(scope, *loc, annot_name);
700 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
701 sel.SetSearchUnresolved();
702 sel.ExcludeNamedAnnots(pileup_name);
703
704 if ( 1 ) {
705 CGraph_CI git(scope, *loc, sel);
706 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
707 }
708
709 if ( 1 ) {
710 CAlign_CI it(scope, *loc, sel);
711 if ( it ) {
712 cout << "Align count: "<<it.GetSize()<<endl;
713 if ( it.GetAnnot().IsNamed() ) {
714 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
715 }
716 }
717 BOOST_CHECK_EQUAL(align_count, it.GetSize());
718
719 for ( ; it; ++it ) {
720 const CSeq_align& align = *it;
721 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
722 sx_CheckSeq(scope, idh, **j);
723 }
724 }
725 }
726
727 if ( 1 ) {
728 sel.ResetAnnotsNames();
729 sel.AddNamedAnnots(pileup_name);
730 CGraph_CI git(scope, *loc, sel);
731 BOOST_CHECK(git.GetSize() % 6 == 0);
732 if ( 0 ) {
733 for ( ; git; ++git ) {
734 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
735 }
736 }
737 }
738
739 scope.RemoveDataLoader(loader_name); // no use-lock as there is no main sequence
740 om->RevokeDataLoader(loader_name);
741 }
742
BOOST_AUTO_TEST_CASE(FetchSeq5u)743 BOOST_AUTO_TEST_CASE(FetchSeq5u)
744 {
745 CRef<CObjectManager> om = sx_GetOM();
746
747 CCSRADataLoader::SLoaderParams params;
748 string csra_name, id;
749 TSeqPos from, to, align_count;
750
751 #if 1
752 {
753 csra_name = "SRR389414";
754 params.m_DirPath = csra_name;
755 id = "NC_000023.10";
756 from = 1000000;
757 to = 2000000;
758 align_count = 3016; // 3005 if mapq>=1
759 }
760 string loader_name =
761 CCSRADataLoader::RegisterInObjectManager(*om, params,
762 CObjectManager::eDefault)
763 .GetLoader()->GetName();
764 #else
765 {
766 csra_name = "SRR389414";
767 id = "NC_000023.10";
768 from = 1000000;
769 to = 2000000;
770 align_count = 3016; // 3005 if mapq>=1
771 }
772 string loader_name =
773 CCSRADataLoader::RegisterInObjectManager(*om, csra_name,
774 CObjectManager::eDefault)
775 .GetLoader()->GetName();
776 #endif
777 sx_ReportCSraLoaderName(loader_name);
778 CGBDataLoader::RegisterInObjectManager(*om);
779 CScope scope(*om);
780 scope.AddDefaults();
781
782 string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
783 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
784
785 CRef<CSeq_id> seqid(new CSeq_id(id));
786 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
787 CRef<CSeq_loc> loc(new CSeq_loc);
788 loc->SetInt().SetId(*seqid);
789 loc->SetInt().SetFrom(from);
790 loc->SetInt().SetTo(to);
791 sx_CheckNames(scope, *loc, annot_name);
792 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
793 sel.ExcludeNamedAnnots(pileup_name);
794
795 if ( 1 ) {
796 CGraph_CI git(scope, *loc, sel);
797 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
798 }
799
800 if ( 1 ) {
801 CAlign_CI it(scope, *loc, sel);
802 if ( it ) {
803 cout << "Align count: "<<it.GetSize()<<endl;
804 if ( it.GetAnnot().IsNamed() ) {
805 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
806 }
807 }
808 BOOST_CHECK_EQUAL(align_count, it.GetSize());
809
810 for ( ; it; ++it ) {
811 const CSeq_align& align = *it;
812 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
813 sx_CheckSeq(scope, idh, **j);
814 }
815 }
816 }
817
818 if ( 1 ) {
819 sel.ResetAnnotsNames();
820 sel.AddNamedAnnots(pileup_name);
821 CGraph_CI git(scope, *loc, sel);
822 BOOST_CHECK(git.GetSize() % 6 == 0);
823 if ( 0 ) {
824 for ( ; git; ++git ) {
825 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
826 }
827 }
828 }
829
830 scope.RemoveDataLoader(loader_name, scope.eRemoveIfLocked); // there is a use-lock from main sequence
831 om->RevokeDataLoader(loader_name); // but it shouldn't prevend revoking data loader
832 }
833
BOOST_AUTO_TEST_CASE(FetchSeq6)834 BOOST_AUTO_TEST_CASE(FetchSeq6)
835 {
836 CRef<CObjectManager> om = sx_GetOM();
837
838 CCSRADataLoader::SLoaderParams params;
839 string csra_name, id;
840 TSeqPos from, to, align_count;
841
842 {
843 csra_name = "SRR389414";
844 id = "gnl|SRA|SRR389414/GL000215.1";
845 from = 100000;
846 to = 200000;
847 align_count = 83; // 8 if mapq>=1
848 }
849 string loader_name =
850 CCSRADataLoader::RegisterInObjectManager(*om, params,
851 CObjectManager::eDefault)
852 .GetLoader()->GetName();
853 sx_ReportCSraLoaderName(loader_name);
854 CScope scope(*om);
855 scope.AddDefaults();
856
857 string annot_name = csra_name;
858 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
859
860 CRef<CSeq_id> seqid(new CSeq_id(id));
861 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
862 CRef<CSeq_loc> loc(new CSeq_loc);
863 loc->SetInt().SetId(*seqid);
864 loc->SetInt().SetFrom(from);
865 loc->SetInt().SetTo(to);
866 sx_CheckNames(scope, *loc, annot_name);
867 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
868 sel.SetSearchUnresolved();
869 sel.ExcludeNamedAnnots(pileup_name);
870
871 BOOST_CHECK(scope.GetBioseqHandle(idh));
872 //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
873 //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetTopLevelEntry().GetCompleteObject();
874 if ( 1 ) {
875 CGraph_CI git(scope, *loc, sel);
876 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
877 }
878
879 if ( 1 ) {
880 CAlign_CI it(scope, *loc, sel);
881 if ( it ) {
882 cout << "Align count: "<<it.GetSize()<<endl;
883 if ( it.GetAnnot().IsNamed() ) {
884 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
885 }
886 }
887 BOOST_CHECK_EQUAL(align_count, it.GetSize());
888
889 for ( ; it; ++it ) {
890 const CSeq_align& align = *it;
891 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
892 sx_CheckSeq(scope, idh, **j);
893 }
894 }
895 }
896
897 if ( 1 ) {
898 sel.ResetAnnotsNames();
899 sel.AddNamedAnnots(pileup_name);
900 CGraph_CI git(scope, *loc, sel);
901 BOOST_CHECK(git.GetSize() % 6 == 0);
902 if ( 0 ) {
903 for ( ; git; ++git ) {
904 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
905 }
906 }
907 }
908 }
909
910
BOOST_AUTO_TEST_CASE(FetchSeq7)911 BOOST_AUTO_TEST_CASE(FetchSeq7)
912 {
913 CRef<CObjectManager> om = sx_GetOM();
914
915 CCSRADataLoader::SLoaderParams params;
916 string csra_name, id;
917 TSeqPos from, to, align_count;
918
919 {
920 csra_name = "SRR389414";
921 id = "gnl|SRA|SRR389414/1";
922 from = 1000000;
923 to = 2000000;
924 align_count = 6104; // 5400 if mapq>=1
925 }
926 string loader_name =
927 CCSRADataLoader::RegisterInObjectManager(*om, params,
928 CObjectManager::eDefault)
929 .GetLoader()->GetName();
930 sx_ReportCSraLoaderName(loader_name);
931 CScope scope(*om);
932 scope.AddDefaults();
933
934 string annot_name = csra_name;
935 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
936
937 CRef<CSeq_id> seqid(new CSeq_id(id));
938 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
939 CRef<CSeq_loc> loc(new CSeq_loc);
940 loc->SetInt().SetId(*seqid);
941 loc->SetInt().SetFrom(from);
942 loc->SetInt().SetTo(to);
943 sx_CheckNames(scope, *loc, annot_name);
944 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
945 sel.SetSearchUnresolved();
946 sel.ExcludeNamedAnnots(pileup_name);
947
948 BOOST_CHECK(scope.GetBioseqHandle(idh));
949 //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
950 if ( 1 ) {
951 CGraph_CI git(scope, *loc, sel);
952 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
953 }
954
955 if ( 1 ) {
956 CAlign_CI it(scope, *loc, sel);
957 if ( it ) {
958 cout << "Align count: "<<it.GetSize()<<endl;
959 if ( it.GetAnnot().IsNamed() ) {
960 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
961 }
962 }
963 BOOST_CHECK_EQUAL(align_count, it.GetSize());
964
965 for ( ; it; ++it ) {
966 const CSeq_align& align = *it;
967 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
968 sx_CheckSeq(scope, idh, **j);
969 }
970 }
971 }
972
973 if ( 1 ) {
974 sel.ResetAnnotsNames();
975 sel.AddNamedAnnots(pileup_name);
976 CGraph_CI git(scope, *loc, sel);
977 BOOST_CHECK(git.GetSize() % 6 == 0);
978 if ( 0 ) {
979 for ( ; git; ++git ) {
980 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
981 }
982 }
983 }
984 }
985
986
BOOST_AUTO_TEST_CASE(FetchSeq8)987 BOOST_AUTO_TEST_CASE(FetchSeq8)
988 {
989 // pileup graph test
990 CRef<CObjectManager> om = sx_GetOM();
991
992 CCSRADataLoader::SLoaderParams params;
993 string csra_name, id;
994 TSeqPos from, to, align_count;
995
996 {
997 csra_name = "ERR669165";
998 id = "GK000001.2";
999 from = 922690;
1000 to = 922760;
1001 align_count = 87;
1002 }
1003 params.m_CSRAFiles.push_back(csra_name);
1004 CGBDataLoader::RegisterInObjectManager(*om);
1005 string loader_name =
1006 CCSRADataLoader::RegisterInObjectManager(*om, params,
1007 CObjectManager::eDefault)
1008 .GetLoader()->GetName();
1009 sx_ReportCSraLoaderName(loader_name);
1010 CScope scope(*om);
1011 scope.AddDefaults();
1012
1013 string annot_name = csra_name;
1014 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1015
1016 CRef<CSeq_id> seqid(new CSeq_id(id));
1017 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1018 CRef<CSeq_loc> loc(new CSeq_loc);
1019 loc->SetInt().SetId(*seqid);
1020 loc->SetInt().SetFrom(from);
1021 loc->SetInt().SetTo(to);
1022 sx_CheckNames(scope, *loc, annot_name);
1023 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1024 sel.SetSearchUnresolved();
1025 sel.ExcludeNamedAnnots(pileup_name);
1026
1027 BOOST_CHECK(scope.GetBioseqHandle(idh));
1028 if ( 1 ) {
1029 CGraph_CI git(scope, *loc, sel);
1030 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1031 }
1032
1033 if ( 1 ) {
1034 CAlign_CI it(scope, *loc, sel);
1035 if ( it ) {
1036 cout << "Align count: "<<it.GetSize()<<endl;
1037 if ( it.GetAnnot().IsNamed() ) {
1038 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1039 }
1040 }
1041 BOOST_CHECK_EQUAL(align_count, it.GetSize());
1042
1043 for ( ; it; ++it ) {
1044 const CSeq_align& align = *it;
1045 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1046 sx_CheckSeq(scope, idh, **j);
1047 }
1048 }
1049 }
1050
1051 if ( 1 ) {
1052 sel.ResetAnnotsNames();
1053 sel.AddNamedAnnots(pileup_name);
1054 CGraph_CI git(scope, *loc, sel);
1055 BOOST_CHECK_EQUAL(git.GetSize(), 6u);
1056 for ( size_t k = 0; git && k < 6; ++k, ++git ) {
1057 const CSeq_graph& graph = git->GetOriginalGraph();
1058 string title = graph.GetTitle();
1059 NcbiCout << "Pileup graph: " << title << NcbiEndl;
1060 typedef unsigned TExpectedPair[2];
1061 const TExpectedPair* expected_pairs = 0;
1062 size_t expected_count = 0;
1063 if ( title == "Number of inserts" ) {
1064 static const TExpectedPair expected_I[] = {
1065 { 0, 0 },
1066 };
1067 expected_pairs = expected_I;
1068 expected_count = ArraySize(expected_I);
1069 }
1070 else if ( title == "Number of A bases" ) {
1071 static const TExpectedPair expected_A[] = {
1072 { 0, 0 },
1073 };
1074 expected_pairs = expected_A;
1075 expected_count = ArraySize(expected_A);
1076 }
1077 else if ( title == "Number of C bases" ) {
1078 static const TExpectedPair expected_C[] = {
1079 { 922695, 1 },
1080 };
1081 expected_pairs = expected_C;
1082 expected_count = ArraySize(expected_C);
1083 }
1084 else if ( title == "Number of G bases" ) {
1085 static const TExpectedPair expected_G[] = {
1086 { 922693, 2 },
1087 };
1088 expected_pairs = expected_G;
1089 expected_count = ArraySize(expected_G);
1090 }
1091 else if ( title == "Number of T bases" ) {
1092 static const TExpectedPair expected_T[] = {
1093 { 922692, 1 },
1094 { 922693, 2 },
1095 { 922695, 1 },
1096 { 922750, 1 },
1097 };
1098 expected_pairs = expected_T;
1099 expected_count = ArraySize(expected_T);
1100 }
1101 else {
1102 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1103 static const TExpectedPair expected_M[] = {
1104 { 922690, 70 },
1105 { 922691, 74 },
1106 { 922692, 74 },
1107 { 922693, 75 },
1108 { 922694, 80 },
1109 { 922695, 81 },
1110 { 922696, 83 },
1111 { 922697, 83 },
1112 { 922698, 84 },
1113 { 922699, 85 },
1114 { 922745, 1 },
1115 { 922746, 1 },
1116 { 922747, 1 },
1117 { 922748, 1 },
1118 { 922749, 1 },
1119 { 922750, 1 },
1120 { 922751, 2 },
1121 { 922752, 2 },
1122 { 922753, 2 },
1123 { 922754, 2 },
1124 { 922755, 2 },
1125 { 922756, 2 },
1126 { 922757, 2 },
1127 { 922758, 2 },
1128 { 922759, 2 },
1129 { 922760, 2 },
1130 };
1131 expected_pairs = expected_M;
1132 expected_count = ArraySize(expected_M);
1133 }
1134 map<TSeqPos, unsigned> expected;
1135 for ( size_t i = 0; i < expected_count; ++i ) {
1136 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1137 }
1138 CRange<TSeqPos> range = graph.GetLoc().GetTotalRange();
1139 for ( TSeqPos pos = from; pos <= to; ++pos ) {
1140 TSeqPos i = pos - range.GetFrom();
1141 unsigned pileup_value =
1142 graph.GetGraph().IsByte()?
1143 graph.GetGraph().GetByte().GetValues()[i]:
1144 graph.GetGraph().GetInt().GetValues()[i];
1145 unsigned expected_value =
1146 expected.count(pos)? expected[pos]: 0;
1147 if ( false && pileup_value ) {
1148 NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1149 }
1150 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1151 }
1152 }
1153 }
1154 }
1155
BOOST_AUTO_TEST_CASE(FetchSeq9)1156 BOOST_AUTO_TEST_CASE(FetchSeq9)
1157 {
1158 // pileup graph test on chunks border in gaps
1159 CRef<CObjectManager> om = sx_GetOM();
1160
1161 CCSRADataLoader::SLoaderParams params;
1162 string csra_name, id;
1163 TSeqPos from, to, align_count, align_count_over;
1164
1165 {
1166 csra_name = "ERR669165";
1167 id = "GK000001.2";
1168 from = 9701400;
1169 to = 9701420;
1170 align_count = 0;
1171 align_count_over = 58;
1172 }
1173 params.m_CSRAFiles.push_back(csra_name);
1174 CGBDataLoader::RegisterInObjectManager(*om);
1175 string loader_name =
1176 CCSRADataLoader::RegisterInObjectManager(*om, params,
1177 CObjectManager::eDefault)
1178 .GetLoader()->GetName();
1179 sx_ReportCSraLoaderName(loader_name);
1180 CScope scope(*om);
1181 scope.AddDefaults();
1182
1183 string annot_name = csra_name;
1184 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1185
1186 CRef<CSeq_id> seqid(new CSeq_id(id));
1187 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1188 CRef<CSeq_loc> loc(new CSeq_loc);
1189 loc->SetInt().SetId(*seqid);
1190 loc->SetInt().SetFrom(from);
1191 loc->SetInt().SetTo(to);
1192 sx_CheckNames(scope, *loc, annot_name);
1193 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1194 sel.SetSearchUnresolved();
1195 sel.ExcludeNamedAnnots(pileup_name);
1196
1197 BOOST_CHECK(scope.GetBioseqHandle(idh));
1198 if ( 1 ) {
1199 CGraph_CI git(scope, *loc, sel);
1200 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1201 }
1202
1203 if ( 1 ) {
1204 CAlign_CI it(scope, *loc, sel);
1205 if ( it ) {
1206 cout << "Align count: "<<it.GetSize()<<endl;
1207 if ( it.GetAnnot().IsNamed() ) {
1208 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1209 }
1210 }
1211 BOOST_CHECK_EQUAL(align_count, it.GetSize());
1212
1213 for ( ; it; ++it ) {
1214 const CSeq_align& align = *it;
1215 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1216 sx_CheckSeq(scope, idh, **j);
1217 }
1218 }
1219 }
1220
1221 if ( 1 ) {
1222 SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1223 CAlign_CI it(scope, *loc, sel2);
1224 if ( it ) {
1225 cout << "Align count: "<<it.GetSize()<<endl;
1226 if ( it.GetAnnot().IsNamed() ) {
1227 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1228 }
1229 }
1230 BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1231
1232 for ( ; it; ++it ) {
1233 const CSeq_align& align = *it;
1234 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1235 sx_CheckSeq(scope, idh, **j);
1236 }
1237 }
1238 }
1239
1240 if ( 1 ) {
1241 sel.ResetAnnotsNames();
1242 sel.AddNamedAnnots(pileup_name);
1243 CGraph_CI git(scope, *loc, sel);
1244 //BOOST_CHECK_EQUAL(git.GetSize(), 12u);
1245 for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1246 const CSeq_graph& graph = git->GetOriginalGraph();
1247 string title = graph.GetTitle();
1248 NcbiCout << "Pileup graph: " << title << NcbiEndl;
1249 typedef unsigned TExpectedPair[2];
1250 const TExpectedPair* expected_pairs = 0;
1251 size_t expected_count = 0;
1252 if ( title == "Number of inserts" ) {
1253 static const TExpectedPair expected_I[] = {
1254 { 0, 0 },
1255 };
1256 expected_pairs = expected_I;
1257 expected_count = ArraySize(expected_I);
1258 }
1259 else if ( title == "Number of A bases" ) {
1260 static const TExpectedPair expected_A[] = {
1261 { 0, 0 },
1262 };
1263 expected_pairs = expected_A;
1264 expected_count = ArraySize(expected_A);
1265 }
1266 else if ( title == "Number of C bases" ) {
1267 static const TExpectedPair expected_C[] = {
1268 { 0, 0 },
1269 };
1270 expected_pairs = expected_C;
1271 expected_count = ArraySize(expected_C);
1272 }
1273 else if ( title == "Number of G bases" ) {
1274 static const TExpectedPair expected_G[] = {
1275 { 0, 0 },
1276 };
1277 expected_pairs = expected_G;
1278 expected_count = ArraySize(expected_G);
1279 }
1280 else if ( title == "Number of T bases" ) {
1281 static const TExpectedPair expected_T[] = {
1282 { 0, 0 },
1283 };
1284 expected_pairs = expected_T;
1285 expected_count = ArraySize(expected_T);
1286 }
1287 else {
1288 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1289 static const TExpectedPair expected_M[] = {
1290 { 0, 0 },
1291 };
1292 expected_pairs = expected_M;
1293 expected_count = ArraySize(expected_M);
1294 }
1295 map<TSeqPos, unsigned> expected;
1296 for ( size_t i = 0; i < expected_count; ++i ) {
1297 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1298 }
1299 CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1300 CRange<TSeqPos> range =
1301 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1302 BOOST_CHECK(!range.Empty());
1303 for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1304 TSeqPos i = pos - graph_range.GetFrom();
1305 unsigned pileup_value =
1306 graph.GetGraph().IsByte()?
1307 graph.GetGraph().GetByte().GetValues()[i]:
1308 graph.GetGraph().GetInt().GetValues()[i];
1309 unsigned expected_value =
1310 expected.count(pos)? expected[pos]: 0;
1311 if ( false && pileup_value ) {
1312 NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1313 }
1314 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1315 }
1316 }
1317 }
1318 }
1319
BOOST_AUTO_TEST_CASE(FetchSeq10)1320 BOOST_AUTO_TEST_CASE(FetchSeq10)
1321 {
1322 // pileup graph test on chunks border
1323 CRef<CObjectManager> om = sx_GetOM();
1324
1325 CCSRADataLoader::SLoaderParams params;
1326 string csra_name, id;
1327 TSeqPos from, to, align_count, align_count_over;
1328
1329 {
1330 csra_name = "ERR669165";
1331 id = "GK000001.2";
1332 from = 147027820;
1333 to = 147027840;
1334 align_count = 683;
1335 align_count_over = 683;
1336 }
1337 params.m_CSRAFiles.push_back(csra_name);
1338 CGBDataLoader::RegisterInObjectManager(*om);
1339 string loader_name =
1340 CCSRADataLoader::RegisterInObjectManager(*om, params,
1341 CObjectManager::eDefault)
1342 .GetLoader()->GetName();
1343 sx_ReportCSraLoaderName(loader_name);
1344 CScope scope(*om);
1345 scope.AddDefaults();
1346
1347 string annot_name = csra_name;
1348 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1349
1350 CRef<CSeq_id> seqid(new CSeq_id(id));
1351 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1352 CRef<CSeq_loc> loc(new CSeq_loc);
1353 loc->SetInt().SetId(*seqid);
1354 loc->SetInt().SetFrom(from);
1355 loc->SetInt().SetTo(to);
1356 sx_CheckNames(scope, *loc, annot_name);
1357 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1358 sel.SetSearchUnresolved();
1359 sel.ExcludeNamedAnnots(pileup_name);
1360
1361 BOOST_CHECK(scope.GetBioseqHandle(idh));
1362 if ( 1 ) {
1363 CGraph_CI git(scope, *loc, sel);
1364 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1365 }
1366
1367 if ( 1 ) {
1368 CAlign_CI it(scope, *loc, sel);
1369 if ( it ) {
1370 cout << "Align count: "<<it.GetSize()<<endl;
1371 if ( it.GetAnnot().IsNamed() ) {
1372 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1373 }
1374 }
1375 BOOST_CHECK_EQUAL(align_count, it.GetSize());
1376
1377 for ( ; it; ++it ) {
1378 const CSeq_align& align = *it;
1379 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1380 sx_CheckSeq(scope, idh, **j);
1381 }
1382 }
1383 }
1384
1385 if ( 1 ) {
1386 SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1387 CAlign_CI it(scope, *loc, sel2);
1388 if ( it ) {
1389 cout << "Align count: "<<it.GetSize()<<endl;
1390 if ( it.GetAnnot().IsNamed() ) {
1391 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1392 }
1393 }
1394 BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1395
1396 for ( ; it; ++it ) {
1397 const CSeq_align& align = *it;
1398 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1399 sx_CheckSeq(scope, idh, **j);
1400 }
1401 }
1402 }
1403
1404 if ( 1 ) {
1405 sel.ResetAnnotsNames();
1406 sel.AddNamedAnnots(pileup_name);
1407 CGraph_CI git(scope, *loc, sel);
1408 //BOOST_CHECK_EQUAL(git.GetSize(), 12u);
1409 for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1410 const CSeq_graph& graph = git->GetOriginalGraph();
1411 string title = graph.GetTitle();
1412 NcbiCout << "Pileup graph: " << title << NcbiEndl;
1413 typedef unsigned TExpectedPair[2];
1414 const TExpectedPair* expected_pairs = 0;
1415 size_t expected_count = 0;
1416 if ( title == "Number of inserts" ) {
1417 static const TExpectedPair expected_I[] = {
1418 { 0, 0 },
1419 };
1420 expected_pairs = expected_I;
1421 expected_count = ArraySize(expected_I);
1422 }
1423 else if ( title == "Number of A bases" ) {
1424 static const TExpectedPair expected_A[] = {
1425 { 147027821, 4 },
1426 { 147027822, 6 },
1427 { 147027825, 1 },
1428 { 147027828, 1 },
1429 { 147027829, 4 },
1430 { 147027830, 10 },
1431 { 147027832, 2 },
1432 };
1433 expected_pairs = expected_A;
1434 expected_count = ArraySize(expected_A);
1435 }
1436 else if ( title == "Number of C bases" ) {
1437 static const TExpectedPair expected_C[] = {
1438 { 147027820, 1 },
1439 { 147027821, 1 },
1440 { 147027827, 2 },
1441 { 147027829, 1 },
1442 { 147027832, 1 },
1443 { 147027837, 1 },
1444 };
1445 expected_pairs = expected_C;
1446 expected_count = ArraySize(expected_C);
1447 }
1448 else if ( title == "Number of G bases" ) {
1449 static const TExpectedPair expected_G[] = {
1450 { 147027822, 1 },
1451 { 147027826, 1 },
1452 { 147027827, 1 },
1453 };
1454 expected_pairs = expected_G;
1455 expected_count = ArraySize(expected_G);
1456 }
1457 else if ( title == "Number of T bases" ) {
1458 static const TExpectedPair expected_T[] = {
1459 { 147027821, 1 },
1460 { 147027822, 1 },
1461 { 147027823, 1 },
1462 { 147027826, 2 },
1463 { 147027827, 2 },
1464 { 147027831, 1 },
1465 { 147027832, 1 },
1466 { 147027833, 1 },
1467 { 147027836, 4 },
1468 { 147027837, 1 },
1469 };
1470 expected_pairs = expected_T;
1471 expected_count = ArraySize(expected_T);
1472 }
1473 else {
1474 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1475 static const TExpectedPair expected_M[] = {
1476 { 147027820, 602 },
1477 { 147027821, 597 },
1478 { 147027822, 600 },
1479 { 147027823, 597 },
1480 { 147027824, 590 },
1481 { 147027825, 598 },
1482 { 147027826, 597 },
1483 { 147027827, 599 },
1484 { 147027828, 603 },
1485 { 147027829, 581 },
1486 { 147027830, 561 },
1487 { 147027831, 561 },
1488 { 147027832, 552 },
1489 { 147027833, 559 },
1490 { 147027834, 547 },
1491 { 147027835, 546 },
1492 { 147027836, 541 },
1493 { 147027837, 545 },
1494 { 147027838, 8 },
1495 { 147027839, 8 },
1496 { 147027840, 8 },
1497 };
1498 expected_pairs = expected_M;
1499 expected_count = ArraySize(expected_M);
1500 }
1501 map<TSeqPos, unsigned> expected;
1502 for ( size_t i = 0; i < expected_count; ++i ) {
1503 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1504 }
1505 CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1506 CRange<TSeqPos> range =
1507 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1508 BOOST_CHECK(!range.Empty());
1509 for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1510 TSeqPos i = pos - graph_range.GetFrom();
1511 unsigned pileup_value =
1512 graph.GetGraph().IsByte()?
1513 graph.GetGraph().GetByte().GetValues()[i]:
1514 graph.GetGraph().GetInt().GetValues()[i];
1515 unsigned expected_value =
1516 expected.count(pos)? expected[pos]: 0;
1517 if ( false && pileup_value ) {
1518 NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1519 }
1520 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1521 }
1522 }
1523 }
1524 }
1525
1526
BOOST_AUTO_TEST_CASE(FetchSeq11)1527 BOOST_AUTO_TEST_CASE(FetchSeq11)
1528 {
1529 // pileup graph test on chunks and page border
1530 CRef<CObjectManager> om = sx_GetOM();
1531
1532 CCSRADataLoader::SLoaderParams params;
1533 string csra_name, id;
1534 TSeqPos from, to, align_count, align_count_over;
1535
1536 {
1537 csra_name = "ERR669165";
1538 id = "GK000001.2";
1539 from = 59805960;
1540 to = 59805980;
1541 align_count = 6;
1542 align_count_over = 6;
1543 }
1544 params.m_CSRAFiles.push_back(csra_name);
1545 CGBDataLoader::RegisterInObjectManager(*om);
1546 string loader_name =
1547 CCSRADataLoader::RegisterInObjectManager(*om, params,
1548 CObjectManager::eDefault)
1549 .GetLoader()->GetName();
1550 sx_ReportCSraLoaderName(loader_name);
1551 CScope scope(*om);
1552 scope.AddDefaults();
1553
1554 string annot_name = csra_name;
1555 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1556
1557 CRef<CSeq_id> seqid(new CSeq_id(id));
1558 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1559 CRef<CSeq_loc> loc(new CSeq_loc);
1560 loc->SetInt().SetId(*seqid);
1561 loc->SetInt().SetFrom(from);
1562 loc->SetInt().SetTo(to);
1563 sx_CheckNames(scope, *loc, annot_name);
1564 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1565 sel.SetSearchUnresolved();
1566 sel.ExcludeNamedAnnots(pileup_name);
1567
1568 BOOST_CHECK(scope.GetBioseqHandle(idh));
1569 if ( 1 ) {
1570 CGraph_CI git(scope, *loc, sel);
1571 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1572 }
1573
1574 if ( 1 ) {
1575 CAlign_CI it(scope, *loc, sel);
1576 if ( it ) {
1577 cout << "Align count: "<<it.GetSize()<<endl;
1578 if ( it.GetAnnot().IsNamed() ) {
1579 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1580 }
1581 }
1582 BOOST_CHECK_EQUAL(align_count, it.GetSize());
1583
1584 for ( ; it; ++it ) {
1585 const CSeq_align& align = *it;
1586 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1587 sx_CheckSeq(scope, idh, **j);
1588 }
1589 }
1590 }
1591
1592 if ( 1 ) {
1593 SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1594 CAlign_CI it(scope, *loc, sel2);
1595 if ( it ) {
1596 cout << "Align count: "<<it.GetSize()<<endl;
1597 if ( it.GetAnnot().IsNamed() ) {
1598 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1599 }
1600 }
1601 BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1602
1603 for ( ; it; ++it ) {
1604 const CSeq_align& align = *it;
1605 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1606 sx_CheckSeq(scope, idh, **j);
1607 }
1608 }
1609 }
1610
1611 if ( 1 ) {
1612 sel.ResetAnnotsNames();
1613 sel.AddNamedAnnots(pileup_name);
1614 CGraph_CI git(scope, *loc, sel);
1615 BOOST_CHECK_EQUAL(git.GetSize(), 6u);
1616 for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1617 const CSeq_graph& graph = git->GetOriginalGraph();
1618 string title = graph.GetTitle();
1619 NcbiCout << "Pileup graph: " << title << NcbiEndl;
1620 typedef unsigned TExpectedPair[2];
1621 const TExpectedPair* expected_pairs = 0;
1622 size_t expected_count = 0;
1623 if ( title == "Number of inserts" ) {
1624 static const TExpectedPair expected_I[] = {
1625 { 0, 0 },
1626 };
1627 expected_pairs = expected_I;
1628 expected_count = ArraySize(expected_I);
1629 }
1630 else if ( title == "Number of A bases" ) {
1631 static const TExpectedPair expected_A[] = {
1632 { 0, 0 },
1633 };
1634 expected_pairs = expected_A;
1635 expected_count = ArraySize(expected_A);
1636 }
1637 else if ( title == "Number of C bases" ) {
1638 static const TExpectedPair expected_C[] = {
1639 { 0, 0 },
1640 };
1641 expected_pairs = expected_C;
1642 expected_count = ArraySize(expected_C);
1643 }
1644 else if ( title == "Number of G bases" ) {
1645 static const TExpectedPair expected_G[] = {
1646 { 0, 0 },
1647 };
1648 expected_pairs = expected_G;
1649 expected_count = ArraySize(expected_G);
1650 }
1651 else if ( title == "Number of T bases" ) {
1652 static const TExpectedPair expected_T[] = {
1653 { 59805974, 1 },
1654 };
1655 expected_pairs = expected_T;
1656 expected_count = ArraySize(expected_T);
1657 }
1658 else {
1659 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1660 static const TExpectedPair expected_M[] = {
1661 { 59805960, 4 },
1662 { 59805961, 4 },
1663 { 59805962, 4 },
1664 { 59805963, 4 },
1665 { 59805964, 4 },
1666 { 59805965, 3 },
1667 { 59805966, 3 },
1668 { 59805967, 3 },
1669 { 59805968, 3 },
1670 { 59805969, 3 },
1671 { 59805970, 3 },
1672 { 59805971, 3 },
1673 { 59805972, 3 },
1674 { 59805973, 3 },
1675 { 59805974, 3 },
1676 { 59805975, 5 },
1677 { 59805976, 5 },
1678 { 59805977, 5 },
1679 { 59805978, 5 },
1680 { 59805979, 5 },
1681 { 59805980, 5 },
1682 };
1683 expected_pairs = expected_M;
1684 expected_count = ArraySize(expected_M);
1685 }
1686 map<TSeqPos, unsigned> expected;
1687 for ( size_t i = 0; i < expected_count; ++i ) {
1688 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1689 }
1690 CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1691 CRange<TSeqPos> range =
1692 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1693 BOOST_CHECK(!range.Empty());
1694 for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1695 TSeqPos i = pos - graph_range.GetFrom();
1696 unsigned pileup_value =
1697 graph.GetGraph().IsByte()?
1698 graph.GetGraph().GetByte().GetValues()[i]:
1699 graph.GetGraph().GetInt().GetValues()[i];
1700 unsigned expected_value =
1701 expected.count(pos)? expected[pos]: 0;
1702 if ( false && pileup_value ) {
1703 NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1704 }
1705 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1706 }
1707 }
1708 }
1709 }
1710
1711
BOOST_AUTO_TEST_CASE(FetchSeq12)1712 BOOST_AUTO_TEST_CASE(FetchSeq12)
1713 {
1714 // pileup graph test on chunks and page border
1715 CRef<CObjectManager> om = sx_GetOM();
1716
1717 CCSRADataLoader::SLoaderParams params;
1718 string csra_name, id;
1719 TSeqPos from, to, align_count, align_count_over;
1720
1721 {
1722 csra_name = "SRR1686476";
1723 id = "NC_011294.1";
1724 from = 805960;
1725 to = 805980;
1726 align_count = 81;
1727 align_count_over = 81;
1728 }
1729 params.m_CSRAFiles.push_back(csra_name);
1730 CGBDataLoader::RegisterInObjectManager(*om);
1731 string loader_name =
1732 CCSRADataLoader::RegisterInObjectManager(*om, params,
1733 CObjectManager::eDefault)
1734 .GetLoader()->GetName();
1735 sx_ReportCSraLoaderName(loader_name);
1736 CScope scope(*om);
1737 scope.AddDefaults();
1738
1739 string annot_name = csra_name;
1740 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1741
1742 CRef<CSeq_id> seqid(new CSeq_id(id));
1743 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1744 CRef<CSeq_loc> loc(new CSeq_loc);
1745 loc->SetInt().SetId(*seqid);
1746 loc->SetInt().SetFrom(from);
1747 loc->SetInt().SetTo(to);
1748 sx_CheckNames(scope, *loc, annot_name);
1749 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1750 sel.SetSearchUnresolved();
1751 sel.ExcludeNamedAnnots(pileup_name);
1752
1753 BOOST_CHECK(scope.GetBioseqHandle(idh));
1754 if ( 1 ) {
1755 CGraph_CI git(scope, *loc, sel);
1756 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1757 }
1758
1759 if ( 1 ) {
1760 CAlign_CI it(scope, *loc, sel);
1761 if ( it ) {
1762 cout << "Align count: "<<it.GetSize()<<endl;
1763 if ( it.GetAnnot().IsNamed() ) {
1764 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1765 }
1766 }
1767 BOOST_CHECK_EQUAL(align_count, it.GetSize());
1768
1769 for ( ; it; ++it ) {
1770 const CSeq_align& align = *it;
1771 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1772 sx_CheckSeq(scope, idh, **j);
1773 }
1774 }
1775 }
1776
1777 if ( 1 ) {
1778 SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1779 CAlign_CI it(scope, *loc, sel2);
1780 if ( it ) {
1781 cout << "Align count: "<<it.GetSize()<<endl;
1782 if ( it.GetAnnot().IsNamed() ) {
1783 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1784 }
1785 }
1786 BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1787
1788 for ( ; it; ++it ) {
1789 const CSeq_align& align = *it;
1790 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1791 sx_CheckSeq(scope, idh, **j);
1792 }
1793 }
1794 }
1795
1796 if ( 1 ) {
1797 sel.ResetAnnotsNames();
1798 sel.AddNamedAnnots(pileup_name);
1799 CGraph_CI git(scope, *loc, sel);
1800 BOOST_CHECK_EQUAL(git.GetSize(), 6u);
1801 for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1802 const CSeq_graph& graph = git->GetOriginalGraph();
1803 string title = graph.GetTitle();
1804 NcbiCout << "Pileup graph: " << title << NcbiEndl;
1805 typedef unsigned TExpectedPair[2];
1806 const TExpectedPair* expected_pairs = 0;
1807 size_t expected_count = 0;
1808 if ( title == "Number of inserts" ) {
1809 static const TExpectedPair expected_I[] = {
1810 { 0, 0 },
1811 };
1812 expected_pairs = expected_I;
1813 expected_count = ArraySize(expected_I);
1814 }
1815 else if ( title == "Number of A bases" ) {
1816 static const TExpectedPair expected_A[] = {
1817 { 805967, 1 },
1818 { 805977, 1 },
1819 };
1820 expected_pairs = expected_A;
1821 expected_count = ArraySize(expected_A);
1822 }
1823 else if ( title == "Number of C bases" ) {
1824 static const TExpectedPair expected_C[] = {
1825 { 805960, 1 },
1826 };
1827 expected_pairs = expected_C;
1828 expected_count = ArraySize(expected_C);
1829 }
1830 else if ( title == "Number of G bases" ) {
1831 static const TExpectedPair expected_G[] = {
1832 { 805968, 1 },
1833 };
1834 expected_pairs = expected_G;
1835 expected_count = ArraySize(expected_G);
1836 }
1837 else if ( title == "Number of T bases" ) {
1838 static const TExpectedPair expected_T[] = {
1839 { 805963, 1 },
1840 { 805965, 1 },
1841 { 805966, 1 },
1842 };
1843 expected_pairs = expected_T;
1844 expected_count = ArraySize(expected_T);
1845 }
1846 else {
1847 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1848 static const TExpectedPair expected_M[] = {
1849 { 805960, 69 },
1850 { 805961, 70 },
1851 { 805962, 72 },
1852 { 805963, 72 },
1853 { 805964, 74 },
1854 { 805965, 70 },
1855 { 805966, 72 },
1856 { 805967, 72 },
1857 { 805968, 70 },
1858 { 805969, 71 },
1859 { 805970, 70 },
1860 { 805971, 70 },
1861 { 805972, 71 },
1862 { 805973, 69 },
1863 { 805974, 70 },
1864 { 805975, 70 },
1865 { 805976, 70 },
1866 { 805977, 70 },
1867 { 805978, 69 },
1868 { 805979, 69 },
1869 { 805980, 70 },
1870 };
1871 expected_pairs = expected_M;
1872 expected_count = ArraySize(expected_M);
1873 }
1874 map<TSeqPos, unsigned> expected;
1875 for ( size_t i = 0; i < expected_count; ++i ) {
1876 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1877 }
1878 CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1879 CRange<TSeqPos> range =
1880 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1881 BOOST_CHECK(!range.Empty());
1882 for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1883 TSeqPos i = pos - graph_range.GetFrom();
1884 unsigned pileup_value =
1885 graph.GetGraph().IsByte()?
1886 graph.GetGraph().GetByte().GetValues()[i]:
1887 graph.GetGraph().GetInt().GetValues()[i];
1888 unsigned expected_value =
1889 expected.count(pos)? expected[pos]: 0;
1890 if ( false && pileup_value ) {
1891 NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1892 }
1893 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1894 }
1895 }
1896 }
1897 }
1898
1899 #if 0
1900 // huge cSRA file loading
1901 BOOST_AUTO_TEST_CASE(FetchSeq8)
1902 {
1903 CCSRADataLoader::SetPileupGraphsParamDefault(true);
1904
1905 CStopWatch sw(CStopWatch::eStart);
1906 CRef<CObjectManager> om = sx_GetOM();
1907 NcbiCout << "Loaded OM in " << sw.Restart() << NcbiEndl;
1908
1909 CCSRADataLoader::SLoaderParams params;
1910 string csra_name, id;
1911 TSeqPos from, to, align_count;
1912
1913 {
1914 csra_name = "/panfs/traces01/compress/qa/yaschenk/RICHA/NA12878.richa.hiseq.primary_full.csra";
1915 id = "NC_000005.9";
1916 from = 20000;
1917 to = 25000;
1918 align_count = 2636; // 5400 if mapq>=1
1919 }
1920 params.m_CSRAFiles.push_back(csra_name);
1921 CGBDataLoader::RegisterInObjectManager(*om);
1922 string loader_name =
1923 CCSRADataLoader::RegisterInObjectManager(*om, params,
1924 CObjectManager::eDefault)
1925 .GetLoader()->GetName();
1926 NcbiCout << "Registered cSRA loader in " << sw.Restart() << NcbiEndl;
1927 sx_ReportCSraLoaderName(loader_name);
1928 CScope scope(*om);
1929 scope.AddDefaults();
1930
1931 string annot_name = csra_name;
1932 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1933
1934 CRef<CSeq_id> seqid(new CSeq_id(id));
1935 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1936 CRef<CSeq_loc> loc(new CSeq_loc);
1937 loc->SetInt().SetId(*seqid);
1938 loc->SetInt().SetFrom(from);
1939 loc->SetInt().SetTo(to);
1940
1941 if ( 1 ) {
1942 BOOST_CHECK(scope.GetBioseqHandle(idh));
1943 //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
1944 NcbiCout << "Got reference handle in " << sw.Restart() << NcbiEndl;
1945 }
1946
1947 if ( 1 ) {
1948 sx_CheckNames(scope, *loc, annot_name);
1949 NcbiCout << "Got names in " << sw.Restart() << NcbiEndl;
1950 }
1951
1952 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1953 sel.SetSearchUnresolved();
1954
1955 if ( 1 ) {
1956 sel.ResetAnnotsNames();
1957 sel.AddNamedAnnots(annot_name);
1958 CGraph_CI git(scope, *loc, sel);
1959 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1960 NcbiCout << "Got coverage graph in " << sw.Restart() << NcbiEndl;
1961 if ( 0 ) {
1962 for ( ; git; ++git ) {
1963 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
1964 }
1965 }
1966 }
1967
1968 if ( 1 ) {
1969 sel.ResetAnnotsNames();
1970 sel.AddNamedAnnots(pileup_name);
1971 CGraph_CI git(scope, *loc, sel);
1972 BOOST_CHECK(git.GetSize() % 6 == 0);
1973 BOOST_CHECK(git.GetSize());
1974 NcbiCout << "Got pileup graphs in " << sw.Restart() << NcbiEndl;
1975 if ( 0 ) {
1976 for ( ; git; ++git ) {
1977 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
1978 }
1979 }
1980 }
1981
1982 if ( 1 ) {
1983 sel.ResetAnnotsNames();
1984 sel.AddNamedAnnots(annot_name);
1985 CAlign_CI it(scope, *loc, sel);
1986 if ( it ) {
1987 cout << "Align count: "<<it.GetSize()<<endl;
1988 if ( it.GetAnnot().IsNamed() ) {
1989 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1990 }
1991 }
1992 BOOST_CHECK_EQUAL(align_count, it.GetSize());
1993 NcbiCout << "Got alignments in " << sw.Restart() << NcbiEndl;
1994
1995 for ( ; it; ++it ) {
1996 const CSeq_align& align = *it;
1997 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1998 sx_CheckSeq(scope, idh, **j);
1999 }
2000 }
2001 NcbiCout << "Got short reads in " << sw.Restart() << NcbiEndl;
2002 }
2003 }
2004 #endif
2005 #if 0 // the SRR494718 is not released yet
2006 BOOST_AUTO_TEST_CASE(ShortSeq1x)
2007 {
2008 CRef<CObjectManager> om = sx_GetOM();
2009
2010 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2011
2012 string loader_name =
2013 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2014 .GetLoader()->GetName();
2015 sx_ReportCSraLoaderName(loader_name);
2016
2017 CScope scope(*om);
2018 scope.AddDefaults();
2019
2020 CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR494718/chr1"));
2021 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR494718.7001.1"));
2022 TSeqPos prim_pos = 4263026;
2023 TSeqPos sec_pos = 3996256;
2024
2025 typedef CRange<TSeqPos> TRange;
2026 CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2027 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2028
2029 CBioseq_Handle ref;
2030 if ( 1 ) {
2031 ref = scope.GetBioseqHandle(ref_idh);
2032 BOOST_REQUIRE(ref);
2033 if ( 1 ) {
2034 // scan first 100 alignments
2035 CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2036 BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2037 }
2038 if ( 0 ) {
2039 // scan alignments around primary position
2040 int found = 0;
2041 for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2042 const CSeq_align& align = *ait;
2043 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2044 if ( read_id->Equals(*it) ) {
2045 ++found;
2046 }
2047 }
2048 }
2049 BOOST_CHECK_EQUAL(found, 1);
2050 }
2051 if ( 1 ) {
2052 // scan alignments around secondary position
2053 int found = 0;
2054 for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2055 const CSeq_align& align = *ait;
2056 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2057 if ( read_id->Equals(*it) ) {
2058 ++found;
2059 }
2060 }
2061 }
2062 BOOST_CHECK_EQUAL(found, 1);
2063 }
2064 }
2065
2066 BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2067 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2068 BOOST_REQUIRE(read);
2069 if ( 1 ) {
2070 // quality graph
2071 CGraph_CI git(read);
2072 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2073 }
2074 if ( 1 ) {
2075 // alignment (primary and secondary)
2076 CAlign_CI ait(read);
2077 for ( ; ait; ++ait ) {
2078 NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2079 }
2080 BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2081 }
2082 }
2083 #endif
2084
BOOST_AUTO_TEST_CASE(ShortSeq1)2085 BOOST_AUTO_TEST_CASE(ShortSeq1)
2086 {
2087 CRef<CObjectManager> om = sx_GetOM();
2088
2089 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2090
2091 bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2092 CCSRADataLoader::SetSpotReadAlignParamDefault(false);
2093
2094 string loader_name =
2095 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2096 .GetLoader()->GetName();
2097 sx_ReportCSraLoaderName(loader_name);
2098
2099 CScope scope(*om);
2100 scope.AddDefaults();
2101
2102 CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2103 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2104 TSeqPos prim_pos = 965627;
2105 TSeqPos sec_pos = 966293;
2106
2107 typedef CRange<TSeqPos> TRange;
2108 CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2109 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2110
2111 CBioseq_Handle ref;
2112 if ( 1 ) {
2113 ref = scope.GetBioseqHandle(ref_idh);
2114 BOOST_REQUIRE(ref);
2115 if ( 1 ) {
2116 // scan first 100 alignments
2117 CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2118 BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2119 }
2120 if ( 0 ) {
2121 // scan alignments around primary position
2122 int found = 0;
2123 for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2124 const CSeq_align& align = *ait;
2125 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2126 if ( read_id->Equals(*it) ) {
2127 ++found;
2128 }
2129 }
2130 }
2131 BOOST_CHECK_EQUAL(found, 1);
2132 }
2133 if ( 1 ) {
2134 // scan alignments around secondary position
2135 int found = 0;
2136 for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2137 const CSeq_align& align = *ait;
2138 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2139 if ( read_id->Equals(*it) ) {
2140 ++found;
2141 }
2142 }
2143 }
2144 BOOST_CHECK_EQUAL(found, 1);
2145 }
2146 }
2147
2148 BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2149 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2150 BOOST_REQUIRE(read);
2151 if ( 1 ) {
2152 // quality graph
2153 CGraph_CI git(read);
2154 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2155 }
2156 if ( 1 ) {
2157 // alignment (primary and secondary)
2158 CAlign_CI ait(read);
2159 for ( ; ait; ++ait ) {
2160 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2161 }
2162 BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2163 }
2164 if ( 1 ) {
2165 scope.ResetHistory();
2166 dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(true);
2167 // only primary alignment is available with 'spot_read_align' parameter set
2168 CAlign_CI ait(read);
2169 for ( ; ait; ++ait ) {
2170 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2171 }
2172 BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2173 }
2174
2175 CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2176 }
2177
2178
BOOST_AUTO_TEST_CASE(ShortSeq1_set)2179 BOOST_AUTO_TEST_CASE(ShortSeq1_set)
2180 {
2181 CRef<CObjectManager> om = sx_GetOM();
2182
2183 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2184
2185 bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2186 CCSRADataLoader::SetSpotReadAlignParamDefault(true);
2187
2188 string loader_name =
2189 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2190 .GetLoader()->GetName();
2191 sx_ReportCSraLoaderName(loader_name);
2192 dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(false);
2193
2194 CScope scope(*om);
2195 scope.AddDefaults();
2196
2197 CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2198 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2199 TSeqPos prim_pos = 965627;
2200 TSeqPos sec_pos = 966293;
2201
2202 typedef CRange<TSeqPos> TRange;
2203 CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2204 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2205
2206 CBioseq_Handle ref;
2207 if ( 1 ) {
2208 ref = scope.GetBioseqHandle(ref_idh);
2209 BOOST_REQUIRE(ref);
2210 if ( 1 ) {
2211 // scan first 100 alignments
2212 CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2213 BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2214 }
2215 if ( 0 ) {
2216 // scan alignments around primary position
2217 int found = 0;
2218 for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2219 const CSeq_align& align = *ait;
2220 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2221 if ( read_id->Equals(*it) ) {
2222 ++found;
2223 }
2224 }
2225 }
2226 BOOST_CHECK_EQUAL(found, 1);
2227 }
2228 if ( 1 ) {
2229 // scan alignments around secondary position
2230 int found = 0;
2231 for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2232 const CSeq_align& align = *ait;
2233 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2234 if ( read_id->Equals(*it) ) {
2235 ++found;
2236 }
2237 }
2238 }
2239 BOOST_CHECK_EQUAL(found, 1);
2240 }
2241 }
2242
2243 BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2244 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2245 BOOST_REQUIRE(read);
2246 if ( 1 ) {
2247 // quality graph
2248 CGraph_CI git(read);
2249 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2250 }
2251 if ( 1 ) {
2252 // alignment (primary and secondary)
2253 CAlign_CI ait(read);
2254 for ( ; ait; ++ait ) {
2255 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2256 }
2257 BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2258 }
2259 if ( 1 ) {
2260 scope.ResetHistory();
2261 dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(true);
2262 // only primary alignment is available with 'spot_read_align' parameter set
2263 CAlign_CI ait(read);
2264 for ( ; ait; ++ait ) {
2265 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2266 }
2267 BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2268 }
2269
2270 CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2271 }
2272
2273
BOOST_AUTO_TEST_CASE(ShortSeq1spot)2274 BOOST_AUTO_TEST_CASE(ShortSeq1spot)
2275 {
2276 CRef<CObjectManager> om = sx_GetOM();
2277
2278 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2279
2280 bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2281 CCSRADataLoader::SetSpotReadAlignParamDefault(true);
2282
2283 string loader_name =
2284 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2285 .GetLoader()->GetName();
2286 sx_ReportCSraLoaderName(loader_name);
2287
2288 CScope scope(*om);
2289 scope.AddDefaults();
2290
2291 CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2292 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2293 TSeqPos prim_pos = 965627;
2294 TSeqPos sec_pos = 966293;
2295
2296 typedef CRange<TSeqPos> TRange;
2297 CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2298 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2299
2300 CBioseq_Handle ref;
2301 if ( 1 ) {
2302 ref = scope.GetBioseqHandle(ref_idh);
2303 BOOST_REQUIRE(ref);
2304 if ( 1 ) {
2305 // scan first 100 alignments
2306 CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2307 BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2308 }
2309 if ( 0 ) {
2310 // scan alignments around primary position
2311 int found = 0;
2312 for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2313 const CSeq_align& align = *ait;
2314 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2315 if ( read_id->Equals(*it) ) {
2316 ++found;
2317 }
2318 }
2319 }
2320 BOOST_CHECK_EQUAL(found, 1);
2321 }
2322 if ( 1 ) {
2323 // scan alignments around secondary position
2324 int found = 0;
2325 for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2326 const CSeq_align& align = *ait;
2327 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2328 if ( read_id->Equals(*it) ) {
2329 ++found;
2330 }
2331 }
2332 }
2333 BOOST_CHECK_EQUAL(found, 1);
2334 }
2335 }
2336
2337 BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2338 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2339 BOOST_REQUIRE(read);
2340 if ( 1 ) {
2341 // quality graph
2342 CGraph_CI git(read);
2343 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2344 }
2345 if ( 1 ) {
2346 // only primary alignment is available with 'spot_read_align' parameter set
2347 CAlign_CI ait(read);
2348 for ( ; ait; ++ait ) {
2349 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2350 }
2351 BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2352 }
2353 if ( 1 ) {
2354 scope.ResetHistory();
2355 dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(false);
2356 // both primary and secondary alignments should be visible
2357 // after 'spot_read_align' parameter is reset
2358 CAlign_CI ait(read);
2359 for ( ; ait; ++ait ) {
2360 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2361 }
2362 BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2363 }
2364
2365 CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2366 }
2367
2368
BOOST_AUTO_TEST_CASE(ShortSeq1spot_set)2369 BOOST_AUTO_TEST_CASE(ShortSeq1spot_set)
2370 {
2371 CRef<CObjectManager> om = sx_GetOM();
2372
2373 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2374
2375 bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2376 CCSRADataLoader::SetSpotReadAlignParamDefault(false);
2377
2378 string loader_name =
2379 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2380 .GetLoader()->GetName();
2381 sx_ReportCSraLoaderName(loader_name);
2382 dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(true);
2383
2384 CScope scope(*om);
2385 scope.AddDefaults();
2386
2387 CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2388 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2389 TSeqPos prim_pos = 965627;
2390 TSeqPos sec_pos = 966293;
2391
2392 typedef CRange<TSeqPos> TRange;
2393 CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2394 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2395
2396 CBioseq_Handle ref;
2397 if ( 1 ) {
2398 ref = scope.GetBioseqHandle(ref_idh);
2399 BOOST_REQUIRE(ref);
2400 if ( 1 ) {
2401 // scan first 100 alignments
2402 CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2403 BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2404 }
2405 if ( 0 ) {
2406 // scan alignments around primary position
2407 int found = 0;
2408 for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2409 const CSeq_align& align = *ait;
2410 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2411 if ( read_id->Equals(*it) ) {
2412 ++found;
2413 }
2414 }
2415 }
2416 BOOST_CHECK_EQUAL(found, 1);
2417 }
2418 if ( 1 ) {
2419 // scan alignments around secondary position
2420 int found = 0;
2421 for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2422 const CSeq_align& align = *ait;
2423 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2424 if ( read_id->Equals(*it) ) {
2425 ++found;
2426 }
2427 }
2428 }
2429 BOOST_CHECK_EQUAL(found, 1);
2430 }
2431 }
2432
2433 BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2434 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2435 BOOST_REQUIRE(read);
2436 if ( 1 ) {
2437 // quality graph
2438 CGraph_CI git(read);
2439 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2440 }
2441 if ( 1 ) {
2442 // only primary alignment is available with 'spot_read_align' parameter set
2443 CAlign_CI ait(read);
2444 for ( ; ait; ++ait ) {
2445 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2446 }
2447 BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2448 }
2449 if ( 1 ) {
2450 scope.ResetHistory();
2451 dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(false);
2452 // both primary and secondary alignments should be visible
2453 // after 'spot_read_align' parameter is reset
2454 CAlign_CI ait(read);
2455 for ( ; ait; ++ait ) {
2456 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2457 }
2458 BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2459 }
2460
2461 CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2462 }
2463
2464
BOOST_AUTO_TEST_CASE(ShortSeq2)2465 BOOST_AUTO_TEST_CASE(ShortSeq2)
2466 {
2467 CRef<CObjectManager> om = sx_GetOM();
2468
2469 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2470
2471 string loader_name =
2472 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2473 .GetLoader()->GetName();
2474 sx_ReportCSraLoaderName(loader_name);
2475
2476 CScope scope(*om);
2477 scope.AddDefaults();
2478
2479 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR035417.1.1"));
2480
2481 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2482
2483 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2484 BOOST_REQUIRE(read);
2485 if ( 1 ) {
2486 // quality graph
2487 CGraph_CI git(read);
2488 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2489 }
2490 if ( 1 ) {
2491 // alignment (primary and secondary)
2492 CAlign_CI ait(read);
2493 for ( ; ait; ++ait ) {
2494 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2495 }
2496 BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2497 }
2498 }
2499
2500
BOOST_AUTO_TEST_CASE(ShortSeq3)2501 BOOST_AUTO_TEST_CASE(ShortSeq3)
2502 {
2503 CRef<CObjectManager> om = sx_GetOM();
2504
2505 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2506
2507 string loader_name =
2508 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2509 .GetLoader()->GetName();
2510 sx_ReportCSraLoaderName(loader_name);
2511
2512 CScope scope(*om);
2513 scope.AddDefaults();
2514
2515 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|ERR003990.1.1"));
2516
2517 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2518
2519 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2520 BOOST_REQUIRE(read);
2521 if ( 1 ) {
2522 // quality graph
2523 CGraph_CI git(read);
2524 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2525 }
2526 if ( 1 ) {
2527 // alignment (primary and secondary)
2528 CAlign_CI ait(read);
2529 for ( ; ait; ++ait ) {
2530 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2531 }
2532 BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2533 }
2534 }
2535
2536
BOOST_AUTO_TEST_CASE(ShortSeq4)2537 BOOST_AUTO_TEST_CASE(ShortSeq4)
2538 {
2539 CRef<CObjectManager> om = sx_GetOM();
2540
2541 //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2542
2543 string loader_name =
2544 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2545 .GetLoader()->GetName();
2546 sx_ReportCSraLoaderName(loader_name);
2547
2548 CScope scope(*om);
2549 scope.AddDefaults();
2550
2551 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR749060.1.1"));
2552
2553 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2554
2555 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2556 BOOST_REQUIRE(read);
2557 if ( 1 ) {
2558 // quality graph
2559 CGraph_CI git(read);
2560 BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2561 }
2562 if ( 1 ) {
2563 // alignment (primary and secondary)
2564 CAlign_CI ait(read);
2565 for ( ; ait; ++ait ) {
2566 //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2567 }
2568 BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2569 }
2570 }
2571
2572 /*
2573 BOOST_AUTO_TEST_CASE(ShortSeq5)
2574 {
2575 CRef<CObjectManager> om = sx_GetOM();
2576
2577 bool verbose = 1;
2578 bool graph = 1;
2579
2580 if ( graph ) {
2581 putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2582 }
2583
2584 string loader_name =
2585 CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2586 .GetLoader()->GetName();
2587 sx_ReportCSraLoaderName(loader_name);
2588
2589 CScope scope(*om);
2590 scope.AddDefaults();
2591
2592 CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR500739.422766.2"));
2593
2594 CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2595
2596 CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2597 BOOST_REQUIRE(read);
2598 if ( 1 ) {
2599 // quality graph
2600 CGraph_CI git(read);
2601 BOOST_REQUIRE_EQUAL(git.GetSize(), graph? 1u: 0u);
2602 if ( graph && verbose ) {
2603 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
2604 }
2605 }
2606 if ( 1 ) {
2607 // alignment (primary and secondary)
2608 CAlign_CI ait(read);
2609 for ( ; ait; ++ait ) {
2610 if ( verbose ) {
2611 NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2612 }
2613 }
2614 BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2615 }
2616 if ( verbose ) {
2617 NcbiCout << MSerial_AsnText << *read.GetCompleteObject();
2618 }
2619 }
2620 */
2621
BOOST_AUTO_TEST_CASE(MultipleIds1)2622 BOOST_AUTO_TEST_CASE(MultipleIds1)
2623 {
2624 CRef<CObjectManager> om = sx_GetOM();
2625
2626 CCSRADataLoader::SLoaderParams params;
2627 string csra_name, id;
2628 TSeqPos from, to, align_count;
2629
2630 {
2631 csra_name = "SRR413273";
2632 id = "121114303";
2633 from = 100;
2634 to = 2000;
2635 align_count = 1525; // 5400 if mapq>=1
2636 }
2637 params.m_DirPath.erase();
2638 params.m_CSRAFiles.push_back(csra_name);
2639 string loader_name =
2640 CCSRADataLoader::RegisterInObjectManager(*om, params,
2641 CObjectManager::eDefault)
2642 .GetLoader()->GetName();
2643 sx_ReportCSraLoaderName(loader_name);
2644 CScope scope(*om);
2645 scope.AddDefaults();
2646
2647 string annot_name = csra_name;
2648 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
2649
2650 CRef<CSeq_id> seqid(new CSeq_id(id));
2651 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
2652 CRef<CSeq_loc> loc(new CSeq_loc);
2653 loc->SetInt().SetId(*seqid);
2654 loc->SetInt().SetFrom(from);
2655 loc->SetInt().SetTo(to);
2656 sx_CheckNames(scope, *loc, annot_name);
2657 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
2658 sel.SetSearchUnresolved();
2659 sel.ExcludeNamedAnnots(pileup_name);
2660
2661 //BOOST_REQUIRE(scope.GetBioseqHandle(idh));
2662 //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
2663 if ( 1 ) {
2664 CGraph_CI git(scope, *loc, sel);
2665 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
2666 }
2667
2668 if ( 1 ) {
2669 CAlign_CI it(scope, *loc, sel);
2670 if ( it ) {
2671 cout << "Align count: "<<it.GetSize()<<endl;
2672 if ( it.GetAnnot().IsNamed() ) {
2673 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
2674 }
2675 }
2676 BOOST_CHECK_EQUAL(align_count, it.GetSize());
2677
2678 for ( ; it; ++it ) {
2679 const CSeq_align& align = *it;
2680 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
2681 sx_CheckSeq(scope, idh, **j);
2682 }
2683 }
2684 }
2685
2686 if ( 1 ) {
2687 sel.ResetAnnotsNames();
2688 sel.AddNamedAnnots(pileup_name);
2689 CGraph_CI git(scope, *loc, sel);
2690 BOOST_CHECK(git.GetSize() % 6 == 0);
2691 if ( 0 ) {
2692 for ( ; git; ++git ) {
2693 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
2694 }
2695 }
2696 }
2697 }
2698
2699
BOOST_AUTO_TEST_CASE(MultipleIds2)2700 BOOST_AUTO_TEST_CASE(MultipleIds2)
2701 {
2702 CRef<CObjectManager> om = sx_GetOM();
2703
2704 CCSRADataLoader::SLoaderParams params;
2705 string csra_name, id;
2706 TSeqPos from, to, align_count;
2707
2708 {
2709 csra_name = "SRR413273";
2710 id = "NM_004119.2";
2711 from = 100;
2712 to = 2000;
2713 align_count = 1525; // 5400 if mapq>=1
2714 }
2715 params.m_DirPath.erase();
2716 params.m_CSRAFiles.push_back(csra_name);
2717 string loader_name =
2718 CCSRADataLoader::RegisterInObjectManager(*om, params,
2719 CObjectManager::eDefault)
2720 .GetLoader()->GetName();
2721 sx_ReportCSraLoaderName(loader_name);
2722 CGBDataLoader::RegisterInObjectManager(*om);
2723 CScope scope(*om);
2724 scope.AddDefaults();
2725
2726 string annot_name = csra_name;
2727 string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
2728
2729 CRef<CSeq_id> seqid(new CSeq_id(id));
2730 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
2731 CRef<CSeq_loc> loc(new CSeq_loc);
2732 loc->SetInt().SetId(*seqid);
2733 loc->SetInt().SetFrom(from);
2734 loc->SetInt().SetTo(to);
2735 sx_CheckNames(scope, *loc, annot_name);
2736 SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
2737 sel.SetSearchUnresolved();
2738 sel.ExcludeNamedAnnots(pileup_name);
2739 sel.ExcludeNamedAnnots("SNP");
2740
2741 //BOOST_REQUIRE(scope.GetBioseqHandle(idh));
2742 //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
2743 if ( 1 ) {
2744 CGraph_CI git(scope, *loc, sel);
2745 BOOST_CHECK_EQUAL(git.GetSize(), 1u);
2746 }
2747
2748 if ( 1 ) {
2749 CAlign_CI it(scope, *loc, sel);
2750 if ( it ) {
2751 cout << "Align count: "<<it.GetSize()<<endl;
2752 if ( it.GetAnnot().IsNamed() ) {
2753 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
2754 }
2755 }
2756 BOOST_CHECK_EQUAL(align_count, it.GetSize());
2757
2758 for ( ; it; ++it ) {
2759 const CSeq_align& align = *it;
2760 ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
2761 sx_CheckSeq(scope, idh, **j);
2762 }
2763 }
2764 }
2765
2766 if ( 1 ) {
2767 sel.ResetAnnotsNames();
2768 sel.AddNamedAnnots(pileup_name);
2769 CGraph_CI git(scope, *loc, sel);
2770 BOOST_CHECK(git.GetSize() % 6 == 0);
2771 if ( 0 ) {
2772 for ( ; git; ++git ) {
2773 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
2774 }
2775 }
2776 }
2777 }
2778
BOOST_AUTO_TEST_CASE(CheckPrivate1)2779 BOOST_AUTO_TEST_CASE(CheckPrivate1)
2780 {
2781 CRef<CObjectManager> om = sx_GetOM();
2782
2783 CCSRADataLoader::SLoaderParams params;
2784
2785 string path("http://gapsview11.be-md.ncbi.nlm.nih.gov/49DCAC8956E9BD404735BC18EA2E7986C1674A6F/SRR1219902.sra");
2786 params.m_CSRAFiles.push_back(path);
2787
2788 string loader_name;
2789 bool isError = false;
2790 try {
2791 NcbiCout << "Trying to create a loader for: " << path << NcbiEndl;
2792 loader_name = CCSRADataLoader::RegisterInObjectManager(*om, params)
2793 .GetLoader()->GetName();
2794 NcbiCout << "Loader created successfully" << NcbiEndl;
2795 sx_ReportCSraLoaderName(loader_name);
2796 }
2797 catch (CSraException& e) {
2798 isError = true;
2799 ERR_POST("Exception: "<<e);
2800 BOOST_CHECK(e.GetErrCode() == e.eProtectedDb);
2801 }
2802 BOOST_CHECK(isError);
2803 }
2804
2805
NCBITEST_INIT_TREE()2806 NCBITEST_INIT_TREE()
2807 {
2808 NCBITEST_DISABLE(FetchSeq1);
2809 NCBITEST_DISABLE(FetchSeq2);
2810 NCBITEST_DISABLE(FetchSeq3);
2811 NCBITEST_DISABLE(FetchSeq4);
2812 NCBITEST_DISABLE(FetchSeq4l);
2813 NCBITEST_DISABLE(FetchSeq4sx);
2814 NCBITEST_DISABLE(FetchSeq4sd);
2815 NCBITEST_DISABLE(FetchSeq4s);
2816 }
2817