1 /*  $Id: test_csra_loader.cpp 562583 2018-04-24 16:07:23Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Eugene Vasilchenko
27 *
28 * File Description:
29 *   Unit tests for CSRA data loader
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <sra/readers/sra/exception.hpp>
36 #include <sra/data_loaders/csra/csraloader.hpp>
37 #include <sra/readers/ncbi_traces_path.hpp>
38 #include <objmgr/scope.hpp>
39 #include <objmgr/bioseq_handle.hpp>
40 #include <objmgr/align_ci.hpp>
41 #include <objmgr/graph_ci.hpp>
42 #include <objtools/data_loaders/genbank/gbloader.hpp>
43 #include <objects/seqalign/seqalign__.hpp>
44 #include <objects/seqres/seqres__.hpp>
45 #include <corelib/ncbi_system.hpp>
46 #include <objtools/readers/idmapper.hpp>
47 #include <serial/iterator.hpp>
48 
49 #include <corelib/test_boost.hpp>
50 
51 #include <common/test_data_path.h>
52 #include <common/test_assert.h>  /* This header must go last */
53 
54 #include <objmgr/impl/scope_info.hpp>
55 #include <objmgr/impl/data_source.hpp>
56 
57 USING_NCBI_SCOPE;
58 USING_SCOPE(objects);
59 
60 #define PILEUP_NAME_SUFFIX " pileup graphs"
61 
sx_GetOM(void)62 CRef<CObjectManager> sx_GetOM(void)
63 {
64     SetDiagPostLevel(eDiag_Info);
65     CRef<CObjectManager> om = CObjectManager::GetInstance();
66     CObjectManager::TRegisteredNames names;
67     om->GetRegisteredNames(names);
68     ITERATE ( CObjectManager::TRegisteredNames, it, names ) {
69         om->RevokeDataLoader(*it);
70     }
71     return om;
72 }
73 
sx_CheckNames(CScope & scope,const CSeq_loc & loc,const string & name)74 void sx_CheckNames(CScope& scope, const CSeq_loc& loc,
75                    const string& name)
76 {
77     SAnnotSelector sel;
78     sel.SetSearchUnresolved();
79     sel.SetCollectNames();
80     CAnnotTypes_CI it(CSeq_annot::C_Data::e_not_set, scope, loc, &sel);
81     CAnnotTypes_CI::TAnnotNames names = it.GetAnnotNames();
82     ITERATE ( CAnnotTypes_CI::TAnnotNames, i, names ) {
83         if ( i->IsNamed() ) {
84             NcbiCout << "Named annot: " << i->GetName()
85                      << NcbiEndl;
86         }
87         else {
88             NcbiCout << "Unnamed annot"
89                      << NcbiEndl;
90         }
91     }
92     //NcbiCout << "Checking for name: " << name << NcbiEndl;
93     BOOST_CHECK_EQUAL(names.count(name), 1u);
94     if ( names.size() > 1 ) {
95         BOOST_CHECK_EQUAL(names.count(name+PILEUP_NAME_SUFFIX), 1u);
96     }
97     else {
98         BOOST_CHECK_EQUAL(names.size(), 1u);
99     }
100 }
101 
sx_GetPath(const string & dir)102 string sx_GetPath(const string& dir)
103 {
104     vector<string> reps;
105     NStr::Split("traces02:traces04", ":", reps);
106     ITERATE ( vector<string>, it, reps ) {
107         string path = CFile::MakePath(CFile::MakePath(NCBI_GetTestDataPath(), *it), dir);
108         if ( CDirEntry(path).Exists() ) {
109             return path;
110         }
111     }
112     return dir;
113 }
114 
sx_ReportCSraLoaderName(const string & name)115 void sx_ReportCSraLoaderName(const string& name)
116 {
117     NcbiCout << "CSRA loader: " << name << NcbiEndl;
118     CDataLoader* loader = CObjectManager::GetInstance()->FindDataLoader(name);
119     CCSRADataLoader* csra_loader = dynamic_cast<CCSRADataLoader*>(loader);
120     BOOST_REQUIRE(csra_loader);
121     CCSRADataLoader::TAnnotNames names = csra_loader->GetPossibleAnnotNames();
122     ITERATE ( CCSRADataLoader::TAnnotNames, it, names ) {
123         NcbiCout << "  annot name: " << it->GetName() << NcbiEndl;
124     }
125 }
126 
sx_CheckSeq(CScope & scope,const CSeq_id_Handle & main_idh,const CSeq_id & id)127 void sx_CheckSeq(CScope& scope,
128                  const CSeq_id_Handle& main_idh,
129                  const CSeq_id& id)
130 {
131     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(id);
132     if ( idh == main_idh ) {
133         return;
134     }
135     const CBioseq_Handle::TId& ids = scope.GetIds(idh);
136     BOOST_REQUIRE(!ids.empty());
137     //BOOST_CHECK_EQUAL(ids[0], idh);
138     CBioseq_Handle bh = scope.GetBioseqHandle(idh);
139     BOOST_REQUIRE(bh);
140 }
141 
BOOST_AUTO_TEST_CASE(FetchSeq1)142 BOOST_AUTO_TEST_CASE(FetchSeq1)
143 {
144     CRef<CObjectManager> om = sx_GetOM();
145 
146     CCSRADataLoader::SLoaderParams params;
147     string csra_name, id;
148     TSeqPos from, to, align_count;
149 
150     {
151         //string path = NCBI_TRACES01_PATH "/compress/DATA/ASW/NA19909.mapped.illumina.mosaik.ASW.exome.20110411";
152         string path = NCBI_TRACES01_PATH "/compress/1KG/ASW/NA19909";
153         params.m_DirPath = sx_GetPath(path);
154         csra_name = "exome.ILLUMINA.MOSAIK.csra";
155         id = "NC_000001.10";
156         from = 0;
157         to = 100000;
158         align_count = 37; // 36 if mapq>=1
159     }
160     csra_name = CFile::MakePath(params.m_DirPath, csra_name);
161     params.m_DirPath.erase();
162     params.m_CSRAFiles.push_back(csra_name);
163     string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
164     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
165 
166     string loader_name =
167         CCSRADataLoader::RegisterInObjectManager(*om, params,
168                                                  CObjectManager::eDefault)
169         .GetLoader()->GetName();
170     sx_ReportCSraLoaderName(loader_name);
171     CScope scope(*om);
172     scope.AddDefaults();
173 
174     CRef<CSeq_id> seqid(new CSeq_id(id));
175     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
176     CRef<CSeq_loc> loc(new CSeq_loc);
177     loc->SetInt().SetId(*seqid);
178     loc->SetInt().SetFrom(from);
179     loc->SetInt().SetTo(to);
180     sx_CheckNames(scope, *loc, annot_name);
181     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
182     sel.SetSearchUnresolved();
183     sel.ExcludeNamedAnnots(pileup_name);
184 
185     if ( 1 ) {
186         CGraph_CI git(scope, *loc, sel);
187         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
188     }
189 
190     if ( 1 ) {
191         CAlign_CI it(scope, *loc, sel);
192         if ( it ) {
193             cout << "Align count: "<<it.GetSize()<<endl;
194             if ( it.GetAnnot().IsNamed() ) {
195                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
196             }
197         }
198         BOOST_CHECK_EQUAL(align_count, it.GetSize());
199 
200         for ( ; it; ++it ) {
201             const CSeq_align& align = *it;
202             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
203                 sx_CheckSeq(scope, idh, **j);
204             }
205         }
206     }
207 
208     if ( 1 ) {
209         sel.ResetAnnotsNames();
210         sel.AddNamedAnnots(pileup_name);
211         CGraph_CI git(scope, *loc, sel);
212         BOOST_CHECK(git.GetSize() % 6 == 0);
213         if ( 0 ) {
214             for ( ; git; ++git ) {
215                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
216             }
217         }
218     }
219 }
220 
BOOST_AUTO_TEST_CASE(FetchSeq2)221 BOOST_AUTO_TEST_CASE(FetchSeq2)
222 {
223     CRef<CObjectManager> om = sx_GetOM();
224 
225     CCSRADataLoader::SLoaderParams params;
226     string csra_name, id;
227     TSeqPos from, to, align_count;
228 
229     {
230         //string path = NCBI_TRACES01_PATH "/compress/DATA/ASW/NA19909.mapped.illumina.mosaik.ASW.exome.20110411";
231         string path = NCBI_TRACES01_PATH "/compress/1KG/ASW/NA19909";
232         params.m_DirPath = sx_GetPath(path);
233         csra_name = "exome.ILLUMINA.MOSAIK.csra";
234         id = "NC_000001.10";
235         if ( 1 ) {
236             CNcbiIfstream mapfile("mapfile");
237             BOOST_CHECK(mapfile);
238             params.m_IdMapper.reset(new CIdMapperConfig(mapfile, "", false));
239             id = "89161185";
240             //id = "NC_000001.9";
241         }
242         from = 0;
243         to = 100000;
244         align_count = 37; // 36 if mapq>=1
245     }
246     params.m_CSRAFiles.push_back(csra_name);
247     string annot_name = "csra_name"; //CDirEntry(csra_name).GetBase();
248     params.m_AnnotName = "csra_name";
249     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
250 
251     string loader_name =
252         CCSRADataLoader::RegisterInObjectManager(*om, params,
253                                                  CObjectManager::eDefault)
254         .GetLoader()->GetName();
255     sx_ReportCSraLoaderName(loader_name);
256     CScope scope(*om);
257     scope.AddDefaults();
258 
259     CRef<CSeq_id> seqid(new CSeq_id(id));
260     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
261     CRef<CSeq_loc> loc(new CSeq_loc);
262     loc->SetInt().SetId(*seqid);
263     loc->SetInt().SetFrom(from);
264     loc->SetInt().SetTo(to);
265     sx_CheckNames(scope, *loc, annot_name);
266     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
267     sel.SetSearchUnresolved();
268     sel.ExcludeNamedAnnots(pileup_name);
269 
270     if ( 1 ) {
271         CGraph_CI git(scope, *loc, sel);
272         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
273     }
274 
275     if ( 1 ) {
276         CAlign_CI it(scope, *loc, sel);
277         if ( it ) {
278             cout << "Align count: "<<it.GetSize()<<endl;
279             if ( it.GetAnnot().IsNamed() ) {
280                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
281             }
282         }
283         BOOST_CHECK_EQUAL(align_count, it.GetSize());
284 
285         for ( ; it; ++it ) {
286             const CSeq_align& align = *it;
287             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
288                 sx_CheckSeq(scope, idh, **j);
289             }
290         }
291     }
292 
293     if ( 1 ) {
294         sel.ResetAnnotsNames();
295         sel.AddNamedAnnots(pileup_name);
296         CGraph_CI git(scope, *loc, sel);
297         BOOST_CHECK(git.GetSize() % 6 == 0);
298         if ( 0 ) {
299             for ( ; git; ++git ) {
300                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
301             }
302         }
303     }
304 }
305 
BOOST_AUTO_TEST_CASE(FetchSeq3)306 BOOST_AUTO_TEST_CASE(FetchSeq3)
307 {
308     CRef<CObjectManager> om = sx_GetOM();
309 
310     CCSRADataLoader::SLoaderParams params;
311     string csra_name, id;
312     TSeqPos from, to, align_count;
313 
314     {
315         string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12043";
316         params.m_DirPath = sx_GetPath(path);
317         csra_name = "genome.LS454.SSAHA2.csra";
318         id = "NC_000001.10";
319         if ( 0 ) {
320             CNcbiIfstream mapfile("mapfile");
321             BOOST_CHECK(mapfile);
322             params.m_IdMapper.reset(new CIdMapperConfig(mapfile, "", false));
323             id = "89161185";
324             //id = "NC_000001.9";
325         }
326         from = 0;
327         to = 100000;
328         align_count = 617; // 95 if mapq>=1
329     }
330     params.m_CSRAFiles.push_back(csra_name);
331     string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
332     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
333 
334     string loader_name =
335         CCSRADataLoader::RegisterInObjectManager(*om, params,
336                                                  CObjectManager::eDefault)
337         .GetLoader()->GetName();
338     sx_ReportCSraLoaderName(loader_name);
339     CScope scope(*om);
340     scope.AddDefaults();
341 
342     CRef<CSeq_id> seqid(new CSeq_id(id));
343     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
344     CRef<CSeq_loc> loc(new CSeq_loc);
345     loc->SetInt().SetId(*seqid);
346     loc->SetInt().SetFrom(from);
347     loc->SetInt().SetTo(to);
348     sx_CheckNames(scope, *loc, annot_name);
349     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
350     sel.SetSearchUnresolved();
351     sel.ExcludeNamedAnnots(pileup_name);
352 
353     if ( 1 ) {
354         CGraph_CI git(scope, *loc, sel);
355         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
356     }
357 
358     if ( 1 ) {
359         CAlign_CI it(scope, *loc, sel);
360         if ( it ) {
361             cout << "Align count: "<<it.GetSize()<<endl;
362             if ( it.GetAnnot().IsNamed() ) {
363                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
364             }
365         }
366         BOOST_CHECK_EQUAL(align_count, it.GetSize());
367 
368         for ( ; it; ++it ) {
369             const CSeq_align& align = *it;
370             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
371                 sx_CheckSeq(scope, idh, **j);
372             }
373         }
374     }
375 
376     if ( 1 ) {
377         sel.ResetAnnotsNames();
378         sel.AddNamedAnnots(pileup_name);
379         CGraph_CI git(scope, *loc, sel);
380         BOOST_CHECK(git.GetSize() % 6 == 0);
381         if ( 0 ) {
382             for ( ; git; ++git ) {
383                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
384             }
385         }
386     }
387 }
388 
BOOST_AUTO_TEST_CASE(FetchSeq4)389 BOOST_AUTO_TEST_CASE(FetchSeq4)
390 {
391     CRef<CObjectManager> om = sx_GetOM();
392 
393     CCSRADataLoader::SLoaderParams params;
394     string csra_name, id;
395     TSeqPos from, to, align_count;
396 
397     {
398         string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
399         params.m_DirPath = sx_GetPath(path);
400         //csra_name = "genome.ILLUMINA.BWA.csra";
401         csra_name = "exome.ILLUMINA.MOSAIK.csra";
402         id = "NC_000023.10";
403         if ( 0 ) {
404             CNcbiIfstream mapfile("mapfile");
405             BOOST_CHECK(mapfile);
406             params.m_IdMapper.reset(new CIdMapperConfig(mapfile, "", false));
407             id = "89161185";
408             //id = "NC_000001.9";
409         }
410         from = 31137345;
411         to   = 31157726;
412         align_count = 730;
413         //to = 33357726;
414         //align_count = 129595;
415     }
416     params.m_CSRAFiles.push_back(csra_name);
417     string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
418     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
419 
420     CStopWatch sw(CStopWatch::eStart);
421     string loader_name =
422         CCSRADataLoader::RegisterInObjectManager(*om, params,
423                                                  CObjectManager::eDefault)
424         .GetLoader()->GetName();
425     sx_ReportCSraLoaderName(loader_name);
426     CScope scope(*om);
427     scope.AddDefaults();
428 
429     CRef<CSeq_id> seqid(new CSeq_id(id));
430     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
431     CRef<CSeq_loc> loc(new CSeq_loc);
432     loc->SetInt().SetId(*seqid);
433     loc->SetInt().SetFrom(from);
434     loc->SetInt().SetTo(to);
435     sx_CheckNames(scope, *loc, annot_name);
436     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
437     sel.SetSearchUnresolved();
438     sel.ExcludeNamedAnnots(pileup_name);
439 
440     if ( 1 ) {
441         CGraph_CI git(scope, *loc, sel);
442         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
443     }
444 
445     if ( 1 ) {
446         CAlign_CI it(scope, *loc, sel);
447         if ( it ) {
448             cout << "Align count: "<<it.GetSize()<<endl;
449             if ( it.GetAnnot().IsNamed() ) {
450                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
451             }
452         }
453         BOOST_CHECK_EQUAL(align_count, it.GetSize());
454         NcbiCout << "Elapsed: " << sw.Elapsed() << NcbiEndl;
455 
456         for ( ; it; ++it ) {
457             const CSeq_align& align = it.GetOriginalSeq_align();
458             //NcbiCout << MSerial_AsnText << align;
459             if ( 1 ) {
460                 ITERATE( CDense_seg::TIds, j,
461                          align.GetSegs().GetDenseg().GetIds() ) {
462                     sx_CheckSeq(scope, idh, **j);
463                     CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle(**j);
464                     if ( id2 != idh ) {
465                         CBioseq_Handle bh = scope.GetBioseqHandle(id2);
466                         BOOST_CHECK(bh);
467                         //NcbiCout << MSerial_AsnText << *bh.GetCompleteBioseq();
468                     }
469                 }
470             }
471         }
472         NcbiCout << "Elapsed: " << sw.Elapsed() << NcbiEndl;
473     }
474 
475     if ( 1 ) {
476         sel.ResetAnnotsNames();
477         sel.AddNamedAnnots(pileup_name);
478         CGraph_CI git(scope, *loc, sel);
479         BOOST_CHECK(git.GetSize() % 6 == 0);
480         if ( 0 ) {
481             for ( ; git; ++git ) {
482                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
483             }
484         }
485         NcbiCout << "Elapsed: " << sw.Elapsed() << NcbiEndl;
486     }
487 }
488 
489 
BOOST_AUTO_TEST_CASE(FetchSeq4l)490 BOOST_AUTO_TEST_CASE(FetchSeq4l)
491 {
492     CRef<CObjectManager> om = sx_GetOM();
493 
494     CCSRADataLoader::SLoaderParams params;
495     string csra_name, id;
496 
497     {
498         string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
499         path = sx_GetPath(path);
500         csra_name = CDirEntry::MakePath(path, "exome.ILLUMINA.MOSAIK.csra");
501         id = csra_name;
502         replace(id.begin(), id.end(), '/', '\\');
503         id = "gnl|SRA|"+id+".6.1";
504     }
505     params.m_CSRAFiles.push_back(csra_name);
506     params.m_PathInId = true;
507     params.m_QualityGraphs = true;
508 
509     CStopWatch sw(CStopWatch::eStart);
510     string loader_name =
511         CCSRADataLoader::RegisterInObjectManager(*om, params,
512                                                  CObjectManager::eDefault)
513         .GetLoader()->GetName();
514     sx_ReportCSraLoaderName(loader_name);
515 
516     for ( int t = 0; t < 2; ++t ) {
517         CScope scope(*om);
518         scope.AddDefaults();
519 
520         CRef<CSeq_id> seqid(new CSeq_id(id));
521         CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
522         CBioseq_Handle bh = scope.GetBioseqHandle(idh);
523         BOOST_REQUIRE(bh);
524         BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
525         CGraph_CI git(bh);
526         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
527     }
528 }
529 
530 
BOOST_AUTO_TEST_CASE(FetchSeq4sx)531 BOOST_AUTO_TEST_CASE(FetchSeq4sx)
532 {
533     CRef<CObjectManager> om = sx_GetOM();
534 
535     CCSRADataLoader::SLoaderParams params;
536     string csra_name, id;
537 
538     {
539         string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
540         path = sx_GetPath(path);
541         csra_name = CDirEntry::MakePath(path, "exome.ILLUMINA.MOSAIK.csra");
542         id = csra_name;
543         replace(id.begin(), id.end(), '/', '\\');
544         id = "gnl|SRA|"+id+".6.1";
545     }
546     params.m_CSRAFiles.push_back(csra_name);
547     params.m_PathInId = true;
548     params.m_QualityGraphs = true;
549 
550     CStopWatch sw(CStopWatch::eStart);
551     string loader_name =
552         CCSRADataLoader::RegisterInObjectManager(*om, params,
553                                                  CObjectManager::eDefault)
554         .GetLoader()->GetName();
555     sx_ReportCSraLoaderName(loader_name);
556 
557     for ( int t = 0; t < 2; ++t ) {
558         CScope scope(*om);
559         scope.AddDefaults();
560 
561         CRef<CSeq_id> seqid(new CSeq_id(id));
562         CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
563         CBioseq_Handle bh = scope.GetBioseqHandle(idh);
564         BOOST_REQUIRE(bh);
565         BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
566         CGraph_CI git(bh);
567         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
568     }
569 }
570 
571 
BOOST_AUTO_TEST_CASE(FetchSeq4sd)572 BOOST_AUTO_TEST_CASE(FetchSeq4sd)
573 {
574     CRef<CObjectManager> om = sx_GetOM();
575 
576     CCSRADataLoader::SLoaderParams params;
577     string csra_name, id;
578 
579     {
580         string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
581         string file;
582         CDirEntry::SplitPath(sx_GetPath(path), &params.m_DirPath, &file);
583         csra_name = CDirEntry::MakePath(file, "exome.ILLUMINA.MOSAIK.csra");
584         id = CDirEntry::MakePath(file, "exome.ILLUMINA.MOSAIK");
585         replace(id.begin(), id.end(), '/', '\\');
586         id = "gnl|SRA|"+id+".6.1";
587     }
588     params.m_CSRAFiles.push_back(csra_name);
589     params.m_PathInId = false;
590     params.m_QualityGraphs = true;
591 
592     CStopWatch sw(CStopWatch::eStart);
593     string loader_name =
594         CCSRADataLoader::RegisterInObjectManager(*om, params,
595                                                  CObjectManager::eDefault)
596         .GetLoader()->GetName();
597     sx_ReportCSraLoaderName(loader_name);
598 
599     for ( int t = 0; t < 2; ++t ) {
600         CScope scope(*om);
601         scope.AddDefaults();
602 
603         CRef<CSeq_id> seqid(new CSeq_id(id));
604         CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
605         CBioseq_Handle bh = scope.GetBioseqHandle(idh);
606         BOOST_REQUIRE(bh);
607         BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
608         CGraph_CI git(bh);
609         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
610     }
611 }
612 
613 
BOOST_AUTO_TEST_CASE(FetchSeq4s)614 BOOST_AUTO_TEST_CASE(FetchSeq4s)
615 {
616     CRef<CObjectManager> om = sx_GetOM();
617 
618     CCSRADataLoader::SLoaderParams params;
619     string csra_name, id;
620 
621     {
622         string path = NCBI_TRACES01_PATH "/compress/1KG/CEU/NA12249";
623         params.m_DirPath = sx_GetPath(path);
624         csra_name = "exome.ILLUMINA.MOSAIK.csra";
625         id = "gnl|SRA|exome.ILLUMINA.MOSAIK.6.1";
626     }
627     params.m_CSRAFiles.push_back(csra_name);
628     params.m_PathInId = false;
629     params.m_QualityGraphs = true;
630 
631     CStopWatch sw(CStopWatch::eStart);
632     string loader_name =
633         CCSRADataLoader::RegisterInObjectManager(*om, params,
634                                                  CObjectManager::eDefault)
635         .GetLoader()->GetName();
636     sx_ReportCSraLoaderName(loader_name);
637     for ( int t = 0; t < 2; ++t ) {
638         CScope scope(*om);
639         scope.AddDefaults();
640 
641         CRef<CSeq_id> seqid(new CSeq_id(id));
642         CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
643         CBioseq_Handle bh = scope.GetBioseqHandle(idh);
644         BOOST_REQUIRE(bh);
645         BOOST_CHECK_EQUAL(bh.GetBioseqLength(), 97u);
646         CGraph_CI git(bh);
647         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
648     }
649 }
650 
651 
BOOST_AUTO_TEST_CASE(FetchSeq5)652 BOOST_AUTO_TEST_CASE(FetchSeq5)
653 {
654     CRef<CObjectManager> om = sx_GetOM();
655 
656     CCSRADataLoader::SLoaderParams params;
657     string csra_name, id;
658     TSeqPos from, to, align_count;
659 
660 #if 1
661     {
662         csra_name = "SRR389414";
663         params.m_DirPath = csra_name;
664         id = "NC_000023.10";
665         from = 1000000;
666         to = 2000000;
667         align_count = 3016; // 3005 if mapq>=1
668     }
669     string loader_name =
670         CCSRADataLoader::RegisterInObjectManager(*om, params,
671                                                  CObjectManager::eDefault)
672         .GetLoader()->GetName();
673 #else
674     {
675         csra_name = "SRR389414";
676         id = "NC_000023.10";
677         from = 1000000;
678         to = 2000000;
679         align_count = 3016; // 3005 if mapq>=1
680     }
681     string loader_name =
682         CCSRADataLoader::RegisterInObjectManager(*om, csra_name,
683                                                  CObjectManager::eDefault)
684         .GetLoader()->GetName();
685 #endif
686     sx_ReportCSraLoaderName(loader_name);
687     CScope scope(*om);
688     scope.AddDefaults();
689 
690     string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
691     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
692 
693     CRef<CSeq_id> seqid(new CSeq_id(id));
694     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
695     CRef<CSeq_loc> loc(new CSeq_loc);
696     loc->SetInt().SetId(*seqid);
697     loc->SetInt().SetFrom(from);
698     loc->SetInt().SetTo(to);
699     sx_CheckNames(scope, *loc, annot_name);
700     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
701     sel.SetSearchUnresolved();
702     sel.ExcludeNamedAnnots(pileup_name);
703 
704     if ( 1 ) {
705         CGraph_CI git(scope, *loc, sel);
706         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
707     }
708 
709     if ( 1 ) {
710         CAlign_CI it(scope, *loc, sel);
711         if ( it ) {
712             cout << "Align count: "<<it.GetSize()<<endl;
713             if ( it.GetAnnot().IsNamed() ) {
714                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
715             }
716         }
717         BOOST_CHECK_EQUAL(align_count, it.GetSize());
718 
719         for ( ; it; ++it ) {
720             const CSeq_align& align = *it;
721             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
722                 sx_CheckSeq(scope, idh, **j);
723             }
724         }
725     }
726 
727     if ( 1 ) {
728         sel.ResetAnnotsNames();
729         sel.AddNamedAnnots(pileup_name);
730         CGraph_CI git(scope, *loc, sel);
731         BOOST_CHECK(git.GetSize() % 6 == 0);
732         if ( 0 ) {
733             for ( ; git; ++git ) {
734                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
735             }
736         }
737     }
738 
739     scope.RemoveDataLoader(loader_name); // no use-lock as there is no main sequence
740     om->RevokeDataLoader(loader_name);
741 }
742 
BOOST_AUTO_TEST_CASE(FetchSeq5u)743 BOOST_AUTO_TEST_CASE(FetchSeq5u)
744 {
745     CRef<CObjectManager> om = sx_GetOM();
746 
747     CCSRADataLoader::SLoaderParams params;
748     string csra_name, id;
749     TSeqPos from, to, align_count;
750 
751 #if 1
752     {
753         csra_name = "SRR389414";
754         params.m_DirPath = csra_name;
755         id = "NC_000023.10";
756         from = 1000000;
757         to = 2000000;
758         align_count = 3016; // 3005 if mapq>=1
759     }
760     string loader_name =
761         CCSRADataLoader::RegisterInObjectManager(*om, params,
762                                                  CObjectManager::eDefault)
763         .GetLoader()->GetName();
764 #else
765     {
766         csra_name = "SRR389414";
767         id = "NC_000023.10";
768         from = 1000000;
769         to = 2000000;
770         align_count = 3016; // 3005 if mapq>=1
771     }
772     string loader_name =
773         CCSRADataLoader::RegisterInObjectManager(*om, csra_name,
774                                                  CObjectManager::eDefault)
775         .GetLoader()->GetName();
776 #endif
777     sx_ReportCSraLoaderName(loader_name);
778     CGBDataLoader::RegisterInObjectManager(*om);
779     CScope scope(*om);
780     scope.AddDefaults();
781 
782     string annot_name = csra_name; //CDirEntry(csra_name).GetBase();
783     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
784 
785     CRef<CSeq_id> seqid(new CSeq_id(id));
786     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
787     CRef<CSeq_loc> loc(new CSeq_loc);
788     loc->SetInt().SetId(*seqid);
789     loc->SetInt().SetFrom(from);
790     loc->SetInt().SetTo(to);
791     sx_CheckNames(scope, *loc, annot_name);
792     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
793     sel.ExcludeNamedAnnots(pileup_name);
794 
795     if ( 1 ) {
796         CGraph_CI git(scope, *loc, sel);
797         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
798     }
799 
800     if ( 1 ) {
801         CAlign_CI it(scope, *loc, sel);
802         if ( it ) {
803             cout << "Align count: "<<it.GetSize()<<endl;
804             if ( it.GetAnnot().IsNamed() ) {
805                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
806             }
807         }
808         BOOST_CHECK_EQUAL(align_count, it.GetSize());
809 
810         for ( ; it; ++it ) {
811             const CSeq_align& align = *it;
812             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
813                 sx_CheckSeq(scope, idh, **j);
814             }
815         }
816     }
817 
818     if ( 1 ) {
819         sel.ResetAnnotsNames();
820         sel.AddNamedAnnots(pileup_name);
821         CGraph_CI git(scope, *loc, sel);
822         BOOST_CHECK(git.GetSize() % 6 == 0);
823         if ( 0 ) {
824             for ( ; git; ++git ) {
825                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
826             }
827         }
828     }
829 
830     scope.RemoveDataLoader(loader_name, scope.eRemoveIfLocked); // there is a use-lock from main sequence
831     om->RevokeDataLoader(loader_name); // but it shouldn't prevend revoking data loader
832 }
833 
BOOST_AUTO_TEST_CASE(FetchSeq6)834 BOOST_AUTO_TEST_CASE(FetchSeq6)
835 {
836     CRef<CObjectManager> om = sx_GetOM();
837 
838     CCSRADataLoader::SLoaderParams params;
839     string csra_name, id;
840     TSeqPos from, to, align_count;
841 
842     {
843         csra_name = "SRR389414";
844         id = "gnl|SRA|SRR389414/GL000215.1";
845         from = 100000;
846         to   = 200000;
847         align_count = 83; // 8 if mapq>=1
848     }
849     string loader_name =
850         CCSRADataLoader::RegisterInObjectManager(*om, params,
851                                                  CObjectManager::eDefault)
852         .GetLoader()->GetName();
853     sx_ReportCSraLoaderName(loader_name);
854     CScope scope(*om);
855     scope.AddDefaults();
856 
857     string annot_name = csra_name;
858     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
859 
860     CRef<CSeq_id> seqid(new CSeq_id(id));
861     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
862     CRef<CSeq_loc> loc(new CSeq_loc);
863     loc->SetInt().SetId(*seqid);
864     loc->SetInt().SetFrom(from);
865     loc->SetInt().SetTo(to);
866     sx_CheckNames(scope, *loc, annot_name);
867     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
868     sel.SetSearchUnresolved();
869     sel.ExcludeNamedAnnots(pileup_name);
870 
871     BOOST_CHECK(scope.GetBioseqHandle(idh));
872     //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
873     //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetTopLevelEntry().GetCompleteObject();
874     if ( 1 ) {
875         CGraph_CI git(scope, *loc, sel);
876         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
877     }
878 
879     if ( 1 ) {
880         CAlign_CI it(scope, *loc, sel);
881         if ( it ) {
882             cout << "Align count: "<<it.GetSize()<<endl;
883             if ( it.GetAnnot().IsNamed() ) {
884                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
885             }
886         }
887         BOOST_CHECK_EQUAL(align_count, it.GetSize());
888 
889         for ( ; it; ++it ) {
890             const CSeq_align& align = *it;
891             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
892                 sx_CheckSeq(scope, idh, **j);
893             }
894         }
895     }
896 
897     if ( 1 ) {
898         sel.ResetAnnotsNames();
899         sel.AddNamedAnnots(pileup_name);
900         CGraph_CI git(scope, *loc, sel);
901         BOOST_CHECK(git.GetSize() % 6 == 0);
902         if ( 0 ) {
903             for ( ; git; ++git ) {
904                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
905             }
906         }
907     }
908 }
909 
910 
BOOST_AUTO_TEST_CASE(FetchSeq7)911 BOOST_AUTO_TEST_CASE(FetchSeq7)
912 {
913     CRef<CObjectManager> om = sx_GetOM();
914 
915     CCSRADataLoader::SLoaderParams params;
916     string csra_name, id;
917     TSeqPos from, to, align_count;
918 
919     {
920         csra_name = "SRR389414";
921         id = "gnl|SRA|SRR389414/1";
922         from = 1000000;
923         to   = 2000000;
924         align_count = 6104; // 5400 if mapq>=1
925     }
926     string loader_name =
927         CCSRADataLoader::RegisterInObjectManager(*om, params,
928                                                  CObjectManager::eDefault)
929         .GetLoader()->GetName();
930     sx_ReportCSraLoaderName(loader_name);
931     CScope scope(*om);
932     scope.AddDefaults();
933 
934     string annot_name = csra_name;
935     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
936 
937     CRef<CSeq_id> seqid(new CSeq_id(id));
938     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
939     CRef<CSeq_loc> loc(new CSeq_loc);
940     loc->SetInt().SetId(*seqid);
941     loc->SetInt().SetFrom(from);
942     loc->SetInt().SetTo(to);
943     sx_CheckNames(scope, *loc, annot_name);
944     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
945     sel.SetSearchUnresolved();
946     sel.ExcludeNamedAnnots(pileup_name);
947 
948     BOOST_CHECK(scope.GetBioseqHandle(idh));
949     //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
950     if ( 1 ) {
951         CGraph_CI git(scope, *loc, sel);
952         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
953     }
954 
955     if ( 1 ) {
956         CAlign_CI it(scope, *loc, sel);
957         if ( it ) {
958             cout << "Align count: "<<it.GetSize()<<endl;
959             if ( it.GetAnnot().IsNamed() ) {
960                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
961             }
962         }
963         BOOST_CHECK_EQUAL(align_count, it.GetSize());
964 
965         for ( ; it; ++it ) {
966             const CSeq_align& align = *it;
967             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
968                 sx_CheckSeq(scope, idh, **j);
969             }
970         }
971     }
972 
973     if ( 1 ) {
974         sel.ResetAnnotsNames();
975         sel.AddNamedAnnots(pileup_name);
976         CGraph_CI git(scope, *loc, sel);
977         BOOST_CHECK(git.GetSize() % 6 == 0);
978         if ( 0 ) {
979             for ( ; git; ++git ) {
980                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
981             }
982         }
983     }
984 }
985 
986 
BOOST_AUTO_TEST_CASE(FetchSeq8)987 BOOST_AUTO_TEST_CASE(FetchSeq8)
988 {
989     // pileup graph test
990     CRef<CObjectManager> om = sx_GetOM();
991 
992     CCSRADataLoader::SLoaderParams params;
993     string csra_name, id;
994     TSeqPos from, to, align_count;
995 
996     {
997         csra_name = "ERR669165";
998         id = "GK000001.2";
999         from = 922690;
1000         to   = 922760;
1001         align_count = 87;
1002     }
1003     params.m_CSRAFiles.push_back(csra_name);
1004     CGBDataLoader::RegisterInObjectManager(*om);
1005     string loader_name =
1006         CCSRADataLoader::RegisterInObjectManager(*om, params,
1007                                                  CObjectManager::eDefault)
1008         .GetLoader()->GetName();
1009     sx_ReportCSraLoaderName(loader_name);
1010     CScope scope(*om);
1011     scope.AddDefaults();
1012 
1013     string annot_name = csra_name;
1014     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1015 
1016     CRef<CSeq_id> seqid(new CSeq_id(id));
1017     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1018     CRef<CSeq_loc> loc(new CSeq_loc);
1019     loc->SetInt().SetId(*seqid);
1020     loc->SetInt().SetFrom(from);
1021     loc->SetInt().SetTo(to);
1022     sx_CheckNames(scope, *loc, annot_name);
1023     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1024     sel.SetSearchUnresolved();
1025     sel.ExcludeNamedAnnots(pileup_name);
1026 
1027     BOOST_CHECK(scope.GetBioseqHandle(idh));
1028     if ( 1 ) {
1029         CGraph_CI git(scope, *loc, sel);
1030         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1031     }
1032 
1033     if ( 1 ) {
1034         CAlign_CI it(scope, *loc, sel);
1035         if ( it ) {
1036             cout << "Align count: "<<it.GetSize()<<endl;
1037             if ( it.GetAnnot().IsNamed() ) {
1038                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1039             }
1040         }
1041         BOOST_CHECK_EQUAL(align_count, it.GetSize());
1042 
1043         for ( ; it; ++it ) {
1044             const CSeq_align& align = *it;
1045             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1046                 sx_CheckSeq(scope, idh, **j);
1047             }
1048         }
1049     }
1050 
1051     if ( 1 ) {
1052         sel.ResetAnnotsNames();
1053         sel.AddNamedAnnots(pileup_name);
1054         CGraph_CI git(scope, *loc, sel);
1055         BOOST_CHECK_EQUAL(git.GetSize(), 6u);
1056         for ( size_t k = 0; git && k < 6; ++k, ++git ) {
1057             const CSeq_graph& graph = git->GetOriginalGraph();
1058             string title = graph.GetTitle();
1059             NcbiCout << "Pileup graph: " << title << NcbiEndl;
1060             typedef unsigned TExpectedPair[2];
1061             const TExpectedPair* expected_pairs = 0;
1062             size_t expected_count = 0;
1063             if ( title == "Number of inserts" ) {
1064                 static const TExpectedPair expected_I[] = {
1065                     { 0, 0 },
1066                 };
1067                 expected_pairs = expected_I;
1068                 expected_count = ArraySize(expected_I);
1069             }
1070             else if ( title == "Number of A bases" ) {
1071                 static const TExpectedPair expected_A[] = {
1072                     { 0, 0 },
1073                 };
1074                 expected_pairs = expected_A;
1075                 expected_count = ArraySize(expected_A);
1076             }
1077             else if ( title == "Number of C bases" ) {
1078                 static const TExpectedPair expected_C[] = {
1079                     { 922695, 1 },
1080                 };
1081                 expected_pairs = expected_C;
1082                 expected_count = ArraySize(expected_C);
1083             }
1084             else if ( title == "Number of G bases" ) {
1085                 static const TExpectedPair expected_G[] = {
1086                     { 922693, 2 },
1087                 };
1088                 expected_pairs = expected_G;
1089                 expected_count = ArraySize(expected_G);
1090             }
1091             else if ( title == "Number of T bases" ) {
1092                 static const TExpectedPair expected_T[] = {
1093                     { 922692, 1 },
1094                     { 922693, 2 },
1095                     { 922695, 1 },
1096                     { 922750, 1 },
1097                 };
1098                 expected_pairs = expected_T;
1099                 expected_count = ArraySize(expected_T);
1100             }
1101             else {
1102                 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1103                 static const TExpectedPair expected_M[] = {
1104                     { 922690, 70 },
1105                     { 922691, 74 },
1106                     { 922692, 74 },
1107                     { 922693, 75 },
1108                     { 922694, 80 },
1109                     { 922695, 81 },
1110                     { 922696, 83 },
1111                     { 922697, 83 },
1112                     { 922698, 84 },
1113                     { 922699, 85 },
1114                     { 922745, 1 },
1115                     { 922746, 1 },
1116                     { 922747, 1 },
1117                     { 922748, 1 },
1118                     { 922749, 1 },
1119                     { 922750, 1 },
1120                     { 922751, 2 },
1121                     { 922752, 2 },
1122                     { 922753, 2 },
1123                     { 922754, 2 },
1124                     { 922755, 2 },
1125                     { 922756, 2 },
1126                     { 922757, 2 },
1127                     { 922758, 2 },
1128                     { 922759, 2 },
1129                     { 922760, 2 },
1130                 };
1131                 expected_pairs = expected_M;
1132                 expected_count = ArraySize(expected_M);
1133             }
1134             map<TSeqPos, unsigned> expected;
1135             for ( size_t i = 0; i < expected_count; ++i ) {
1136                 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1137             }
1138             CRange<TSeqPos> range = graph.GetLoc().GetTotalRange();
1139             for ( TSeqPos pos = from; pos <= to; ++pos ) {
1140                 TSeqPos i = pos - range.GetFrom();
1141                 unsigned pileup_value =
1142                     graph.GetGraph().IsByte()?
1143                     graph.GetGraph().GetByte().GetValues()[i]:
1144                     graph.GetGraph().GetInt().GetValues()[i];
1145                 unsigned expected_value =
1146                     expected.count(pos)? expected[pos]: 0;
1147                 if ( false && pileup_value ) {
1148                     NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1149                 }
1150                 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1151             }
1152         }
1153     }
1154 }
1155 
BOOST_AUTO_TEST_CASE(FetchSeq9)1156 BOOST_AUTO_TEST_CASE(FetchSeq9)
1157 {
1158     // pileup graph test on chunks border in gaps
1159     CRef<CObjectManager> om = sx_GetOM();
1160 
1161     CCSRADataLoader::SLoaderParams params;
1162     string csra_name, id;
1163     TSeqPos from, to, align_count, align_count_over;
1164 
1165     {
1166         csra_name = "ERR669165";
1167         id = "GK000001.2";
1168         from = 9701400;
1169         to   = 9701420;
1170         align_count = 0;
1171         align_count_over = 58;
1172     }
1173     params.m_CSRAFiles.push_back(csra_name);
1174     CGBDataLoader::RegisterInObjectManager(*om);
1175     string loader_name =
1176         CCSRADataLoader::RegisterInObjectManager(*om, params,
1177                                                  CObjectManager::eDefault)
1178         .GetLoader()->GetName();
1179     sx_ReportCSraLoaderName(loader_name);
1180     CScope scope(*om);
1181     scope.AddDefaults();
1182 
1183     string annot_name = csra_name;
1184     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1185 
1186     CRef<CSeq_id> seqid(new CSeq_id(id));
1187     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1188     CRef<CSeq_loc> loc(new CSeq_loc);
1189     loc->SetInt().SetId(*seqid);
1190     loc->SetInt().SetFrom(from);
1191     loc->SetInt().SetTo(to);
1192     sx_CheckNames(scope, *loc, annot_name);
1193     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1194     sel.SetSearchUnresolved();
1195     sel.ExcludeNamedAnnots(pileup_name);
1196 
1197     BOOST_CHECK(scope.GetBioseqHandle(idh));
1198     if ( 1 ) {
1199         CGraph_CI git(scope, *loc, sel);
1200         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1201     }
1202 
1203     if ( 1 ) {
1204         CAlign_CI it(scope, *loc, sel);
1205         if ( it ) {
1206             cout << "Align count: "<<it.GetSize()<<endl;
1207             if ( it.GetAnnot().IsNamed() ) {
1208                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1209             }
1210         }
1211         BOOST_CHECK_EQUAL(align_count, it.GetSize());
1212 
1213         for ( ; it; ++it ) {
1214             const CSeq_align& align = *it;
1215             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1216                 sx_CheckSeq(scope, idh, **j);
1217             }
1218         }
1219     }
1220 
1221     if ( 1 ) {
1222         SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1223         CAlign_CI it(scope, *loc, sel2);
1224         if ( it ) {
1225             cout << "Align count: "<<it.GetSize()<<endl;
1226             if ( it.GetAnnot().IsNamed() ) {
1227                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1228             }
1229         }
1230         BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1231 
1232         for ( ; it; ++it ) {
1233             const CSeq_align& align = *it;
1234             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1235                 sx_CheckSeq(scope, idh, **j);
1236             }
1237         }
1238     }
1239 
1240     if ( 1 ) {
1241         sel.ResetAnnotsNames();
1242         sel.AddNamedAnnots(pileup_name);
1243         CGraph_CI git(scope, *loc, sel);
1244         //BOOST_CHECK_EQUAL(git.GetSize(), 12u);
1245         for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1246             const CSeq_graph& graph = git->GetOriginalGraph();
1247             string title = graph.GetTitle();
1248             NcbiCout << "Pileup graph: " << title << NcbiEndl;
1249             typedef unsigned TExpectedPair[2];
1250             const TExpectedPair* expected_pairs = 0;
1251             size_t expected_count = 0;
1252             if ( title == "Number of inserts" ) {
1253                 static const TExpectedPair expected_I[] = {
1254                     { 0, 0 },
1255                 };
1256                 expected_pairs = expected_I;
1257                 expected_count = ArraySize(expected_I);
1258             }
1259             else if ( title == "Number of A bases" ) {
1260                 static const TExpectedPair expected_A[] = {
1261                     { 0, 0 },
1262                 };
1263                 expected_pairs = expected_A;
1264                 expected_count = ArraySize(expected_A);
1265             }
1266             else if ( title == "Number of C bases" ) {
1267                 static const TExpectedPair expected_C[] = {
1268                     { 0, 0 },
1269                 };
1270                 expected_pairs = expected_C;
1271                 expected_count = ArraySize(expected_C);
1272             }
1273             else if ( title == "Number of G bases" ) {
1274                 static const TExpectedPair expected_G[] = {
1275                     { 0, 0 },
1276                 };
1277                 expected_pairs = expected_G;
1278                 expected_count = ArraySize(expected_G);
1279             }
1280             else if ( title == "Number of T bases" ) {
1281                 static const TExpectedPair expected_T[] = {
1282                     { 0, 0 },
1283                 };
1284                 expected_pairs = expected_T;
1285                 expected_count = ArraySize(expected_T);
1286             }
1287             else {
1288                 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1289                 static const TExpectedPair expected_M[] = {
1290                     { 0, 0 },
1291                 };
1292                 expected_pairs = expected_M;
1293                 expected_count = ArraySize(expected_M);
1294             }
1295             map<TSeqPos, unsigned> expected;
1296             for ( size_t i = 0; i < expected_count; ++i ) {
1297                 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1298             }
1299             CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1300             CRange<TSeqPos> range =
1301                 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1302             BOOST_CHECK(!range.Empty());
1303             for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1304                 TSeqPos i = pos - graph_range.GetFrom();
1305                 unsigned pileup_value =
1306                     graph.GetGraph().IsByte()?
1307                     graph.GetGraph().GetByte().GetValues()[i]:
1308                     graph.GetGraph().GetInt().GetValues()[i];
1309                 unsigned expected_value =
1310                     expected.count(pos)? expected[pos]: 0;
1311                 if ( false && pileup_value ) {
1312                     NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1313                 }
1314                 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1315             }
1316         }
1317     }
1318 }
1319 
BOOST_AUTO_TEST_CASE(FetchSeq10)1320 BOOST_AUTO_TEST_CASE(FetchSeq10)
1321 {
1322     // pileup graph test on chunks border
1323     CRef<CObjectManager> om = sx_GetOM();
1324 
1325     CCSRADataLoader::SLoaderParams params;
1326     string csra_name, id;
1327     TSeqPos from, to, align_count, align_count_over;
1328 
1329     {
1330         csra_name = "ERR669165";
1331         id = "GK000001.2";
1332         from = 147027820;
1333         to   = 147027840;
1334         align_count = 683;
1335         align_count_over = 683;
1336     }
1337     params.m_CSRAFiles.push_back(csra_name);
1338     CGBDataLoader::RegisterInObjectManager(*om);
1339     string loader_name =
1340         CCSRADataLoader::RegisterInObjectManager(*om, params,
1341                                                  CObjectManager::eDefault)
1342         .GetLoader()->GetName();
1343     sx_ReportCSraLoaderName(loader_name);
1344     CScope scope(*om);
1345     scope.AddDefaults();
1346 
1347     string annot_name = csra_name;
1348     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1349 
1350     CRef<CSeq_id> seqid(new CSeq_id(id));
1351     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1352     CRef<CSeq_loc> loc(new CSeq_loc);
1353     loc->SetInt().SetId(*seqid);
1354     loc->SetInt().SetFrom(from);
1355     loc->SetInt().SetTo(to);
1356     sx_CheckNames(scope, *loc, annot_name);
1357     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1358     sel.SetSearchUnresolved();
1359     sel.ExcludeNamedAnnots(pileup_name);
1360 
1361     BOOST_CHECK(scope.GetBioseqHandle(idh));
1362     if ( 1 ) {
1363         CGraph_CI git(scope, *loc, sel);
1364         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1365     }
1366 
1367     if ( 1 ) {
1368         CAlign_CI it(scope, *loc, sel);
1369         if ( it ) {
1370             cout << "Align count: "<<it.GetSize()<<endl;
1371             if ( it.GetAnnot().IsNamed() ) {
1372                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1373             }
1374         }
1375         BOOST_CHECK_EQUAL(align_count, it.GetSize());
1376 
1377         for ( ; it; ++it ) {
1378             const CSeq_align& align = *it;
1379             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1380                 sx_CheckSeq(scope, idh, **j);
1381             }
1382         }
1383     }
1384 
1385     if ( 1 ) {
1386         SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1387         CAlign_CI it(scope, *loc, sel2);
1388         if ( it ) {
1389             cout << "Align count: "<<it.GetSize()<<endl;
1390             if ( it.GetAnnot().IsNamed() ) {
1391                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1392             }
1393         }
1394         BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1395 
1396         for ( ; it; ++it ) {
1397             const CSeq_align& align = *it;
1398             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1399                 sx_CheckSeq(scope, idh, **j);
1400             }
1401         }
1402     }
1403 
1404     if ( 1 ) {
1405         sel.ResetAnnotsNames();
1406         sel.AddNamedAnnots(pileup_name);
1407         CGraph_CI git(scope, *loc, sel);
1408         //BOOST_CHECK_EQUAL(git.GetSize(), 12u);
1409         for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1410             const CSeq_graph& graph = git->GetOriginalGraph();
1411             string title = graph.GetTitle();
1412             NcbiCout << "Pileup graph: " << title << NcbiEndl;
1413             typedef unsigned TExpectedPair[2];
1414             const TExpectedPair* expected_pairs = 0;
1415             size_t expected_count = 0;
1416             if ( title == "Number of inserts" ) {
1417                 static const TExpectedPair expected_I[] = {
1418                     { 0, 0 },
1419                 };
1420                 expected_pairs = expected_I;
1421                 expected_count = ArraySize(expected_I);
1422             }
1423             else if ( title == "Number of A bases" ) {
1424                 static const TExpectedPair expected_A[] = {
1425                     { 147027821, 4 },
1426                     { 147027822, 6 },
1427                     { 147027825, 1 },
1428                     { 147027828, 1 },
1429                     { 147027829, 4 },
1430                     { 147027830, 10 },
1431                     { 147027832, 2 },
1432                 };
1433                 expected_pairs = expected_A;
1434                 expected_count = ArraySize(expected_A);
1435             }
1436             else if ( title == "Number of C bases" ) {
1437                 static const TExpectedPair expected_C[] = {
1438                     { 147027820, 1 },
1439                     { 147027821, 1 },
1440                     { 147027827, 2 },
1441                     { 147027829, 1 },
1442                     { 147027832, 1 },
1443                     { 147027837, 1 },
1444                 };
1445                 expected_pairs = expected_C;
1446                 expected_count = ArraySize(expected_C);
1447             }
1448             else if ( title == "Number of G bases" ) {
1449                 static const TExpectedPair expected_G[] = {
1450                     { 147027822, 1 },
1451                     { 147027826, 1 },
1452                     { 147027827, 1 },
1453                 };
1454                 expected_pairs = expected_G;
1455                 expected_count = ArraySize(expected_G);
1456             }
1457             else if ( title == "Number of T bases" ) {
1458                 static const TExpectedPair expected_T[] = {
1459                     { 147027821, 1 },
1460                     { 147027822, 1 },
1461                     { 147027823, 1 },
1462                     { 147027826, 2 },
1463                     { 147027827, 2 },
1464                     { 147027831, 1 },
1465                     { 147027832, 1 },
1466                     { 147027833, 1 },
1467                     { 147027836, 4 },
1468                     { 147027837, 1 },
1469                 };
1470                 expected_pairs = expected_T;
1471                 expected_count = ArraySize(expected_T);
1472             }
1473             else {
1474                 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1475                 static const TExpectedPair expected_M[] = {
1476                     { 147027820, 602 },
1477                     { 147027821, 597 },
1478                     { 147027822, 600 },
1479                     { 147027823, 597 },
1480                     { 147027824, 590 },
1481                     { 147027825, 598 },
1482                     { 147027826, 597 },
1483                     { 147027827, 599 },
1484                     { 147027828, 603 },
1485                     { 147027829, 581 },
1486                     { 147027830, 561 },
1487                     { 147027831, 561 },
1488                     { 147027832, 552 },
1489                     { 147027833, 559 },
1490                     { 147027834, 547 },
1491                     { 147027835, 546 },
1492                     { 147027836, 541 },
1493                     { 147027837, 545 },
1494                     { 147027838, 8 },
1495                     { 147027839, 8 },
1496                     { 147027840, 8 },
1497                 };
1498                 expected_pairs = expected_M;
1499                 expected_count = ArraySize(expected_M);
1500             }
1501             map<TSeqPos, unsigned> expected;
1502             for ( size_t i = 0; i < expected_count; ++i ) {
1503                 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1504             }
1505             CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1506             CRange<TSeqPos> range =
1507                 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1508             BOOST_CHECK(!range.Empty());
1509             for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1510                 TSeqPos i = pos - graph_range.GetFrom();
1511                 unsigned pileup_value =
1512                     graph.GetGraph().IsByte()?
1513                     graph.GetGraph().GetByte().GetValues()[i]:
1514                     graph.GetGraph().GetInt().GetValues()[i];
1515                 unsigned expected_value =
1516                     expected.count(pos)? expected[pos]: 0;
1517                 if ( false && pileup_value ) {
1518                     NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1519                 }
1520                 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1521             }
1522         }
1523     }
1524 }
1525 
1526 
BOOST_AUTO_TEST_CASE(FetchSeq11)1527 BOOST_AUTO_TEST_CASE(FetchSeq11)
1528 {
1529     // pileup graph test on chunks and page border
1530     CRef<CObjectManager> om = sx_GetOM();
1531 
1532     CCSRADataLoader::SLoaderParams params;
1533     string csra_name, id;
1534     TSeqPos from, to, align_count, align_count_over;
1535 
1536     {
1537         csra_name = "ERR669165";
1538         id = "GK000001.2";
1539         from = 59805960;
1540         to   = 59805980;
1541         align_count = 6;
1542         align_count_over = 6;
1543     }
1544     params.m_CSRAFiles.push_back(csra_name);
1545     CGBDataLoader::RegisterInObjectManager(*om);
1546     string loader_name =
1547         CCSRADataLoader::RegisterInObjectManager(*om, params,
1548                                                  CObjectManager::eDefault)
1549         .GetLoader()->GetName();
1550     sx_ReportCSraLoaderName(loader_name);
1551     CScope scope(*om);
1552     scope.AddDefaults();
1553 
1554     string annot_name = csra_name;
1555     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1556 
1557     CRef<CSeq_id> seqid(new CSeq_id(id));
1558     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1559     CRef<CSeq_loc> loc(new CSeq_loc);
1560     loc->SetInt().SetId(*seqid);
1561     loc->SetInt().SetFrom(from);
1562     loc->SetInt().SetTo(to);
1563     sx_CheckNames(scope, *loc, annot_name);
1564     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1565     sel.SetSearchUnresolved();
1566     sel.ExcludeNamedAnnots(pileup_name);
1567 
1568     BOOST_CHECK(scope.GetBioseqHandle(idh));
1569     if ( 1 ) {
1570         CGraph_CI git(scope, *loc, sel);
1571         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1572     }
1573 
1574     if ( 1 ) {
1575         CAlign_CI it(scope, *loc, sel);
1576         if ( it ) {
1577             cout << "Align count: "<<it.GetSize()<<endl;
1578             if ( it.GetAnnot().IsNamed() ) {
1579                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1580             }
1581         }
1582         BOOST_CHECK_EQUAL(align_count, it.GetSize());
1583 
1584         for ( ; it; ++it ) {
1585             const CSeq_align& align = *it;
1586             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1587                 sx_CheckSeq(scope, idh, **j);
1588             }
1589         }
1590     }
1591 
1592     if ( 1 ) {
1593         SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1594         CAlign_CI it(scope, *loc, sel2);
1595         if ( it ) {
1596             cout << "Align count: "<<it.GetSize()<<endl;
1597             if ( it.GetAnnot().IsNamed() ) {
1598                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1599             }
1600         }
1601         BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1602 
1603         for ( ; it; ++it ) {
1604             const CSeq_align& align = *it;
1605             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1606                 sx_CheckSeq(scope, idh, **j);
1607             }
1608         }
1609     }
1610 
1611     if ( 1 ) {
1612         sel.ResetAnnotsNames();
1613         sel.AddNamedAnnots(pileup_name);
1614         CGraph_CI git(scope, *loc, sel);
1615         BOOST_CHECK_EQUAL(git.GetSize(), 6u);
1616         for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1617             const CSeq_graph& graph = git->GetOriginalGraph();
1618             string title = graph.GetTitle();
1619             NcbiCout << "Pileup graph: " << title << NcbiEndl;
1620             typedef unsigned TExpectedPair[2];
1621             const TExpectedPair* expected_pairs = 0;
1622             size_t expected_count = 0;
1623             if ( title == "Number of inserts" ) {
1624                 static const TExpectedPair expected_I[] = {
1625                     { 0, 0 },
1626                 };
1627                 expected_pairs = expected_I;
1628                 expected_count = ArraySize(expected_I);
1629             }
1630             else if ( title == "Number of A bases" ) {
1631                 static const TExpectedPair expected_A[] = {
1632                     { 0, 0 },
1633                 };
1634                 expected_pairs = expected_A;
1635                 expected_count = ArraySize(expected_A);
1636             }
1637             else if ( title == "Number of C bases" ) {
1638                 static const TExpectedPair expected_C[] = {
1639                     { 0, 0 },
1640                 };
1641                 expected_pairs = expected_C;
1642                 expected_count = ArraySize(expected_C);
1643             }
1644             else if ( title == "Number of G bases" ) {
1645                 static const TExpectedPair expected_G[] = {
1646                     { 0, 0 },
1647                 };
1648                 expected_pairs = expected_G;
1649                 expected_count = ArraySize(expected_G);
1650             }
1651             else if ( title == "Number of T bases" ) {
1652                 static const TExpectedPair expected_T[] = {
1653                     { 59805974, 1 },
1654                 };
1655                 expected_pairs = expected_T;
1656                 expected_count = ArraySize(expected_T);
1657             }
1658             else {
1659                 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1660                 static const TExpectedPair expected_M[] = {
1661                     { 59805960, 4 },
1662                     { 59805961, 4 },
1663                     { 59805962, 4 },
1664                     { 59805963, 4 },
1665                     { 59805964, 4 },
1666                     { 59805965, 3 },
1667                     { 59805966, 3 },
1668                     { 59805967, 3 },
1669                     { 59805968, 3 },
1670                     { 59805969, 3 },
1671                     { 59805970, 3 },
1672                     { 59805971, 3 },
1673                     { 59805972, 3 },
1674                     { 59805973, 3 },
1675                     { 59805974, 3 },
1676                     { 59805975, 5 },
1677                     { 59805976, 5 },
1678                     { 59805977, 5 },
1679                     { 59805978, 5 },
1680                     { 59805979, 5 },
1681                     { 59805980, 5 },
1682                 };
1683                 expected_pairs = expected_M;
1684                 expected_count = ArraySize(expected_M);
1685             }
1686             map<TSeqPos, unsigned> expected;
1687             for ( size_t i = 0; i < expected_count; ++i ) {
1688                 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1689             }
1690             CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1691             CRange<TSeqPos> range =
1692                 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1693             BOOST_CHECK(!range.Empty());
1694             for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1695                 TSeqPos i = pos - graph_range.GetFrom();
1696                 unsigned pileup_value =
1697                     graph.GetGraph().IsByte()?
1698                     graph.GetGraph().GetByte().GetValues()[i]:
1699                     graph.GetGraph().GetInt().GetValues()[i];
1700                 unsigned expected_value =
1701                     expected.count(pos)? expected[pos]: 0;
1702                 if ( false && pileup_value ) {
1703                     NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1704                 }
1705                 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1706             }
1707         }
1708     }
1709 }
1710 
1711 
BOOST_AUTO_TEST_CASE(FetchSeq12)1712 BOOST_AUTO_TEST_CASE(FetchSeq12)
1713 {
1714     // pileup graph test on chunks and page border
1715     CRef<CObjectManager> om = sx_GetOM();
1716 
1717     CCSRADataLoader::SLoaderParams params;
1718     string csra_name, id;
1719     TSeqPos from, to, align_count, align_count_over;
1720 
1721     {
1722         csra_name = "SRR1686476";
1723         id = "NC_011294.1";
1724         from = 805960;
1725         to   = 805980;
1726         align_count = 81;
1727         align_count_over = 81;
1728     }
1729     params.m_CSRAFiles.push_back(csra_name);
1730     CGBDataLoader::RegisterInObjectManager(*om);
1731     string loader_name =
1732         CCSRADataLoader::RegisterInObjectManager(*om, params,
1733                                                  CObjectManager::eDefault)
1734         .GetLoader()->GetName();
1735     sx_ReportCSraLoaderName(loader_name);
1736     CScope scope(*om);
1737     scope.AddDefaults();
1738 
1739     string annot_name = csra_name;
1740     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1741 
1742     CRef<CSeq_id> seqid(new CSeq_id(id));
1743     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1744     CRef<CSeq_loc> loc(new CSeq_loc);
1745     loc->SetInt().SetId(*seqid);
1746     loc->SetInt().SetFrom(from);
1747     loc->SetInt().SetTo(to);
1748     sx_CheckNames(scope, *loc, annot_name);
1749     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1750     sel.SetSearchUnresolved();
1751     sel.ExcludeNamedAnnots(pileup_name);
1752 
1753     BOOST_CHECK(scope.GetBioseqHandle(idh));
1754     if ( 1 ) {
1755         CGraph_CI git(scope, *loc, sel);
1756         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1757     }
1758 
1759     if ( 1 ) {
1760         CAlign_CI it(scope, *loc, sel);
1761         if ( it ) {
1762             cout << "Align count: "<<it.GetSize()<<endl;
1763             if ( it.GetAnnot().IsNamed() ) {
1764                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1765             }
1766         }
1767         BOOST_CHECK_EQUAL(align_count, it.GetSize());
1768 
1769         for ( ; it; ++it ) {
1770             const CSeq_align& align = *it;
1771             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1772                 sx_CheckSeq(scope, idh, **j);
1773             }
1774         }
1775     }
1776 
1777     if ( 1 ) {
1778         SAnnotSelector sel2 = sel; sel2.SetOverlapTotalRange();
1779         CAlign_CI it(scope, *loc, sel2);
1780         if ( it ) {
1781             cout << "Align count: "<<it.GetSize()<<endl;
1782             if ( it.GetAnnot().IsNamed() ) {
1783                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1784             }
1785         }
1786         BOOST_CHECK_EQUAL(align_count_over, it.GetSize());
1787 
1788         for ( ; it; ++it ) {
1789             const CSeq_align& align = *it;
1790             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1791                 sx_CheckSeq(scope, idh, **j);
1792             }
1793         }
1794     }
1795 
1796     if ( 1 ) {
1797         sel.ResetAnnotsNames();
1798         sel.AddNamedAnnots(pileup_name);
1799         CGraph_CI git(scope, *loc, sel);
1800         BOOST_CHECK_EQUAL(git.GetSize(), 6u);
1801         for ( size_t k = 0; git && k < 12; ++k, ++git ) {
1802             const CSeq_graph& graph = git->GetOriginalGraph();
1803             string title = graph.GetTitle();
1804             NcbiCout << "Pileup graph: " << title << NcbiEndl;
1805             typedef unsigned TExpectedPair[2];
1806             const TExpectedPair* expected_pairs = 0;
1807             size_t expected_count = 0;
1808             if ( title == "Number of inserts" ) {
1809                 static const TExpectedPair expected_I[] = {
1810                     { 0, 0 },
1811                 };
1812                 expected_pairs = expected_I;
1813                 expected_count = ArraySize(expected_I);
1814             }
1815             else if ( title == "Number of A bases" ) {
1816                 static const TExpectedPair expected_A[] = {
1817                     { 805967, 1 },
1818                     { 805977, 1 },
1819                 };
1820                 expected_pairs = expected_A;
1821                 expected_count = ArraySize(expected_A);
1822             }
1823             else if ( title == "Number of C bases" ) {
1824                 static const TExpectedPair expected_C[] = {
1825                     { 805960, 1 },
1826                 };
1827                 expected_pairs = expected_C;
1828                 expected_count = ArraySize(expected_C);
1829             }
1830             else if ( title == "Number of G bases" ) {
1831                 static const TExpectedPair expected_G[] = {
1832                     { 805968, 1 },
1833                 };
1834                 expected_pairs = expected_G;
1835                 expected_count = ArraySize(expected_G);
1836             }
1837             else if ( title == "Number of T bases" ) {
1838                 static const TExpectedPair expected_T[] = {
1839                     { 805963, 1 },
1840                     { 805965, 1 },
1841                     { 805966, 1 },
1842                 };
1843                 expected_pairs = expected_T;
1844                 expected_count = ArraySize(expected_T);
1845             }
1846             else {
1847                 BOOST_REQUIRE_EQUAL(title, "Number of matches");
1848                 static const TExpectedPair expected_M[] = {
1849                     { 805960, 69 },
1850                     { 805961, 70 },
1851                     { 805962, 72 },
1852                     { 805963, 72 },
1853                     { 805964, 74 },
1854                     { 805965, 70 },
1855                     { 805966, 72 },
1856                     { 805967, 72 },
1857                     { 805968, 70 },
1858                     { 805969, 71 },
1859                     { 805970, 70 },
1860                     { 805971, 70 },
1861                     { 805972, 71 },
1862                     { 805973, 69 },
1863                     { 805974, 70 },
1864                     { 805975, 70 },
1865                     { 805976, 70 },
1866                     { 805977, 70 },
1867                     { 805978, 69 },
1868                     { 805979, 69 },
1869                     { 805980, 70 },
1870                 };
1871                 expected_pairs = expected_M;
1872                 expected_count = ArraySize(expected_M);
1873             }
1874             map<TSeqPos, unsigned> expected;
1875             for ( size_t i = 0; i < expected_count; ++i ) {
1876                 expected[expected_pairs[i][0]] = expected_pairs[i][1];
1877             }
1878             CRange<TSeqPos> graph_range = graph.GetLoc().GetTotalRange();
1879             CRange<TSeqPos> range =
1880                 graph_range.IntersectionWith(CRange<TSeqPos>(from, to));
1881             BOOST_CHECK(!range.Empty());
1882             for ( TSeqPos pos = range.GetFrom(); pos <= range.GetTo(); ++pos ) {
1883                 TSeqPos i = pos - graph_range.GetFrom();
1884                 unsigned pileup_value =
1885                     graph.GetGraph().IsByte()?
1886                     graph.GetGraph().GetByte().GetValues()[i]:
1887                     graph.GetGraph().GetInt().GetValues()[i];
1888                 unsigned expected_value =
1889                     expected.count(pos)? expected[pos]: 0;
1890                 if ( false && pileup_value ) {
1891                     NcbiCout << pos << ": " << pileup_value << NcbiEndl;
1892                 }
1893                 BOOST_CHECK_EQUAL(pileup_value, expected_value);
1894             }
1895         }
1896     }
1897 }
1898 
1899 #if 0
1900 // huge cSRA file loading
1901 BOOST_AUTO_TEST_CASE(FetchSeq8)
1902 {
1903     CCSRADataLoader::SetPileupGraphsParamDefault(true);
1904 
1905     CStopWatch sw(CStopWatch::eStart);
1906     CRef<CObjectManager> om = sx_GetOM();
1907     NcbiCout << "Loaded OM in " << sw.Restart() << NcbiEndl;
1908 
1909     CCSRADataLoader::SLoaderParams params;
1910     string csra_name, id;
1911     TSeqPos from, to, align_count;
1912 
1913     {
1914         csra_name = "/panfs/traces01/compress/qa/yaschenk/RICHA/NA12878.richa.hiseq.primary_full.csra";
1915         id = "NC_000005.9";
1916         from = 20000;
1917         to   = 25000;
1918         align_count = 2636; // 5400 if mapq>=1
1919     }
1920     params.m_CSRAFiles.push_back(csra_name);
1921     CGBDataLoader::RegisterInObjectManager(*om);
1922     string loader_name =
1923         CCSRADataLoader::RegisterInObjectManager(*om, params,
1924                                                  CObjectManager::eDefault)
1925         .GetLoader()->GetName();
1926     NcbiCout << "Registered cSRA loader in " << sw.Restart() << NcbiEndl;
1927     sx_ReportCSraLoaderName(loader_name);
1928     CScope scope(*om);
1929     scope.AddDefaults();
1930 
1931     string annot_name = csra_name;
1932     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
1933 
1934     CRef<CSeq_id> seqid(new CSeq_id(id));
1935     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
1936     CRef<CSeq_loc> loc(new CSeq_loc);
1937     loc->SetInt().SetId(*seqid);
1938     loc->SetInt().SetFrom(from);
1939     loc->SetInt().SetTo(to);
1940 
1941     if ( 1 ) {
1942         BOOST_CHECK(scope.GetBioseqHandle(idh));
1943         //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
1944         NcbiCout << "Got reference handle in " << sw.Restart() << NcbiEndl;
1945     }
1946 
1947     if ( 1 ) {
1948         sx_CheckNames(scope, *loc, annot_name);
1949         NcbiCout << "Got names in " << sw.Restart() << NcbiEndl;
1950     }
1951 
1952     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
1953     sel.SetSearchUnresolved();
1954 
1955     if ( 1 ) {
1956         sel.ResetAnnotsNames();
1957         sel.AddNamedAnnots(annot_name);
1958         CGraph_CI git(scope, *loc, sel);
1959         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
1960         NcbiCout << "Got coverage graph in " << sw.Restart() << NcbiEndl;
1961         if ( 0 ) {
1962             for ( ; git; ++git ) {
1963                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
1964             }
1965         }
1966     }
1967 
1968     if ( 1 ) {
1969         sel.ResetAnnotsNames();
1970         sel.AddNamedAnnots(pileup_name);
1971         CGraph_CI git(scope, *loc, sel);
1972         BOOST_CHECK(git.GetSize() % 6 == 0);
1973         BOOST_CHECK(git.GetSize());
1974         NcbiCout << "Got pileup graphs in " << sw.Restart() << NcbiEndl;
1975         if ( 0 ) {
1976             for ( ; git; ++git ) {
1977                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
1978             }
1979         }
1980     }
1981 
1982     if ( 1 ) {
1983         sel.ResetAnnotsNames();
1984         sel.AddNamedAnnots(annot_name);
1985         CAlign_CI it(scope, *loc, sel);
1986         if ( it ) {
1987             cout << "Align count: "<<it.GetSize()<<endl;
1988             if ( it.GetAnnot().IsNamed() ) {
1989                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
1990             }
1991         }
1992         BOOST_CHECK_EQUAL(align_count, it.GetSize());
1993         NcbiCout << "Got alignments in " << sw.Restart() << NcbiEndl;
1994 
1995         for ( ; it; ++it ) {
1996             const CSeq_align& align = *it;
1997             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
1998                 sx_CheckSeq(scope, idh, **j);
1999             }
2000         }
2001         NcbiCout << "Got short reads in " << sw.Restart() << NcbiEndl;
2002     }
2003 }
2004 #endif
2005 #if 0 // the SRR494718 is not released yet
2006 BOOST_AUTO_TEST_CASE(ShortSeq1x)
2007 {
2008     CRef<CObjectManager> om = sx_GetOM();
2009 
2010     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2011 
2012     string loader_name =
2013         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2014         .GetLoader()->GetName();
2015     sx_ReportCSraLoaderName(loader_name);
2016 
2017     CScope scope(*om);
2018     scope.AddDefaults();
2019 
2020     CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR494718/chr1"));
2021     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR494718.7001.1"));
2022     TSeqPos prim_pos = 4263026;
2023     TSeqPos sec_pos = 3996256;
2024 
2025     typedef CRange<TSeqPos> TRange;
2026     CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2027     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2028 
2029     CBioseq_Handle ref;
2030     if ( 1 ) {
2031         ref = scope.GetBioseqHandle(ref_idh);
2032         BOOST_REQUIRE(ref);
2033         if ( 1 ) {
2034             // scan first 100 alignments
2035             CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2036             BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2037         }
2038         if ( 0 ) {
2039             // scan alignments around primary position
2040             int found = 0;
2041             for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2042                 const CSeq_align& align = *ait;
2043                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2044                     if ( read_id->Equals(*it) ) {
2045                         ++found;
2046                     }
2047                 }
2048             }
2049             BOOST_CHECK_EQUAL(found, 1);
2050         }
2051         if ( 1 ) {
2052             // scan alignments around secondary position
2053             int found = 0;
2054             for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2055                 const CSeq_align& align = *ait;
2056                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2057                     if ( read_id->Equals(*it) ) {
2058                         ++found;
2059                     }
2060                 }
2061             }
2062             BOOST_CHECK_EQUAL(found, 1);
2063         }
2064     }
2065 
2066     BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2067     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2068     BOOST_REQUIRE(read);
2069     if ( 1 ) {
2070         // quality graph
2071         CGraph_CI git(read);
2072         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2073     }
2074     if ( 1 ) {
2075         // alignment (primary and secondary)
2076         CAlign_CI ait(read);
2077         for ( ; ait; ++ait ) {
2078             NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2079         }
2080         BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2081     }
2082 }
2083 #endif
2084 
BOOST_AUTO_TEST_CASE(ShortSeq1)2085 BOOST_AUTO_TEST_CASE(ShortSeq1)
2086 {
2087     CRef<CObjectManager> om = sx_GetOM();
2088 
2089     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2090 
2091     bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2092     CCSRADataLoader::SetSpotReadAlignParamDefault(false);
2093 
2094     string loader_name =
2095         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2096         .GetLoader()->GetName();
2097     sx_ReportCSraLoaderName(loader_name);
2098 
2099     CScope scope(*om);
2100     scope.AddDefaults();
2101 
2102     CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2103     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2104     TSeqPos prim_pos = 965627;
2105     TSeqPos sec_pos = 966293;
2106 
2107     typedef CRange<TSeqPos> TRange;
2108     CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2109     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2110 
2111     CBioseq_Handle ref;
2112     if ( 1 ) {
2113         ref = scope.GetBioseqHandle(ref_idh);
2114         BOOST_REQUIRE(ref);
2115         if ( 1 ) {
2116             // scan first 100 alignments
2117             CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2118             BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2119         }
2120         if ( 0 ) {
2121             // scan alignments around primary position
2122             int found = 0;
2123             for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2124                 const CSeq_align& align = *ait;
2125                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2126                     if ( read_id->Equals(*it) ) {
2127                         ++found;
2128                     }
2129                 }
2130             }
2131             BOOST_CHECK_EQUAL(found, 1);
2132         }
2133         if ( 1 ) {
2134             // scan alignments around secondary position
2135             int found = 0;
2136             for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2137                 const CSeq_align& align = *ait;
2138                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2139                     if ( read_id->Equals(*it) ) {
2140                         ++found;
2141                     }
2142                 }
2143             }
2144             BOOST_CHECK_EQUAL(found, 1);
2145         }
2146     }
2147 
2148     BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2149     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2150     BOOST_REQUIRE(read);
2151     if ( 1 ) {
2152         // quality graph
2153         CGraph_CI git(read);
2154         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2155     }
2156     if ( 1 ) {
2157         // alignment (primary and secondary)
2158         CAlign_CI ait(read);
2159         for ( ; ait; ++ait ) {
2160             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2161         }
2162         BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2163     }
2164     if ( 1 ) {
2165         scope.ResetHistory();
2166         dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(true);
2167         // only primary alignment is available with 'spot_read_align' parameter set
2168         CAlign_CI ait(read);
2169         for ( ; ait; ++ait ) {
2170             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2171         }
2172         BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2173     }
2174 
2175     CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2176 }
2177 
2178 
BOOST_AUTO_TEST_CASE(ShortSeq1_set)2179 BOOST_AUTO_TEST_CASE(ShortSeq1_set)
2180 {
2181     CRef<CObjectManager> om = sx_GetOM();
2182 
2183     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2184 
2185     bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2186     CCSRADataLoader::SetSpotReadAlignParamDefault(true);
2187 
2188     string loader_name =
2189         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2190         .GetLoader()->GetName();
2191     sx_ReportCSraLoaderName(loader_name);
2192     dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(false);
2193 
2194     CScope scope(*om);
2195     scope.AddDefaults();
2196 
2197     CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2198     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2199     TSeqPos prim_pos = 965627;
2200     TSeqPos sec_pos = 966293;
2201 
2202     typedef CRange<TSeqPos> TRange;
2203     CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2204     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2205 
2206     CBioseq_Handle ref;
2207     if ( 1 ) {
2208         ref = scope.GetBioseqHandle(ref_idh);
2209         BOOST_REQUIRE(ref);
2210         if ( 1 ) {
2211             // scan first 100 alignments
2212             CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2213             BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2214         }
2215         if ( 0 ) {
2216             // scan alignments around primary position
2217             int found = 0;
2218             for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2219                 const CSeq_align& align = *ait;
2220                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2221                     if ( read_id->Equals(*it) ) {
2222                         ++found;
2223                     }
2224                 }
2225             }
2226             BOOST_CHECK_EQUAL(found, 1);
2227         }
2228         if ( 1 ) {
2229             // scan alignments around secondary position
2230             int found = 0;
2231             for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2232                 const CSeq_align& align = *ait;
2233                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2234                     if ( read_id->Equals(*it) ) {
2235                         ++found;
2236                     }
2237                 }
2238             }
2239             BOOST_CHECK_EQUAL(found, 1);
2240         }
2241     }
2242 
2243     BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2244     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2245     BOOST_REQUIRE(read);
2246     if ( 1 ) {
2247         // quality graph
2248         CGraph_CI git(read);
2249         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2250     }
2251     if ( 1 ) {
2252         // alignment (primary and secondary)
2253         CAlign_CI ait(read);
2254         for ( ; ait; ++ait ) {
2255             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2256         }
2257         BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2258     }
2259     if ( 1 ) {
2260         scope.ResetHistory();
2261         dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(true);
2262         // only primary alignment is available with 'spot_read_align' parameter set
2263         CAlign_CI ait(read);
2264         for ( ; ait; ++ait ) {
2265             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2266         }
2267         BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2268     }
2269 
2270     CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2271 }
2272 
2273 
BOOST_AUTO_TEST_CASE(ShortSeq1spot)2274 BOOST_AUTO_TEST_CASE(ShortSeq1spot)
2275 {
2276     CRef<CObjectManager> om = sx_GetOM();
2277 
2278     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2279 
2280     bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2281     CCSRADataLoader::SetSpotReadAlignParamDefault(true);
2282 
2283     string loader_name =
2284         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2285         .GetLoader()->GetName();
2286     sx_ReportCSraLoaderName(loader_name);
2287 
2288     CScope scope(*om);
2289     scope.AddDefaults();
2290 
2291     CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2292     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2293     TSeqPos prim_pos = 965627;
2294     TSeqPos sec_pos = 966293;
2295 
2296     typedef CRange<TSeqPos> TRange;
2297     CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2298     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2299 
2300     CBioseq_Handle ref;
2301     if ( 1 ) {
2302         ref = scope.GetBioseqHandle(ref_idh);
2303         BOOST_REQUIRE(ref);
2304         if ( 1 ) {
2305             // scan first 100 alignments
2306             CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2307             BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2308         }
2309         if ( 0 ) {
2310             // scan alignments around primary position
2311             int found = 0;
2312             for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2313                 const CSeq_align& align = *ait;
2314                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2315                     if ( read_id->Equals(*it) ) {
2316                         ++found;
2317                     }
2318                 }
2319             }
2320             BOOST_CHECK_EQUAL(found, 1);
2321         }
2322         if ( 1 ) {
2323             // scan alignments around secondary position
2324             int found = 0;
2325             for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2326                 const CSeq_align& align = *ait;
2327                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2328                     if ( read_id->Equals(*it) ) {
2329                         ++found;
2330                     }
2331                 }
2332             }
2333             BOOST_CHECK_EQUAL(found, 1);
2334         }
2335     }
2336 
2337     BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2338     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2339     BOOST_REQUIRE(read);
2340     if ( 1 ) {
2341         // quality graph
2342         CGraph_CI git(read);
2343         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2344     }
2345     if ( 1 ) {
2346         // only primary alignment is available with 'spot_read_align' parameter set
2347         CAlign_CI ait(read);
2348         for ( ; ait; ++ait ) {
2349             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2350         }
2351         BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2352     }
2353     if ( 1 ) {
2354         scope.ResetHistory();
2355         dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(false);
2356         // both primary and secondary alignments should be visible
2357         // after 'spot_read_align' parameter is reset
2358         CAlign_CI ait(read);
2359         for ( ; ait; ++ait ) {
2360             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2361         }
2362         BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2363     }
2364 
2365     CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2366 }
2367 
2368 
BOOST_AUTO_TEST_CASE(ShortSeq1spot_set)2369 BOOST_AUTO_TEST_CASE(ShortSeq1spot_set)
2370 {
2371     CRef<CObjectManager> om = sx_GetOM();
2372 
2373     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2374 
2375     bool save_param = CCSRADataLoader::GetSpotReadAlignParamDefault();
2376     CCSRADataLoader::SetSpotReadAlignParamDefault(false);
2377 
2378     string loader_name =
2379         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2380         .GetLoader()->GetName();
2381     sx_ReportCSraLoaderName(loader_name);
2382     dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(true);
2383 
2384     CScope scope(*om);
2385     scope.AddDefaults();
2386 
2387     CRef<CSeq_id> ref_id(new CSeq_id("gnl|SRA|SRR505887/chr1"));
2388     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR505887.144261.2"));
2389     TSeqPos prim_pos = 965627;
2390     TSeqPos sec_pos = 966293;
2391 
2392     typedef CRange<TSeqPos> TRange;
2393     CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(*ref_id);
2394     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2395 
2396     CBioseq_Handle ref;
2397     if ( 1 ) {
2398         ref = scope.GetBioseqHandle(ref_idh);
2399         BOOST_REQUIRE(ref);
2400         if ( 1 ) {
2401             // scan first 100 alignments
2402             CAlign_CI ait(ref, SAnnotSelector().SetMaxSize(100));
2403             BOOST_CHECK_EQUAL(ait.GetSize(), 100u);
2404         }
2405         if ( 0 ) {
2406             // scan alignments around primary position
2407             int found = 0;
2408             for ( CAlign_CI ait(ref, TRange(prim_pos, prim_pos)); ait; ++ait ) {
2409                 const CSeq_align& align = *ait;
2410                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2411                     if ( read_id->Equals(*it) ) {
2412                         ++found;
2413                     }
2414                 }
2415             }
2416             BOOST_CHECK_EQUAL(found, 1);
2417         }
2418         if ( 1 ) {
2419             // scan alignments around secondary position
2420             int found = 0;
2421             for ( CAlign_CI ait(ref, TRange(sec_pos, sec_pos)); ait; ++ait ) {
2422                 const CSeq_align& align = *ait;
2423                 for ( CTypeConstIterator<CSeq_id> it(Begin(align)); it; ++it ) {
2424                     if ( read_id->Equals(*it) ) {
2425                         ++found;
2426                     }
2427                 }
2428             }
2429             BOOST_CHECK_EQUAL(found, 1);
2430         }
2431     }
2432 
2433     BOOST_CHECK(scope.GetIds(ref_idh).size() > 0);
2434     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2435     BOOST_REQUIRE(read);
2436     if ( 1 ) {
2437         // quality graph
2438         CGraph_CI git(read);
2439         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2440     }
2441     if ( 1 ) {
2442         // only primary alignment is available with 'spot_read_align' parameter set
2443         CAlign_CI ait(read);
2444         for ( ; ait; ++ait ) {
2445             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2446         }
2447         BOOST_CHECK_EQUAL(ait.GetSize(), 1u);
2448     }
2449     if ( 1 ) {
2450         scope.ResetHistory();
2451         dynamic_cast<CCSRADataLoader*>(om->FindDataLoader(loader_name))->SetSpotReadAlign(false);
2452         // both primary and secondary alignments should be visible
2453         // after 'spot_read_align' parameter is reset
2454         CAlign_CI ait(read);
2455         for ( ; ait; ++ait ) {
2456             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2457         }
2458         BOOST_CHECK_EQUAL(ait.GetSize(), 2u);
2459     }
2460 
2461     CCSRADataLoader::SetSpotReadAlignParamDefault(save_param);
2462 }
2463 
2464 
BOOST_AUTO_TEST_CASE(ShortSeq2)2465 BOOST_AUTO_TEST_CASE(ShortSeq2)
2466 {
2467     CRef<CObjectManager> om = sx_GetOM();
2468 
2469     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2470 
2471     string loader_name =
2472         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2473         .GetLoader()->GetName();
2474     sx_ReportCSraLoaderName(loader_name);
2475 
2476     CScope scope(*om);
2477     scope.AddDefaults();
2478 
2479     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR035417.1.1"));
2480 
2481     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2482 
2483     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2484     BOOST_REQUIRE(read);
2485     if ( 1 ) {
2486         // quality graph
2487         CGraph_CI git(read);
2488         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2489     }
2490     if ( 1 ) {
2491         // alignment (primary and secondary)
2492         CAlign_CI ait(read);
2493         for ( ; ait; ++ait ) {
2494             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2495         }
2496         BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2497     }
2498 }
2499 
2500 
BOOST_AUTO_TEST_CASE(ShortSeq3)2501 BOOST_AUTO_TEST_CASE(ShortSeq3)
2502 {
2503     CRef<CObjectManager> om = sx_GetOM();
2504 
2505     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2506 
2507     string loader_name =
2508         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2509         .GetLoader()->GetName();
2510     sx_ReportCSraLoaderName(loader_name);
2511 
2512     CScope scope(*om);
2513     scope.AddDefaults();
2514 
2515     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|ERR003990.1.1"));
2516 
2517     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2518 
2519     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2520     BOOST_REQUIRE(read);
2521     if ( 1 ) {
2522         // quality graph
2523         CGraph_CI git(read);
2524         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2525     }
2526     if ( 1 ) {
2527         // alignment (primary and secondary)
2528         CAlign_CI ait(read);
2529         for ( ; ait; ++ait ) {
2530             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2531         }
2532         BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2533     }
2534 }
2535 
2536 
BOOST_AUTO_TEST_CASE(ShortSeq4)2537 BOOST_AUTO_TEST_CASE(ShortSeq4)
2538 {
2539     CRef<CObjectManager> om = sx_GetOM();
2540 
2541     //putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2542 
2543     string loader_name =
2544         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2545         .GetLoader()->GetName();
2546     sx_ReportCSraLoaderName(loader_name);
2547 
2548     CScope scope(*om);
2549     scope.AddDefaults();
2550 
2551     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR749060.1.1"));
2552 
2553     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2554 
2555     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2556     BOOST_REQUIRE(read);
2557     if ( 1 ) {
2558         // quality graph
2559         CGraph_CI git(read);
2560         BOOST_CHECK_EQUAL(git.GetSize(), 0u);
2561     }
2562     if ( 1 ) {
2563         // alignment (primary and secondary)
2564         CAlign_CI ait(read);
2565         for ( ; ait; ++ait ) {
2566             //NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2567         }
2568         BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2569     }
2570 }
2571 
2572 /*
2573 BOOST_AUTO_TEST_CASE(ShortSeq5)
2574 {
2575     CRef<CObjectManager> om = sx_GetOM();
2576 
2577     bool verbose = 1;
2578     bool graph = 1;
2579 
2580     if ( graph ) {
2581         putenv("CSRA_LOADER_QUALITY_GRAPHS=true");
2582     }
2583 
2584     string loader_name =
2585         CCSRADataLoader::RegisterInObjectManager(*om, CObjectManager::eDefault)
2586         .GetLoader()->GetName();
2587     sx_ReportCSraLoaderName(loader_name);
2588 
2589     CScope scope(*om);
2590     scope.AddDefaults();
2591 
2592     CRef<CSeq_id> read_id(new CSeq_id("gnl|SRA|SRR500739.422766.2"));
2593 
2594     CSeq_id_Handle read_idh = CSeq_id_Handle::GetHandle(*read_id);
2595 
2596     CBioseq_Handle read = scope.GetBioseqHandle(read_idh);
2597     BOOST_REQUIRE(read);
2598     if ( 1 ) {
2599         // quality graph
2600         CGraph_CI git(read);
2601         BOOST_REQUIRE_EQUAL(git.GetSize(), graph? 1u: 0u);
2602         if ( graph && verbose ) {
2603             NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
2604         }
2605     }
2606     if ( 1 ) {
2607         // alignment (primary and secondary)
2608         CAlign_CI ait(read);
2609         for ( ; ait; ++ait ) {
2610             if ( verbose ) {
2611                 NcbiCout << MSerial_AsnText << *ait << NcbiEndl;
2612             }
2613         }
2614         BOOST_CHECK_EQUAL(ait.GetSize(), 0u);
2615     }
2616     if ( verbose ) {
2617         NcbiCout << MSerial_AsnText << *read.GetCompleteObject();
2618     }
2619 }
2620 */
2621 
BOOST_AUTO_TEST_CASE(MultipleIds1)2622 BOOST_AUTO_TEST_CASE(MultipleIds1)
2623 {
2624     CRef<CObjectManager> om = sx_GetOM();
2625 
2626     CCSRADataLoader::SLoaderParams params;
2627     string csra_name, id;
2628     TSeqPos from, to, align_count;
2629 
2630     {
2631         csra_name = "SRR413273";
2632         id = "121114303";
2633         from = 100;
2634         to   = 2000;
2635         align_count = 1525; // 5400 if mapq>=1
2636     }
2637     params.m_DirPath.erase();
2638     params.m_CSRAFiles.push_back(csra_name);
2639     string loader_name =
2640         CCSRADataLoader::RegisterInObjectManager(*om, params,
2641                                                  CObjectManager::eDefault)
2642         .GetLoader()->GetName();
2643     sx_ReportCSraLoaderName(loader_name);
2644     CScope scope(*om);
2645     scope.AddDefaults();
2646 
2647     string annot_name = csra_name;
2648     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
2649 
2650     CRef<CSeq_id> seqid(new CSeq_id(id));
2651     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
2652     CRef<CSeq_loc> loc(new CSeq_loc);
2653     loc->SetInt().SetId(*seqid);
2654     loc->SetInt().SetFrom(from);
2655     loc->SetInt().SetTo(to);
2656     sx_CheckNames(scope, *loc, annot_name);
2657     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
2658     sel.SetSearchUnresolved();
2659     sel.ExcludeNamedAnnots(pileup_name);
2660 
2661     //BOOST_REQUIRE(scope.GetBioseqHandle(idh));
2662     //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
2663     if ( 1 ) {
2664         CGraph_CI git(scope, *loc, sel);
2665         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
2666     }
2667 
2668     if ( 1 ) {
2669         CAlign_CI it(scope, *loc, sel);
2670         if ( it ) {
2671             cout << "Align count: "<<it.GetSize()<<endl;
2672             if ( it.GetAnnot().IsNamed() ) {
2673                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
2674             }
2675         }
2676         BOOST_CHECK_EQUAL(align_count, it.GetSize());
2677 
2678         for ( ; it; ++it ) {
2679             const CSeq_align& align = *it;
2680             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
2681                 sx_CheckSeq(scope, idh, **j);
2682             }
2683         }
2684     }
2685 
2686     if ( 1 ) {
2687         sel.ResetAnnotsNames();
2688         sel.AddNamedAnnots(pileup_name);
2689         CGraph_CI git(scope, *loc, sel);
2690         BOOST_CHECK(git.GetSize() % 6 == 0);
2691         if ( 0 ) {
2692             for ( ; git; ++git ) {
2693                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
2694             }
2695         }
2696     }
2697 }
2698 
2699 
BOOST_AUTO_TEST_CASE(MultipleIds2)2700 BOOST_AUTO_TEST_CASE(MultipleIds2)
2701 {
2702     CRef<CObjectManager> om = sx_GetOM();
2703 
2704     CCSRADataLoader::SLoaderParams params;
2705     string csra_name, id;
2706     TSeqPos from, to, align_count;
2707 
2708     {
2709         csra_name = "SRR413273";
2710         id = "NM_004119.2";
2711         from = 100;
2712         to   = 2000;
2713         align_count = 1525; // 5400 if mapq>=1
2714     }
2715     params.m_DirPath.erase();
2716     params.m_CSRAFiles.push_back(csra_name);
2717     string loader_name =
2718         CCSRADataLoader::RegisterInObjectManager(*om, params,
2719                                                  CObjectManager::eDefault)
2720         .GetLoader()->GetName();
2721     sx_ReportCSraLoaderName(loader_name);
2722     CGBDataLoader::RegisterInObjectManager(*om);
2723     CScope scope(*om);
2724     scope.AddDefaults();
2725 
2726     string annot_name = csra_name;
2727     string pileup_name = annot_name+PILEUP_NAME_SUFFIX;
2728 
2729     CRef<CSeq_id> seqid(new CSeq_id(id));
2730     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*seqid);
2731     CRef<CSeq_loc> loc(new CSeq_loc);
2732     loc->SetInt().SetId(*seqid);
2733     loc->SetInt().SetFrom(from);
2734     loc->SetInt().SetTo(to);
2735     sx_CheckNames(scope, *loc, annot_name);
2736     SAnnotSelector sel(CSeq_annot::C_Data::e_Align);
2737     sel.SetSearchUnresolved();
2738     sel.ExcludeNamedAnnots(pileup_name);
2739     sel.ExcludeNamedAnnots("SNP");
2740 
2741     //BOOST_REQUIRE(scope.GetBioseqHandle(idh));
2742     //NcbiCout<<MSerial_AsnText<<*scope.GetBioseqHandle(idh).GetCompleteObject();
2743     if ( 1 ) {
2744         CGraph_CI git(scope, *loc, sel);
2745         BOOST_CHECK_EQUAL(git.GetSize(), 1u);
2746     }
2747 
2748     if ( 1 ) {
2749         CAlign_CI it(scope, *loc, sel);
2750         if ( it ) {
2751             cout << "Align count: "<<it.GetSize()<<endl;
2752             if ( it.GetAnnot().IsNamed() ) {
2753                 cout << "Annot name: " << it.GetAnnot().GetName()<<endl;
2754             }
2755         }
2756         BOOST_CHECK_EQUAL(align_count, it.GetSize());
2757 
2758         for ( ; it; ++it ) {
2759             const CSeq_align& align = *it;
2760             ITERATE(CDense_seg::TIds, j, align.GetSegs().GetDenseg().GetIds()) {
2761                 sx_CheckSeq(scope, idh, **j);
2762             }
2763         }
2764     }
2765 
2766     if ( 1 ) {
2767         sel.ResetAnnotsNames();
2768         sel.AddNamedAnnots(pileup_name);
2769         CGraph_CI git(scope, *loc, sel);
2770         BOOST_CHECK(git.GetSize() % 6 == 0);
2771         if ( 0 ) {
2772             for ( ; git; ++git ) {
2773                 NcbiCout << MSerial_AsnText << git->GetOriginalGraph();
2774             }
2775         }
2776     }
2777 }
2778 
BOOST_AUTO_TEST_CASE(CheckPrivate1)2779 BOOST_AUTO_TEST_CASE(CheckPrivate1)
2780 {
2781     CRef<CObjectManager> om = sx_GetOM();
2782 
2783     CCSRADataLoader::SLoaderParams params;
2784 
2785     string path("http://gapsview11.be-md.ncbi.nlm.nih.gov/49DCAC8956E9BD404735BC18EA2E7986C1674A6F/SRR1219902.sra");
2786     params.m_CSRAFiles.push_back(path);
2787 
2788     string loader_name;
2789     bool isError = false;
2790     try {
2791         NcbiCout << "Trying to create a loader for: " << path << NcbiEndl;
2792         loader_name = CCSRADataLoader::RegisterInObjectManager(*om, params)
2793             .GetLoader()->GetName();
2794         NcbiCout << "Loader created successfully" << NcbiEndl;
2795         sx_ReportCSraLoaderName(loader_name);
2796     }
2797     catch (CSraException& e) {
2798         isError = true;
2799         ERR_POST("Exception: "<<e);
2800         BOOST_CHECK(e.GetErrCode() == e.eProtectedDb);
2801     }
2802     BOOST_CHECK(isError);
2803 }
2804 
2805 
NCBITEST_INIT_TREE()2806 NCBITEST_INIT_TREE()
2807 {
2808     NCBITEST_DISABLE(FetchSeq1);
2809     NCBITEST_DISABLE(FetchSeq2);
2810     NCBITEST_DISABLE(FetchSeq3);
2811     NCBITEST_DISABLE(FetchSeq4);
2812     NCBITEST_DISABLE(FetchSeq4l);
2813     NCBITEST_DISABLE(FetchSeq4sx);
2814     NCBITEST_DISABLE(FetchSeq4sd);
2815     NCBITEST_DISABLE(FetchSeq4s);
2816 }
2817