1 /* $Id: csraread.cpp 598358 2019-12-06 23:45:25Z vasilche $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Eugene Vasilchenko
27  *
28  * File Description:
29  *   Access to SRA files
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <sra/readers/sra/csraread.hpp>
35 #include <corelib/ncbistr.hpp>
36 #include <corelib/ncbifile.hpp>
37 #include <corelib/ncbi_param.hpp>
38 #include <objects/general/general__.hpp>
39 #include <objects/seq/seq__.hpp>
40 #include <objects/seqset/seqset__.hpp>
41 #include <objects/seqloc/seqloc__.hpp>
42 #include <objects/seqalign/seqalign__.hpp>
43 #include <objects/seqres/seqres__.hpp>
44 #include <objtools/readers/iidmapper.hpp>
45 #include <sra/error_codes.hpp>
46 
47 #include <sra/readers/sra/kdbread.hpp>
48 #include <klib/rc.h>
49 #include <insdc/sra.h>
50 
51 #include <sstream>
52 #include <algorithm>
53 
54 BEGIN_NCBI_NAMESPACE;
55 
56 #define NCBI_USE_ERRCODE_X   cSRAReader
57 NCBI_DEFINE_ERR_SUBCODE_X(1);
58 
59 BEGIN_NAMESPACE(objects);
60 
61 
62 NCBI_PARAM_DECL(bool, CSRA, EXPLICIT_MATE_INFO);
63 NCBI_PARAM_DEF_EX(bool, CSRA, EXPLICIT_MATE_INFO, false,
64                   eParam_NoThread, CSRA_EXPLICIT_MATE_INFO);
65 
66 
67 NCBI_PARAM_DECL(bool, CSRA, CIGAR_IN_ALIGN_EXT);
68 NCBI_PARAM_DEF_EX(bool, CSRA, CIGAR_IN_ALIGN_EXT, true,
69                   eParam_NoThread, CSRA_CIGAR_IN_ALIGN_EXT);
70 
71 
72 NCBI_PARAM_DECL(bool, CSRA, INCLUDE_TECHNICAL_READS);
73 NCBI_PARAM_DEF_EX(bool, CSRA, INCLUDE_TECHNICAL_READS, false,
74                   eParam_NoThread, CSRA_INCLUDE_TECHNICAL_READS);
75 
76 
77 NCBI_PARAM_DECL(bool, CSRA, CLIP_BY_QUALITY);
78 NCBI_PARAM_DEF_EX(bool, CSRA, CLIP_BY_QUALITY, true,
79                   eParam_NoThread, CSRA_CLIP_BY_QUALITY);
80 
81 
82 NCBI_PARAM_DECL(bool, CSRA, PATH_IN_ID);
83 NCBI_PARAM_DEF_EX(bool, CSRA, PATH_IN_ID, true,
84                   eParam_NoThread, CSRA_PATH_IN_ID);
85 
86 
87 NCBI_PARAM_DECL(bool, CSRA, READ_FILTER_IN_ALIGN_EXT);
88 NCBI_PARAM_DEF(bool, CSRA, READ_FILTER_IN_ALIGN_EXT, true);
89 
90 
s_GetExplicitMateInfoParam(void)91 static bool s_GetExplicitMateInfoParam(void)
92 {
93     static CSafeStatic<NCBI_PARAM_TYPE(CSRA, EXPLICIT_MATE_INFO)> s_Value;
94     return s_Value->Get();
95 }
96 
97 
s_GetCigarInAlignExt(void)98 static bool s_GetCigarInAlignExt(void)
99 {
100     static CSafeStatic<NCBI_PARAM_TYPE(CSRA, CIGAR_IN_ALIGN_EXT)> s_Value;
101     return s_Value->Get();
102 }
103 
104 
s_GetIncludeTechnicalReads(void)105 static bool s_GetIncludeTechnicalReads(void)
106 {
107     static CSafeStatic<NCBI_PARAM_TYPE(CSRA, INCLUDE_TECHNICAL_READS)> s_Value;
108     return s_Value->Get();
109 }
110 
111 
s_GetClipByQuality(void)112 static bool s_GetClipByQuality(void)
113 {
114     static CSafeStatic<NCBI_PARAM_TYPE(CSRA, CLIP_BY_QUALITY)> s_Value;
115     return s_Value->Get();
116 }
117 
118 
s_GetPathInId(void)119 static bool s_GetPathInId(void)
120 {
121     static CSafeStatic<NCBI_PARAM_TYPE(CSRA, PATH_IN_ID)> s_Value;
122     return s_Value->Get();
123 }
124 
125 
s_GetReadFilterInAlignExt(void)126 static bool s_GetReadFilterInAlignExt(void)
127 {
128     static CSafeStatic<NCBI_PARAM_TYPE(CSRA, READ_FILTER_IN_ALIGN_EXT)> s_Value;
129     return s_Value->Get();
130 }
131 
132 
133 #define RC_NO_MORE_ALIGNMENTS RC(rcApp, rcQuery, rcSearching, rcRow, rcNotFound)
134 
135 
136 // SRefTableCursor is helper accessor structure for refseq table
137 struct CCSraDb_Impl::SRefTableCursor : public CObject {
138     SRefTableCursor(const CVDBTable& table);
139 
140     CVDBCursor m_Cursor;
141 
142     DECLARE_VDB_COLUMN_AS(Uint1, CGRAPH_HIGH);
143     DECLARE_VDB_COLUMN_AS(TVDBRowId, PRIMARY_ALIGNMENT_IDS);
144     DECLARE_VDB_COLUMN_AS(TVDBRowId, SECONDARY_ALIGNMENT_IDS);
145     DECLARE_VDB_COLUMN_AS_STRING(NAME);
146     typedef pair<TVDBRowId, TVDBRowId> row_range_t;
147     DECLARE_VDB_COLUMN_AS(row_range_t, NAME_RANGE);
148     DECLARE_VDB_COLUMN_AS_STRING(SEQ_ID);
149     DECLARE_VDB_COLUMN_AS(INSDC_coord_len, SEQ_LEN);
150     DECLARE_VDB_COLUMN_AS(INSDC_coord_len, MAX_SEQ_LEN);
151     DECLARE_VDB_COLUMN_AS_STRING(READ);
152     DECLARE_VDB_COLUMN_AS(bool, CIRCULAR);
153     DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, OVERLAP_REF_POS);
154 };
155 
156 
157 // SAlnTableCursor is helper accessor structure for align table
158 struct CCSraDb_Impl::SAlnTableCursor : public CObject {
159     SAlnTableCursor(const CVDBTable& table, bool is_secondary);
160 
161     CVDBCursor m_Cursor;
162     bool m_IsSecondary;
163 
164     DECLARE_VDB_COLUMN_AS_STRING(REF_NAME);
165     DECLARE_VDB_COLUMN_AS_STRING(REF_SEQ_ID);
166     DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, REF_POS);
167     DECLARE_VDB_COLUMN_AS(INSDC_coord_len, REF_LEN);
168     DECLARE_VDB_COLUMN_AS(bool, REF_ORIENTATION);
169     DECLARE_VDB_COLUMN_AS(char, HAS_REF_OFFSET);
170     DECLARE_VDB_COLUMN_AS(char, HAS_MISMATCH);
171     DECLARE_VDB_COLUMN_AS(int32_t, REF_OFFSET);
172     DECLARE_VDB_COLUMN_AS(Uint1, REF_OFFSET_TYPE);
173     DECLARE_VDB_COLUMN_AS_STRING(CIGAR_SHORT);
174     DECLARE_VDB_COLUMN_AS_STRING(CIGAR_LONG);
175     DECLARE_VDB_COLUMN_AS_STRING(RAW_READ);
176     DECLARE_VDB_COLUMN_AS_STRING(MISMATCH_READ);
177     DECLARE_VDB_COLUMN_AS_STRING(MISMATCH);
178     DECLARE_VDB_COLUMN_AS(INSDC_coord_len, SPOT_LEN);
179     DECLARE_VDB_COLUMN_AS(TVDBRowId, SEQ_SPOT_ID);
180     DECLARE_VDB_COLUMN_AS(INSDC_coord_one, SEQ_READ_ID);
181     DECLARE_VDB_COLUMN_AS(int32_t, MAPQ);
182     DECLARE_VDB_COLUMN_AS(TVDBRowId, MATE_ALIGN_ID);
183     DECLARE_VDB_COLUMN_AS(INSDC_quality_phred, QUALITY);
184     DECLARE_VDB_COLUMN_AS_STRING(SPOT_GROUP);
185     DECLARE_VDB_COLUMN_AS_STRING(NAME);
186     DECLARE_VDB_COLUMN_AS(INSDC_read_filter, READ_FILTER);
187 };
188 
189 
190 // SSeqTableCursor is helper accessor structure for sequence table
191 struct CCSraDb_Impl::SSeqTableCursor : public CObject {
192     SSeqTableCursor(const CVDBTable& table);
193 
194     CVDBCursor m_Cursor;
195 
196     DECLARE_VDB_COLUMN_AS_STRING(SPOT_GROUP);
197     DECLARE_VDB_COLUMN_AS(INSDC_read_type, READ_TYPE);
198     DECLARE_VDB_COLUMN_AS(INSDC_coord_len, READ_LEN);
199     DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, READ_START);
200     DECLARE_VDB_COLUMN_AS_STRING(READ);
201     DECLARE_VDB_COLUMN_AS(INSDC_quality_phred, QUALITY);
202     DECLARE_VDB_COLUMN_AS(TVDBRowId, PRIMARY_ALIGNMENT_ID);
203     DECLARE_VDB_COLUMN_AS(INSDC_coord_len, TRIM_LEN);
204     DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, TRIM_START);
205     DECLARE_VDB_COLUMN_AS_STRING(NAME);
206     DECLARE_VDB_COLUMN_AS(INSDC_read_filter, READ_FILTER);
207 };
208 
209 
SRefTableCursor(const CVDBTable & table)210 CCSraDb_Impl::SRefTableCursor::SRefTableCursor(const CVDBTable& table)
211     : m_Cursor(table),
212       INIT_VDB_COLUMN(CGRAPH_HIGH),
213       INIT_VDB_COLUMN(PRIMARY_ALIGNMENT_IDS),
214       INIT_OPTIONAL_VDB_COLUMN(SECONDARY_ALIGNMENT_IDS),
215       INIT_VDB_COLUMN(NAME),
216       INIT_VDB_COLUMN(NAME_RANGE),
217       INIT_VDB_COLUMN(SEQ_ID),
218       INIT_VDB_COLUMN(SEQ_LEN),
219       INIT_VDB_COLUMN(MAX_SEQ_LEN),
220       INIT_VDB_COLUMN(READ),
221       INIT_VDB_COLUMN(CIRCULAR),
222       INIT_OPTIONAL_VDB_COLUMN(OVERLAP_REF_POS)
223 {
224 }
225 
226 
SAlnTableCursor(const CVDBTable & table,bool is_secondary)227 CCSraDb_Impl::SAlnTableCursor::SAlnTableCursor(const CVDBTable& table,
228                                                bool is_secondary)
229     : m_Cursor(table),
230       m_IsSecondary(is_secondary),
231       INIT_VDB_COLUMN(REF_NAME),
232       INIT_VDB_COLUMN(REF_SEQ_ID),
233       INIT_VDB_COLUMN(REF_POS),
234       INIT_VDB_COLUMN(REF_LEN),
235       INIT_VDB_COLUMN(REF_ORIENTATION),
236       INIT_VDB_COLUMN(HAS_REF_OFFSET),
237       INIT_VDB_COLUMN(HAS_MISMATCH),
238       INIT_VDB_COLUMN(REF_OFFSET),
239       INIT_OPTIONAL_VDB_COLUMN(REF_OFFSET_TYPE),
240       INIT_VDB_COLUMN(CIGAR_SHORT),
241       INIT_VDB_COLUMN(CIGAR_LONG),
242       INIT_VDB_COLUMN(RAW_READ),
243       INIT_VDB_COLUMN_BACKUP(MISMATCH_READ, READ),
244       INIT_VDB_COLUMN(MISMATCH),
245       INIT_VDB_COLUMN(SPOT_LEN),
246       INIT_VDB_COLUMN(SEQ_SPOT_ID),
247       INIT_VDB_COLUMN(SEQ_READ_ID),
248       INIT_VDB_COLUMN(MAPQ),
249       INIT_VDB_COLUMN(MATE_ALIGN_ID),
250       INIT_VDB_COLUMN(QUALITY),
251       INIT_VDB_COLUMN(SPOT_GROUP),
252       INIT_OPTIONAL_VDB_COLUMN(NAME),
253       INIT_OPTIONAL_VDB_COLUMN_BACKUP(READ_FILTER, RD_FILTER)
254 {
255 }
256 
257 
SSeqTableCursor(const CVDBTable & table)258 CCSraDb_Impl::SSeqTableCursor::SSeqTableCursor(const CVDBTable& table)
259     : m_Cursor(table),
260       INIT_VDB_COLUMN(SPOT_GROUP),
261       INIT_VDB_COLUMN(READ_TYPE),
262       INIT_VDB_COLUMN(READ_LEN),
263       INIT_VDB_COLUMN(READ_START),
264       INIT_VDB_COLUMN(READ),
265       INIT_VDB_COLUMN(QUALITY),
266       INIT_OPTIONAL_VDB_COLUMN(PRIMARY_ALIGNMENT_ID),
267       INIT_VDB_COLUMN(TRIM_LEN),
268       INIT_VDB_COLUMN(TRIM_START),
269       INIT_OPTIONAL_VDB_COLUMN(NAME),
270       INIT_OPTIONAL_VDB_COLUMN_BACKUP(READ_FILTER, RD_FILTER)
271 {
272 }
273 
274 
CCSraDb_Impl(CVDBMgr & mgr,const string & csra_path,IIdMapper * ref_id_mapper,ERefIdType ref_id_type,const string & sra_id_part)275 CCSraDb_Impl::CCSraDb_Impl(CVDBMgr& mgr, const string& csra_path,
276                            IIdMapper* ref_id_mapper,
277                            ERefIdType ref_id_type,
278                            const string& sra_id_part)
279     : m_Mgr(mgr),
280       m_CSraPath(csra_path),
281       m_SraIdPart(sra_id_part),
282       m_RowSize(0)
283 {
284     // to avoid conflict with ref seq ids like gnl|SRA|SRR452437/scaffold_1
285     // we replace all slashes ('/') with backslashes ('\\')
286     replace(m_SraIdPart.begin(), m_SraIdPart.end(), '/', '\\');
287     try {
288         m_Db = CVDB(mgr, csra_path);
289     }
290     catch ( CSraException& exc ) {
291         // We want to open old SRA schema files too.
292         // In this case the DB object is not found, and the error is eDataError
293         // any other error cannot allow to work with this accession
294         if ( exc.GetErrCode() != CSraException::eDataError ) {
295             // report all other exceptions as is
296             throw;
297         }
298     }
299     CRef<SRefTableCursor> ref;
300     if ( m_Db ) {
301         try {
302             ref = Ref();
303         }
304         catch ( CSraException& exc ) {
305             // allow missing REFERENCE table
306             // any error except eNotFoundTable is not good
307             if ( exc.GetErrCode() != CSraException::eNotFoundTable ) {
308                 throw;
309             }
310         }
311     }
312     if ( ref ) {
313         SRefInfo info;
314         m_RowSize = *ref->MAX_SEQ_LEN(1);
315         auto name_row_range = ref->m_NAME.GetRowIdRange(ref->m_Cursor);
316         TVDBRowId end_row = name_row_range.first + name_row_range.second;
317         for ( TVDBRowId row = 1; row < end_row; ) {
318             // read range and names
319             CTempString name = ref->NAME(row);
320             ref->m_Cursor.SetParam("QUERY_SEQ_NAME", name);
321             info.m_Name = name;
322             info.m_SeqId = *ref->SEQ_ID(row);
323             x_MakeRefSeq_ids(info, ref_id_mapper, ref_id_type);
324             _ASSERT(!info.m_Seq_ids.empty());
325             CBioseq::TId::iterator best_id = info.m_Seq_ids.begin();
326             // put the best id at front as the main id
327             swap(info.GetMainSeq_id(), *best_id);
328             info.m_Seq_id_Handle =
329                 CSeq_id_Handle::GetHandle(*info.GetMainSeq_id());
330 
331             const SRefTableCursor::row_range_t& range = *ref->NAME_RANGE(row);
332             info.m_RowFirst = range.first;
333             info.m_RowLast = range.second;
334             info.m_SeqLength = kInvalidSeqPos;
335             info.m_Circular = *ref->CIRCULAR(row);
336             m_RefList.push_back(info);
337             row = info.m_RowLast+1;
338         }
339         Put(ref);
340         NON_CONST_ITERATE ( TRefInfoList, it, m_RefList ) {
341             m_RefMapByName.insert
342                 (TRefInfoMapByName::value_type(it->m_Name, it));
343             ITERATE ( CBioseq::TId, id_it, it->m_Seq_ids ) {
344                 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(**id_it);
345                 m_RefMapBySeq_id.insert
346                     (TRefInfoMapBySeq_id::value_type(idh, it));
347             }
348         }
349     }
350     else {
351         // we'll work with SEQUENCE only in this case
352         // check if the SEQUENCE table exists
353         // They are opened with direct table access,
354         // but the caller expects to see 'no data' error as eNotFoundDb.
355         try {
356             SeqTable();
357         }
358         catch ( CSraException& exc ) {
359             // missing table is reported as eNotFoundTable
360             if ( exc.GetErrCode() == CSraException::eNotFoundTable ) {
361                 // replace with eNotFoundDb, expected by caller
362                 NCBI_THROW3(CSraException, eNotFoundDb,
363                             exc.GetMsg(), exc.GetRC(), exc.GetParam());
364             }
365             // report all other excetions as is
366             throw;
367         }
368     }
369 }
370 
371 
~CCSraDb_Impl(void)372 CCSraDb_Impl::~CCSraDb_Impl(void)
373 {
374 }
375 
376 
MakeSraIdPart(EPathInIdType path_in_id_type,const string & dir_path,const string & csra_file)377 string CCSraDb::MakeSraIdPart(EPathInIdType path_in_id_type,
378                               const string& dir_path,
379                               const string& csra_file)
380 {
381     string sra_id_part;
382     if ( path_in_id_type == ePathInId_config ) {
383         path_in_id_type = s_GetPathInId()? ePathInId_yes: ePathInId_no;
384     }
385     if ( path_in_id_type == ePathInId_yes ) {
386         sra_id_part = CDirEntry::MakePath(dir_path, csra_file);
387     }
388     else {
389         string dir, name;
390         CDirEntry::SplitPath(csra_file, &dir, &name);
391         sra_id_part = CDirEntry::MakePath(dir, name);
392     }
393     return sra_id_part;
394 }
395 
396 
CCSraDb(CVDBMgr & mgr,const string & csra_path,IIdMapper * ref_id_mapper,ERefIdType ref_id_type)397 CCSraDb::CCSraDb(CVDBMgr& mgr,
398                  const string& csra_path,
399                  IIdMapper* ref_id_mapper,
400                  ERefIdType ref_id_type)
401     : CRef<CCSraDb_Impl>(new CCSraDb_Impl(mgr, csra_path,
402                                           ref_id_mapper,
403                                           ref_id_type,
404                                           csra_path))
405 {
406 }
407 
408 
CCSraDb(CVDBMgr & mgr,const string & csra_path,const string & sra_id_part,IIdMapper * ref_id_mapper,ERefIdType ref_id_type)409 CCSraDb::CCSraDb(CVDBMgr& mgr,
410                  const string& csra_path,
411                  const string& sra_id_part,
412                  IIdMapper* ref_id_mapper,
413                  ERefIdType ref_id_type)
414     : CRef<CCSraDb_Impl>(new CCSraDb_Impl(mgr, csra_path,
415                                           ref_id_mapper,
416                                           ref_id_type,
417                                           sra_id_part))
418 {
419 }
420 
421 
GetSpotGroups(TSpotGroups & spot_groups)422 void CCSraDb_Impl::GetSpotGroups(TSpotGroups& spot_groups)
423 {
424     CKMDataNode parent(CKMetadata(m_Db, "SEQUENCE"), "STATS/SPOT_GROUP");
425     CKNameList names(parent);
426     for ( uint32_t i = 0; i < names.size(); ++i ) {
427         const char* name = names[i];
428         if ( CKMDataNode(CKMDataNode(parent, name), "SPOT_COUNT").GetUint8() ) {
429             spot_groups.push_back(name);
430         }
431     }
432 }
433 
434 
x_MakeRefSeq_ids(SRefInfo & info,IIdMapper * ref_id_mapper,int ref_id_type)435 void CCSraDb_Impl::x_MakeRefSeq_ids(SRefInfo& info,
436                                     IIdMapper* ref_id_mapper,
437                                     int ref_id_type)
438 {
439     info.m_Seq_ids.clear();
440     CRef<CSeq_id> ret;
441     if ( ref_id_mapper ) {
442         // first try NAME -> Seq-id mapping
443         ret = new CSeq_id(CSeq_id::e_Local, info.m_Name);
444         try {
445             ref_id_mapper->MapObject(*ret);
446             if ( !ret->IsLocal() ) {
447                 // mapped
448                 info.m_Seq_ids.push_back(ret);
449                 return;
450             }
451             CRef<CSeq_id> old(new CSeq_id(CSeq_id::e_Local, info.m_Name));
452             if ( !ret->Equals(*old) ) {
453                 // mapped
454                 info.m_Seq_ids.push_back(ret);
455                 return;
456             }
457         }
458         catch ( CException& /*ignored*/ ) {
459         }
460     }
461     // second try SEQ_ID -> Seq-id mapping
462     if ( ref_id_type == CCSraDb::eRefId_SEQ_ID ) {
463         if ( info.m_SeqId.find('|') != NPOS ) {
464             // try multiple FASTA ids
465             try {
466                 CSeq_id::ParseIDs(info.m_Seq_ids, info.m_SeqId);
467             }
468             catch ( CSeqIdException& /*ignored*/ ) {
469                 info.m_Seq_ids.clear();
470             }
471         }
472         if ( info.m_Seq_ids.empty() ) {
473             try {
474                 ret = new CSeq_id(info.m_SeqId);
475             }
476             catch ( CSeqIdException& /*ignored*/ ) {
477                 ret = new CSeq_id(CSeq_id::e_Local, info.m_SeqId);
478             }
479         }
480     }
481     else {
482         ret = new CSeq_id(CSeq_id::e_General,
483                           "SRA",
484                           m_SraIdPart+'/'+info.m_Name);
485     }
486     if ( ret ) {
487         info.m_Seq_ids.push_back(ret);
488     }
489     if ( ref_id_mapper ) {
490         NON_CONST_ITERATE( CBioseq::TId, it, info.m_Seq_ids ) {
491             try {
492                 ref_id_mapper->MapObject(**it);
493             }
494             catch ( CException& /*ignored*/ ) {
495             }
496         }
497     }
498 }
499 
500 
OpenRefTable(void)501 void CCSraDb_Impl::OpenRefTable(void)
502 {
503     CFastMutexGuard guard(m_TableMutex);
504     if ( !m_RefTable ) {
505         m_RefTable = CVDBTable(m_Db, "REFERENCE");
506     }
507 }
508 
509 
OpenAlnTable(bool is_secondary)510 void CCSraDb_Impl::OpenAlnTable(bool is_secondary)
511 {
512     CFastMutexGuard guard(m_TableMutex);
513     if ( !m_AlnTable[is_secondary] ) {
514         m_AlnTable[is_secondary] = CVDBTable(m_Db, (is_secondary?
515                                                     "SECONDARY_ALIGNMENT":
516                                                     "PRIMARY_ALIGNMENT"));
517     }
518 }
519 
520 
OpenSeqTable(void)521 void CCSraDb_Impl::OpenSeqTable(void)
522 {
523     CFastMutexGuard guard(m_TableMutex);
524     if ( !m_SeqTable ) {
525         if ( m_Db ) {
526             m_SeqTable = CVDBTable(m_Db, "SEQUENCE");
527         }
528         else {
529             m_SeqTable = CVDBTable(m_Mgr, GetCSraPath());
530         }
531     }
532 }
533 
534 
Ref(void)535 CRef<CCSraDb_Impl::SRefTableCursor> CCSraDb_Impl::Ref(void)
536 {
537     CRef<SRefTableCursor> curs = m_Ref.Get();
538     if ( !curs ) {
539         curs = new SRefTableCursor(RefTable());
540     }
541     return curs;
542 }
543 
544 
Aln(bool is_secondary)545 CRef<CCSraDb_Impl::SAlnTableCursor> CCSraDb_Impl::Aln(bool is_secondary)
546 {
547     CRef<SAlnTableCursor> curs = m_Aln[is_secondary].Get();
548     if ( !curs ) {
549         curs = new SAlnTableCursor(AlnTable(is_secondary), is_secondary);
550     }
551     return curs;
552 }
553 
554 
Seq(void)555 CRef<CCSraDb_Impl::SSeqTableCursor> CCSraDb_Impl::Seq(void)
556 {
557     CRef<SSeqTableCursor> curs = m_Seq.Get();
558     if ( !curs ) {
559         curs = new SSeqTableCursor(SeqTable());
560     }
561     return curs;
562 }
563 
564 
Put(CRef<SRefTableCursor> & curs)565 void CCSraDb_Impl::Put(CRef<SRefTableCursor>& curs)
566 {
567     m_Ref.Put(curs);
568 }
569 
570 
Put(CRef<SAlnTableCursor> & curs)571 void CCSraDb_Impl::Put(CRef<SAlnTableCursor>& curs)
572 {
573     if ( curs ) {
574         m_Aln[curs->m_IsSecondary].Put(curs);
575     }
576 }
577 
578 
Put(CRef<SSeqTableCursor> & curs)579 void CCSraDb_Impl::Put(CRef<SSeqTableCursor>& curs)
580 {
581     m_Seq.Put(curs);
582 }
583 
584 
x_CalcSeqLength(const SRefInfo & info)585 void CCSraDb_Impl::x_CalcSeqLength(const SRefInfo& info)
586 {
587     CRef<SRefTableCursor> ref(Ref());
588     TSeqPos last_len = *ref->SEQ_LEN(info.m_RowLast);
589     info.m_SeqLength =
590         GetRowSize()*TSeqPos(info.m_RowLast-info.m_RowFirst)+last_len;
591     Put(ref);
592 }
593 
594 
CCSraRefSeqIterator(const CCSraDb & csra_db)595 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db)
596     : m_Db(csra_db),
597       m_Iter(csra_db->GetRefInfoList().begin())
598 {
599 }
600 
601 
CCSraRefSeqIterator(const CCSraDb & csra_db,const string & seq_id)602 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db,
603                                          const string& seq_id)
604 {
605     CCSraDb_Impl::TRefInfoMapBySeq_id::const_iterator iter =
606         csra_db->m_RefMapBySeq_id.find(CSeq_id_Handle::GetHandle(seq_id));
607     if ( iter != csra_db->m_RefMapBySeq_id.end() ) {
608         m_Db = csra_db;
609         m_Iter = iter->second;
610     }
611     else {
612         ERR_POST_X(1, "RefSeq \""<<seq_id<<"\" not found.");
613     }
614 }
615 
616 
CCSraRefSeqIterator(const CCSraDb & csra_db,const string & name,EByName)617 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db,
618                                          const string& name,
619                                          EByName)
620 {
621     CCSraDb_Impl::TRefInfoMapByName::const_iterator iter =
622         csra_db->m_RefMapByName.find(name);
623     if ( iter != csra_db->m_RefMapByName.end() ) {
624         m_Db = csra_db;
625         m_Iter = iter->second;
626     }
627 }
628 
629 
CCSraRefSeqIterator(const CCSraDb & csra_db,const CSeq_id_Handle & seq_id)630 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db,
631                                          const CSeq_id_Handle& seq_id)
632 {
633     CCSraDb_Impl::TRefInfoMapBySeq_id::const_iterator iter =
634         csra_db->m_RefMapBySeq_id.find(seq_id);
635     if ( iter != csra_db->m_RefMapBySeq_id.end() ) {
636         m_Db = csra_db;
637         m_Iter = iter->second;
638     }
639 }
640 
641 
GetInfo(void) const642 const CCSraDb_Impl::SRefInfo& CCSraRefSeqIterator::GetInfo(void) const
643 {
644     if ( !*this ) {
645         NCBI_THROW(CSraException, eInvalidState,
646                    "CCSraRefSeqIterator is invalid");
647     }
648     return *m_Iter;
649 }
650 
651 
IsCircular(void) const652 bool CCSraRefSeqIterator::IsCircular(void) const
653 {
654     return GetInfo().m_Circular;
655 }
656 
657 
GetSeqLength(void) const658 TSeqPos CCSraRefSeqIterator::GetSeqLength(void) const
659 {
660     const CCSraDb_Impl::SRefInfo& info = **this;
661     if ( info.m_SeqLength == kInvalidSeqPos ) {
662         GetDb().x_CalcSeqLength(info);
663     }
664     return info.m_SeqLength;
665 }
666 
667 
GetRowAlignCount(TVDBRowId row) const668 size_t CCSraRefSeqIterator::GetRowAlignCount(TVDBRowId row) const
669 {
670     return GetAlignCountAtPos(TSeqPos((row-GetInfo().m_RowFirst)*GetDb().GetRowSize()),
671                               fPrimaryAlign);
672 }
673 
674 
GetAlignCountAtPos(TSeqPos pos,TAlignType type) const675 size_t CCSraRefSeqIterator::GetAlignCountAtPos(TSeqPos pos,
676                                                TAlignType type) const
677 {
678     if ( pos >= GetSeqLength() ) {
679         NCBI_THROW(CSraException, eInvalidArg,
680                    "pos is beyond reference sequence");
681     }
682     TVDBRowId row = GetInfo().m_RowFirst + pos/GetDb().GetRowSize();
683     CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
684     size_t ret = 0;
685     if ( type & fPrimaryAlign ) {
686         ret += ref->PRIMARY_ALIGNMENT_IDS(row).size();
687     }
688     if ( (type & fSecondaryAlign) && ref->m_SECONDARY_ALIGNMENT_IDS ) {
689         ret += ref->SECONDARY_ALIGNMENT_IDS(row).size();
690     }
691     GetDb().Put(ref);
692     return ret;
693 }
694 
695 
GetCoverageGraph(void) const696 CRef<CSeq_graph> CCSraRefSeqIterator::GetCoverageGraph(void) const
697 {
698     CRef<CSeq_graph> graph(new CSeq_graph);
699 
700     const CCSraDb_Impl::SRefInfo& info = **this;
701     TSeqPos size = TSeqPos(info.m_RowLast-info.m_RowFirst+1);
702     CSeq_interval& loc_int = graph->SetLoc().SetInt();
703     loc_int.SetId(info.GetMainSeq_id().GetNCObject());
704     loc_int.SetFrom(0);
705     loc_int.SetTo(GetSeqLength()-1);
706     graph->SetNumval(size);
707     TSeqPos row_size = m_Db->GetRowSize();
708     graph->SetComp(row_size);
709     CByte_graph& b_graph = graph->SetGraph().SetByte();
710     b_graph.SetAxis(0);
711     CByte_graph::TValues& values = b_graph.SetValues();
712     values.resize(size);
713     CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
714     Uint1 max_q = 0;
715     for ( size_t i = 0; i < size; ++i ) {
716         TVDBRowId row = info.m_RowFirst+i;
717         Uint1 q = *ref->CGRAPH_HIGH(row);
718         values[i] = q;
719         if ( q > max_q ) {
720             max_q = q;
721         }
722         if ( row == info.m_RowLast ) {
723             TSeqPos len = *ref->SEQ_LEN(row);
724             loc_int.SetTo((size-1)*row_size+len);
725         }
726     }
727     b_graph.SetMin(0);
728     b_graph.SetMax(max_q);
729     GetDb().Put(ref);
730     return graph;
731 }
732 
733 
GetCoverageAnnot(void) const734 CRef<CSeq_annot> CCSraRefSeqIterator::GetCoverageAnnot(void) const
735 {
736     CRef<CSeq_annot> annot = GetSeq_annot();
737     annot->SetData().SetGraph().push_back(GetCoverageGraph());
738     return annot;
739 }
740 
741 
GetCoverageAnnot(const string & annot_name) const742 CRef<CSeq_annot> CCSraRefSeqIterator::GetCoverageAnnot(const string& annot_name) const
743 {
744     CRef<CSeq_annot> annot = GetSeq_annot(annot_name);
745     annot->SetData().SetGraph().push_back(GetCoverageGraph());
746     return annot;
747 }
748 
749 
750 CRef<CSeq_annot>
MakeSeq_annot(const string & annot_name)751 CCSraRefSeqIterator::MakeSeq_annot(const string& annot_name)
752 {
753     CRef<CSeq_annot> annot(new CSeq_annot);
754     annot->SetData().SetAlign();
755     CRef<CAnnotdesc> desc(new CAnnotdesc);
756     desc->SetName(annot_name);
757     annot->SetDesc().Set().push_back(desc);
758     return annot;
759 }
760 
761 
762 CRef<CSeq_annot>
x_GetSeq_annot(const string * annot_name) const763 CCSraRefSeqIterator::x_GetSeq_annot(const string* annot_name) const
764 {
765     CRef<CSeq_annot> annot(new CSeq_annot);
766     annot->SetData().SetAlign();
767     if ( annot_name ) {
768         CRef<CAnnotdesc> desc(new CAnnotdesc);
769         desc->SetName(*annot_name);
770         annot->SetDesc().Set().push_back(desc);
771     }
772     return annot;
773 }
774 
775 
GetRefBioseq(ELoadData load) const776 CRef<CBioseq> CCSraRefSeqIterator::GetRefBioseq(ELoadData load) const
777 {
778     CRef<CBioseq> seq(new CBioseq);
779     const CCSraDb_Impl::SRefInfo& info = GetInfo();
780     seq->SetId() = GetRefSeq_ids();
781     const string& csra_path = m_Db->GetCSraPath();
782     if ( CVPath::IsPlainAccession(csra_path) ) {
783         // SRR accession
784         CRef<CSeqdesc> desc(new CSeqdesc);
785         string title =
786             info.m_Name +
787             " - reference sequence from ShortRead RUN " +
788             csra_path;
789         desc->SetTitle(title);
790         seq->SetDescr().Set().push_back(desc);
791     }
792     CSeq_inst& inst = seq->SetInst();
793     inst.SetRepr(inst.eRepr_delta);
794     inst.SetMol(inst.eMol_na);
795     inst.SetLength(GetSeqLength());
796     inst.SetTopology
797         (info.m_Circular? inst.eTopology_circular: inst.eTopology_linear);
798     TLiterals literals;
799     GetRefLiterals(literals, TRange::GetWhole(), load);
800     NON_CONST_ITERATE ( TLiterals, it, literals ) {
801         CRef<CDelta_seq> seq(new CDelta_seq);
802         seq->SetLiteral(**it);
803         inst.SetExt().SetDelta().Set().push_back(seq);
804     }
805     return seq;
806 }
807 
808 
GetRefLiterals(TLiterals & literals,TRange range,ELoadData load) const809 void CCSraRefSeqIterator::GetRefLiterals(TLiterals& literals,
810                                          TRange range,
811                                          ELoadData load) const
812 {
813     const CCSraDb_Impl::SRefInfo& info = GetInfo();
814     TSeqPos segment_len = GetDb().GetRowSize();
815     CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
816     for ( TVDBRowId row = info.m_RowFirst+range.GetFrom()/segment_len;
817           row <= info.m_RowLast; ++row ) {
818         TSeqPos pos = TSeqPos(row-info.m_RowFirst)*segment_len;
819         if ( pos >= range.GetToOpen() ) {
820             break;
821         }
822         CRef<CSeq_literal> literal(new CSeq_literal);
823         if ( row == info.m_RowLast ) {
824             segment_len = *ref->SEQ_LEN(row);
825         }
826         literal->SetLength(segment_len);
827         if ( load == eLoadData ) {
828             literal->SetSeq_data().SetIupacna().Set() = *ref->READ(row);
829         }
830         literals.push_back(literal);
831     }
832     GetDb().Put(ref);
833 }
834 
835 
GetAlnOverStarts(void) const836 const vector<TSeqPos>& CCSraRefSeqIterator::GetAlnOverStarts(void) const
837 {
838     const CCSraDb_Impl::SRefInfo& info = GetInfo();
839     if ( info.m_AlnOverStarts.empty() ) {
840         CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
841         if ( ref->m_OVERLAP_REF_POS ) {
842             CFastMutexGuard guard(GetDb().m_OverlapMutex);
843             if ( info.m_AlnOverStarts.empty() ) {
844                 TSeqPos segment_len = GetDb().GetRowSize();
845                 vector<TSeqPos> pp;
846                 // collect overlaps
847                 for ( TVDBRowId row = info.m_RowFirst; row <= info.m_RowLast; ++row ) {
848                     CVDBValueFor<INSDC_coord_zero> vv = ref->OVERLAP_REF_POS(row);
849                     TSeqPos pos = TSeqPos((row-info.m_RowFirst)*segment_len);
850                     for ( size_t i = 0; i < 2 && i < vv.size(); ++i ) {
851                         TSeqPos p = vv[i];
852                         if ( p && p < pos ) {
853                             pos = p;
854                         }
855                     }
856                     pp.push_back(pos);
857                 }
858                 if ( !pp.empty() ) {
859                     // fix overlaps for alignments starting at pos 0
860                     size_t max_aln_end = 1;
861                     for ( int secondary = 0; secondary < 2; ++secondary ) {
862                         CVDBValueFor<TVDBRowId> aln_ids;
863                         if ( secondary ) {
864                             if ( !ref->m_SECONDARY_ALIGNMENT_IDS ) {
865                                 break;
866                             }
867                             aln_ids = ref->SECONDARY_ALIGNMENT_IDS(info.m_RowFirst);
868                         }
869                         else {
870                             aln_ids = ref->PRIMARY_ALIGNMENT_IDS(info.m_RowFirst);
871                         }
872                         if ( aln_ids.empty() ) {
873                             continue;
874                         }
875                         CRef<CCSraDb_Impl::SAlnTableCursor> aln(GetDb().Aln(secondary));
876                         for ( auto aln_id : aln_ids ) {
877                             INSDC_coord_zero pos = *aln->REF_POS(aln_id);
878                             if ( pos != 0 ) {
879                                 // check only alignments that start at the very beginning
880                                 break;
881                             }
882                             size_t end = pos + *aln->REF_LEN(aln_id);
883                             max_aln_end = max(max_aln_end, end);
884                         }
885                         GetDb().Put(aln);
886                     }
887                     pp[min(pp.size()-1, (max_aln_end-1)/segment_len)] = 0;
888                 }
889                 // propagate overlaps
890                 for ( size_t i = pp.size(); i-- > 1; ) {
891                     pp[i-1] = min(pp[i-1], pp[i]);
892                 }
893                 swap(const_cast<vector<TSeqPos>&>(info.m_AlnOverStarts), pp);
894             }
895         }
896         GetDb().Put(ref);
897     }
898     return info.m_AlnOverStarts;
899 }
900 
901 
GetAlnOverToOpen(TRange range) const902 TSeqPos CCSraRefSeqIterator::GetAlnOverToOpen(TRange range) const
903 {
904     const vector<TSeqPos>& pp = GetAlnOverStarts();
905     TSeqPos segment_len = GetDb().GetRowSize();
906     TSeqPos seg = range.GetToOpen()/segment_len;
907     if ( pp.empty() ) {
908         ++seg;
909     }
910     else {
911         while ( seg+1 < pp.size() && pp[seg+1] < range.GetToOpen() ) {
912             ++seg;
913         }
914     }
915     return min(GetSeqLength(), (seg+1)*segment_len);
916 }
917 
918 
GetEstimatedNumberOfAlignments(void) const919 Uint8 CCSraRefSeqIterator::GetEstimatedNumberOfAlignments(void) const
920 {
921     TVDBRowId first_align_id = 0, last_align_id = 0;
922     const CCSraDb_Impl::SRefInfo& info = GetInfo();
923     CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
924     for ( TVDBRowId row = info.m_RowFirst; row <= info.m_RowLast; ++row ) {
925         CVDBValueFor<TVDBRowId> ids = ref->PRIMARY_ALIGNMENT_IDS(row);
926         size_t count = ids.size();
927         if ( count ) {
928             first_align_id = *min_element(ids.data(), ids.data()+count);
929             if ( first_align_id ) {
930                 break;
931             }
932         }
933     }
934     if ( !first_align_id ) {
935         GetDb().Put(ref);
936         return 0;
937     }
938     for ( TVDBRowId row = info.m_RowLast; row >= info.m_RowFirst; --row ) {
939         CVDBValueFor<TVDBRowId> ids = ref->PRIMARY_ALIGNMENT_IDS(row);
940         size_t count = ids.size();
941         if ( count ) {
942             last_align_id = *max_element(ids.data(), ids.data()+count);
943             if ( last_align_id ) {
944                 break;
945             }
946         }
947     }
948     GetDb().Put(ref);
949     return last_align_id-first_align_id+1;
950 }
951 
952 
953 /////////////////////////////////////////////////////////////////////////////
954 // CCSraAlignIterator
955 /////////////////////////////////////////////////////////////////////////////
956 
Reset(void)957 void CCSraAlignIterator::Reset(void)
958 {
959     if ( m_Ref ) {
960         GetDb().Put(m_Ref);
961     }
962     if ( m_Aln ) {
963         GetDb().Put(m_Aln);
964     }
965     m_RefIter = CCSraRefSeqIterator();
966     m_Error = RC_NO_MORE_ALIGNMENTS;
967     m_ArgRefPos = m_ArgRefLast = 0;
968     m_CurRefPos = m_CurRefLen = 0;
969     m_RefRowNext = m_RefRowLast = 0;
970     m_AlnRowIsSecondary = false;
971     m_SearchMode = eSearchByOverlap;
972     m_AlignType = fAnyAlign;
973     m_AlnRowCur = m_AlnRowEnd = 0;
974 }
975 
976 
CCSraAlignIterator(const CCSraAlignIterator & iter)977 CCSraAlignIterator::CCSraAlignIterator(const CCSraAlignIterator& iter)
978 {
979     *this = iter;
980 }
981 
982 
operator =(const CCSraAlignIterator & iter)983 CCSraAlignIterator& CCSraAlignIterator::operator=(const CCSraAlignIterator& iter)
984 {
985     if ( this != &iter ) {
986         Reset();
987         m_Ref = iter.m_Ref;
988         m_Aln = iter.m_Aln;
989         m_RefIter = iter.m_RefIter;
990         m_Error = iter.m_Error;
991         m_ArgRefPos = iter.m_ArgRefPos;
992         m_ArgRefLast = iter.m_ArgRefLast;
993         m_CurRefPos = iter.m_CurRefPos;
994         m_CurRefLen = iter.m_CurRefLen;
995         m_RefRowNext = iter.m_RefRowNext;
996         m_RefRowLast = iter.m_RefRowLast;
997         m_AlnRowIsSecondary = iter.m_AlnRowIsSecondary;
998         m_SearchMode = iter.m_SearchMode;
999         m_AlignType = iter.m_AlignType;
1000         if ( iter.m_AlnRowCur == &iter.m_RefRowNext ) {
1001             m_AlnRowCur = &m_RefRowNext;
1002             m_AlnRowEnd = m_AlnRowCur+1;
1003         }
1004         else {
1005             m_AlnRowCur = iter.m_AlnRowCur;
1006             m_AlnRowEnd = iter.m_AlnRowEnd;
1007         }
1008     }
1009     return *this;
1010 }
1011 
1012 
CCSraAlignIterator(void)1013 CCSraAlignIterator::CCSraAlignIterator(void)
1014     : m_Error(RC_NO_MORE_ALIGNMENTS)
1015 {
1016 }
1017 
1018 
CCSraAlignIterator(const CCSraDb & csra_db,const string & ref_id,TSeqPos ref_pos,TSeqPos window,ESearchMode search_mode,TAlignType align_type)1019 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1020                                        const string& ref_id,
1021                                        TSeqPos ref_pos,
1022                                        TSeqPos window,
1023                                        ESearchMode search_mode,
1024                                        TAlignType align_type)
1025     : m_RefIter(csra_db, CSeq_id_Handle::GetHandle(ref_id)),
1026       m_Ref(m_RefIter.GetDb().Ref()),
1027       m_Error(RC_NO_MORE_ALIGNMENTS),
1028       m_ArgRefPos(0),
1029       m_ArgRefLast(0)
1030 {
1031     Select(ref_pos, window, search_mode, align_type);
1032 }
1033 
1034 
CCSraAlignIterator(const CCSraDb & csra_db,const CSeq_id_Handle & ref_id,TSeqPos ref_pos,TSeqPos window,ESearchMode search_mode,TAlignType align_type)1035 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1036                                        const CSeq_id_Handle& ref_id,
1037                                        TSeqPos ref_pos,
1038                                        TSeqPos window,
1039                                        ESearchMode search_mode,
1040                                        TAlignType align_type)
1041     : m_RefIter(csra_db, ref_id),
1042       m_Ref(m_RefIter.GetDb().Ref()),
1043       m_Error(RC_NO_MORE_ALIGNMENTS),
1044       m_ArgRefPos(0),
1045       m_ArgRefLast(0)
1046 {
1047     Select(ref_pos, window, search_mode, align_type);
1048 }
1049 
1050 
CCSraAlignIterator(const CCSraDb & csra_db,const CSeq_id_Handle & ref_id,TSeqPos ref_pos,TSeqPos window,TAlignType align_type)1051 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1052                                        const CSeq_id_Handle& ref_id,
1053                                        TSeqPos ref_pos,
1054                                        TSeqPos window,
1055                                        TAlignType align_type)
1056     : m_RefIter(csra_db, ref_id),
1057       m_Ref(m_RefIter.GetDb().Ref()),
1058       m_Error(RC_NO_MORE_ALIGNMENTS),
1059       m_ArgRefPos(0),
1060       m_ArgRefLast(0)
1061 {
1062     Select(ref_pos, window, eSearchByOverlap, align_type);
1063 }
1064 
1065 
CCSraAlignIterator(const CCSraDb & csra_db,TAlignType align_type,TVDBRowId align_row)1066 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1067                                        TAlignType align_type,
1068                                        TVDBRowId align_row)
1069 {
1070     switch ( align_type ) {
1071     case fPrimaryAlign:
1072         m_AlnRowIsSecondary = false;
1073         break;
1074     case fSecondaryAlign:
1075         m_AlnRowIsSecondary = true;
1076         break;
1077     default:
1078         NCBI_THROW(CSraException, eInvalidArg,
1079                    "unspecified alignment type");
1080     }
1081     m_SearchMode = eSearchByStart;
1082 
1083     m_Aln = csra_db.GetNCObject().Aln(m_AlnRowIsSecondary);
1084     m_RefIter = CCSraRefSeqIterator(csra_db, *m_Aln->REF_NAME(align_row), CCSraRefSeqIterator::eByName);
1085 
1086     m_ArgRefPos = m_ArgRefLast = 0;
1087     m_CurRefPos = *m_Aln->REF_POS(align_row);
1088     m_CurRefLen = *m_Aln->REF_LEN(align_row);
1089 
1090     m_RefRowNext = m_RefRowLast = align_row;
1091     m_AlnRowCur = &m_RefRowNext;
1092     m_AlnRowEnd = m_AlnRowCur+1;
1093 
1094     m_Error = 0;
1095 }
1096 
1097 
~CCSraAlignIterator(void)1098 CCSraAlignIterator::~CCSraAlignIterator(void)
1099 {
1100     Reset();
1101 }
1102 
1103 
Select(TSeqPos ref_pos,TSeqPos window,ESearchMode search_mode,TAlignType align_type)1104 void CCSraAlignIterator::Select(TSeqPos ref_pos,
1105                                 TSeqPos window,
1106                                 ESearchMode search_mode,
1107                                 TAlignType align_type)
1108 {
1109     m_Error = RC_NO_MORE_ALIGNMENTS;
1110     m_ArgRefPos = m_ArgRefLast = 0;
1111     m_SearchMode = search_mode;
1112     m_AlignType = align_type;
1113 
1114     m_RefRowNext = m_RefRowLast = 0;
1115     m_AlnRowIsSecondary = true;
1116     m_AlnRowCur = m_AlnRowEnd = 0;
1117 
1118     if ( !m_Ref ) {
1119         return;
1120     }
1121 
1122     m_ArgRefPos = ref_pos;
1123     m_ArgRefLast = ref_pos+window-1 < ref_pos? kInvalidSeqPos: ref_pos+window-1;
1124 
1125     TSeqPos row_size = GetDb().GetRowSize();
1126     const CCSraDb_Impl::SRefInfo& info = *m_RefIter;
1127     TSeqPos start_pos;
1128     if ( search_mode == eSearchByOverlap ) {
1129         const vector<TSeqPos>& pp = m_RefIter.GetAlnOverStarts();
1130         if ( pp.empty() ) {
1131             // max overlap is 1 row
1132             start_pos = max(ref_pos, row_size)-row_size;
1133         }
1134         else {
1135             start_pos = pp[ref_pos/row_size];
1136         }
1137     }
1138     else {
1139         start_pos = ref_pos;
1140     }
1141 
1142     m_RefRowNext = info.m_RowFirst + start_pos/row_size;
1143     m_RefRowLast = min(info.m_RowLast, info.m_RowFirst + m_ArgRefLast/row_size);
1144     m_AlnRowCur = m_AlnRowEnd = 0;
1145     m_AlnRowIsSecondary = true;
1146     x_Settle();
1147 }
1148 
1149 
x_Settle(void)1150 void CCSraAlignIterator::x_Settle(void)
1151 {
1152     for ( ;; ) {
1153         if ( m_AlnRowCur == m_AlnRowEnd ) {
1154             if ( m_RefRowNext > m_RefRowLast ) {
1155                 m_Error = RC_NO_MORE_ALIGNMENTS;
1156                 return;
1157             }
1158 
1159             if ( m_AlnRowIsSecondary ) {
1160                 m_AlnRowIsSecondary = false;
1161                 if ( !(m_AlignType & fPrimaryAlign) ) {
1162                     m_AlnRowCur = m_AlnRowEnd = 0;
1163                 }
1164                 else {
1165                     CVDBValueFor<TVDBRowId> ids =
1166                         m_Ref->PRIMARY_ALIGNMENT_IDS(m_RefRowNext);
1167                     m_AlnRowCur = ids.data();
1168                     m_AlnRowEnd = m_AlnRowCur + ids.size();
1169                 }
1170             }
1171             else if ( m_Ref->m_SECONDARY_ALIGNMENT_IDS ) {
1172                 m_AlnRowIsSecondary = true;
1173                 if ( !(m_AlignType & fSecondaryAlign) ) {
1174                     m_AlnRowCur = m_AlnRowEnd = 0;
1175                 }
1176                 else {
1177                     CVDBValueFor<TVDBRowId> ids =
1178                         m_Ref->SECONDARY_ALIGNMENT_IDS(m_RefRowNext);
1179                     m_AlnRowCur = ids.data();
1180                     m_AlnRowEnd = m_AlnRowCur + ids.size();
1181                 }
1182                 ++m_RefRowNext;
1183             }
1184             else {
1185                 m_AlnRowIsSecondary = true;
1186                 m_AlnRowCur = m_AlnRowEnd = 0;
1187                 ++m_RefRowNext;
1188             }
1189             if ( m_AlnRowCur != m_AlnRowEnd ) {
1190                 if ( !m_Aln || m_Aln->m_IsSecondary != m_AlnRowIsSecondary ) {
1191                     m_RefIter.GetDb().Put(m_Aln);
1192                     m_Aln = m_RefIter.GetDb().Aln(m_AlnRowIsSecondary);
1193                 }
1194             }
1195         }
1196         else {
1197             TVDBRowId row = *m_AlnRowCur;
1198             TSeqPos pos = *m_Aln->REF_POS(row);
1199             if ( pos > m_ArgRefLast ) {
1200                 // completely after
1201                 ++m_AlnRowCur;
1202                 continue;
1203             }
1204             if ( m_SearchMode == eSearchByStart && pos < m_ArgRefPos ) {
1205                 // starts before
1206                 ++m_AlnRowCur;
1207                 continue;
1208             }
1209             TSeqPos len = *m_Aln->REF_LEN(row);
1210             TSeqPos end = pos + len;
1211             if ( end <= m_ArgRefPos ) {
1212                 // completely before
1213                 ++m_AlnRowCur;
1214                 continue;
1215             }
1216             m_CurRefPos = pos;
1217             m_CurRefLen = len;
1218             m_Error = 0;
1219             return;
1220         }
1221     }
1222 }
1223 
1224 
GetAlignmentId(void) const1225 TVDBRowId CCSraAlignIterator::GetAlignmentId(void) const
1226 {
1227     return *m_AlnRowCur;
1228 }
1229 
1230 
GetRefSeqId(void) const1231 CTempString CCSraAlignIterator::GetRefSeqId(void) const
1232 {
1233     return m_Aln->REF_SEQ_ID(*m_AlnRowCur);
1234 }
1235 
1236 
GetRefMinusStrand(void) const1237 bool CCSraAlignIterator::GetRefMinusStrand(void) const
1238 {
1239     return m_Aln->REF_ORIENTATION(*m_AlnRowCur);
1240 }
1241 
1242 
GetSpotGroup(void) const1243 CTempString CCSraAlignIterator::GetSpotGroup(void) const
1244 {
1245     return m_Aln->SPOT_GROUP(*m_AlnRowCur);
1246 }
1247 
1248 
IsSetName(void) const1249 bool CCSraAlignIterator::IsSetName(void) const
1250 {
1251     return m_Aln->m_NAME && !GetName().empty();
1252 }
1253 
1254 
GetName(void) const1255 CTempString CCSraAlignIterator::GetName(void) const
1256 {
1257     return m_Aln->NAME(*m_AlnRowCur);
1258 }
1259 
1260 
GetReadFilter(void) const1261 INSDC_read_filter CCSraAlignIterator::GetReadFilter(void) const
1262 {
1263     if ( !m_Aln->m_READ_FILTER ) {
1264         return SRA_READ_FILTER_PASS;
1265     }
1266     return m_Aln->READ_FILTER(*m_AlnRowCur);
1267 }
1268 
1269 
GetCIGAR(void) const1270 CTempString CCSraAlignIterator::GetCIGAR(void) const
1271 {
1272     return m_Aln->CIGAR_SHORT(*m_AlnRowCur);
1273 }
1274 
1275 
GetCIGARLong(void) const1276 CTempString CCSraAlignIterator::GetCIGARLong(void) const
1277 {
1278     return m_Aln->CIGAR_LONG(*m_AlnRowCur);
1279 }
1280 
1281 
GetMismatchRead(void) const1282 CTempString CCSraAlignIterator::GetMismatchRead(void) const
1283 {
1284     return m_Aln->MISMATCH_READ(*m_AlnRowCur);
1285 }
1286 
1287 
GetMismatchRaw(void) const1288 CTempString CCSraAlignIterator::GetMismatchRaw(void) const
1289 {
1290     return m_Aln->MISMATCH(*m_AlnRowCur);
1291 }
1292 
1293 
GetShortId1(void) const1294 TVDBRowId CCSraAlignIterator::GetShortId1(void) const
1295 {
1296     return m_Aln->SEQ_SPOT_ID(*m_AlnRowCur);
1297 }
1298 
1299 
GetShortId2(void) const1300 INSDC_coord_one CCSraAlignIterator::GetShortId2(void) const
1301 {
1302     return m_Aln->SEQ_READ_ID(*m_AlnRowCur);
1303 }
1304 
1305 
MakeShortReadId(TVDBRowId id1,INSDC_coord_one id2) const1306 CRef<CSeq_id> CCSraDb_Impl::MakeShortReadId(TVDBRowId id1,
1307                                             INSDC_coord_one id2) const
1308 {
1309     CRef<CSeq_id> ret(new CSeq_id);
1310     CDbtag& dbtag = ret->SetGeneral();
1311     dbtag.SetDb("SRA");
1312     SetShortReadId(dbtag.SetTag().SetStr(), id1, id2);
1313     return ret;
1314 }
1315 
1316 
SetShortReadId(string & str,TVDBRowId id1,INSDC_coord_one id2) const1317 void CCSraDb_Impl::SetShortReadId(string& str,
1318                                   TVDBRowId id1,
1319                                   INSDC_coord_one id2) const
1320 {
1321     ostringstream s;
1322     s << m_SraIdPart << '.' << id1 << '.' << id2;
1323     str = s.str();
1324 }
1325 
1326 
GetShortSeq_id(void) const1327 CRef<CSeq_id> CCSraAlignIterator::GetShortSeq_id(void) const
1328 {
1329     return GetDb().MakeShortReadId(GetShortId1(), GetShortId2());
1330 }
1331 
1332 
GetMateShortSeq_id(void) const1333 CRef<CSeq_id> CCSraAlignIterator::GetMateShortSeq_id(void) const
1334 {
1335     CVDBValueFor<TVDBRowId> value = m_Aln->MATE_ALIGN_ID(*m_AlnRowCur);
1336     if ( value.empty() ) {
1337         return null;
1338     }
1339     TVDBRowId mate_id = *value;
1340     return GetDb().MakeShortReadId(m_Aln->SEQ_SPOT_ID(mate_id),
1341                                    m_Aln->SEQ_READ_ID(mate_id));
1342 }
1343 
1344 
GetShortPos(void) const1345 TSeqPos CCSraAlignIterator::GetShortPos(void) const
1346 {
1347     bool has_off = m_Aln->HAS_REF_OFFSET(*m_AlnRowCur)[0] == '1';
1348     if ( !has_off ) {
1349         return 0;
1350     }
1351     if ( m_Aln->m_REF_OFFSET_TYPE &&
1352          m_Aln->REF_OFFSET_TYPE(*m_AlnRowCur)[0] != 1 ) {
1353         return 0;
1354     }
1355     return -m_Aln->REF_OFFSET(*m_AlnRowCur)[0];
1356 }
1357 
1358 
GetShortLen(void) const1359 TSeqPos CCSraAlignIterator::GetShortLen(void) const
1360 {
1361     return m_Aln->SPOT_LEN(*m_AlnRowCur);
1362 }
1363 
1364 
GetMapQuality(void) const1365 int CCSraAlignIterator::GetMapQuality(void) const
1366 {
1367     return m_Aln->MAPQ(*m_AlnRowCur);
1368 }
1369 
1370 
MakeFullMismatch(string & ret,CTempString cigar,CTempString mismatch) const1371 void CCSraAlignIterator::MakeFullMismatch(string& ret,
1372                                           CTempString cigar,
1373                                           CTempString mismatch) const
1374 {
1375     CVDBValueFor<char> has_mismatch = m_Aln->HAS_MISMATCH(*m_AlnRowCur);
1376     ret.reserve(mismatch.size()+2);
1377     const char* cptr = cigar.data();
1378     const char* cend = cigar.end();
1379     const char* mptr = mismatch.data();
1380     const char* mend = mismatch.end();
1381 
1382     TSeqPos seqpos = 0;
1383     while ( cptr != cend ) {
1384         char type = 0;
1385         TSeqPos seglen = 0;
1386         while ( cptr != cend ) {
1387             char c = *cptr++;
1388             if ( c >= '0' && c <= '9' ) {
1389                 seglen = seglen*10+(c-'0');
1390             }
1391             else {
1392                 type = c;
1393                 break;
1394             }
1395         }
1396         if ( !type ) {
1397             NCBI_THROW_FMT(CSraException, eDataError,
1398                            "Incomplete CIGAR: " << cigar);
1399         }
1400         if ( seglen == 0 ) {
1401             NCBI_THROW_FMT(CSraException, eDataError,
1402                            "Bad CIGAR length: " << type <<
1403                            "0 in " << cigar);
1404         }
1405         if ( type == '=' ) {
1406             // match
1407             seqpos += seglen;
1408         }
1409         else if ( type == 'X' ) {
1410             // mismatch
1411             if ( mptr + seglen > mend ) {
1412                 NCBI_THROW_FMT(CSraException, eDataError,
1413                                "CIGAR mismatch segment beyond MISMATCH: "
1414                                <<cigar<<" vs "<<mismatch);
1415             }
1416             ret.append(mptr, seglen);
1417             mptr += seglen;
1418             seqpos += seglen;
1419         }
1420         else if ( type == 'I' || type == 'S' ) {
1421             if ( seqpos + seglen > has_mismatch.size() ) {
1422                 NCBI_THROW_FMT(CSraException, eDataError,
1423                                "CIGAR insert segment beyond HAS_MISMATCH: "
1424                                <<cigar<<" vs "<<mismatch);
1425             }
1426             for ( TSeqPos i = 0; i < seglen; ++i, ++seqpos ) {
1427                 char c;
1428                 if ( has_mismatch[seqpos] == '1' ) {
1429                     if ( mptr == mend ) {
1430                         NCBI_THROW_FMT(CSraException, eDataError,
1431                                        "CIGAR insert/mismatch segment beyond MISMATCH: "
1432                                        <<cigar<<" vs "<<mismatch);
1433 
1434                     }
1435                     c = *mptr++;
1436                 }
1437                 else {
1438                     c = '=';
1439                 }
1440                 ret += c;
1441             }
1442         }
1443         else if ( type == 'D' || type == 'N' || type == 'P' ) {
1444             continue;
1445         }
1446         else {
1447             NCBI_THROW_FMT(CSraException, eDataError,
1448                            "Bad CIGAR char: " <<type<< " in " <<cigar);
1449         }
1450     }
1451     if ( mptr != mend ) {
1452         NCBI_THROW_FMT(CSraException, eDataError,
1453                        "Extra mismatch chars: " <<cigar<< " vs " <<mismatch);
1454     }
1455 }
1456 
1457 
1458 CCSraAlignIterator::SCreateCache&
x_GetCreateCache(void) const1459 CCSraAlignIterator::x_GetCreateCache(void) const
1460 {
1461     if ( !m_CreateCache ) {
1462         m_CreateCache = new SCreateCache;
1463     }
1464     return *m_CreateCache;
1465 }
1466 
1467 
GetMatchAlign(void) const1468 CRef<CSeq_align> CCSraAlignIterator::GetMatchAlign(void) const
1469 {
1470     SCreateCache& cache = x_GetCreateCache();
1471 
1472     CRef<CSeq_align> align(new CSeq_align);
1473     align->SetType(CSeq_align::eType_diags);
1474     CDense_seg& denseg = align->SetSegs().SetDenseg();
1475     CDense_seg::TIds& ids = denseg.SetIds();
1476     ids.reserve(2);
1477     ids.push_back(GetRefSeq_id());
1478     ids.push_back(GetShortSeq_id());
1479 
1480     TSeqPos refpos = GetRefSeqPos();
1481     TSeqPos seqpos = GetShortPos();
1482     CTempString cigar = GetCIGAR();
1483     const char* ptr = cigar.data();
1484     const char* end = cigar.end();
1485 
1486     TSeqPos segcount = 0;
1487     for ( const char* p = ptr; p != end; ++p ) {
1488         char c = *p;
1489         if ( c < '0' || c > '9' ) {
1490             if ( c == 'S' || c == 'N' ) {
1491                 // soft clipping already accounted in seqpos
1492                 // skip N segments
1493                 continue;
1494             }
1495             ++segcount;
1496         }
1497     }
1498 
1499     CDense_seg::TStarts& starts = denseg.SetStarts();
1500     CDense_seg::TLens& lens = denseg.SetLens();
1501     starts.reserve(2*segcount);
1502     lens.reserve(segcount);
1503 
1504     TSeqPos insert_size = 0;
1505     TSeqPos refstart = 0, seqstart = 0;
1506     while ( ptr != end ) {
1507         char type = 0;
1508         TSeqPos seglen = 0;
1509         while ( ptr != end ) {
1510             char c = *ptr++;
1511             if ( c >= '0' && c <= '9' ) {
1512                 seglen = seglen*10+(c-'0');
1513             }
1514             else {
1515                 type = c;
1516                 break;
1517             }
1518         }
1519         if ( !type ) {
1520             NCBI_THROW_FMT(CSraException, eDataError,
1521                            "Incomplete CIGAR: " << cigar);
1522         }
1523         if ( seglen == 0 ) {
1524             NCBI_THROW_FMT(CSraException, eDataError,
1525                            "Bad CIGAR length: " << type <<
1526                            "0 in " << cigar);
1527         }
1528         if ( type == 'M' || type == '=' || type == 'X' ) {
1529             // match
1530             refstart = refpos;
1531             refpos += seglen;
1532             seqstart = seqpos;
1533             seqpos += seglen;
1534         }
1535         else if ( type == 'I' || type == 'S' ) {
1536             insert_size += seglen;
1537             if ( type == 'S' ) {
1538                 // soft clipping already accounted in seqpos
1539                 continue;
1540             }
1541             refstart = kInvalidSeqPos;
1542             seqstart = seqpos;
1543             seqpos += seglen;
1544         }
1545         else if ( type == 'D' || type == 'N' ) {
1546             // delete
1547             refstart = refpos;
1548             refpos += seglen;
1549             if ( type == 'N' ) {
1550                 // skip N segments
1551                 continue;
1552             }
1553             seqstart = kInvalidSeqPos;
1554         }
1555         else if ( type != 'P' ) {
1556             NCBI_THROW_FMT(CSraException, eDataError,
1557                            "Bad CIGAR char: " <<type<< " in " <<cigar);
1558         }
1559         starts.push_back(refstart);
1560         starts.push_back(seqstart);
1561         lens.push_back(seglen);
1562     }
1563     if ( GetRefMinusStrand() ) {
1564         CDense_seg::TStrands& strands = denseg.SetStrands();
1565         strands.reserve(2*segcount);
1566         TSeqPos end = GetShortLen();
1567         for ( size_t i = 0; i < segcount; ++i ) {
1568             strands.push_back(eNa_strand_plus);
1569             strands.push_back(eNa_strand_minus);
1570             TSeqPos pos = starts[i*2+1];
1571             TSeqPos len = lens[i];
1572             if ( pos != kInvalidSeqPos ) {
1573                 starts[i*2+1] = end - (pos + len);
1574             }
1575         }
1576     }
1577 
1578     denseg.SetNumseg(segcount);
1579 
1580     if ( s_GetExplicitMateInfoParam() ) {
1581         CVDBValueFor<TVDBRowId> mate_id_v = m_Aln->MATE_ALIGN_ID(*m_AlnRowCur);
1582         if ( !mate_id_v.empty() ) {
1583             _ASSERT(mate_id_v.size() == 1);
1584             TVDBRowId mate_id = *mate_id_v;
1585             _ASSERT(mate_id);
1586             CRef<CUser_object> obj(new CUser_object());
1587             obj->SetType(x_GetObject_id("Mate read",
1588                                         cache.m_ObjectIdMateRead));
1589             CTempString mate_name = m_Aln->REF_NAME(mate_id);
1590             if ( mate_name != m_RefIter->m_Name ) {
1591                 CCSraRefSeqIterator mate_iter
1592                     (m_RefIter.m_Db, mate_name, CCSraRefSeqIterator::eByName);
1593                 if ( mate_iter ) {
1594                     x_AddField(*obj,
1595                                "RefId", mate_iter.GetRefSeq_id()->AsFastaString(),
1596                                cache.m_ObjectIdRefId);
1597                 }
1598                 else {
1599                     CTempString mate_ref_id = m_Aln->REF_SEQ_ID(mate_id);
1600                     x_AddField(*obj,
1601                                "RefId", mate_ref_id,
1602                                cache.m_ObjectIdRefId);
1603                 }
1604             }
1605             x_AddField(*obj,
1606                        "RefPos", *m_Aln->REF_POS(mate_id),
1607                        cache.m_ObjectIdRefPos);
1608 
1609             CRef<CUser_field> field(new CUser_field());
1610             field->SetLabel(x_GetObject_id("gnl|SRA|",
1611                                            cache.m_ObjectIdLcl));
1612             GetDb().SetShortReadId(field->SetData().SetStr(),
1613                                    m_Aln->SEQ_SPOT_ID(mate_id),
1614                                    m_Aln->SEQ_READ_ID(mate_id));
1615             obj->SetData().push_back(field);
1616 
1617             align->SetExt().push_back(obj);
1618         }
1619     }
1620 
1621     if ( s_GetCigarInAlignExt() ) {
1622         CRef<CUser_object> obj(new CUser_object);
1623         obj->SetType(x_GetObject_id("Tracebacks",
1624                                     cache.m_ObjectIdTracebacks));
1625         CTempString cigar = GetCIGARLong();
1626         CTempString mismatch = GetMismatchRaw();
1627         x_AddField(*obj, "CIGAR", cigar,
1628                    cache.m_ObjectIdCIGAR,
1629                    cache.m_UserFieldCacheCigar, 8, 8192);
1630         if ( insert_size == 0 ) {
1631             // all mismatches are explicit 'X', so there are no '=' to add
1632             // use the VDB privided MISMATCHE string without modifications
1633             x_AddField(*obj, "MISMATCH", mismatch,
1634                        cache.m_ObjectIdMISMATCH,
1635                        cache.m_UserFieldCacheMismatch, 8, 8192);
1636         }
1637         else {
1638             CUser_field& field = x_AddField(*obj, "MISMATCH",
1639                                             cache.m_ObjectIdMISMATCH);
1640             MakeFullMismatch(field.SetData().SetStr(), cigar, mismatch);
1641         }
1642         align->SetExt().push_back(obj);
1643     }
1644 
1645     if ( s_GetReadFilterInAlignExt() ) {
1646         INSDC_read_filter filter = GetReadFilter();
1647         if ( filter > SRA_READ_FILTER_PASS &&
1648              filter <= SRA_READ_FILTER_REDACTED ) {
1649             if ( !cache.m_ReadFilterIndicator[filter] ) {
1650                 static const char* const value[4] = {
1651                     "Good",
1652                     "Poor sequence quality",
1653                     "PCR duplicate",
1654                     "Hidden"
1655                 };
1656                 CRef<CUser_object> obj(new CUser_object);
1657                 obj->SetType().SetStr(value[filter]);
1658                 obj->SetData();
1659                 cache.m_ReadFilterIndicator[filter] = obj;
1660             }
1661             align->SetExt().push_back(cache.m_ReadFilterIndicator[filter]);
1662         }
1663     }
1664 
1665     if ( IsSecondary() ) {
1666         align->SetExt().push_back(x_GetSecondaryIndicator());
1667     }
1668 
1669     return align;
1670 }
1671 
1672 
GetQualityGraph(void) const1673 CRef<CSeq_graph> CCSraAlignIterator::GetQualityGraph(void) const
1674 {
1675     CRef<CSeq_graph> graph(new CSeq_graph);
1676     graph->SetTitle("Phred Quality");
1677     CSeq_interval& loc_int = graph->SetLoc().SetInt();
1678     loc_int.SetId(*GetShortSeq_id());
1679     TSeqPos size = GetShortLen();
1680     loc_int.SetFrom(0);
1681     loc_int.SetTo(size-1);
1682     if ( GetRefMinusStrand() ) {
1683         loc_int.SetStrand(eNa_strand_minus);
1684     }
1685     graph->SetNumval(size);
1686     CByte_graph& b_graph = graph->SetGraph().SetByte();
1687     b_graph.SetAxis(0);
1688     CByte_graph::TValues& values = b_graph.SetValues();
1689     values.resize(size);
1690     CVDBValueFor<INSDC_quality_phred> qual = m_Aln->QUALITY(*m_AlnRowCur);
1691     typedef CByte_graph::TValues::value_type TValue;
1692     TValue min_q = numeric_limits<TValue>::max();
1693     TValue max_q = 0;
1694     for ( size_t i = 0; i < size; ++i ) {
1695         TValue q = qual[i];
1696         values[i] = q;
1697         if ( q < min_q ) {
1698             min_q = q;
1699         }
1700         if ( q > max_q ) {
1701             max_q = q;
1702         }
1703     }
1704     b_graph.SetMin(min_q);
1705     b_graph.SetMax(max_q);
1706     return graph;
1707 }
1708 
1709 
GetShortBioseq(void) const1710 CRef<CBioseq> CCSraAlignIterator::GetShortBioseq(void) const
1711 {
1712     CRef<CBioseq> seq(new CBioseq);
1713     seq->SetId().push_back(GetShortSeq_id());
1714     if ( IsSetName() ) {
1715         CRef<CSeqdesc> desc(new CSeqdesc);
1716         desc->SetTitle(GetName());
1717         seq->SetDescr().Set().push_back(desc);
1718     }
1719     CSeq_inst& inst = seq->SetInst();
1720     inst.SetRepr(inst.eRepr_raw);
1721     inst.SetMol(inst.eMol_na);
1722     CTempString data = m_Aln->RAW_READ(*m_AlnRowCur);
1723     TSeqPos length = TSeqPos(data.size());
1724     inst.SetLength(length);
1725     string& iupac = inst.SetSeq_data().SetIupacna().Set();
1726     iupac.assign(data.data(), length);
1727     return seq;
1728 }
1729 
1730 
MakeMatchAnnotIndicator(void)1731 CRef<CAnnotdesc> CCSraAlignIterator::MakeMatchAnnotIndicator(void)
1732 {
1733     CRef<CAnnotdesc> desc(new CAnnotdesc);
1734     CUser_object& obj = desc->SetUser();
1735     obj.SetType().SetStr("Mate read");
1736     obj.AddField("Match by local Seq-id", true);
1737     return desc;
1738 }
1739 
1740 
1741 CRef<CSeq_annot>
x_GetEmptyMatchAnnot(const string * annot_name) const1742 CCSraAlignIterator::x_GetEmptyMatchAnnot(const string* annot_name) const
1743 {
1744     CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
1745     if ( !s_GetExplicitMateInfoParam() ) {
1746         SCreateCache& cache = x_GetCreateCache();
1747         if ( !cache.m_MatchAnnotIndicator ) {
1748             cache.m_MatchAnnotIndicator = MakeMatchAnnotIndicator();
1749         }
1750         annot->SetDesc().Set().push_back(cache.m_MatchAnnotIndicator);
1751     }
1752     return annot;
1753 }
1754 
1755 
1756 CRef<CSeq_annot>
MakeEmptyMatchAnnot(const string & annot_name)1757 CCSraAlignIterator::MakeEmptyMatchAnnot(const string& annot_name)
1758 {
1759     CRef<CSeq_annot> annot = MakeSeq_annot(annot_name);
1760     annot->SetDesc().Set().push_back(MakeMatchAnnotIndicator());
1761     return annot;
1762 }
1763 
1764 
1765 CRef<CSeq_annot>
x_GetMatchAnnot(const string * annot_name) const1766 CCSraAlignIterator::x_GetMatchAnnot(const string* annot_name) const
1767 {
1768     CRef<CSeq_annot> annot = x_GetEmptyMatchAnnot(annot_name);
1769     annot->SetData().SetAlign().push_back(GetMatchAlign());
1770     return annot;
1771 }
1772 
1773 
1774 CRef<CSeq_annot>
x_GetQualityGraphAnnot(const string * annot_name) const1775 CCSraAlignIterator::x_GetQualityGraphAnnot(const string* annot_name) const
1776 {
1777     CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
1778     annot->SetData().SetGraph().push_back(GetQualityGraph());
1779     return annot;
1780 }
1781 
1782 
1783 CRef<CSeq_entry>
x_GetMatchEntry(const string * annot_name) const1784 CCSraAlignIterator::x_GetMatchEntry(const string* annot_name) const
1785 {
1786     CRef<CSeq_entry> entry(new CSeq_entry);
1787     CRef<CBioseq> seq = GetShortBioseq();
1788     seq->SetAnnot().push_back(x_GetMatchAnnot(annot_name));
1789     entry->SetSeq(*seq);
1790     return entry;
1791 }
1792 
1793 
x_GetSecondaryIndicator(void) const1794 CRef<CUser_object> CCSraAlignIterator::x_GetSecondaryIndicator(void) const
1795 {
1796     SCreateCache& cache = x_GetCreateCache();
1797     if ( !cache.m_SecondaryIndicator ) {
1798         cache.m_SecondaryIndicator = new CUser_object();
1799         cache.m_SecondaryIndicator->SetType().SetStr("Secondary");
1800         cache.m_SecondaryIndicator->SetData();
1801     }
1802     return cache.m_SecondaryIndicator;
1803 }
1804 
1805 
x_GetObject_id(const char * name,TObjectIdCache & cache) const1806 CObject_id& CCSraAlignIterator::x_GetObject_id(const char* name,
1807                                                TObjectIdCache& cache) const
1808 {
1809     if ( !cache ) {
1810         cache = new CObject_id();
1811         cache->SetStr(name);
1812     }
1813     return *cache;
1814 }
1815 
1816 
x_AddField(CUser_object & obj,const char * name,TObjectIdCache & cache) const1817 CUser_field& CCSraAlignIterator::x_AddField(CUser_object& obj,
1818                                             const char* name,
1819                                             TObjectIdCache& cache) const
1820 {
1821     CRef<CUser_field> field(new CUser_field());
1822     field->SetLabel(x_GetObject_id(name, cache));
1823     obj.SetData().push_back(field);
1824     return *field;
1825 }
1826 
1827 
x_AddField(CUser_object & obj,const char * name,CTempString value,TObjectIdCache & cache) const1828 void CCSraAlignIterator::x_AddField(CUser_object& obj,
1829                                     const char* name,
1830                                     CTempString value,
1831                                     TObjectIdCache& cache) const
1832 {
1833     x_AddField(obj, name, cache).SetData().SetStr(value);
1834 }
1835 
1836 
x_AddField(CUser_object & obj,const char * name,int value,TObjectIdCache & cache) const1837 void CCSraAlignIterator::x_AddField(CUser_object& obj,
1838                                     const char* name,
1839                                     int value,
1840                                     TObjectIdCache& cache) const
1841 {
1842     x_AddField(obj, name, cache).SetData().SetInt(value);
1843 }
1844 
1845 
x_AddField(CUser_object & obj,const char * name,CTempString value,TObjectIdCache & id_cache,TUserFieldCache & cache,size_t max_value_length,size_t max_cache_size) const1846 void CCSraAlignIterator::x_AddField(CUser_object& obj,
1847                                     const char* name,
1848                                     CTempString value,
1849                                     TObjectIdCache& id_cache,
1850                                     TUserFieldCache& cache,
1851                                     size_t max_value_length,
1852                                     size_t max_cache_size) const
1853 {
1854     if ( value.size() > max_value_length || cache.size() > max_cache_size ) {
1855         x_AddField(obj, name, value, id_cache);
1856         return;
1857     }
1858     TUserFieldCache::iterator iter = cache.lower_bound(value);
1859     if ( iter == cache.end() || iter->first != value ) {
1860         CRef<CUser_field> field(new CUser_field());
1861         field->SetLabel(x_GetObject_id(name, id_cache));
1862         field->SetData().SetStr(value);
1863         CTempString key = field->GetData().GetStr();
1864         iter = cache.insert(iter, TUserFieldCache::value_type(key, field));
1865     }
1866     obj.SetData().push_back(iter->second);
1867 }
1868 
1869 
1870 /////////////////////////////////////////////////////////////////////////////
1871 // CCSraShortReadIterator
1872 
Reset(void)1873 void CCSraShortReadIterator::Reset(void)
1874 {
1875     if ( m_Seq ) {
1876         GetDb().Put(m_Seq);
1877     }
1878     m_Db.Reset();
1879     m_SpotId = m_MaxSpotId = 0;
1880     m_ReadId = 1;
1881     m_MaxReadId = 0;
1882     m_IncludeTechnicalReads = false;
1883     m_ClipByQuality = false;
1884     m_Error = RC_NO_MORE_ALIGNMENTS;
1885 }
1886 
1887 
CCSraShortReadIterator(const CCSraShortReadIterator & iter)1888 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraShortReadIterator& iter)
1889 {
1890     *this = iter;
1891 }
1892 
1893 
operator =(const CCSraShortReadIterator & iter)1894 CCSraShortReadIterator& CCSraShortReadIterator::operator=(const CCSraShortReadIterator& iter)
1895 {
1896     if ( this != &iter ) {
1897         Reset();
1898         m_Seq = iter.m_Seq;
1899         m_Db = iter.m_Db;
1900         m_Error = iter.m_Error;
1901         m_SpotId = iter.m_SpotId;
1902         m_MaxSpotId = iter.m_MaxSpotId;
1903         m_ReadId = iter.m_ReadId;
1904         m_MaxReadId = iter.m_MaxReadId;
1905         m_IncludeTechnicalReads = iter.m_IncludeTechnicalReads;
1906         m_ClipByQuality = iter.m_ClipByQuality;
1907         m_Error = iter.m_Error;
1908     }
1909     return *this;
1910 }
1911 
1912 
CCSraShortReadIterator(void)1913 CCSraShortReadIterator::CCSraShortReadIterator(void)
1914     : m_SpotId(0),
1915       m_MaxSpotId(0),
1916       m_ReadId(1),
1917       m_MaxReadId(0),
1918       m_IncludeTechnicalReads(false),
1919       m_ClipByQuality(false),
1920       m_Error(RC_NO_MORE_ALIGNMENTS)
1921 {
1922 }
1923 
1924 
CCSraShortReadIterator(const CCSraDb & csra_db,EClipType clip_type)1925 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraDb& csra_db,
1926                                                EClipType clip_type)
1927 {
1928     x_Init(csra_db, clip_type);
1929     if ( *this ) {
1930         x_GetMaxReadId();
1931         x_Settle();
1932     }
1933 }
1934 
1935 
CCSraShortReadIterator(const CCSraDb & csra_db,TVDBRowId spot_id,EClipType clip_type)1936 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraDb& csra_db,
1937                                                TVDBRowId spot_id,
1938                                                EClipType clip_type)
1939 {
1940     x_Init(csra_db, clip_type);
1941     Select(spot_id);
1942 }
1943 
1944 
CCSraShortReadIterator(const CCSraDb & csra_db,TVDBRowId spot_id,uint32_t read_id,EClipType clip_type)1945 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraDb& csra_db,
1946                                                TVDBRowId spot_id,
1947                                                uint32_t read_id,
1948                                                EClipType clip_type)
1949 {
1950     x_Init(csra_db, clip_type);
1951     Select(spot_id, read_id);
1952 }
1953 
1954 
x_Init(const CCSraDb & csra_db,EClipType clip_type)1955 void CCSraShortReadIterator::x_Init(const CCSraDb& csra_db,
1956                                     EClipType clip_type)
1957 {
1958     m_Db = csra_db;
1959     m_Seq = csra_db.GetNCObject().Seq();
1960     m_ReadId = 1;
1961     m_MaxReadId = 0;
1962     m_IncludeTechnicalReads = s_GetIncludeTechnicalReads();
1963     switch ( clip_type ) {
1964     case eNoClip:
1965         m_ClipByQuality = false;
1966         break;
1967     case eClipByQuality:
1968         m_ClipByQuality = true;
1969         break;
1970     default:
1971         m_ClipByQuality = s_GetClipByQuality();
1972         break;
1973     }
1974     TVDBRowIdRange range = m_Seq->m_READ_LEN.GetRowIdRange(m_Seq->m_Cursor);
1975     m_SpotId = range.first;
1976     m_MaxSpotId = range.first+range.second-1;
1977     m_Error = m_SpotId <= m_MaxSpotId? 0: RC_NO_MORE_ALIGNMENTS;
1978 }
1979 
1980 
Select(TVDBRowId spot_id)1981 bool CCSraShortReadIterator::Select(TVDBRowId spot_id)
1982 {
1983     m_SpotId = spot_id;
1984     m_ReadId = 1;
1985     if ( spot_id < 1 || spot_id > m_MaxSpotId ) {
1986         // bad spot id
1987         m_Error = RC_NO_MORE_ALIGNMENTS;
1988         return false;
1989     }
1990     x_GetMaxReadId();
1991     return x_Settle(true); // settle within single spot id
1992 }
1993 
1994 
Select(TVDBRowId spot_id,uint32_t read_id)1995 bool CCSraShortReadIterator::Select(TVDBRowId spot_id,
1996                                     uint32_t read_id)
1997 {
1998     m_SpotId = spot_id;
1999     m_ReadId = read_id;
2000     if ( spot_id < 1 || spot_id > m_MaxSpotId ) {
2001         // bad spot id
2002         m_Error = RC_NO_MORE_ALIGNMENTS;
2003         return false;
2004     }
2005     x_GetMaxReadId();
2006     if ( m_ReadId > m_MaxReadId ) {
2007         // bad read id
2008         m_Error = RC_NO_MORE_ALIGNMENTS;
2009         return false;
2010     }
2011     if ( !x_ValidRead() ) {
2012         // bad read type
2013         m_Error = RC_NO_MORE_ALIGNMENTS;
2014         return false;
2015     }
2016     m_Error = 0;
2017     return true;
2018 }
2019 
2020 
SetLastSpotId(TVDBRowId spot_id)2021 void CCSraShortReadIterator::SetLastSpotId(TVDBRowId spot_id)
2022 {
2023     m_MaxSpotId = min(m_MaxSpotId, spot_id);
2024 }
2025 
2026 
~CCSraShortReadIterator(void)2027 CCSraShortReadIterator::~CCSraShortReadIterator(void)
2028 {
2029     Reset();
2030 }
2031 
2032 
x_ValidRead(void) const2033 bool CCSraShortReadIterator::x_ValidRead(void) const
2034 {
2035     if ( !m_IncludeTechnicalReads && IsTechnicalRead() ) {
2036         return false;
2037     }
2038     if ( GetReadRange().Empty() ) {
2039         return false;
2040     }
2041     return true;
2042 }
2043 
2044 
x_Settle(bool single_spot)2045 bool CCSraShortReadIterator::x_Settle(bool single_spot)
2046 {
2047     for ( ;; ) {
2048         if ( m_ReadId > m_MaxReadId ) {
2049             // next spot, first read
2050             if ( single_spot ) {
2051                 // bad read id
2052                 m_Error = RC_NO_MORE_ALIGNMENTS;
2053                 return false;
2054             }
2055             m_ReadId = 1;
2056             if ( ++m_SpotId > m_MaxSpotId ) {
2057                 // bad spot id
2058                 m_MaxReadId = 0;
2059                 m_Error = RC_NO_MORE_ALIGNMENTS;
2060                 return false;
2061             }
2062             x_GetMaxReadId();
2063         }
2064         else {
2065             if ( x_ValidRead() ) {
2066                 // found
2067                 m_Error = 0;
2068                 return true;
2069             }
2070             // next read id
2071             ++m_ReadId;
2072         }
2073     }
2074 }
2075 
2076 
GetMateCount(void) const2077 uint32_t CCSraShortReadIterator::GetMateCount(void) const
2078 {
2079     uint32_t count = 0;
2080     CVDBValueFor<INSDC_read_type> read_types = m_Seq->READ_TYPE(m_SpotId);
2081     for ( size_t i = 0; i < read_types.size(); ++i ) {
2082         INSDC_read_type type = read_types[i];
2083         if ( (type & (SRA_READ_TYPE_TECHNICAL | SRA_READ_TYPE_BIOLOGICAL)) ==
2084              SRA_READ_TYPE_TECHNICAL ) {
2085             // skip technical reads
2086             continue;
2087         }
2088         ++count;
2089     }
2090     return count;
2091 }
2092 
2093 
x_GetMaxReadId(void)2094 void CCSraShortReadIterator::x_GetMaxReadId(void)
2095 {
2096     m_MaxReadId = uint32_t(m_Seq->READ_TYPE(m_SpotId).size());
2097 }
2098 
2099 
GetShortSeq_id(void) const2100 CRef<CSeq_id> CCSraShortReadIterator::GetShortSeq_id(void) const
2101 {
2102     return GetDb().MakeShortReadId(GetShortId1(), GetShortId2());
2103 }
2104 
2105 
GetSpotGroup(void) const2106 CTempString CCSraShortReadIterator::GetSpotGroup(void) const
2107 {
2108     return m_Seq->SPOT_GROUP(m_SpotId);
2109 }
2110 
2111 
IsSetName(void) const2112 bool CCSraShortReadIterator::IsSetName(void) const
2113 {
2114     return m_Seq->m_NAME && !GetName().empty();
2115 }
2116 
2117 
GetName(void) const2118 CTempString CCSraShortReadIterator::GetName(void) const
2119 {
2120     return m_Seq->NAME(m_SpotId);
2121 }
2122 
2123 
IsTechnicalRead(void) const2124 bool CCSraShortReadIterator::IsTechnicalRead(void) const
2125 {
2126     INSDC_read_type type = m_Seq->READ_TYPE(m_SpotId)[m_ReadId-1];
2127     return (type & (SRA_READ_TYPE_TECHNICAL | SRA_READ_TYPE_BIOLOGICAL)) ==
2128         SRA_READ_TYPE_TECHNICAL;
2129 }
2130 
2131 
GetReadFilter(void) const2132 INSDC_read_filter CCSraShortReadIterator::GetReadFilter(void) const
2133 {
2134     if ( !m_Seq->m_READ_FILTER ) {
2135         return SRA_READ_FILTER_PASS;
2136     }
2137     return m_Seq->READ_FILTER(m_SpotId)[m_ReadId-1];
2138 }
2139 
2140 
GetClipQualityLeft(void) const2141 TSeqPos CCSraShortReadIterator::GetClipQualityLeft(void) const
2142 {
2143     return *m_Seq->TRIM_START(m_SpotId);
2144 }
2145 
2146 
GetClipQualityLength(void) const2147 TSeqPos CCSraShortReadIterator::GetClipQualityLength(void) const
2148 {
2149     return *m_Seq->TRIM_LEN(m_SpotId);
2150 }
2151 
2152 
HasClippingInfo(void) const2153 bool CCSraShortReadIterator::HasClippingInfo(void) const
2154 {
2155     TSeqPos pos = m_Seq->READ_START(m_SpotId)[m_ReadId-1];
2156     TSeqPos trim_pos = GetClipQualityLeft();
2157     if ( trim_pos > pos ) {
2158         return true;
2159     }
2160     TSeqPos len = m_Seq->READ_LEN(m_SpotId)[m_ReadId-1];
2161     TSeqPos end = pos + len;
2162     TSeqPos trim_len = GetClipQualityLength();
2163     TSeqPos trim_end = trim_pos + trim_len;
2164     if ( trim_end < end ) {
2165         return true;
2166     }
2167     return false;
2168 }
2169 
2170 
2171 COpenRange<TSeqPos>
GetReadRange(EClipType clip_type) const2172 CCSraShortReadIterator::GetReadRange(EClipType clip_type) const
2173 {
2174     TSeqPos len = m_Seq->READ_LEN(m_SpotId)[m_ReadId-1];
2175     if ( !len ) {
2176         return TOpenRange::GetEmpty();
2177     }
2178     TSeqPos pos = m_Seq->READ_START(m_SpotId)[m_ReadId-1];
2179     TSeqPos end = pos + len;
2180     bool clip_by_quality = clip_type == eDefaultClip?
2181         m_ClipByQuality: clip_type == eClipByQuality;
2182     if ( clip_by_quality ) {
2183         TSeqPos trim_pos = GetClipQualityLeft();
2184         pos = max(pos, trim_pos);
2185         TSeqPos trim_len = GetClipQualityLength();
2186         TSeqPos trim_end = trim_pos + trim_len;
2187         end = min(end, trim_end);
2188     }
2189     return TOpenRange(pos, end);
2190 }
2191 
2192 
2193 CTempString
x_GetReadData(TOpenRange range) const2194 CCSraShortReadIterator::x_GetReadData(TOpenRange range) const
2195 {
2196     CTempString s = *m_Seq->READ(m_SpotId);
2197     return s.substr(range.GetFrom(), range.GetLength());
2198 }
2199 
2200 
GetReadData(EClipType clip_type) const2201 CTempString CCSraShortReadIterator::GetReadData(EClipType clip_type) const
2202 {
2203     return x_GetReadData(GetReadRange(clip_type));
2204 }
2205 
2206 
2207 CRef<CBioseq>
GetShortBioseq(TBioseqFlags flags) const2208 CCSraShortReadIterator::GetShortBioseq(TBioseqFlags flags) const
2209 {
2210     CRef<CBioseq> seq(new CBioseq);
2211     seq->SetId().push_back(GetShortSeq_id());
2212     if ( IsSetName() ) {
2213         CRef<CSeqdesc> desc(new CSeqdesc);
2214         desc->SetTitle(GetName());
2215         seq->SetDescr().Set().push_back(desc);
2216     }
2217     CSeq_inst& inst = seq->SetInst();
2218     inst.SetRepr(inst.eRepr_raw);
2219     inst.SetMol(inst.eMol_na);
2220     TOpenRange range = GetReadRange();
2221     CTempString data = x_GetReadData(range);
2222     inst.SetLength(TSeqPos(data.size()));
2223     string& iupac = inst.SetSeq_data().SetIupacna().Set();
2224     iupac.assign(data.data(), data.size());
2225     if ( flags & fQualityGraph ) {
2226         seq->SetAnnot().push_back(x_GetQualityGraphAnnot(range, 0));
2227     }
2228     return seq;
2229 }
2230 
2231 
2232 CRef<CSeq_graph>
x_GetQualityGraph(TOpenRange range) const2233 CCSraShortReadIterator::x_GetQualityGraph(TOpenRange range) const
2234 {
2235     CRef<CSeq_graph> graph(new CSeq_graph);
2236     graph->SetTitle("Phred Quality");
2237     CSeq_interval& loc_int = graph->SetLoc().SetInt();
2238     loc_int.SetId(*GetShortSeq_id());
2239     TSeqPos size = range.GetLength();
2240     loc_int.SetFrom(0);
2241     loc_int.SetTo(size-1);
2242     graph->SetNumval(size);
2243     CByte_graph& b_graph = graph->SetGraph().SetByte();
2244     b_graph.SetAxis(0);
2245     CByte_graph::TValues& values = b_graph.SetValues();
2246     values.resize(size);
2247     CVDBValueFor<INSDC_quality_phred> qual = m_Seq->QUALITY(m_SpotId);
2248     TSeqPos start = range.GetFrom();
2249     typedef CByte_graph::TValues::value_type TValue;
2250     TValue min_q = numeric_limits<TValue>::max();
2251     TValue max_q = 0;
2252     for ( size_t i = 0; i < size; ++i ) {
2253         TValue q = qual[start+i];
2254         values[i] = q;
2255         if ( q < min_q ) {
2256             min_q = q;
2257         }
2258         if ( q > max_q ) {
2259             max_q = q;
2260         }
2261     }
2262     b_graph.SetMin(min_q);
2263     b_graph.SetMax(max_q);
2264     return graph;
2265 }
2266 
2267 
2268 CRef<CSeq_graph>
GetQualityGraph(void) const2269 CCSraShortReadIterator::GetQualityGraph(void) const
2270 {
2271     return x_GetQualityGraph(GetReadRange());
2272 }
2273 
2274 
2275 CRef<CSeq_annot>
x_GetSeq_annot(const string * annot_name) const2276 CCSraShortReadIterator::x_GetSeq_annot(const string* annot_name) const
2277 {
2278     CRef<CSeq_annot> annot(new CSeq_annot);
2279     annot->SetData().SetAlign();
2280     if ( annot_name ) {
2281         CRef<CAnnotdesc> desc(new CAnnotdesc);
2282         desc->SetName(*annot_name);
2283         annot->SetDesc().Set().push_back(desc);
2284     }
2285     return annot;
2286 }
2287 
2288 
2289 CRef<CSeq_annot>
x_GetQualityGraphAnnot(TOpenRange range,const string * annot_name) const2290 CCSraShortReadIterator::x_GetQualityGraphAnnot(TOpenRange range,
2291                                                const string* annot_name) const
2292 {
2293     CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
2294     annot->SetData().SetGraph().push_back(x_GetQualityGraph(range));
2295     return annot;
2296 }
2297 
2298 
2299 CRef<CSeq_annot>
x_GetQualityGraphAnnot(const string * annot_name) const2300 CCSraShortReadIterator::x_GetQualityGraphAnnot(const string* annot_name) const
2301 {
2302     return x_GetQualityGraphAnnot(GetReadRange(), annot_name);
2303 }
2304 
2305 
GetRefSeqIter(TSeqPos * ref_pos_ptr) const2306 CCSraRefSeqIterator CCSraShortReadIterator::GetRefSeqIter(TSeqPos* ref_pos_ptr) const
2307 {
2308     CCSraRefSeqIterator ret;
2309     if ( m_Seq->m_PRIMARY_ALIGNMENT_ID ) {
2310         CVDBValueFor<TVDBRowId> align_ids = m_Seq->PRIMARY_ALIGNMENT_ID(m_SpotId);
2311         if ( TVDBRowId row_id = align_ids[m_ReadId-1] ) {
2312             CRef<CCSraDb_Impl::SAlnTableCursor> aln = GetDb().Aln(false);
2313             ret = CCSraRefSeqIterator(m_Db, *aln->REF_NAME(row_id),
2314                                       CCSraRefSeqIterator::eByName);
2315             if ( ref_pos_ptr ) {
2316                 *ref_pos_ptr = *aln->REF_POS(row_id);
2317             }
2318             GetDb().Put(aln);
2319         }
2320     }
2321     return ret;
2322 }
2323 
2324 
GetAlignIter() const2325 CCSraAlignIterator CCSraShortReadIterator::GetAlignIter() const
2326 {
2327     if ( m_Seq->m_PRIMARY_ALIGNMENT_ID ) {
2328         CVDBValueFor<TVDBRowId> align_ids = m_Seq->PRIMARY_ALIGNMENT_ID(m_SpotId);
2329         if ( TVDBRowId align_row_id = align_ids[m_ReadId-1] ) {
2330             return CCSraAlignIterator(m_Db, fPrimaryAlign, align_row_id);
2331         }
2332     }
2333     return CCSraAlignIterator();
2334 }
2335 
2336 
2337 END_NAMESPACE(objects);
2338 END_NCBI_NAMESPACE;
2339