1 /* $Id: csraread.cpp 598358 2019-12-06 23:45:25Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Eugene Vasilchenko
27 *
28 * File Description:
29 * Access to SRA files
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <sra/readers/sra/csraread.hpp>
35 #include <corelib/ncbistr.hpp>
36 #include <corelib/ncbifile.hpp>
37 #include <corelib/ncbi_param.hpp>
38 #include <objects/general/general__.hpp>
39 #include <objects/seq/seq__.hpp>
40 #include <objects/seqset/seqset__.hpp>
41 #include <objects/seqloc/seqloc__.hpp>
42 #include <objects/seqalign/seqalign__.hpp>
43 #include <objects/seqres/seqres__.hpp>
44 #include <objtools/readers/iidmapper.hpp>
45 #include <sra/error_codes.hpp>
46
47 #include <sra/readers/sra/kdbread.hpp>
48 #include <klib/rc.h>
49 #include <insdc/sra.h>
50
51 #include <sstream>
52 #include <algorithm>
53
54 BEGIN_NCBI_NAMESPACE;
55
56 #define NCBI_USE_ERRCODE_X cSRAReader
57 NCBI_DEFINE_ERR_SUBCODE_X(1);
58
59 BEGIN_NAMESPACE(objects);
60
61
62 NCBI_PARAM_DECL(bool, CSRA, EXPLICIT_MATE_INFO);
63 NCBI_PARAM_DEF_EX(bool, CSRA, EXPLICIT_MATE_INFO, false,
64 eParam_NoThread, CSRA_EXPLICIT_MATE_INFO);
65
66
67 NCBI_PARAM_DECL(bool, CSRA, CIGAR_IN_ALIGN_EXT);
68 NCBI_PARAM_DEF_EX(bool, CSRA, CIGAR_IN_ALIGN_EXT, true,
69 eParam_NoThread, CSRA_CIGAR_IN_ALIGN_EXT);
70
71
72 NCBI_PARAM_DECL(bool, CSRA, INCLUDE_TECHNICAL_READS);
73 NCBI_PARAM_DEF_EX(bool, CSRA, INCLUDE_TECHNICAL_READS, false,
74 eParam_NoThread, CSRA_INCLUDE_TECHNICAL_READS);
75
76
77 NCBI_PARAM_DECL(bool, CSRA, CLIP_BY_QUALITY);
78 NCBI_PARAM_DEF_EX(bool, CSRA, CLIP_BY_QUALITY, true,
79 eParam_NoThread, CSRA_CLIP_BY_QUALITY);
80
81
82 NCBI_PARAM_DECL(bool, CSRA, PATH_IN_ID);
83 NCBI_PARAM_DEF_EX(bool, CSRA, PATH_IN_ID, true,
84 eParam_NoThread, CSRA_PATH_IN_ID);
85
86
87 NCBI_PARAM_DECL(bool, CSRA, READ_FILTER_IN_ALIGN_EXT);
88 NCBI_PARAM_DEF(bool, CSRA, READ_FILTER_IN_ALIGN_EXT, true);
89
90
s_GetExplicitMateInfoParam(void)91 static bool s_GetExplicitMateInfoParam(void)
92 {
93 static CSafeStatic<NCBI_PARAM_TYPE(CSRA, EXPLICIT_MATE_INFO)> s_Value;
94 return s_Value->Get();
95 }
96
97
s_GetCigarInAlignExt(void)98 static bool s_GetCigarInAlignExt(void)
99 {
100 static CSafeStatic<NCBI_PARAM_TYPE(CSRA, CIGAR_IN_ALIGN_EXT)> s_Value;
101 return s_Value->Get();
102 }
103
104
s_GetIncludeTechnicalReads(void)105 static bool s_GetIncludeTechnicalReads(void)
106 {
107 static CSafeStatic<NCBI_PARAM_TYPE(CSRA, INCLUDE_TECHNICAL_READS)> s_Value;
108 return s_Value->Get();
109 }
110
111
s_GetClipByQuality(void)112 static bool s_GetClipByQuality(void)
113 {
114 static CSafeStatic<NCBI_PARAM_TYPE(CSRA, CLIP_BY_QUALITY)> s_Value;
115 return s_Value->Get();
116 }
117
118
s_GetPathInId(void)119 static bool s_GetPathInId(void)
120 {
121 static CSafeStatic<NCBI_PARAM_TYPE(CSRA, PATH_IN_ID)> s_Value;
122 return s_Value->Get();
123 }
124
125
s_GetReadFilterInAlignExt(void)126 static bool s_GetReadFilterInAlignExt(void)
127 {
128 static CSafeStatic<NCBI_PARAM_TYPE(CSRA, READ_FILTER_IN_ALIGN_EXT)> s_Value;
129 return s_Value->Get();
130 }
131
132
133 #define RC_NO_MORE_ALIGNMENTS RC(rcApp, rcQuery, rcSearching, rcRow, rcNotFound)
134
135
136 // SRefTableCursor is helper accessor structure for refseq table
137 struct CCSraDb_Impl::SRefTableCursor : public CObject {
138 SRefTableCursor(const CVDBTable& table);
139
140 CVDBCursor m_Cursor;
141
142 DECLARE_VDB_COLUMN_AS(Uint1, CGRAPH_HIGH);
143 DECLARE_VDB_COLUMN_AS(TVDBRowId, PRIMARY_ALIGNMENT_IDS);
144 DECLARE_VDB_COLUMN_AS(TVDBRowId, SECONDARY_ALIGNMENT_IDS);
145 DECLARE_VDB_COLUMN_AS_STRING(NAME);
146 typedef pair<TVDBRowId, TVDBRowId> row_range_t;
147 DECLARE_VDB_COLUMN_AS(row_range_t, NAME_RANGE);
148 DECLARE_VDB_COLUMN_AS_STRING(SEQ_ID);
149 DECLARE_VDB_COLUMN_AS(INSDC_coord_len, SEQ_LEN);
150 DECLARE_VDB_COLUMN_AS(INSDC_coord_len, MAX_SEQ_LEN);
151 DECLARE_VDB_COLUMN_AS_STRING(READ);
152 DECLARE_VDB_COLUMN_AS(bool, CIRCULAR);
153 DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, OVERLAP_REF_POS);
154 };
155
156
157 // SAlnTableCursor is helper accessor structure for align table
158 struct CCSraDb_Impl::SAlnTableCursor : public CObject {
159 SAlnTableCursor(const CVDBTable& table, bool is_secondary);
160
161 CVDBCursor m_Cursor;
162 bool m_IsSecondary;
163
164 DECLARE_VDB_COLUMN_AS_STRING(REF_NAME);
165 DECLARE_VDB_COLUMN_AS_STRING(REF_SEQ_ID);
166 DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, REF_POS);
167 DECLARE_VDB_COLUMN_AS(INSDC_coord_len, REF_LEN);
168 DECLARE_VDB_COLUMN_AS(bool, REF_ORIENTATION);
169 DECLARE_VDB_COLUMN_AS(char, HAS_REF_OFFSET);
170 DECLARE_VDB_COLUMN_AS(char, HAS_MISMATCH);
171 DECLARE_VDB_COLUMN_AS(int32_t, REF_OFFSET);
172 DECLARE_VDB_COLUMN_AS(Uint1, REF_OFFSET_TYPE);
173 DECLARE_VDB_COLUMN_AS_STRING(CIGAR_SHORT);
174 DECLARE_VDB_COLUMN_AS_STRING(CIGAR_LONG);
175 DECLARE_VDB_COLUMN_AS_STRING(RAW_READ);
176 DECLARE_VDB_COLUMN_AS_STRING(MISMATCH_READ);
177 DECLARE_VDB_COLUMN_AS_STRING(MISMATCH);
178 DECLARE_VDB_COLUMN_AS(INSDC_coord_len, SPOT_LEN);
179 DECLARE_VDB_COLUMN_AS(TVDBRowId, SEQ_SPOT_ID);
180 DECLARE_VDB_COLUMN_AS(INSDC_coord_one, SEQ_READ_ID);
181 DECLARE_VDB_COLUMN_AS(int32_t, MAPQ);
182 DECLARE_VDB_COLUMN_AS(TVDBRowId, MATE_ALIGN_ID);
183 DECLARE_VDB_COLUMN_AS(INSDC_quality_phred, QUALITY);
184 DECLARE_VDB_COLUMN_AS_STRING(SPOT_GROUP);
185 DECLARE_VDB_COLUMN_AS_STRING(NAME);
186 DECLARE_VDB_COLUMN_AS(INSDC_read_filter, READ_FILTER);
187 };
188
189
190 // SSeqTableCursor is helper accessor structure for sequence table
191 struct CCSraDb_Impl::SSeqTableCursor : public CObject {
192 SSeqTableCursor(const CVDBTable& table);
193
194 CVDBCursor m_Cursor;
195
196 DECLARE_VDB_COLUMN_AS_STRING(SPOT_GROUP);
197 DECLARE_VDB_COLUMN_AS(INSDC_read_type, READ_TYPE);
198 DECLARE_VDB_COLUMN_AS(INSDC_coord_len, READ_LEN);
199 DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, READ_START);
200 DECLARE_VDB_COLUMN_AS_STRING(READ);
201 DECLARE_VDB_COLUMN_AS(INSDC_quality_phred, QUALITY);
202 DECLARE_VDB_COLUMN_AS(TVDBRowId, PRIMARY_ALIGNMENT_ID);
203 DECLARE_VDB_COLUMN_AS(INSDC_coord_len, TRIM_LEN);
204 DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, TRIM_START);
205 DECLARE_VDB_COLUMN_AS_STRING(NAME);
206 DECLARE_VDB_COLUMN_AS(INSDC_read_filter, READ_FILTER);
207 };
208
209
SRefTableCursor(const CVDBTable & table)210 CCSraDb_Impl::SRefTableCursor::SRefTableCursor(const CVDBTable& table)
211 : m_Cursor(table),
212 INIT_VDB_COLUMN(CGRAPH_HIGH),
213 INIT_VDB_COLUMN(PRIMARY_ALIGNMENT_IDS),
214 INIT_OPTIONAL_VDB_COLUMN(SECONDARY_ALIGNMENT_IDS),
215 INIT_VDB_COLUMN(NAME),
216 INIT_VDB_COLUMN(NAME_RANGE),
217 INIT_VDB_COLUMN(SEQ_ID),
218 INIT_VDB_COLUMN(SEQ_LEN),
219 INIT_VDB_COLUMN(MAX_SEQ_LEN),
220 INIT_VDB_COLUMN(READ),
221 INIT_VDB_COLUMN(CIRCULAR),
222 INIT_OPTIONAL_VDB_COLUMN(OVERLAP_REF_POS)
223 {
224 }
225
226
SAlnTableCursor(const CVDBTable & table,bool is_secondary)227 CCSraDb_Impl::SAlnTableCursor::SAlnTableCursor(const CVDBTable& table,
228 bool is_secondary)
229 : m_Cursor(table),
230 m_IsSecondary(is_secondary),
231 INIT_VDB_COLUMN(REF_NAME),
232 INIT_VDB_COLUMN(REF_SEQ_ID),
233 INIT_VDB_COLUMN(REF_POS),
234 INIT_VDB_COLUMN(REF_LEN),
235 INIT_VDB_COLUMN(REF_ORIENTATION),
236 INIT_VDB_COLUMN(HAS_REF_OFFSET),
237 INIT_VDB_COLUMN(HAS_MISMATCH),
238 INIT_VDB_COLUMN(REF_OFFSET),
239 INIT_OPTIONAL_VDB_COLUMN(REF_OFFSET_TYPE),
240 INIT_VDB_COLUMN(CIGAR_SHORT),
241 INIT_VDB_COLUMN(CIGAR_LONG),
242 INIT_VDB_COLUMN(RAW_READ),
243 INIT_VDB_COLUMN_BACKUP(MISMATCH_READ, READ),
244 INIT_VDB_COLUMN(MISMATCH),
245 INIT_VDB_COLUMN(SPOT_LEN),
246 INIT_VDB_COLUMN(SEQ_SPOT_ID),
247 INIT_VDB_COLUMN(SEQ_READ_ID),
248 INIT_VDB_COLUMN(MAPQ),
249 INIT_VDB_COLUMN(MATE_ALIGN_ID),
250 INIT_VDB_COLUMN(QUALITY),
251 INIT_VDB_COLUMN(SPOT_GROUP),
252 INIT_OPTIONAL_VDB_COLUMN(NAME),
253 INIT_OPTIONAL_VDB_COLUMN_BACKUP(READ_FILTER, RD_FILTER)
254 {
255 }
256
257
SSeqTableCursor(const CVDBTable & table)258 CCSraDb_Impl::SSeqTableCursor::SSeqTableCursor(const CVDBTable& table)
259 : m_Cursor(table),
260 INIT_VDB_COLUMN(SPOT_GROUP),
261 INIT_VDB_COLUMN(READ_TYPE),
262 INIT_VDB_COLUMN(READ_LEN),
263 INIT_VDB_COLUMN(READ_START),
264 INIT_VDB_COLUMN(READ),
265 INIT_VDB_COLUMN(QUALITY),
266 INIT_OPTIONAL_VDB_COLUMN(PRIMARY_ALIGNMENT_ID),
267 INIT_VDB_COLUMN(TRIM_LEN),
268 INIT_VDB_COLUMN(TRIM_START),
269 INIT_OPTIONAL_VDB_COLUMN(NAME),
270 INIT_OPTIONAL_VDB_COLUMN_BACKUP(READ_FILTER, RD_FILTER)
271 {
272 }
273
274
CCSraDb_Impl(CVDBMgr & mgr,const string & csra_path,IIdMapper * ref_id_mapper,ERefIdType ref_id_type,const string & sra_id_part)275 CCSraDb_Impl::CCSraDb_Impl(CVDBMgr& mgr, const string& csra_path,
276 IIdMapper* ref_id_mapper,
277 ERefIdType ref_id_type,
278 const string& sra_id_part)
279 : m_Mgr(mgr),
280 m_CSraPath(csra_path),
281 m_SraIdPart(sra_id_part),
282 m_RowSize(0)
283 {
284 // to avoid conflict with ref seq ids like gnl|SRA|SRR452437/scaffold_1
285 // we replace all slashes ('/') with backslashes ('\\')
286 replace(m_SraIdPart.begin(), m_SraIdPart.end(), '/', '\\');
287 try {
288 m_Db = CVDB(mgr, csra_path);
289 }
290 catch ( CSraException& exc ) {
291 // We want to open old SRA schema files too.
292 // In this case the DB object is not found, and the error is eDataError
293 // any other error cannot allow to work with this accession
294 if ( exc.GetErrCode() != CSraException::eDataError ) {
295 // report all other exceptions as is
296 throw;
297 }
298 }
299 CRef<SRefTableCursor> ref;
300 if ( m_Db ) {
301 try {
302 ref = Ref();
303 }
304 catch ( CSraException& exc ) {
305 // allow missing REFERENCE table
306 // any error except eNotFoundTable is not good
307 if ( exc.GetErrCode() != CSraException::eNotFoundTable ) {
308 throw;
309 }
310 }
311 }
312 if ( ref ) {
313 SRefInfo info;
314 m_RowSize = *ref->MAX_SEQ_LEN(1);
315 auto name_row_range = ref->m_NAME.GetRowIdRange(ref->m_Cursor);
316 TVDBRowId end_row = name_row_range.first + name_row_range.second;
317 for ( TVDBRowId row = 1; row < end_row; ) {
318 // read range and names
319 CTempString name = ref->NAME(row);
320 ref->m_Cursor.SetParam("QUERY_SEQ_NAME", name);
321 info.m_Name = name;
322 info.m_SeqId = *ref->SEQ_ID(row);
323 x_MakeRefSeq_ids(info, ref_id_mapper, ref_id_type);
324 _ASSERT(!info.m_Seq_ids.empty());
325 CBioseq::TId::iterator best_id = info.m_Seq_ids.begin();
326 // put the best id at front as the main id
327 swap(info.GetMainSeq_id(), *best_id);
328 info.m_Seq_id_Handle =
329 CSeq_id_Handle::GetHandle(*info.GetMainSeq_id());
330
331 const SRefTableCursor::row_range_t& range = *ref->NAME_RANGE(row);
332 info.m_RowFirst = range.first;
333 info.m_RowLast = range.second;
334 info.m_SeqLength = kInvalidSeqPos;
335 info.m_Circular = *ref->CIRCULAR(row);
336 m_RefList.push_back(info);
337 row = info.m_RowLast+1;
338 }
339 Put(ref);
340 NON_CONST_ITERATE ( TRefInfoList, it, m_RefList ) {
341 m_RefMapByName.insert
342 (TRefInfoMapByName::value_type(it->m_Name, it));
343 ITERATE ( CBioseq::TId, id_it, it->m_Seq_ids ) {
344 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(**id_it);
345 m_RefMapBySeq_id.insert
346 (TRefInfoMapBySeq_id::value_type(idh, it));
347 }
348 }
349 }
350 else {
351 // we'll work with SEQUENCE only in this case
352 // check if the SEQUENCE table exists
353 // They are opened with direct table access,
354 // but the caller expects to see 'no data' error as eNotFoundDb.
355 try {
356 SeqTable();
357 }
358 catch ( CSraException& exc ) {
359 // missing table is reported as eNotFoundTable
360 if ( exc.GetErrCode() == CSraException::eNotFoundTable ) {
361 // replace with eNotFoundDb, expected by caller
362 NCBI_THROW3(CSraException, eNotFoundDb,
363 exc.GetMsg(), exc.GetRC(), exc.GetParam());
364 }
365 // report all other excetions as is
366 throw;
367 }
368 }
369 }
370
371
~CCSraDb_Impl(void)372 CCSraDb_Impl::~CCSraDb_Impl(void)
373 {
374 }
375
376
MakeSraIdPart(EPathInIdType path_in_id_type,const string & dir_path,const string & csra_file)377 string CCSraDb::MakeSraIdPart(EPathInIdType path_in_id_type,
378 const string& dir_path,
379 const string& csra_file)
380 {
381 string sra_id_part;
382 if ( path_in_id_type == ePathInId_config ) {
383 path_in_id_type = s_GetPathInId()? ePathInId_yes: ePathInId_no;
384 }
385 if ( path_in_id_type == ePathInId_yes ) {
386 sra_id_part = CDirEntry::MakePath(dir_path, csra_file);
387 }
388 else {
389 string dir, name;
390 CDirEntry::SplitPath(csra_file, &dir, &name);
391 sra_id_part = CDirEntry::MakePath(dir, name);
392 }
393 return sra_id_part;
394 }
395
396
CCSraDb(CVDBMgr & mgr,const string & csra_path,IIdMapper * ref_id_mapper,ERefIdType ref_id_type)397 CCSraDb::CCSraDb(CVDBMgr& mgr,
398 const string& csra_path,
399 IIdMapper* ref_id_mapper,
400 ERefIdType ref_id_type)
401 : CRef<CCSraDb_Impl>(new CCSraDb_Impl(mgr, csra_path,
402 ref_id_mapper,
403 ref_id_type,
404 csra_path))
405 {
406 }
407
408
CCSraDb(CVDBMgr & mgr,const string & csra_path,const string & sra_id_part,IIdMapper * ref_id_mapper,ERefIdType ref_id_type)409 CCSraDb::CCSraDb(CVDBMgr& mgr,
410 const string& csra_path,
411 const string& sra_id_part,
412 IIdMapper* ref_id_mapper,
413 ERefIdType ref_id_type)
414 : CRef<CCSraDb_Impl>(new CCSraDb_Impl(mgr, csra_path,
415 ref_id_mapper,
416 ref_id_type,
417 sra_id_part))
418 {
419 }
420
421
GetSpotGroups(TSpotGroups & spot_groups)422 void CCSraDb_Impl::GetSpotGroups(TSpotGroups& spot_groups)
423 {
424 CKMDataNode parent(CKMetadata(m_Db, "SEQUENCE"), "STATS/SPOT_GROUP");
425 CKNameList names(parent);
426 for ( uint32_t i = 0; i < names.size(); ++i ) {
427 const char* name = names[i];
428 if ( CKMDataNode(CKMDataNode(parent, name), "SPOT_COUNT").GetUint8() ) {
429 spot_groups.push_back(name);
430 }
431 }
432 }
433
434
x_MakeRefSeq_ids(SRefInfo & info,IIdMapper * ref_id_mapper,int ref_id_type)435 void CCSraDb_Impl::x_MakeRefSeq_ids(SRefInfo& info,
436 IIdMapper* ref_id_mapper,
437 int ref_id_type)
438 {
439 info.m_Seq_ids.clear();
440 CRef<CSeq_id> ret;
441 if ( ref_id_mapper ) {
442 // first try NAME -> Seq-id mapping
443 ret = new CSeq_id(CSeq_id::e_Local, info.m_Name);
444 try {
445 ref_id_mapper->MapObject(*ret);
446 if ( !ret->IsLocal() ) {
447 // mapped
448 info.m_Seq_ids.push_back(ret);
449 return;
450 }
451 CRef<CSeq_id> old(new CSeq_id(CSeq_id::e_Local, info.m_Name));
452 if ( !ret->Equals(*old) ) {
453 // mapped
454 info.m_Seq_ids.push_back(ret);
455 return;
456 }
457 }
458 catch ( CException& /*ignored*/ ) {
459 }
460 }
461 // second try SEQ_ID -> Seq-id mapping
462 if ( ref_id_type == CCSraDb::eRefId_SEQ_ID ) {
463 if ( info.m_SeqId.find('|') != NPOS ) {
464 // try multiple FASTA ids
465 try {
466 CSeq_id::ParseIDs(info.m_Seq_ids, info.m_SeqId);
467 }
468 catch ( CSeqIdException& /*ignored*/ ) {
469 info.m_Seq_ids.clear();
470 }
471 }
472 if ( info.m_Seq_ids.empty() ) {
473 try {
474 ret = new CSeq_id(info.m_SeqId);
475 }
476 catch ( CSeqIdException& /*ignored*/ ) {
477 ret = new CSeq_id(CSeq_id::e_Local, info.m_SeqId);
478 }
479 }
480 }
481 else {
482 ret = new CSeq_id(CSeq_id::e_General,
483 "SRA",
484 m_SraIdPart+'/'+info.m_Name);
485 }
486 if ( ret ) {
487 info.m_Seq_ids.push_back(ret);
488 }
489 if ( ref_id_mapper ) {
490 NON_CONST_ITERATE( CBioseq::TId, it, info.m_Seq_ids ) {
491 try {
492 ref_id_mapper->MapObject(**it);
493 }
494 catch ( CException& /*ignored*/ ) {
495 }
496 }
497 }
498 }
499
500
OpenRefTable(void)501 void CCSraDb_Impl::OpenRefTable(void)
502 {
503 CFastMutexGuard guard(m_TableMutex);
504 if ( !m_RefTable ) {
505 m_RefTable = CVDBTable(m_Db, "REFERENCE");
506 }
507 }
508
509
OpenAlnTable(bool is_secondary)510 void CCSraDb_Impl::OpenAlnTable(bool is_secondary)
511 {
512 CFastMutexGuard guard(m_TableMutex);
513 if ( !m_AlnTable[is_secondary] ) {
514 m_AlnTable[is_secondary] = CVDBTable(m_Db, (is_secondary?
515 "SECONDARY_ALIGNMENT":
516 "PRIMARY_ALIGNMENT"));
517 }
518 }
519
520
OpenSeqTable(void)521 void CCSraDb_Impl::OpenSeqTable(void)
522 {
523 CFastMutexGuard guard(m_TableMutex);
524 if ( !m_SeqTable ) {
525 if ( m_Db ) {
526 m_SeqTable = CVDBTable(m_Db, "SEQUENCE");
527 }
528 else {
529 m_SeqTable = CVDBTable(m_Mgr, GetCSraPath());
530 }
531 }
532 }
533
534
Ref(void)535 CRef<CCSraDb_Impl::SRefTableCursor> CCSraDb_Impl::Ref(void)
536 {
537 CRef<SRefTableCursor> curs = m_Ref.Get();
538 if ( !curs ) {
539 curs = new SRefTableCursor(RefTable());
540 }
541 return curs;
542 }
543
544
Aln(bool is_secondary)545 CRef<CCSraDb_Impl::SAlnTableCursor> CCSraDb_Impl::Aln(bool is_secondary)
546 {
547 CRef<SAlnTableCursor> curs = m_Aln[is_secondary].Get();
548 if ( !curs ) {
549 curs = new SAlnTableCursor(AlnTable(is_secondary), is_secondary);
550 }
551 return curs;
552 }
553
554
Seq(void)555 CRef<CCSraDb_Impl::SSeqTableCursor> CCSraDb_Impl::Seq(void)
556 {
557 CRef<SSeqTableCursor> curs = m_Seq.Get();
558 if ( !curs ) {
559 curs = new SSeqTableCursor(SeqTable());
560 }
561 return curs;
562 }
563
564
Put(CRef<SRefTableCursor> & curs)565 void CCSraDb_Impl::Put(CRef<SRefTableCursor>& curs)
566 {
567 m_Ref.Put(curs);
568 }
569
570
Put(CRef<SAlnTableCursor> & curs)571 void CCSraDb_Impl::Put(CRef<SAlnTableCursor>& curs)
572 {
573 if ( curs ) {
574 m_Aln[curs->m_IsSecondary].Put(curs);
575 }
576 }
577
578
Put(CRef<SSeqTableCursor> & curs)579 void CCSraDb_Impl::Put(CRef<SSeqTableCursor>& curs)
580 {
581 m_Seq.Put(curs);
582 }
583
584
x_CalcSeqLength(const SRefInfo & info)585 void CCSraDb_Impl::x_CalcSeqLength(const SRefInfo& info)
586 {
587 CRef<SRefTableCursor> ref(Ref());
588 TSeqPos last_len = *ref->SEQ_LEN(info.m_RowLast);
589 info.m_SeqLength =
590 GetRowSize()*TSeqPos(info.m_RowLast-info.m_RowFirst)+last_len;
591 Put(ref);
592 }
593
594
CCSraRefSeqIterator(const CCSraDb & csra_db)595 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db)
596 : m_Db(csra_db),
597 m_Iter(csra_db->GetRefInfoList().begin())
598 {
599 }
600
601
CCSraRefSeqIterator(const CCSraDb & csra_db,const string & seq_id)602 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db,
603 const string& seq_id)
604 {
605 CCSraDb_Impl::TRefInfoMapBySeq_id::const_iterator iter =
606 csra_db->m_RefMapBySeq_id.find(CSeq_id_Handle::GetHandle(seq_id));
607 if ( iter != csra_db->m_RefMapBySeq_id.end() ) {
608 m_Db = csra_db;
609 m_Iter = iter->second;
610 }
611 else {
612 ERR_POST_X(1, "RefSeq \""<<seq_id<<"\" not found.");
613 }
614 }
615
616
CCSraRefSeqIterator(const CCSraDb & csra_db,const string & name,EByName)617 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db,
618 const string& name,
619 EByName)
620 {
621 CCSraDb_Impl::TRefInfoMapByName::const_iterator iter =
622 csra_db->m_RefMapByName.find(name);
623 if ( iter != csra_db->m_RefMapByName.end() ) {
624 m_Db = csra_db;
625 m_Iter = iter->second;
626 }
627 }
628
629
CCSraRefSeqIterator(const CCSraDb & csra_db,const CSeq_id_Handle & seq_id)630 CCSraRefSeqIterator::CCSraRefSeqIterator(const CCSraDb& csra_db,
631 const CSeq_id_Handle& seq_id)
632 {
633 CCSraDb_Impl::TRefInfoMapBySeq_id::const_iterator iter =
634 csra_db->m_RefMapBySeq_id.find(seq_id);
635 if ( iter != csra_db->m_RefMapBySeq_id.end() ) {
636 m_Db = csra_db;
637 m_Iter = iter->second;
638 }
639 }
640
641
GetInfo(void) const642 const CCSraDb_Impl::SRefInfo& CCSraRefSeqIterator::GetInfo(void) const
643 {
644 if ( !*this ) {
645 NCBI_THROW(CSraException, eInvalidState,
646 "CCSraRefSeqIterator is invalid");
647 }
648 return *m_Iter;
649 }
650
651
IsCircular(void) const652 bool CCSraRefSeqIterator::IsCircular(void) const
653 {
654 return GetInfo().m_Circular;
655 }
656
657
GetSeqLength(void) const658 TSeqPos CCSraRefSeqIterator::GetSeqLength(void) const
659 {
660 const CCSraDb_Impl::SRefInfo& info = **this;
661 if ( info.m_SeqLength == kInvalidSeqPos ) {
662 GetDb().x_CalcSeqLength(info);
663 }
664 return info.m_SeqLength;
665 }
666
667
GetRowAlignCount(TVDBRowId row) const668 size_t CCSraRefSeqIterator::GetRowAlignCount(TVDBRowId row) const
669 {
670 return GetAlignCountAtPos(TSeqPos((row-GetInfo().m_RowFirst)*GetDb().GetRowSize()),
671 fPrimaryAlign);
672 }
673
674
GetAlignCountAtPos(TSeqPos pos,TAlignType type) const675 size_t CCSraRefSeqIterator::GetAlignCountAtPos(TSeqPos pos,
676 TAlignType type) const
677 {
678 if ( pos >= GetSeqLength() ) {
679 NCBI_THROW(CSraException, eInvalidArg,
680 "pos is beyond reference sequence");
681 }
682 TVDBRowId row = GetInfo().m_RowFirst + pos/GetDb().GetRowSize();
683 CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
684 size_t ret = 0;
685 if ( type & fPrimaryAlign ) {
686 ret += ref->PRIMARY_ALIGNMENT_IDS(row).size();
687 }
688 if ( (type & fSecondaryAlign) && ref->m_SECONDARY_ALIGNMENT_IDS ) {
689 ret += ref->SECONDARY_ALIGNMENT_IDS(row).size();
690 }
691 GetDb().Put(ref);
692 return ret;
693 }
694
695
GetCoverageGraph(void) const696 CRef<CSeq_graph> CCSraRefSeqIterator::GetCoverageGraph(void) const
697 {
698 CRef<CSeq_graph> graph(new CSeq_graph);
699
700 const CCSraDb_Impl::SRefInfo& info = **this;
701 TSeqPos size = TSeqPos(info.m_RowLast-info.m_RowFirst+1);
702 CSeq_interval& loc_int = graph->SetLoc().SetInt();
703 loc_int.SetId(info.GetMainSeq_id().GetNCObject());
704 loc_int.SetFrom(0);
705 loc_int.SetTo(GetSeqLength()-1);
706 graph->SetNumval(size);
707 TSeqPos row_size = m_Db->GetRowSize();
708 graph->SetComp(row_size);
709 CByte_graph& b_graph = graph->SetGraph().SetByte();
710 b_graph.SetAxis(0);
711 CByte_graph::TValues& values = b_graph.SetValues();
712 values.resize(size);
713 CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
714 Uint1 max_q = 0;
715 for ( size_t i = 0; i < size; ++i ) {
716 TVDBRowId row = info.m_RowFirst+i;
717 Uint1 q = *ref->CGRAPH_HIGH(row);
718 values[i] = q;
719 if ( q > max_q ) {
720 max_q = q;
721 }
722 if ( row == info.m_RowLast ) {
723 TSeqPos len = *ref->SEQ_LEN(row);
724 loc_int.SetTo((size-1)*row_size+len);
725 }
726 }
727 b_graph.SetMin(0);
728 b_graph.SetMax(max_q);
729 GetDb().Put(ref);
730 return graph;
731 }
732
733
GetCoverageAnnot(void) const734 CRef<CSeq_annot> CCSraRefSeqIterator::GetCoverageAnnot(void) const
735 {
736 CRef<CSeq_annot> annot = GetSeq_annot();
737 annot->SetData().SetGraph().push_back(GetCoverageGraph());
738 return annot;
739 }
740
741
GetCoverageAnnot(const string & annot_name) const742 CRef<CSeq_annot> CCSraRefSeqIterator::GetCoverageAnnot(const string& annot_name) const
743 {
744 CRef<CSeq_annot> annot = GetSeq_annot(annot_name);
745 annot->SetData().SetGraph().push_back(GetCoverageGraph());
746 return annot;
747 }
748
749
750 CRef<CSeq_annot>
MakeSeq_annot(const string & annot_name)751 CCSraRefSeqIterator::MakeSeq_annot(const string& annot_name)
752 {
753 CRef<CSeq_annot> annot(new CSeq_annot);
754 annot->SetData().SetAlign();
755 CRef<CAnnotdesc> desc(new CAnnotdesc);
756 desc->SetName(annot_name);
757 annot->SetDesc().Set().push_back(desc);
758 return annot;
759 }
760
761
762 CRef<CSeq_annot>
x_GetSeq_annot(const string * annot_name) const763 CCSraRefSeqIterator::x_GetSeq_annot(const string* annot_name) const
764 {
765 CRef<CSeq_annot> annot(new CSeq_annot);
766 annot->SetData().SetAlign();
767 if ( annot_name ) {
768 CRef<CAnnotdesc> desc(new CAnnotdesc);
769 desc->SetName(*annot_name);
770 annot->SetDesc().Set().push_back(desc);
771 }
772 return annot;
773 }
774
775
GetRefBioseq(ELoadData load) const776 CRef<CBioseq> CCSraRefSeqIterator::GetRefBioseq(ELoadData load) const
777 {
778 CRef<CBioseq> seq(new CBioseq);
779 const CCSraDb_Impl::SRefInfo& info = GetInfo();
780 seq->SetId() = GetRefSeq_ids();
781 const string& csra_path = m_Db->GetCSraPath();
782 if ( CVPath::IsPlainAccession(csra_path) ) {
783 // SRR accession
784 CRef<CSeqdesc> desc(new CSeqdesc);
785 string title =
786 info.m_Name +
787 " - reference sequence from ShortRead RUN " +
788 csra_path;
789 desc->SetTitle(title);
790 seq->SetDescr().Set().push_back(desc);
791 }
792 CSeq_inst& inst = seq->SetInst();
793 inst.SetRepr(inst.eRepr_delta);
794 inst.SetMol(inst.eMol_na);
795 inst.SetLength(GetSeqLength());
796 inst.SetTopology
797 (info.m_Circular? inst.eTopology_circular: inst.eTopology_linear);
798 TLiterals literals;
799 GetRefLiterals(literals, TRange::GetWhole(), load);
800 NON_CONST_ITERATE ( TLiterals, it, literals ) {
801 CRef<CDelta_seq> seq(new CDelta_seq);
802 seq->SetLiteral(**it);
803 inst.SetExt().SetDelta().Set().push_back(seq);
804 }
805 return seq;
806 }
807
808
GetRefLiterals(TLiterals & literals,TRange range,ELoadData load) const809 void CCSraRefSeqIterator::GetRefLiterals(TLiterals& literals,
810 TRange range,
811 ELoadData load) const
812 {
813 const CCSraDb_Impl::SRefInfo& info = GetInfo();
814 TSeqPos segment_len = GetDb().GetRowSize();
815 CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
816 for ( TVDBRowId row = info.m_RowFirst+range.GetFrom()/segment_len;
817 row <= info.m_RowLast; ++row ) {
818 TSeqPos pos = TSeqPos(row-info.m_RowFirst)*segment_len;
819 if ( pos >= range.GetToOpen() ) {
820 break;
821 }
822 CRef<CSeq_literal> literal(new CSeq_literal);
823 if ( row == info.m_RowLast ) {
824 segment_len = *ref->SEQ_LEN(row);
825 }
826 literal->SetLength(segment_len);
827 if ( load == eLoadData ) {
828 literal->SetSeq_data().SetIupacna().Set() = *ref->READ(row);
829 }
830 literals.push_back(literal);
831 }
832 GetDb().Put(ref);
833 }
834
835
GetAlnOverStarts(void) const836 const vector<TSeqPos>& CCSraRefSeqIterator::GetAlnOverStarts(void) const
837 {
838 const CCSraDb_Impl::SRefInfo& info = GetInfo();
839 if ( info.m_AlnOverStarts.empty() ) {
840 CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
841 if ( ref->m_OVERLAP_REF_POS ) {
842 CFastMutexGuard guard(GetDb().m_OverlapMutex);
843 if ( info.m_AlnOverStarts.empty() ) {
844 TSeqPos segment_len = GetDb().GetRowSize();
845 vector<TSeqPos> pp;
846 // collect overlaps
847 for ( TVDBRowId row = info.m_RowFirst; row <= info.m_RowLast; ++row ) {
848 CVDBValueFor<INSDC_coord_zero> vv = ref->OVERLAP_REF_POS(row);
849 TSeqPos pos = TSeqPos((row-info.m_RowFirst)*segment_len);
850 for ( size_t i = 0; i < 2 && i < vv.size(); ++i ) {
851 TSeqPos p = vv[i];
852 if ( p && p < pos ) {
853 pos = p;
854 }
855 }
856 pp.push_back(pos);
857 }
858 if ( !pp.empty() ) {
859 // fix overlaps for alignments starting at pos 0
860 size_t max_aln_end = 1;
861 for ( int secondary = 0; secondary < 2; ++secondary ) {
862 CVDBValueFor<TVDBRowId> aln_ids;
863 if ( secondary ) {
864 if ( !ref->m_SECONDARY_ALIGNMENT_IDS ) {
865 break;
866 }
867 aln_ids = ref->SECONDARY_ALIGNMENT_IDS(info.m_RowFirst);
868 }
869 else {
870 aln_ids = ref->PRIMARY_ALIGNMENT_IDS(info.m_RowFirst);
871 }
872 if ( aln_ids.empty() ) {
873 continue;
874 }
875 CRef<CCSraDb_Impl::SAlnTableCursor> aln(GetDb().Aln(secondary));
876 for ( auto aln_id : aln_ids ) {
877 INSDC_coord_zero pos = *aln->REF_POS(aln_id);
878 if ( pos != 0 ) {
879 // check only alignments that start at the very beginning
880 break;
881 }
882 size_t end = pos + *aln->REF_LEN(aln_id);
883 max_aln_end = max(max_aln_end, end);
884 }
885 GetDb().Put(aln);
886 }
887 pp[min(pp.size()-1, (max_aln_end-1)/segment_len)] = 0;
888 }
889 // propagate overlaps
890 for ( size_t i = pp.size(); i-- > 1; ) {
891 pp[i-1] = min(pp[i-1], pp[i]);
892 }
893 swap(const_cast<vector<TSeqPos>&>(info.m_AlnOverStarts), pp);
894 }
895 }
896 GetDb().Put(ref);
897 }
898 return info.m_AlnOverStarts;
899 }
900
901
GetAlnOverToOpen(TRange range) const902 TSeqPos CCSraRefSeqIterator::GetAlnOverToOpen(TRange range) const
903 {
904 const vector<TSeqPos>& pp = GetAlnOverStarts();
905 TSeqPos segment_len = GetDb().GetRowSize();
906 TSeqPos seg = range.GetToOpen()/segment_len;
907 if ( pp.empty() ) {
908 ++seg;
909 }
910 else {
911 while ( seg+1 < pp.size() && pp[seg+1] < range.GetToOpen() ) {
912 ++seg;
913 }
914 }
915 return min(GetSeqLength(), (seg+1)*segment_len);
916 }
917
918
GetEstimatedNumberOfAlignments(void) const919 Uint8 CCSraRefSeqIterator::GetEstimatedNumberOfAlignments(void) const
920 {
921 TVDBRowId first_align_id = 0, last_align_id = 0;
922 const CCSraDb_Impl::SRefInfo& info = GetInfo();
923 CRef<CCSraDb_Impl::SRefTableCursor> ref(GetDb().Ref());
924 for ( TVDBRowId row = info.m_RowFirst; row <= info.m_RowLast; ++row ) {
925 CVDBValueFor<TVDBRowId> ids = ref->PRIMARY_ALIGNMENT_IDS(row);
926 size_t count = ids.size();
927 if ( count ) {
928 first_align_id = *min_element(ids.data(), ids.data()+count);
929 if ( first_align_id ) {
930 break;
931 }
932 }
933 }
934 if ( !first_align_id ) {
935 GetDb().Put(ref);
936 return 0;
937 }
938 for ( TVDBRowId row = info.m_RowLast; row >= info.m_RowFirst; --row ) {
939 CVDBValueFor<TVDBRowId> ids = ref->PRIMARY_ALIGNMENT_IDS(row);
940 size_t count = ids.size();
941 if ( count ) {
942 last_align_id = *max_element(ids.data(), ids.data()+count);
943 if ( last_align_id ) {
944 break;
945 }
946 }
947 }
948 GetDb().Put(ref);
949 return last_align_id-first_align_id+1;
950 }
951
952
953 /////////////////////////////////////////////////////////////////////////////
954 // CCSraAlignIterator
955 /////////////////////////////////////////////////////////////////////////////
956
Reset(void)957 void CCSraAlignIterator::Reset(void)
958 {
959 if ( m_Ref ) {
960 GetDb().Put(m_Ref);
961 }
962 if ( m_Aln ) {
963 GetDb().Put(m_Aln);
964 }
965 m_RefIter = CCSraRefSeqIterator();
966 m_Error = RC_NO_MORE_ALIGNMENTS;
967 m_ArgRefPos = m_ArgRefLast = 0;
968 m_CurRefPos = m_CurRefLen = 0;
969 m_RefRowNext = m_RefRowLast = 0;
970 m_AlnRowIsSecondary = false;
971 m_SearchMode = eSearchByOverlap;
972 m_AlignType = fAnyAlign;
973 m_AlnRowCur = m_AlnRowEnd = 0;
974 }
975
976
CCSraAlignIterator(const CCSraAlignIterator & iter)977 CCSraAlignIterator::CCSraAlignIterator(const CCSraAlignIterator& iter)
978 {
979 *this = iter;
980 }
981
982
operator =(const CCSraAlignIterator & iter)983 CCSraAlignIterator& CCSraAlignIterator::operator=(const CCSraAlignIterator& iter)
984 {
985 if ( this != &iter ) {
986 Reset();
987 m_Ref = iter.m_Ref;
988 m_Aln = iter.m_Aln;
989 m_RefIter = iter.m_RefIter;
990 m_Error = iter.m_Error;
991 m_ArgRefPos = iter.m_ArgRefPos;
992 m_ArgRefLast = iter.m_ArgRefLast;
993 m_CurRefPos = iter.m_CurRefPos;
994 m_CurRefLen = iter.m_CurRefLen;
995 m_RefRowNext = iter.m_RefRowNext;
996 m_RefRowLast = iter.m_RefRowLast;
997 m_AlnRowIsSecondary = iter.m_AlnRowIsSecondary;
998 m_SearchMode = iter.m_SearchMode;
999 m_AlignType = iter.m_AlignType;
1000 if ( iter.m_AlnRowCur == &iter.m_RefRowNext ) {
1001 m_AlnRowCur = &m_RefRowNext;
1002 m_AlnRowEnd = m_AlnRowCur+1;
1003 }
1004 else {
1005 m_AlnRowCur = iter.m_AlnRowCur;
1006 m_AlnRowEnd = iter.m_AlnRowEnd;
1007 }
1008 }
1009 return *this;
1010 }
1011
1012
CCSraAlignIterator(void)1013 CCSraAlignIterator::CCSraAlignIterator(void)
1014 : m_Error(RC_NO_MORE_ALIGNMENTS)
1015 {
1016 }
1017
1018
CCSraAlignIterator(const CCSraDb & csra_db,const string & ref_id,TSeqPos ref_pos,TSeqPos window,ESearchMode search_mode,TAlignType align_type)1019 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1020 const string& ref_id,
1021 TSeqPos ref_pos,
1022 TSeqPos window,
1023 ESearchMode search_mode,
1024 TAlignType align_type)
1025 : m_RefIter(csra_db, CSeq_id_Handle::GetHandle(ref_id)),
1026 m_Ref(m_RefIter.GetDb().Ref()),
1027 m_Error(RC_NO_MORE_ALIGNMENTS),
1028 m_ArgRefPos(0),
1029 m_ArgRefLast(0)
1030 {
1031 Select(ref_pos, window, search_mode, align_type);
1032 }
1033
1034
CCSraAlignIterator(const CCSraDb & csra_db,const CSeq_id_Handle & ref_id,TSeqPos ref_pos,TSeqPos window,ESearchMode search_mode,TAlignType align_type)1035 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1036 const CSeq_id_Handle& ref_id,
1037 TSeqPos ref_pos,
1038 TSeqPos window,
1039 ESearchMode search_mode,
1040 TAlignType align_type)
1041 : m_RefIter(csra_db, ref_id),
1042 m_Ref(m_RefIter.GetDb().Ref()),
1043 m_Error(RC_NO_MORE_ALIGNMENTS),
1044 m_ArgRefPos(0),
1045 m_ArgRefLast(0)
1046 {
1047 Select(ref_pos, window, search_mode, align_type);
1048 }
1049
1050
CCSraAlignIterator(const CCSraDb & csra_db,const CSeq_id_Handle & ref_id,TSeqPos ref_pos,TSeqPos window,TAlignType align_type)1051 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1052 const CSeq_id_Handle& ref_id,
1053 TSeqPos ref_pos,
1054 TSeqPos window,
1055 TAlignType align_type)
1056 : m_RefIter(csra_db, ref_id),
1057 m_Ref(m_RefIter.GetDb().Ref()),
1058 m_Error(RC_NO_MORE_ALIGNMENTS),
1059 m_ArgRefPos(0),
1060 m_ArgRefLast(0)
1061 {
1062 Select(ref_pos, window, eSearchByOverlap, align_type);
1063 }
1064
1065
CCSraAlignIterator(const CCSraDb & csra_db,TAlignType align_type,TVDBRowId align_row)1066 CCSraAlignIterator::CCSraAlignIterator(const CCSraDb& csra_db,
1067 TAlignType align_type,
1068 TVDBRowId align_row)
1069 {
1070 switch ( align_type ) {
1071 case fPrimaryAlign:
1072 m_AlnRowIsSecondary = false;
1073 break;
1074 case fSecondaryAlign:
1075 m_AlnRowIsSecondary = true;
1076 break;
1077 default:
1078 NCBI_THROW(CSraException, eInvalidArg,
1079 "unspecified alignment type");
1080 }
1081 m_SearchMode = eSearchByStart;
1082
1083 m_Aln = csra_db.GetNCObject().Aln(m_AlnRowIsSecondary);
1084 m_RefIter = CCSraRefSeqIterator(csra_db, *m_Aln->REF_NAME(align_row), CCSraRefSeqIterator::eByName);
1085
1086 m_ArgRefPos = m_ArgRefLast = 0;
1087 m_CurRefPos = *m_Aln->REF_POS(align_row);
1088 m_CurRefLen = *m_Aln->REF_LEN(align_row);
1089
1090 m_RefRowNext = m_RefRowLast = align_row;
1091 m_AlnRowCur = &m_RefRowNext;
1092 m_AlnRowEnd = m_AlnRowCur+1;
1093
1094 m_Error = 0;
1095 }
1096
1097
~CCSraAlignIterator(void)1098 CCSraAlignIterator::~CCSraAlignIterator(void)
1099 {
1100 Reset();
1101 }
1102
1103
Select(TSeqPos ref_pos,TSeqPos window,ESearchMode search_mode,TAlignType align_type)1104 void CCSraAlignIterator::Select(TSeqPos ref_pos,
1105 TSeqPos window,
1106 ESearchMode search_mode,
1107 TAlignType align_type)
1108 {
1109 m_Error = RC_NO_MORE_ALIGNMENTS;
1110 m_ArgRefPos = m_ArgRefLast = 0;
1111 m_SearchMode = search_mode;
1112 m_AlignType = align_type;
1113
1114 m_RefRowNext = m_RefRowLast = 0;
1115 m_AlnRowIsSecondary = true;
1116 m_AlnRowCur = m_AlnRowEnd = 0;
1117
1118 if ( !m_Ref ) {
1119 return;
1120 }
1121
1122 m_ArgRefPos = ref_pos;
1123 m_ArgRefLast = ref_pos+window-1 < ref_pos? kInvalidSeqPos: ref_pos+window-1;
1124
1125 TSeqPos row_size = GetDb().GetRowSize();
1126 const CCSraDb_Impl::SRefInfo& info = *m_RefIter;
1127 TSeqPos start_pos;
1128 if ( search_mode == eSearchByOverlap ) {
1129 const vector<TSeqPos>& pp = m_RefIter.GetAlnOverStarts();
1130 if ( pp.empty() ) {
1131 // max overlap is 1 row
1132 start_pos = max(ref_pos, row_size)-row_size;
1133 }
1134 else {
1135 start_pos = pp[ref_pos/row_size];
1136 }
1137 }
1138 else {
1139 start_pos = ref_pos;
1140 }
1141
1142 m_RefRowNext = info.m_RowFirst + start_pos/row_size;
1143 m_RefRowLast = min(info.m_RowLast, info.m_RowFirst + m_ArgRefLast/row_size);
1144 m_AlnRowCur = m_AlnRowEnd = 0;
1145 m_AlnRowIsSecondary = true;
1146 x_Settle();
1147 }
1148
1149
x_Settle(void)1150 void CCSraAlignIterator::x_Settle(void)
1151 {
1152 for ( ;; ) {
1153 if ( m_AlnRowCur == m_AlnRowEnd ) {
1154 if ( m_RefRowNext > m_RefRowLast ) {
1155 m_Error = RC_NO_MORE_ALIGNMENTS;
1156 return;
1157 }
1158
1159 if ( m_AlnRowIsSecondary ) {
1160 m_AlnRowIsSecondary = false;
1161 if ( !(m_AlignType & fPrimaryAlign) ) {
1162 m_AlnRowCur = m_AlnRowEnd = 0;
1163 }
1164 else {
1165 CVDBValueFor<TVDBRowId> ids =
1166 m_Ref->PRIMARY_ALIGNMENT_IDS(m_RefRowNext);
1167 m_AlnRowCur = ids.data();
1168 m_AlnRowEnd = m_AlnRowCur + ids.size();
1169 }
1170 }
1171 else if ( m_Ref->m_SECONDARY_ALIGNMENT_IDS ) {
1172 m_AlnRowIsSecondary = true;
1173 if ( !(m_AlignType & fSecondaryAlign) ) {
1174 m_AlnRowCur = m_AlnRowEnd = 0;
1175 }
1176 else {
1177 CVDBValueFor<TVDBRowId> ids =
1178 m_Ref->SECONDARY_ALIGNMENT_IDS(m_RefRowNext);
1179 m_AlnRowCur = ids.data();
1180 m_AlnRowEnd = m_AlnRowCur + ids.size();
1181 }
1182 ++m_RefRowNext;
1183 }
1184 else {
1185 m_AlnRowIsSecondary = true;
1186 m_AlnRowCur = m_AlnRowEnd = 0;
1187 ++m_RefRowNext;
1188 }
1189 if ( m_AlnRowCur != m_AlnRowEnd ) {
1190 if ( !m_Aln || m_Aln->m_IsSecondary != m_AlnRowIsSecondary ) {
1191 m_RefIter.GetDb().Put(m_Aln);
1192 m_Aln = m_RefIter.GetDb().Aln(m_AlnRowIsSecondary);
1193 }
1194 }
1195 }
1196 else {
1197 TVDBRowId row = *m_AlnRowCur;
1198 TSeqPos pos = *m_Aln->REF_POS(row);
1199 if ( pos > m_ArgRefLast ) {
1200 // completely after
1201 ++m_AlnRowCur;
1202 continue;
1203 }
1204 if ( m_SearchMode == eSearchByStart && pos < m_ArgRefPos ) {
1205 // starts before
1206 ++m_AlnRowCur;
1207 continue;
1208 }
1209 TSeqPos len = *m_Aln->REF_LEN(row);
1210 TSeqPos end = pos + len;
1211 if ( end <= m_ArgRefPos ) {
1212 // completely before
1213 ++m_AlnRowCur;
1214 continue;
1215 }
1216 m_CurRefPos = pos;
1217 m_CurRefLen = len;
1218 m_Error = 0;
1219 return;
1220 }
1221 }
1222 }
1223
1224
GetAlignmentId(void) const1225 TVDBRowId CCSraAlignIterator::GetAlignmentId(void) const
1226 {
1227 return *m_AlnRowCur;
1228 }
1229
1230
GetRefSeqId(void) const1231 CTempString CCSraAlignIterator::GetRefSeqId(void) const
1232 {
1233 return m_Aln->REF_SEQ_ID(*m_AlnRowCur);
1234 }
1235
1236
GetRefMinusStrand(void) const1237 bool CCSraAlignIterator::GetRefMinusStrand(void) const
1238 {
1239 return m_Aln->REF_ORIENTATION(*m_AlnRowCur);
1240 }
1241
1242
GetSpotGroup(void) const1243 CTempString CCSraAlignIterator::GetSpotGroup(void) const
1244 {
1245 return m_Aln->SPOT_GROUP(*m_AlnRowCur);
1246 }
1247
1248
IsSetName(void) const1249 bool CCSraAlignIterator::IsSetName(void) const
1250 {
1251 return m_Aln->m_NAME && !GetName().empty();
1252 }
1253
1254
GetName(void) const1255 CTempString CCSraAlignIterator::GetName(void) const
1256 {
1257 return m_Aln->NAME(*m_AlnRowCur);
1258 }
1259
1260
GetReadFilter(void) const1261 INSDC_read_filter CCSraAlignIterator::GetReadFilter(void) const
1262 {
1263 if ( !m_Aln->m_READ_FILTER ) {
1264 return SRA_READ_FILTER_PASS;
1265 }
1266 return m_Aln->READ_FILTER(*m_AlnRowCur);
1267 }
1268
1269
GetCIGAR(void) const1270 CTempString CCSraAlignIterator::GetCIGAR(void) const
1271 {
1272 return m_Aln->CIGAR_SHORT(*m_AlnRowCur);
1273 }
1274
1275
GetCIGARLong(void) const1276 CTempString CCSraAlignIterator::GetCIGARLong(void) const
1277 {
1278 return m_Aln->CIGAR_LONG(*m_AlnRowCur);
1279 }
1280
1281
GetMismatchRead(void) const1282 CTempString CCSraAlignIterator::GetMismatchRead(void) const
1283 {
1284 return m_Aln->MISMATCH_READ(*m_AlnRowCur);
1285 }
1286
1287
GetMismatchRaw(void) const1288 CTempString CCSraAlignIterator::GetMismatchRaw(void) const
1289 {
1290 return m_Aln->MISMATCH(*m_AlnRowCur);
1291 }
1292
1293
GetShortId1(void) const1294 TVDBRowId CCSraAlignIterator::GetShortId1(void) const
1295 {
1296 return m_Aln->SEQ_SPOT_ID(*m_AlnRowCur);
1297 }
1298
1299
GetShortId2(void) const1300 INSDC_coord_one CCSraAlignIterator::GetShortId2(void) const
1301 {
1302 return m_Aln->SEQ_READ_ID(*m_AlnRowCur);
1303 }
1304
1305
MakeShortReadId(TVDBRowId id1,INSDC_coord_one id2) const1306 CRef<CSeq_id> CCSraDb_Impl::MakeShortReadId(TVDBRowId id1,
1307 INSDC_coord_one id2) const
1308 {
1309 CRef<CSeq_id> ret(new CSeq_id);
1310 CDbtag& dbtag = ret->SetGeneral();
1311 dbtag.SetDb("SRA");
1312 SetShortReadId(dbtag.SetTag().SetStr(), id1, id2);
1313 return ret;
1314 }
1315
1316
SetShortReadId(string & str,TVDBRowId id1,INSDC_coord_one id2) const1317 void CCSraDb_Impl::SetShortReadId(string& str,
1318 TVDBRowId id1,
1319 INSDC_coord_one id2) const
1320 {
1321 ostringstream s;
1322 s << m_SraIdPart << '.' << id1 << '.' << id2;
1323 str = s.str();
1324 }
1325
1326
GetShortSeq_id(void) const1327 CRef<CSeq_id> CCSraAlignIterator::GetShortSeq_id(void) const
1328 {
1329 return GetDb().MakeShortReadId(GetShortId1(), GetShortId2());
1330 }
1331
1332
GetMateShortSeq_id(void) const1333 CRef<CSeq_id> CCSraAlignIterator::GetMateShortSeq_id(void) const
1334 {
1335 CVDBValueFor<TVDBRowId> value = m_Aln->MATE_ALIGN_ID(*m_AlnRowCur);
1336 if ( value.empty() ) {
1337 return null;
1338 }
1339 TVDBRowId mate_id = *value;
1340 return GetDb().MakeShortReadId(m_Aln->SEQ_SPOT_ID(mate_id),
1341 m_Aln->SEQ_READ_ID(mate_id));
1342 }
1343
1344
GetShortPos(void) const1345 TSeqPos CCSraAlignIterator::GetShortPos(void) const
1346 {
1347 bool has_off = m_Aln->HAS_REF_OFFSET(*m_AlnRowCur)[0] == '1';
1348 if ( !has_off ) {
1349 return 0;
1350 }
1351 if ( m_Aln->m_REF_OFFSET_TYPE &&
1352 m_Aln->REF_OFFSET_TYPE(*m_AlnRowCur)[0] != 1 ) {
1353 return 0;
1354 }
1355 return -m_Aln->REF_OFFSET(*m_AlnRowCur)[0];
1356 }
1357
1358
GetShortLen(void) const1359 TSeqPos CCSraAlignIterator::GetShortLen(void) const
1360 {
1361 return m_Aln->SPOT_LEN(*m_AlnRowCur);
1362 }
1363
1364
GetMapQuality(void) const1365 int CCSraAlignIterator::GetMapQuality(void) const
1366 {
1367 return m_Aln->MAPQ(*m_AlnRowCur);
1368 }
1369
1370
MakeFullMismatch(string & ret,CTempString cigar,CTempString mismatch) const1371 void CCSraAlignIterator::MakeFullMismatch(string& ret,
1372 CTempString cigar,
1373 CTempString mismatch) const
1374 {
1375 CVDBValueFor<char> has_mismatch = m_Aln->HAS_MISMATCH(*m_AlnRowCur);
1376 ret.reserve(mismatch.size()+2);
1377 const char* cptr = cigar.data();
1378 const char* cend = cigar.end();
1379 const char* mptr = mismatch.data();
1380 const char* mend = mismatch.end();
1381
1382 TSeqPos seqpos = 0;
1383 while ( cptr != cend ) {
1384 char type = 0;
1385 TSeqPos seglen = 0;
1386 while ( cptr != cend ) {
1387 char c = *cptr++;
1388 if ( c >= '0' && c <= '9' ) {
1389 seglen = seglen*10+(c-'0');
1390 }
1391 else {
1392 type = c;
1393 break;
1394 }
1395 }
1396 if ( !type ) {
1397 NCBI_THROW_FMT(CSraException, eDataError,
1398 "Incomplete CIGAR: " << cigar);
1399 }
1400 if ( seglen == 0 ) {
1401 NCBI_THROW_FMT(CSraException, eDataError,
1402 "Bad CIGAR length: " << type <<
1403 "0 in " << cigar);
1404 }
1405 if ( type == '=' ) {
1406 // match
1407 seqpos += seglen;
1408 }
1409 else if ( type == 'X' ) {
1410 // mismatch
1411 if ( mptr + seglen > mend ) {
1412 NCBI_THROW_FMT(CSraException, eDataError,
1413 "CIGAR mismatch segment beyond MISMATCH: "
1414 <<cigar<<" vs "<<mismatch);
1415 }
1416 ret.append(mptr, seglen);
1417 mptr += seglen;
1418 seqpos += seglen;
1419 }
1420 else if ( type == 'I' || type == 'S' ) {
1421 if ( seqpos + seglen > has_mismatch.size() ) {
1422 NCBI_THROW_FMT(CSraException, eDataError,
1423 "CIGAR insert segment beyond HAS_MISMATCH: "
1424 <<cigar<<" vs "<<mismatch);
1425 }
1426 for ( TSeqPos i = 0; i < seglen; ++i, ++seqpos ) {
1427 char c;
1428 if ( has_mismatch[seqpos] == '1' ) {
1429 if ( mptr == mend ) {
1430 NCBI_THROW_FMT(CSraException, eDataError,
1431 "CIGAR insert/mismatch segment beyond MISMATCH: "
1432 <<cigar<<" vs "<<mismatch);
1433
1434 }
1435 c = *mptr++;
1436 }
1437 else {
1438 c = '=';
1439 }
1440 ret += c;
1441 }
1442 }
1443 else if ( type == 'D' || type == 'N' || type == 'P' ) {
1444 continue;
1445 }
1446 else {
1447 NCBI_THROW_FMT(CSraException, eDataError,
1448 "Bad CIGAR char: " <<type<< " in " <<cigar);
1449 }
1450 }
1451 if ( mptr != mend ) {
1452 NCBI_THROW_FMT(CSraException, eDataError,
1453 "Extra mismatch chars: " <<cigar<< " vs " <<mismatch);
1454 }
1455 }
1456
1457
1458 CCSraAlignIterator::SCreateCache&
x_GetCreateCache(void) const1459 CCSraAlignIterator::x_GetCreateCache(void) const
1460 {
1461 if ( !m_CreateCache ) {
1462 m_CreateCache = new SCreateCache;
1463 }
1464 return *m_CreateCache;
1465 }
1466
1467
GetMatchAlign(void) const1468 CRef<CSeq_align> CCSraAlignIterator::GetMatchAlign(void) const
1469 {
1470 SCreateCache& cache = x_GetCreateCache();
1471
1472 CRef<CSeq_align> align(new CSeq_align);
1473 align->SetType(CSeq_align::eType_diags);
1474 CDense_seg& denseg = align->SetSegs().SetDenseg();
1475 CDense_seg::TIds& ids = denseg.SetIds();
1476 ids.reserve(2);
1477 ids.push_back(GetRefSeq_id());
1478 ids.push_back(GetShortSeq_id());
1479
1480 TSeqPos refpos = GetRefSeqPos();
1481 TSeqPos seqpos = GetShortPos();
1482 CTempString cigar = GetCIGAR();
1483 const char* ptr = cigar.data();
1484 const char* end = cigar.end();
1485
1486 TSeqPos segcount = 0;
1487 for ( const char* p = ptr; p != end; ++p ) {
1488 char c = *p;
1489 if ( c < '0' || c > '9' ) {
1490 if ( c == 'S' || c == 'N' ) {
1491 // soft clipping already accounted in seqpos
1492 // skip N segments
1493 continue;
1494 }
1495 ++segcount;
1496 }
1497 }
1498
1499 CDense_seg::TStarts& starts = denseg.SetStarts();
1500 CDense_seg::TLens& lens = denseg.SetLens();
1501 starts.reserve(2*segcount);
1502 lens.reserve(segcount);
1503
1504 TSeqPos insert_size = 0;
1505 TSeqPos refstart = 0, seqstart = 0;
1506 while ( ptr != end ) {
1507 char type = 0;
1508 TSeqPos seglen = 0;
1509 while ( ptr != end ) {
1510 char c = *ptr++;
1511 if ( c >= '0' && c <= '9' ) {
1512 seglen = seglen*10+(c-'0');
1513 }
1514 else {
1515 type = c;
1516 break;
1517 }
1518 }
1519 if ( !type ) {
1520 NCBI_THROW_FMT(CSraException, eDataError,
1521 "Incomplete CIGAR: " << cigar);
1522 }
1523 if ( seglen == 0 ) {
1524 NCBI_THROW_FMT(CSraException, eDataError,
1525 "Bad CIGAR length: " << type <<
1526 "0 in " << cigar);
1527 }
1528 if ( type == 'M' || type == '=' || type == 'X' ) {
1529 // match
1530 refstart = refpos;
1531 refpos += seglen;
1532 seqstart = seqpos;
1533 seqpos += seglen;
1534 }
1535 else if ( type == 'I' || type == 'S' ) {
1536 insert_size += seglen;
1537 if ( type == 'S' ) {
1538 // soft clipping already accounted in seqpos
1539 continue;
1540 }
1541 refstart = kInvalidSeqPos;
1542 seqstart = seqpos;
1543 seqpos += seglen;
1544 }
1545 else if ( type == 'D' || type == 'N' ) {
1546 // delete
1547 refstart = refpos;
1548 refpos += seglen;
1549 if ( type == 'N' ) {
1550 // skip N segments
1551 continue;
1552 }
1553 seqstart = kInvalidSeqPos;
1554 }
1555 else if ( type != 'P' ) {
1556 NCBI_THROW_FMT(CSraException, eDataError,
1557 "Bad CIGAR char: " <<type<< " in " <<cigar);
1558 }
1559 starts.push_back(refstart);
1560 starts.push_back(seqstart);
1561 lens.push_back(seglen);
1562 }
1563 if ( GetRefMinusStrand() ) {
1564 CDense_seg::TStrands& strands = denseg.SetStrands();
1565 strands.reserve(2*segcount);
1566 TSeqPos end = GetShortLen();
1567 for ( size_t i = 0; i < segcount; ++i ) {
1568 strands.push_back(eNa_strand_plus);
1569 strands.push_back(eNa_strand_minus);
1570 TSeqPos pos = starts[i*2+1];
1571 TSeqPos len = lens[i];
1572 if ( pos != kInvalidSeqPos ) {
1573 starts[i*2+1] = end - (pos + len);
1574 }
1575 }
1576 }
1577
1578 denseg.SetNumseg(segcount);
1579
1580 if ( s_GetExplicitMateInfoParam() ) {
1581 CVDBValueFor<TVDBRowId> mate_id_v = m_Aln->MATE_ALIGN_ID(*m_AlnRowCur);
1582 if ( !mate_id_v.empty() ) {
1583 _ASSERT(mate_id_v.size() == 1);
1584 TVDBRowId mate_id = *mate_id_v;
1585 _ASSERT(mate_id);
1586 CRef<CUser_object> obj(new CUser_object());
1587 obj->SetType(x_GetObject_id("Mate read",
1588 cache.m_ObjectIdMateRead));
1589 CTempString mate_name = m_Aln->REF_NAME(mate_id);
1590 if ( mate_name != m_RefIter->m_Name ) {
1591 CCSraRefSeqIterator mate_iter
1592 (m_RefIter.m_Db, mate_name, CCSraRefSeqIterator::eByName);
1593 if ( mate_iter ) {
1594 x_AddField(*obj,
1595 "RefId", mate_iter.GetRefSeq_id()->AsFastaString(),
1596 cache.m_ObjectIdRefId);
1597 }
1598 else {
1599 CTempString mate_ref_id = m_Aln->REF_SEQ_ID(mate_id);
1600 x_AddField(*obj,
1601 "RefId", mate_ref_id,
1602 cache.m_ObjectIdRefId);
1603 }
1604 }
1605 x_AddField(*obj,
1606 "RefPos", *m_Aln->REF_POS(mate_id),
1607 cache.m_ObjectIdRefPos);
1608
1609 CRef<CUser_field> field(new CUser_field());
1610 field->SetLabel(x_GetObject_id("gnl|SRA|",
1611 cache.m_ObjectIdLcl));
1612 GetDb().SetShortReadId(field->SetData().SetStr(),
1613 m_Aln->SEQ_SPOT_ID(mate_id),
1614 m_Aln->SEQ_READ_ID(mate_id));
1615 obj->SetData().push_back(field);
1616
1617 align->SetExt().push_back(obj);
1618 }
1619 }
1620
1621 if ( s_GetCigarInAlignExt() ) {
1622 CRef<CUser_object> obj(new CUser_object);
1623 obj->SetType(x_GetObject_id("Tracebacks",
1624 cache.m_ObjectIdTracebacks));
1625 CTempString cigar = GetCIGARLong();
1626 CTempString mismatch = GetMismatchRaw();
1627 x_AddField(*obj, "CIGAR", cigar,
1628 cache.m_ObjectIdCIGAR,
1629 cache.m_UserFieldCacheCigar, 8, 8192);
1630 if ( insert_size == 0 ) {
1631 // all mismatches are explicit 'X', so there are no '=' to add
1632 // use the VDB privided MISMATCHE string without modifications
1633 x_AddField(*obj, "MISMATCH", mismatch,
1634 cache.m_ObjectIdMISMATCH,
1635 cache.m_UserFieldCacheMismatch, 8, 8192);
1636 }
1637 else {
1638 CUser_field& field = x_AddField(*obj, "MISMATCH",
1639 cache.m_ObjectIdMISMATCH);
1640 MakeFullMismatch(field.SetData().SetStr(), cigar, mismatch);
1641 }
1642 align->SetExt().push_back(obj);
1643 }
1644
1645 if ( s_GetReadFilterInAlignExt() ) {
1646 INSDC_read_filter filter = GetReadFilter();
1647 if ( filter > SRA_READ_FILTER_PASS &&
1648 filter <= SRA_READ_FILTER_REDACTED ) {
1649 if ( !cache.m_ReadFilterIndicator[filter] ) {
1650 static const char* const value[4] = {
1651 "Good",
1652 "Poor sequence quality",
1653 "PCR duplicate",
1654 "Hidden"
1655 };
1656 CRef<CUser_object> obj(new CUser_object);
1657 obj->SetType().SetStr(value[filter]);
1658 obj->SetData();
1659 cache.m_ReadFilterIndicator[filter] = obj;
1660 }
1661 align->SetExt().push_back(cache.m_ReadFilterIndicator[filter]);
1662 }
1663 }
1664
1665 if ( IsSecondary() ) {
1666 align->SetExt().push_back(x_GetSecondaryIndicator());
1667 }
1668
1669 return align;
1670 }
1671
1672
GetQualityGraph(void) const1673 CRef<CSeq_graph> CCSraAlignIterator::GetQualityGraph(void) const
1674 {
1675 CRef<CSeq_graph> graph(new CSeq_graph);
1676 graph->SetTitle("Phred Quality");
1677 CSeq_interval& loc_int = graph->SetLoc().SetInt();
1678 loc_int.SetId(*GetShortSeq_id());
1679 TSeqPos size = GetShortLen();
1680 loc_int.SetFrom(0);
1681 loc_int.SetTo(size-1);
1682 if ( GetRefMinusStrand() ) {
1683 loc_int.SetStrand(eNa_strand_minus);
1684 }
1685 graph->SetNumval(size);
1686 CByte_graph& b_graph = graph->SetGraph().SetByte();
1687 b_graph.SetAxis(0);
1688 CByte_graph::TValues& values = b_graph.SetValues();
1689 values.resize(size);
1690 CVDBValueFor<INSDC_quality_phred> qual = m_Aln->QUALITY(*m_AlnRowCur);
1691 typedef CByte_graph::TValues::value_type TValue;
1692 TValue min_q = numeric_limits<TValue>::max();
1693 TValue max_q = 0;
1694 for ( size_t i = 0; i < size; ++i ) {
1695 TValue q = qual[i];
1696 values[i] = q;
1697 if ( q < min_q ) {
1698 min_q = q;
1699 }
1700 if ( q > max_q ) {
1701 max_q = q;
1702 }
1703 }
1704 b_graph.SetMin(min_q);
1705 b_graph.SetMax(max_q);
1706 return graph;
1707 }
1708
1709
GetShortBioseq(void) const1710 CRef<CBioseq> CCSraAlignIterator::GetShortBioseq(void) const
1711 {
1712 CRef<CBioseq> seq(new CBioseq);
1713 seq->SetId().push_back(GetShortSeq_id());
1714 if ( IsSetName() ) {
1715 CRef<CSeqdesc> desc(new CSeqdesc);
1716 desc->SetTitle(GetName());
1717 seq->SetDescr().Set().push_back(desc);
1718 }
1719 CSeq_inst& inst = seq->SetInst();
1720 inst.SetRepr(inst.eRepr_raw);
1721 inst.SetMol(inst.eMol_na);
1722 CTempString data = m_Aln->RAW_READ(*m_AlnRowCur);
1723 TSeqPos length = TSeqPos(data.size());
1724 inst.SetLength(length);
1725 string& iupac = inst.SetSeq_data().SetIupacna().Set();
1726 iupac.assign(data.data(), length);
1727 return seq;
1728 }
1729
1730
MakeMatchAnnotIndicator(void)1731 CRef<CAnnotdesc> CCSraAlignIterator::MakeMatchAnnotIndicator(void)
1732 {
1733 CRef<CAnnotdesc> desc(new CAnnotdesc);
1734 CUser_object& obj = desc->SetUser();
1735 obj.SetType().SetStr("Mate read");
1736 obj.AddField("Match by local Seq-id", true);
1737 return desc;
1738 }
1739
1740
1741 CRef<CSeq_annot>
x_GetEmptyMatchAnnot(const string * annot_name) const1742 CCSraAlignIterator::x_GetEmptyMatchAnnot(const string* annot_name) const
1743 {
1744 CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
1745 if ( !s_GetExplicitMateInfoParam() ) {
1746 SCreateCache& cache = x_GetCreateCache();
1747 if ( !cache.m_MatchAnnotIndicator ) {
1748 cache.m_MatchAnnotIndicator = MakeMatchAnnotIndicator();
1749 }
1750 annot->SetDesc().Set().push_back(cache.m_MatchAnnotIndicator);
1751 }
1752 return annot;
1753 }
1754
1755
1756 CRef<CSeq_annot>
MakeEmptyMatchAnnot(const string & annot_name)1757 CCSraAlignIterator::MakeEmptyMatchAnnot(const string& annot_name)
1758 {
1759 CRef<CSeq_annot> annot = MakeSeq_annot(annot_name);
1760 annot->SetDesc().Set().push_back(MakeMatchAnnotIndicator());
1761 return annot;
1762 }
1763
1764
1765 CRef<CSeq_annot>
x_GetMatchAnnot(const string * annot_name) const1766 CCSraAlignIterator::x_GetMatchAnnot(const string* annot_name) const
1767 {
1768 CRef<CSeq_annot> annot = x_GetEmptyMatchAnnot(annot_name);
1769 annot->SetData().SetAlign().push_back(GetMatchAlign());
1770 return annot;
1771 }
1772
1773
1774 CRef<CSeq_annot>
x_GetQualityGraphAnnot(const string * annot_name) const1775 CCSraAlignIterator::x_GetQualityGraphAnnot(const string* annot_name) const
1776 {
1777 CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
1778 annot->SetData().SetGraph().push_back(GetQualityGraph());
1779 return annot;
1780 }
1781
1782
1783 CRef<CSeq_entry>
x_GetMatchEntry(const string * annot_name) const1784 CCSraAlignIterator::x_GetMatchEntry(const string* annot_name) const
1785 {
1786 CRef<CSeq_entry> entry(new CSeq_entry);
1787 CRef<CBioseq> seq = GetShortBioseq();
1788 seq->SetAnnot().push_back(x_GetMatchAnnot(annot_name));
1789 entry->SetSeq(*seq);
1790 return entry;
1791 }
1792
1793
x_GetSecondaryIndicator(void) const1794 CRef<CUser_object> CCSraAlignIterator::x_GetSecondaryIndicator(void) const
1795 {
1796 SCreateCache& cache = x_GetCreateCache();
1797 if ( !cache.m_SecondaryIndicator ) {
1798 cache.m_SecondaryIndicator = new CUser_object();
1799 cache.m_SecondaryIndicator->SetType().SetStr("Secondary");
1800 cache.m_SecondaryIndicator->SetData();
1801 }
1802 return cache.m_SecondaryIndicator;
1803 }
1804
1805
x_GetObject_id(const char * name,TObjectIdCache & cache) const1806 CObject_id& CCSraAlignIterator::x_GetObject_id(const char* name,
1807 TObjectIdCache& cache) const
1808 {
1809 if ( !cache ) {
1810 cache = new CObject_id();
1811 cache->SetStr(name);
1812 }
1813 return *cache;
1814 }
1815
1816
x_AddField(CUser_object & obj,const char * name,TObjectIdCache & cache) const1817 CUser_field& CCSraAlignIterator::x_AddField(CUser_object& obj,
1818 const char* name,
1819 TObjectIdCache& cache) const
1820 {
1821 CRef<CUser_field> field(new CUser_field());
1822 field->SetLabel(x_GetObject_id(name, cache));
1823 obj.SetData().push_back(field);
1824 return *field;
1825 }
1826
1827
x_AddField(CUser_object & obj,const char * name,CTempString value,TObjectIdCache & cache) const1828 void CCSraAlignIterator::x_AddField(CUser_object& obj,
1829 const char* name,
1830 CTempString value,
1831 TObjectIdCache& cache) const
1832 {
1833 x_AddField(obj, name, cache).SetData().SetStr(value);
1834 }
1835
1836
x_AddField(CUser_object & obj,const char * name,int value,TObjectIdCache & cache) const1837 void CCSraAlignIterator::x_AddField(CUser_object& obj,
1838 const char* name,
1839 int value,
1840 TObjectIdCache& cache) const
1841 {
1842 x_AddField(obj, name, cache).SetData().SetInt(value);
1843 }
1844
1845
x_AddField(CUser_object & obj,const char * name,CTempString value,TObjectIdCache & id_cache,TUserFieldCache & cache,size_t max_value_length,size_t max_cache_size) const1846 void CCSraAlignIterator::x_AddField(CUser_object& obj,
1847 const char* name,
1848 CTempString value,
1849 TObjectIdCache& id_cache,
1850 TUserFieldCache& cache,
1851 size_t max_value_length,
1852 size_t max_cache_size) const
1853 {
1854 if ( value.size() > max_value_length || cache.size() > max_cache_size ) {
1855 x_AddField(obj, name, value, id_cache);
1856 return;
1857 }
1858 TUserFieldCache::iterator iter = cache.lower_bound(value);
1859 if ( iter == cache.end() || iter->first != value ) {
1860 CRef<CUser_field> field(new CUser_field());
1861 field->SetLabel(x_GetObject_id(name, id_cache));
1862 field->SetData().SetStr(value);
1863 CTempString key = field->GetData().GetStr();
1864 iter = cache.insert(iter, TUserFieldCache::value_type(key, field));
1865 }
1866 obj.SetData().push_back(iter->second);
1867 }
1868
1869
1870 /////////////////////////////////////////////////////////////////////////////
1871 // CCSraShortReadIterator
1872
Reset(void)1873 void CCSraShortReadIterator::Reset(void)
1874 {
1875 if ( m_Seq ) {
1876 GetDb().Put(m_Seq);
1877 }
1878 m_Db.Reset();
1879 m_SpotId = m_MaxSpotId = 0;
1880 m_ReadId = 1;
1881 m_MaxReadId = 0;
1882 m_IncludeTechnicalReads = false;
1883 m_ClipByQuality = false;
1884 m_Error = RC_NO_MORE_ALIGNMENTS;
1885 }
1886
1887
CCSraShortReadIterator(const CCSraShortReadIterator & iter)1888 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraShortReadIterator& iter)
1889 {
1890 *this = iter;
1891 }
1892
1893
operator =(const CCSraShortReadIterator & iter)1894 CCSraShortReadIterator& CCSraShortReadIterator::operator=(const CCSraShortReadIterator& iter)
1895 {
1896 if ( this != &iter ) {
1897 Reset();
1898 m_Seq = iter.m_Seq;
1899 m_Db = iter.m_Db;
1900 m_Error = iter.m_Error;
1901 m_SpotId = iter.m_SpotId;
1902 m_MaxSpotId = iter.m_MaxSpotId;
1903 m_ReadId = iter.m_ReadId;
1904 m_MaxReadId = iter.m_MaxReadId;
1905 m_IncludeTechnicalReads = iter.m_IncludeTechnicalReads;
1906 m_ClipByQuality = iter.m_ClipByQuality;
1907 m_Error = iter.m_Error;
1908 }
1909 return *this;
1910 }
1911
1912
CCSraShortReadIterator(void)1913 CCSraShortReadIterator::CCSraShortReadIterator(void)
1914 : m_SpotId(0),
1915 m_MaxSpotId(0),
1916 m_ReadId(1),
1917 m_MaxReadId(0),
1918 m_IncludeTechnicalReads(false),
1919 m_ClipByQuality(false),
1920 m_Error(RC_NO_MORE_ALIGNMENTS)
1921 {
1922 }
1923
1924
CCSraShortReadIterator(const CCSraDb & csra_db,EClipType clip_type)1925 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraDb& csra_db,
1926 EClipType clip_type)
1927 {
1928 x_Init(csra_db, clip_type);
1929 if ( *this ) {
1930 x_GetMaxReadId();
1931 x_Settle();
1932 }
1933 }
1934
1935
CCSraShortReadIterator(const CCSraDb & csra_db,TVDBRowId spot_id,EClipType clip_type)1936 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraDb& csra_db,
1937 TVDBRowId spot_id,
1938 EClipType clip_type)
1939 {
1940 x_Init(csra_db, clip_type);
1941 Select(spot_id);
1942 }
1943
1944
CCSraShortReadIterator(const CCSraDb & csra_db,TVDBRowId spot_id,uint32_t read_id,EClipType clip_type)1945 CCSraShortReadIterator::CCSraShortReadIterator(const CCSraDb& csra_db,
1946 TVDBRowId spot_id,
1947 uint32_t read_id,
1948 EClipType clip_type)
1949 {
1950 x_Init(csra_db, clip_type);
1951 Select(spot_id, read_id);
1952 }
1953
1954
x_Init(const CCSraDb & csra_db,EClipType clip_type)1955 void CCSraShortReadIterator::x_Init(const CCSraDb& csra_db,
1956 EClipType clip_type)
1957 {
1958 m_Db = csra_db;
1959 m_Seq = csra_db.GetNCObject().Seq();
1960 m_ReadId = 1;
1961 m_MaxReadId = 0;
1962 m_IncludeTechnicalReads = s_GetIncludeTechnicalReads();
1963 switch ( clip_type ) {
1964 case eNoClip:
1965 m_ClipByQuality = false;
1966 break;
1967 case eClipByQuality:
1968 m_ClipByQuality = true;
1969 break;
1970 default:
1971 m_ClipByQuality = s_GetClipByQuality();
1972 break;
1973 }
1974 TVDBRowIdRange range = m_Seq->m_READ_LEN.GetRowIdRange(m_Seq->m_Cursor);
1975 m_SpotId = range.first;
1976 m_MaxSpotId = range.first+range.second-1;
1977 m_Error = m_SpotId <= m_MaxSpotId? 0: RC_NO_MORE_ALIGNMENTS;
1978 }
1979
1980
Select(TVDBRowId spot_id)1981 bool CCSraShortReadIterator::Select(TVDBRowId spot_id)
1982 {
1983 m_SpotId = spot_id;
1984 m_ReadId = 1;
1985 if ( spot_id < 1 || spot_id > m_MaxSpotId ) {
1986 // bad spot id
1987 m_Error = RC_NO_MORE_ALIGNMENTS;
1988 return false;
1989 }
1990 x_GetMaxReadId();
1991 return x_Settle(true); // settle within single spot id
1992 }
1993
1994
Select(TVDBRowId spot_id,uint32_t read_id)1995 bool CCSraShortReadIterator::Select(TVDBRowId spot_id,
1996 uint32_t read_id)
1997 {
1998 m_SpotId = spot_id;
1999 m_ReadId = read_id;
2000 if ( spot_id < 1 || spot_id > m_MaxSpotId ) {
2001 // bad spot id
2002 m_Error = RC_NO_MORE_ALIGNMENTS;
2003 return false;
2004 }
2005 x_GetMaxReadId();
2006 if ( m_ReadId > m_MaxReadId ) {
2007 // bad read id
2008 m_Error = RC_NO_MORE_ALIGNMENTS;
2009 return false;
2010 }
2011 if ( !x_ValidRead() ) {
2012 // bad read type
2013 m_Error = RC_NO_MORE_ALIGNMENTS;
2014 return false;
2015 }
2016 m_Error = 0;
2017 return true;
2018 }
2019
2020
SetLastSpotId(TVDBRowId spot_id)2021 void CCSraShortReadIterator::SetLastSpotId(TVDBRowId spot_id)
2022 {
2023 m_MaxSpotId = min(m_MaxSpotId, spot_id);
2024 }
2025
2026
~CCSraShortReadIterator(void)2027 CCSraShortReadIterator::~CCSraShortReadIterator(void)
2028 {
2029 Reset();
2030 }
2031
2032
x_ValidRead(void) const2033 bool CCSraShortReadIterator::x_ValidRead(void) const
2034 {
2035 if ( !m_IncludeTechnicalReads && IsTechnicalRead() ) {
2036 return false;
2037 }
2038 if ( GetReadRange().Empty() ) {
2039 return false;
2040 }
2041 return true;
2042 }
2043
2044
x_Settle(bool single_spot)2045 bool CCSraShortReadIterator::x_Settle(bool single_spot)
2046 {
2047 for ( ;; ) {
2048 if ( m_ReadId > m_MaxReadId ) {
2049 // next spot, first read
2050 if ( single_spot ) {
2051 // bad read id
2052 m_Error = RC_NO_MORE_ALIGNMENTS;
2053 return false;
2054 }
2055 m_ReadId = 1;
2056 if ( ++m_SpotId > m_MaxSpotId ) {
2057 // bad spot id
2058 m_MaxReadId = 0;
2059 m_Error = RC_NO_MORE_ALIGNMENTS;
2060 return false;
2061 }
2062 x_GetMaxReadId();
2063 }
2064 else {
2065 if ( x_ValidRead() ) {
2066 // found
2067 m_Error = 0;
2068 return true;
2069 }
2070 // next read id
2071 ++m_ReadId;
2072 }
2073 }
2074 }
2075
2076
GetMateCount(void) const2077 uint32_t CCSraShortReadIterator::GetMateCount(void) const
2078 {
2079 uint32_t count = 0;
2080 CVDBValueFor<INSDC_read_type> read_types = m_Seq->READ_TYPE(m_SpotId);
2081 for ( size_t i = 0; i < read_types.size(); ++i ) {
2082 INSDC_read_type type = read_types[i];
2083 if ( (type & (SRA_READ_TYPE_TECHNICAL | SRA_READ_TYPE_BIOLOGICAL)) ==
2084 SRA_READ_TYPE_TECHNICAL ) {
2085 // skip technical reads
2086 continue;
2087 }
2088 ++count;
2089 }
2090 return count;
2091 }
2092
2093
x_GetMaxReadId(void)2094 void CCSraShortReadIterator::x_GetMaxReadId(void)
2095 {
2096 m_MaxReadId = uint32_t(m_Seq->READ_TYPE(m_SpotId).size());
2097 }
2098
2099
GetShortSeq_id(void) const2100 CRef<CSeq_id> CCSraShortReadIterator::GetShortSeq_id(void) const
2101 {
2102 return GetDb().MakeShortReadId(GetShortId1(), GetShortId2());
2103 }
2104
2105
GetSpotGroup(void) const2106 CTempString CCSraShortReadIterator::GetSpotGroup(void) const
2107 {
2108 return m_Seq->SPOT_GROUP(m_SpotId);
2109 }
2110
2111
IsSetName(void) const2112 bool CCSraShortReadIterator::IsSetName(void) const
2113 {
2114 return m_Seq->m_NAME && !GetName().empty();
2115 }
2116
2117
GetName(void) const2118 CTempString CCSraShortReadIterator::GetName(void) const
2119 {
2120 return m_Seq->NAME(m_SpotId);
2121 }
2122
2123
IsTechnicalRead(void) const2124 bool CCSraShortReadIterator::IsTechnicalRead(void) const
2125 {
2126 INSDC_read_type type = m_Seq->READ_TYPE(m_SpotId)[m_ReadId-1];
2127 return (type & (SRA_READ_TYPE_TECHNICAL | SRA_READ_TYPE_BIOLOGICAL)) ==
2128 SRA_READ_TYPE_TECHNICAL;
2129 }
2130
2131
GetReadFilter(void) const2132 INSDC_read_filter CCSraShortReadIterator::GetReadFilter(void) const
2133 {
2134 if ( !m_Seq->m_READ_FILTER ) {
2135 return SRA_READ_FILTER_PASS;
2136 }
2137 return m_Seq->READ_FILTER(m_SpotId)[m_ReadId-1];
2138 }
2139
2140
GetClipQualityLeft(void) const2141 TSeqPos CCSraShortReadIterator::GetClipQualityLeft(void) const
2142 {
2143 return *m_Seq->TRIM_START(m_SpotId);
2144 }
2145
2146
GetClipQualityLength(void) const2147 TSeqPos CCSraShortReadIterator::GetClipQualityLength(void) const
2148 {
2149 return *m_Seq->TRIM_LEN(m_SpotId);
2150 }
2151
2152
HasClippingInfo(void) const2153 bool CCSraShortReadIterator::HasClippingInfo(void) const
2154 {
2155 TSeqPos pos = m_Seq->READ_START(m_SpotId)[m_ReadId-1];
2156 TSeqPos trim_pos = GetClipQualityLeft();
2157 if ( trim_pos > pos ) {
2158 return true;
2159 }
2160 TSeqPos len = m_Seq->READ_LEN(m_SpotId)[m_ReadId-1];
2161 TSeqPos end = pos + len;
2162 TSeqPos trim_len = GetClipQualityLength();
2163 TSeqPos trim_end = trim_pos + trim_len;
2164 if ( trim_end < end ) {
2165 return true;
2166 }
2167 return false;
2168 }
2169
2170
2171 COpenRange<TSeqPos>
GetReadRange(EClipType clip_type) const2172 CCSraShortReadIterator::GetReadRange(EClipType clip_type) const
2173 {
2174 TSeqPos len = m_Seq->READ_LEN(m_SpotId)[m_ReadId-1];
2175 if ( !len ) {
2176 return TOpenRange::GetEmpty();
2177 }
2178 TSeqPos pos = m_Seq->READ_START(m_SpotId)[m_ReadId-1];
2179 TSeqPos end = pos + len;
2180 bool clip_by_quality = clip_type == eDefaultClip?
2181 m_ClipByQuality: clip_type == eClipByQuality;
2182 if ( clip_by_quality ) {
2183 TSeqPos trim_pos = GetClipQualityLeft();
2184 pos = max(pos, trim_pos);
2185 TSeqPos trim_len = GetClipQualityLength();
2186 TSeqPos trim_end = trim_pos + trim_len;
2187 end = min(end, trim_end);
2188 }
2189 return TOpenRange(pos, end);
2190 }
2191
2192
2193 CTempString
x_GetReadData(TOpenRange range) const2194 CCSraShortReadIterator::x_GetReadData(TOpenRange range) const
2195 {
2196 CTempString s = *m_Seq->READ(m_SpotId);
2197 return s.substr(range.GetFrom(), range.GetLength());
2198 }
2199
2200
GetReadData(EClipType clip_type) const2201 CTempString CCSraShortReadIterator::GetReadData(EClipType clip_type) const
2202 {
2203 return x_GetReadData(GetReadRange(clip_type));
2204 }
2205
2206
2207 CRef<CBioseq>
GetShortBioseq(TBioseqFlags flags) const2208 CCSraShortReadIterator::GetShortBioseq(TBioseqFlags flags) const
2209 {
2210 CRef<CBioseq> seq(new CBioseq);
2211 seq->SetId().push_back(GetShortSeq_id());
2212 if ( IsSetName() ) {
2213 CRef<CSeqdesc> desc(new CSeqdesc);
2214 desc->SetTitle(GetName());
2215 seq->SetDescr().Set().push_back(desc);
2216 }
2217 CSeq_inst& inst = seq->SetInst();
2218 inst.SetRepr(inst.eRepr_raw);
2219 inst.SetMol(inst.eMol_na);
2220 TOpenRange range = GetReadRange();
2221 CTempString data = x_GetReadData(range);
2222 inst.SetLength(TSeqPos(data.size()));
2223 string& iupac = inst.SetSeq_data().SetIupacna().Set();
2224 iupac.assign(data.data(), data.size());
2225 if ( flags & fQualityGraph ) {
2226 seq->SetAnnot().push_back(x_GetQualityGraphAnnot(range, 0));
2227 }
2228 return seq;
2229 }
2230
2231
2232 CRef<CSeq_graph>
x_GetQualityGraph(TOpenRange range) const2233 CCSraShortReadIterator::x_GetQualityGraph(TOpenRange range) const
2234 {
2235 CRef<CSeq_graph> graph(new CSeq_graph);
2236 graph->SetTitle("Phred Quality");
2237 CSeq_interval& loc_int = graph->SetLoc().SetInt();
2238 loc_int.SetId(*GetShortSeq_id());
2239 TSeqPos size = range.GetLength();
2240 loc_int.SetFrom(0);
2241 loc_int.SetTo(size-1);
2242 graph->SetNumval(size);
2243 CByte_graph& b_graph = graph->SetGraph().SetByte();
2244 b_graph.SetAxis(0);
2245 CByte_graph::TValues& values = b_graph.SetValues();
2246 values.resize(size);
2247 CVDBValueFor<INSDC_quality_phred> qual = m_Seq->QUALITY(m_SpotId);
2248 TSeqPos start = range.GetFrom();
2249 typedef CByte_graph::TValues::value_type TValue;
2250 TValue min_q = numeric_limits<TValue>::max();
2251 TValue max_q = 0;
2252 for ( size_t i = 0; i < size; ++i ) {
2253 TValue q = qual[start+i];
2254 values[i] = q;
2255 if ( q < min_q ) {
2256 min_q = q;
2257 }
2258 if ( q > max_q ) {
2259 max_q = q;
2260 }
2261 }
2262 b_graph.SetMin(min_q);
2263 b_graph.SetMax(max_q);
2264 return graph;
2265 }
2266
2267
2268 CRef<CSeq_graph>
GetQualityGraph(void) const2269 CCSraShortReadIterator::GetQualityGraph(void) const
2270 {
2271 return x_GetQualityGraph(GetReadRange());
2272 }
2273
2274
2275 CRef<CSeq_annot>
x_GetSeq_annot(const string * annot_name) const2276 CCSraShortReadIterator::x_GetSeq_annot(const string* annot_name) const
2277 {
2278 CRef<CSeq_annot> annot(new CSeq_annot);
2279 annot->SetData().SetAlign();
2280 if ( annot_name ) {
2281 CRef<CAnnotdesc> desc(new CAnnotdesc);
2282 desc->SetName(*annot_name);
2283 annot->SetDesc().Set().push_back(desc);
2284 }
2285 return annot;
2286 }
2287
2288
2289 CRef<CSeq_annot>
x_GetQualityGraphAnnot(TOpenRange range,const string * annot_name) const2290 CCSraShortReadIterator::x_GetQualityGraphAnnot(TOpenRange range,
2291 const string* annot_name) const
2292 {
2293 CRef<CSeq_annot> annot = x_GetSeq_annot(annot_name);
2294 annot->SetData().SetGraph().push_back(x_GetQualityGraph(range));
2295 return annot;
2296 }
2297
2298
2299 CRef<CSeq_annot>
x_GetQualityGraphAnnot(const string * annot_name) const2300 CCSraShortReadIterator::x_GetQualityGraphAnnot(const string* annot_name) const
2301 {
2302 return x_GetQualityGraphAnnot(GetReadRange(), annot_name);
2303 }
2304
2305
GetRefSeqIter(TSeqPos * ref_pos_ptr) const2306 CCSraRefSeqIterator CCSraShortReadIterator::GetRefSeqIter(TSeqPos* ref_pos_ptr) const
2307 {
2308 CCSraRefSeqIterator ret;
2309 if ( m_Seq->m_PRIMARY_ALIGNMENT_ID ) {
2310 CVDBValueFor<TVDBRowId> align_ids = m_Seq->PRIMARY_ALIGNMENT_ID(m_SpotId);
2311 if ( TVDBRowId row_id = align_ids[m_ReadId-1] ) {
2312 CRef<CCSraDb_Impl::SAlnTableCursor> aln = GetDb().Aln(false);
2313 ret = CCSraRefSeqIterator(m_Db, *aln->REF_NAME(row_id),
2314 CCSraRefSeqIterator::eByName);
2315 if ( ref_pos_ptr ) {
2316 *ref_pos_ptr = *aln->REF_POS(row_id);
2317 }
2318 GetDb().Put(aln);
2319 }
2320 }
2321 return ret;
2322 }
2323
2324
GetAlignIter() const2325 CCSraAlignIterator CCSraShortReadIterator::GetAlignIter() const
2326 {
2327 if ( m_Seq->m_PRIMARY_ALIGNMENT_ID ) {
2328 CVDBValueFor<TVDBRowId> align_ids = m_Seq->PRIMARY_ALIGNMENT_ID(m_SpotId);
2329 if ( TVDBRowId align_row_id = align_ids[m_ReadId-1] ) {
2330 return CCSraAlignIterator(m_Db, fPrimaryAlign, align_row_id);
2331 }
2332 }
2333 return CCSraAlignIterator();
2334 }
2335
2336
2337 END_NAMESPACE(objects);
2338 END_NCBI_NAMESPACE;
2339