1 /* $Id: seq_vector_ci.cpp 406788 2013-07-16 14:29:35Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Seq-vector iterator
30 *
31 */
32
33
34 #include <ncbi_pch.hpp>
35 #include <objmgr/seq_vector.hpp>
36 #include <objmgr/seq_vector_ci.hpp>
37 #include <objects/seq/NCBI8aa.hpp>
38 #include <objects/seq/NCBIpaa.hpp>
39 #include <objects/seq/NCBIstdaa.hpp>
40 #include <objects/seq/NCBIeaa.hpp>
41 #include <objects/seq/NCBIpna.hpp>
42 #include <objects/seq/NCBI8na.hpp>
43 #include <objects/seq/NCBI4na.hpp>
44 #include <objects/seq/NCBI2na.hpp>
45 #include <objects/seq/IUPACaa.hpp>
46 #include <objects/seq/IUPACna.hpp>
47 #include <algorithm>
48 #include <objmgr/impl/seq_vector_cvt.hpp>
49 #include <objmgr/objmgr_exception.hpp>
50 #include <util/random_gen.hpp>
51
52 BEGIN_NCBI_SCOPE
53 BEGIN_SCOPE(objects)
54
55
56 static const TSeqPos kCacheSize = 1024;
57
ThrowOutOfRangeSeq_inst(size_t pos)58 void ThrowOutOfRangeSeq_inst(size_t pos)
59 {
60 NCBI_THROW_FMT(CSeqVectorException, eOutOfRange,
61 "reference out of range of Seq-inst data: "<<pos);
62 }
63
64 // CSeqVector_CI::
65
66
CSeqVector_CI(void)67 CSeqVector_CI::CSeqVector_CI(void)
68 : m_Strand(eNa_strand_unknown),
69 m_Coding(CSeq_data::e_not_set),
70 m_CaseConversion(eCaseConversion_none),
71 m_Cache(0),
72 m_CachePos(0),
73 m_CacheData(),
74 m_CacheEnd(0),
75 m_BackupPos(0),
76 m_BackupData(),
77 m_BackupEnd(0),
78 m_ScannedStart(0),
79 m_ScannedEnd(0)
80 {
81 }
82
83
~CSeqVector_CI(void)84 CSeqVector_CI::~CSeqVector_CI(void)
85 {
86 }
87
88
CSeqVector_CI(const CSeqVector_CI & sv_it)89 CSeqVector_CI::CSeqVector_CI(const CSeqVector_CI& sv_it)
90 : m_Strand(eNa_strand_unknown),
91 m_Coding(CSeq_data::e_not_set),
92 m_CaseConversion(eCaseConversion_none),
93 m_Cache(0),
94 m_CachePos(0),
95 m_CacheData(),
96 m_CacheEnd(0),
97 m_BackupPos(0),
98 m_BackupData(),
99 m_BackupEnd(0),
100 m_Randomizer(sv_it.m_Randomizer),
101 m_ScannedStart(0),
102 m_ScannedEnd(0)
103 {
104 *this = sv_it;
105 }
106
107
CSeqVector_CI(const CSeqVector & seq_vector,TSeqPos pos)108 CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos)
109 : m_Scope(seq_vector.m_Scope),
110 m_SeqMap(seq_vector.m_SeqMap),
111 m_TSE(seq_vector.m_TSE),
112 m_Strand(seq_vector.m_Strand),
113 m_Coding(seq_vector.m_Coding),
114 m_CaseConversion(eCaseConversion_none),
115 m_Cache(0),
116 m_CachePos(0),
117 m_CacheData(),
118 m_CacheEnd(0),
119 m_BackupPos(0),
120 m_BackupData(),
121 m_BackupEnd(0),
122 m_Randomizer(seq_vector.m_Randomizer),
123 m_ScannedStart(0),
124 m_ScannedEnd(0)
125 {
126 x_SetPos(pos);
127 }
128
129
CSeqVector_CI(const CSeqVector & seq_vector,TSeqPos pos,ECaseConversion case_cvt)130 CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos,
131 ECaseConversion case_cvt)
132 : m_Scope(seq_vector.m_Scope),
133 m_SeqMap(seq_vector.m_SeqMap),
134 m_TSE(seq_vector.m_TSE),
135 m_Strand(seq_vector.m_Strand),
136 m_Coding(seq_vector.m_Coding),
137 m_CaseConversion(case_cvt),
138 m_Cache(0),
139 m_CachePos(0),
140 m_CacheData(),
141 m_CacheEnd(0),
142 m_BackupPos(0),
143 m_BackupData(),
144 m_BackupEnd(0),
145 m_Randomizer(seq_vector.m_Randomizer),
146 m_ScannedStart(0),
147 m_ScannedEnd(0)
148 {
149 x_SetPos(pos);
150 }
151
152
CSeqVector_CI(const CSeqVector & seq_vector,ENa_strand strand,TSeqPos pos,ECaseConversion case_cvt)153 CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, ENa_strand strand,
154 TSeqPos pos, ECaseConversion case_cvt)
155 : m_Scope(seq_vector.m_Scope),
156 m_SeqMap(seq_vector.m_SeqMap),
157 m_TSE(seq_vector.m_TSE),
158 m_Strand(strand),
159 m_Coding(seq_vector.m_Coding),
160 m_CaseConversion(case_cvt),
161 m_Cache(0),
162 m_CachePos(0),
163 m_CacheData(),
164 m_CacheEnd(0),
165 m_BackupPos(0),
166 m_BackupData(),
167 m_BackupEnd(0),
168 m_Randomizer(seq_vector.m_Randomizer),
169 m_ScannedStart(0),
170 m_ScannedEnd(0)
171 {
172 x_SetPos(pos);
173 }
174
175
x_SetVector(CSeqVector & seq_vector)176 void CSeqVector_CI::x_SetVector(CSeqVector& seq_vector)
177 {
178 if ( m_SeqMap ) {
179 // reset old values
180 m_Seg = CSeqMap_CI();
181 x_ResetCache();
182 x_ResetBackup();
183 }
184
185 m_Scope = seq_vector.m_Scope;
186 m_SeqMap = seq_vector.m_SeqMap;
187 m_TSE = seq_vector.m_TSE;
188 m_Strand = seq_vector.m_Strand;
189 m_Coding = seq_vector.m_Coding;
190 m_CachePos = seq_vector.size();
191 m_Randomizer = seq_vector.m_Randomizer;
192 m_ScannedStart = m_ScannedEnd = 0;
193 }
194
195
196 inline
x_GetSize(void) const197 TSeqPos CSeqVector_CI::x_GetSize(void) const
198 {
199 return m_SeqMap->GetLength(m_Scope.GetScopeOrNull());
200 }
201
202
203 static const TSeqPos kMaxPreloadBases = 10*1000*1000;
204
205
CanGetRange(TSeqPos start,TSeqPos stop)206 bool CSeqVector_CI::CanGetRange(TSeqPos start, TSeqPos stop)
207 {
208 try {
209 if ( stop < start ) {
210 return false;
211 }
212 SSeqMapSelector sel(CSeqMap::fDefaultFlags, kMax_UInt);
213 sel.SetStrand(m_Strand).SetRange(start, stop-start);
214 sel.SetLinkUsedTSE(m_TSE).SetLinkUsedTSE(m_UsedTSEs);
215 if ( !m_SeqMap->CanResolveRange(m_Scope.GetScopeOrNull(), sel) ) {
216 return false;
217 }
218 if ( start > m_ScannedEnd || stop < m_ScannedStart ) {
219 m_ScannedStart = start;
220 m_ScannedEnd = stop;
221 }
222 else {
223 m_ScannedStart = min(m_ScannedStart, start);
224 m_ScannedEnd = max(m_ScannedEnd, stop);
225 }
226 return true;
227 }
228 catch ( exception& /*ignored*/ ) {
229 return false;
230 }
231 }
232
233
x_CheckForward(void)234 void CSeqVector_CI::x_CheckForward(void)
235 {
236 TSeqPos scanned = m_ScannedEnd - m_ScannedStart;
237 TSeqPos more = x_GetSize() - m_ScannedEnd;
238 TSeqPos check = min(min(scanned, more), kMaxPreloadBases);
239 if ( check > 0 ) {
240 CanGetRange(m_ScannedEnd, m_ScannedEnd+check);
241 }
242 }
243
244
x_CheckBackward(void)245 void CSeqVector_CI::x_CheckBackward(void)
246 {
247 TSeqPos scanned = m_ScannedEnd - m_ScannedStart;
248 TSeqPos more = m_ScannedStart;
249 TSeqPos check = min(min(scanned, more), kMaxPreloadBases);
250 if ( check > 0 ) {
251 CanGetRange(m_ScannedStart-check, m_ScannedStart);
252 }
253 }
254
255
256 inline
x_InitSeg(TSeqPos pos)257 void CSeqVector_CI::x_InitSeg(TSeqPos pos)
258 {
259 SSeqMapSelector sel(CSeqMap::fDefaultFlags, kMax_UInt);
260 sel.SetStrand(m_Strand).SetLinkUsedTSE(m_TSE);
261 if ( pos == m_ScannedEnd ) {
262 x_CheckForward();
263 }
264 else if ( pos < m_ScannedStart || pos > m_ScannedEnd ) {
265 m_ScannedStart = m_ScannedEnd = pos;
266 }
267 m_Seg = CSeqMap_CI(m_SeqMap, m_Scope.GetScopeOrNull(), sel, pos);
268 m_ScannedStart = min(m_ScannedStart, m_Seg.GetPosition());
269 m_ScannedEnd = max(m_ScannedEnd, m_Seg.GetEndPosition());
270 }
271
272
273 inline
x_IncSeg(void)274 void CSeqVector_CI::x_IncSeg(void)
275 {
276 if ( m_Seg.GetEndPosition() == m_ScannedEnd ) {
277 x_CheckForward();
278 }
279 ++m_Seg;
280 m_ScannedEnd = max(m_ScannedEnd, m_Seg.GetEndPosition());
281 }
282
283
284 inline
x_DecSeg(void)285 void CSeqVector_CI::x_DecSeg(void)
286 {
287 if ( m_Seg.GetPosition() == m_ScannedStart ) {
288 x_CheckBackward();
289 }
290 --m_Seg;
291 m_ScannedStart = min(m_ScannedStart, m_Seg.GetPosition());
292 }
293
294
x_ThrowOutOfRange(void) const295 void CSeqVector_CI::x_ThrowOutOfRange(void) const
296 {
297 NCBI_THROW_FMT(CSeqVectorException, eOutOfRange,
298 "iterator out of range: "<<GetPos()<<">="<<x_GetSize());
299 }
300
301
SetCoding(TCoding coding)302 void CSeqVector_CI::SetCoding(TCoding coding)
303 {
304 if ( m_Coding != coding ) {
305 TSeqPos pos = GetPos();
306 m_Coding = coding;
307 x_ResetBackup();
308 if ( x_CacheSize() ) {
309 x_ResetCache();
310 if ( m_Seg ) {
311 x_SetPos(pos);
312 }
313 }
314 }
315 }
316
317
SetStrand(ENa_strand strand)318 void CSeqVector_CI::SetStrand(ENa_strand strand)
319 {
320 if ( IsReverse(m_Strand) == IsReverse(strand) ) {
321 m_Strand = strand;
322 return;
323 }
324
325 TSeqPos pos = GetPos();
326 m_Strand = strand;
327 x_ResetBackup();
328 if ( x_CacheSize() ) {
329 x_ResetCache();
330 if ( m_Seg ) {
331 m_Seg = CSeqMap_CI();
332 x_SetPos(pos);
333 }
334 }
335 }
336
337
338 // returns gap Seq-data object ref
339 // returns null if it's not a gap or an unspecified gap
GetGapSeq_literal(void) const340 CConstRef<CSeq_literal> CSeqVector_CI::GetGapSeq_literal(void) const
341 {
342 if ( !IsInGap() ) {
343 return null;
344 }
345 return m_Seg.GetRefGapLiteral();
346 }
347
348
349 // returns number of gap symbols ahead including current symbol
350 // returns 0 if current position is not in gap
GetGapSizeForward(void) const351 TSeqPos CSeqVector_CI::GetGapSizeForward(void) const
352 {
353 if ( !IsInGap() ) {
354 return 0;
355 }
356 return m_Seg.GetEndPosition() - GetPos();
357 }
358
359
360 // returns number of gap symbols before current symbol
361 // returns 0 if current position is not in gap
GetGapSizeBackward(void) const362 TSeqPos CSeqVector_CI::GetGapSizeBackward(void) const
363 {
364 if ( !IsInGap() ) {
365 return 0;
366 }
367 return GetPos() - m_Seg.GetPosition();
368 }
369
370
371 // skip current gap forward
372 // returns number of skipped gap symbols
373 // does nothing and returns 0 if current position is not in gap
SkipGap(void)374 TSeqPos CSeqVector_CI::SkipGap(void)
375 {
376 if ( !IsInGap() ) {
377 return 0;
378 }
379 TSeqPos skip = GetGapSizeForward();
380 SetPos(GetPos()+skip);
381 return skip;
382 }
383
384
385 // skip current gap backward
386 // returns number of skipped gap symbols
387 // does nothing and returns 0 if current position is not in gap
SkipGapBackward(void)388 TSeqPos CSeqVector_CI::SkipGapBackward(void)
389 {
390 if ( !IsInGap() ) {
391 return 0;
392 }
393 TSeqPos skip = GetGapSizeBackward()+1;
394 SetPos(GetPos()-skip);
395 return skip;
396 }
397
398
399 // return true if there is zero-length gap before current position
400 // it might happen only if current position is at the beginning of buffer
HasZeroGapBefore(void)401 bool CSeqVector_CI::HasZeroGapBefore(void)
402 {
403 if ( x_CacheOffset() != 0 ) {
404 return false;
405 }
406 TSeqPos pos = GetPos();
407 if ( IsReverse(m_Strand) ) {
408 pos = x_GetSize() - pos;
409 }
410 return m_SeqMap->HasZeroGapAt(pos, m_Scope.GetScopeOrNull());
411 }
412
413
operator =(const CSeqVector_CI & sv_it)414 CSeqVector_CI& CSeqVector_CI::operator=(const CSeqVector_CI& sv_it)
415 {
416 if ( this == &sv_it ) {
417 return *this;
418 }
419
420 m_Scope = sv_it.m_Scope;
421 m_SeqMap = sv_it.m_SeqMap;
422 m_TSE = sv_it.m_TSE;
423 m_Strand = sv_it.m_Strand;
424 m_Coding = sv_it.GetCoding();
425 m_CaseConversion = sv_it.m_CaseConversion;
426 m_Seg = sv_it.m_Seg;
427 m_CachePos = sv_it.x_CachePos();
428 m_Randomizer = sv_it.m_Randomizer;
429 m_ScannedStart = sv_it.m_ScannedStart;
430 m_ScannedEnd = sv_it.m_ScannedEnd;
431 // copy cache if any
432 size_t cache_size = sv_it.x_CacheSize();
433 if ( cache_size ) {
434 x_InitializeCache();
435 m_CacheEnd = m_CacheData.get() + cache_size;
436 m_Cache = m_CacheData.get() + sv_it.x_CacheOffset();
437 memcpy(m_CacheData.get(), sv_it.m_CacheData.get(), cache_size);
438
439 // copy backup cache if any
440 size_t backup_size = sv_it.x_BackupSize();
441 if ( backup_size ) {
442 m_BackupPos = sv_it.x_BackupPos();
443 m_BackupEnd = m_BackupData.get() + backup_size;
444 memcpy(m_BackupData.get(), sv_it.m_BackupData.get(), backup_size);
445 }
446 else {
447 x_ResetBackup();
448 }
449 }
450 else {
451 x_ResetCache();
452 x_ResetBackup();
453 }
454 return *this;
455 }
456
457
x_InitializeCache(void)458 void CSeqVector_CI::x_InitializeCache(void)
459 {
460 if ( !m_Cache ) {
461 m_CacheData.reset(new char[kCacheSize]);
462 m_BackupData.reset(new char[kCacheSize]);
463 m_BackupEnd = m_BackupData.get();
464 m_Cache = m_CacheEnd = m_CacheData.get();
465 }
466 else {
467 x_ResetCache();
468 }
469 }
470
471
472 inline
x_ResizeCache(size_t size)473 void CSeqVector_CI::x_ResizeCache(size_t size)
474 {
475 _ASSERT(size <= kCacheSize);
476 if ( !m_CacheData.get() ) {
477 x_InitializeCache();
478 }
479 m_Cache = m_CacheData.get();
480 m_CacheEnd = m_CacheData.get() + size;
481 }
482
483
x_UpdateCacheUp(TSeqPos pos)484 void CSeqVector_CI::x_UpdateCacheUp(TSeqPos pos)
485 {
486 _ASSERT(pos < x_GetSize());
487
488 TSeqPos segEnd = m_Seg.GetEndPosition();
489 _ASSERT(pos >= m_Seg.GetPosition() && pos < segEnd);
490
491 TSeqPos cache_size = min(kCacheSize, segEnd - pos);
492 x_FillCache(pos, cache_size);
493 m_Cache = m_CacheData.get();
494 _ASSERT(GetPos() == pos);
495 }
496
497
x_UpdateCacheDown(TSeqPos pos)498 void CSeqVector_CI::x_UpdateCacheDown(TSeqPos pos)
499 {
500 _ASSERT(pos < x_GetSize());
501
502 TSeqPos segStart = m_Seg.GetPosition();
503 _ASSERT(pos >= segStart && pos < m_Seg.GetEndPosition());
504
505 TSeqPos cache_offset = min(kCacheSize - 1, pos - segStart);
506 x_FillCache(pos - cache_offset, cache_offset + 1);
507 m_Cache = m_CacheData.get() + cache_offset;
508 _ASSERT(GetPos() == pos);
509 }
510
511
x_FillCache(TSeqPos start,TSeqPos count)512 void CSeqVector_CI::x_FillCache(TSeqPos start, TSeqPos count)
513 {
514 _ASSERT(m_Seg.GetType() != CSeqMap::eSeqEnd);
515 _ASSERT(start >= m_Seg.GetPosition());
516 _ASSERT(start < m_Seg.GetEndPosition());
517
518 x_ResizeCache(count);
519
520 switch ( m_Seg.GetType() ) {
521 case CSeqMap::eSeqData:
522 {
523 const CSeq_data& data = m_Seg.GetRefData();
524 if ( data.IsGap() && m_Seg.GetType() == CSeqMap::eSeqGap ) {
525 // workaround for erroneously split gap Seq-data
526 x_FillCache(start, count);
527 return;
528 }
529
530 TCoding dataCoding = data.Which();
531 TCoding cacheCoding = x_GetCoding(m_Coding, dataCoding);
532 bool reverse = m_Seg.GetRefMinusStrand();
533
534 bool randomize = false;
535 if ( cacheCoding != dataCoding &&
536 cacheCoding == CSeq_data::e_Ncbi2na &&
537 m_Randomizer) {
538 cacheCoding = CSeq_data::e_Ncbi4na;
539 randomize = true;
540 }
541
542 const char* table = 0;
543 if ( cacheCoding != dataCoding || reverse ||
544 m_CaseConversion != eCaseConversion_none ) {
545 table = sx_GetConvertTable(dataCoding, cacheCoding,
546 reverse, m_CaseConversion);
547 if ( !table && cacheCoding != dataCoding ) {
548 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
549 "Incompatible sequence codings: "<<
550 dataCoding<<" -> "<<cacheCoding);
551 }
552 }
553
554 TSeqPos dataPos;
555 if ( reverse ) {
556 // Revert segment offset
557 dataPos = m_Seg.GetRefEndPosition() -
558 (start - m_Seg.GetPosition()) - count;
559 }
560 else {
561 dataPos = m_Seg.GetRefPosition() +
562 (start - m_Seg.GetPosition());
563 }
564
565 switch ( dataCoding ) {
566 case CSeq_data::e_Iupacna:
567 copy_8bit_any(m_Cache, count, data.GetIupacna().Get(), dataPos,
568 table, reverse);
569 break;
570 case CSeq_data::e_Iupacaa:
571 copy_8bit_any(m_Cache, count, data.GetIupacaa().Get(), dataPos,
572 table, reverse);
573 break;
574 case CSeq_data::e_Ncbi2na:
575 copy_2bit_any(m_Cache, count, data.GetNcbi2na().Get(), dataPos,
576 table, reverse);
577 break;
578 case CSeq_data::e_Ncbi4na:
579 copy_4bit_any(m_Cache, count, data.GetNcbi4na().Get(), dataPos,
580 table, reverse);
581 break;
582 case CSeq_data::e_Ncbi8na:
583 copy_8bit_any(m_Cache, count, data.GetNcbi8na().Get(), dataPos,
584 table, reverse);
585 break;
586 case CSeq_data::e_Ncbipna:
587 NCBI_THROW(CSeqVectorException, eCodingError,
588 "Ncbipna conversion not implemented");
589 case CSeq_data::e_Ncbi8aa:
590 copy_8bit_any(m_Cache, count, data.GetNcbi8aa().Get(), dataPos,
591 table, reverse);
592 break;
593 case CSeq_data::e_Ncbieaa:
594 copy_8bit_any(m_Cache, count, data.GetNcbieaa().Get(), dataPos,
595 table, reverse);
596 break;
597 case CSeq_data::e_Ncbipaa:
598 NCBI_THROW(CSeqVectorException, eCodingError,
599 "Ncbipaa conversion not implemented");
600 case CSeq_data::e_Ncbistdaa:
601 copy_8bit_any(m_Cache, count, data.GetNcbistdaa().Get(), dataPos,
602 table, reverse);
603 break;
604 default:
605 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
606 "Invalid data coding: "<<dataCoding);
607 }
608 if ( randomize ) {
609 m_Randomizer->RandomizeData(m_Cache, count, start);
610 }
611 break;
612 }
613 case CSeqMap::eSeqGap:
614 if (m_Coding == CSeq_data::e_Ncbi2na && m_Randomizer) {
615 fill_n(m_Cache, count,
616 sx_GetGapChar(CSeq_data::e_Ncbi4na, eCaseConversion_none));
617 m_Randomizer->RandomizeData(m_Cache, count, start);
618 }
619 else {
620 fill_n(m_Cache, count, GetGapChar());
621 }
622 break;
623 default:
624 NCBI_THROW_FMT(CSeqVectorException, eDataError,
625 "Invalid segment type: "<<m_Seg.GetType());
626 }
627 m_CachePos = start;
628 }
629
630
x_SetPos(TSeqPos pos)631 void CSeqVector_CI::x_SetPos(TSeqPos pos)
632 {
633 TSeqPos size = x_GetSize();
634 if ( pos >= size ) {
635 if ( x_CacheSize() ) {
636 // save current cache as backup
637 x_SwapCache();
638 x_ResetCache();
639 }
640 _ASSERT(x_CacheSize() == 0 && x_CacheOffset() == 0);
641 m_CachePos = size;
642 _ASSERT(GetPos() == size);
643 return;
644 }
645
646 _ASSERT(pos - x_CachePos() >= x_CacheSize());
647
648 // update current segment
649 x_UpdateSeg(pos);
650
651 // save current cache as backup and restore old backup
652 x_SwapCache();
653
654 // check if old backup is suitable
655 TSeqPos cache_offset = pos - x_CachePos();
656 TSeqPos cache_size = x_CacheSize();
657 if ( cache_offset < cache_size ) {
658 // can use backup
659 _ASSERT(x_CacheSize() &&
660 x_CachePos() >= m_Seg.GetPosition() &&
661 x_CacheEndPos() <= m_Seg.GetEndPosition());
662 m_Cache = m_CacheData.get() + cache_offset;
663 }
664 else {
665 // cannot use backup
666 x_InitializeCache();
667 TSeqPos old_pos = x_BackupPos();
668 if ( pos < old_pos && pos >= old_pos - kCacheSize &&
669 m_Seg.GetEndPosition() >= old_pos ) {
670 x_UpdateCacheDown(old_pos - 1);
671 cache_offset = pos - x_CachePos();
672 m_Cache = m_CacheData.get() + cache_offset;
673 }
674 else {
675 x_UpdateCacheUp(pos);
676 }
677 }
678 _ASSERT(x_CacheOffset() < x_CacheSize());
679 _ASSERT(GetPos() == pos);
680 }
681
682
x_UpdateSeg(TSeqPos pos)683 void CSeqVector_CI::x_UpdateSeg(TSeqPos pos)
684 {
685 if ( m_Seg.IsInvalid() ) {
686 x_InitSeg(pos);
687 }
688 else if ( m_Seg.GetPosition() > pos ) {
689 // segment is ahead
690 do {
691 x_DecSeg();
692 } while ( m_Seg && m_Seg.GetLength() == 0 ); // skip 0 length segments
693 if ( !m_Seg || m_Seg.GetPosition() > pos ) {
694 // too far
695 x_InitSeg(pos);
696 }
697 }
698 else if ( m_Seg.GetEndPosition() <= pos ) {
699 // segment is behind
700 do {
701 x_IncSeg();
702 } while ( m_Seg && m_Seg.GetLength() == 0 ); // skip 0 length segments
703 if ( !m_Seg || m_Seg.GetEndPosition() <= pos ) {
704 // too far
705 x_InitSeg(pos);
706 }
707 }
708 if ( !m_Seg && pos == x_GetSize() ) {
709 // it's ok to position to the very end
710 return;
711 }
712 if ( !m_Seg || pos<m_Seg.GetPosition() || pos>=m_Seg.GetEndPosition() ) {
713 NCBI_THROW_FMT(CSeqVectorException, eDataError,
714 "CSeqVector_CI: cannot locate segment at "<<pos);
715 }
716 _ASSERT(m_Seg && pos>=m_Seg.GetPosition() && pos<m_Seg.GetEndPosition());
717 }
718
719
GetSeqData(string & buffer,TSeqPos count)720 void CSeqVector_CI::GetSeqData(string& buffer, TSeqPos count)
721 {
722 buffer.erase();
723 TSeqPos pos = GetPos();
724 _ASSERT(pos <= x_GetSize());
725 count = min(count, x_GetSize() - pos);
726 if ( !count ) {
727 return;
728 }
729
730 if ( m_TSE && !CanGetRange(pos, pos+count) ) {
731 NCBI_THROW_FMT(CSeqVectorException, eDataError,
732 "CSeqVector_CI::GetSeqData: "
733 "cannot get seq-data in range: "
734 <<pos<<"-"<<pos+count);
735 }
736
737 buffer.reserve(count);
738 while ( count ) {
739 TCache_I cache = m_Cache;
740 TCache_I cache_end = m_CacheEnd;
741 TSeqPos chunk_count = min(count, TSeqPos(cache_end - cache));
742 _ASSERT(chunk_count > 0);
743 TCache_I chunk_end = cache + chunk_count;
744 buffer.append(cache, chunk_end);
745 count -= chunk_count;
746 //if ( count == 0 ) break;
747 if ( chunk_end == cache_end ) {
748 x_NextCacheSeg();
749 }
750 else {
751 m_Cache = chunk_end;
752 }
753 }
754 }
755
756
x_NextCacheSeg()757 void CSeqVector_CI::x_NextCacheSeg()
758 {
759 _ASSERT(m_SeqMap);
760 TSeqPos pos = x_CacheEndPos();
761 TSeqPos size = x_GetSize();
762 if ( pos >= size ) {
763 if ( x_CachePos() < pos ) {
764 x_SwapCache();
765 x_ResetCache();
766 m_CachePos = pos;
767 return;
768 }
769 else {
770 // Can not go further
771 NCBI_THROW(CSeqVectorException, eOutOfRange,
772 "Can not update cache: iterator beyond end");
773 }
774 }
775 // save current cache in backup
776 _ASSERT(x_CacheSize());
777 x_SwapCache();
778 // update segment if needed
779 x_UpdateSeg(pos);
780 if ( !m_Seg ) {
781 // end of sequence
782 NCBI_THROW_FMT(CSeqVectorException, eDataError,
783 "CSeqVector_CI: invalid sequence length: "
784 <<pos<<" <> "<<size);
785 }
786 // Try to re-use backup cache
787 if ( pos < x_CacheEndPos() && pos >= x_CachePos() ) {
788 m_Cache = m_CacheData.get() + pos - x_CachePos();
789 }
790 else {
791 // can not use backup cache
792 x_ResetCache();
793 x_UpdateCacheUp(pos);
794 _ASSERT(GetPos() == pos);
795 _ASSERT(x_CacheSize());
796 _ASSERT(x_CachePos() == pos);
797 }
798 }
799
800
x_PrevCacheSeg()801 void CSeqVector_CI::x_PrevCacheSeg()
802 {
803 _ASSERT(m_SeqMap);
804 TSeqPos pos = x_CachePos();
805 if ( pos-- == 0 ) {
806 // Can not go further
807 NCBI_THROW(CSeqVectorException, eOutOfRange,
808 "Can not update cache: iterator beyond start");
809 }
810 TSeqPos size = x_GetSize();
811 // save current cache in backup
812 x_SwapCache();
813 // update segment if needed
814 if ( m_Seg.IsInvalid() ) {
815 x_InitSeg(pos);
816 }
817 else {
818 while ( m_Seg && m_Seg.GetPosition() > pos ) {
819 x_DecSeg();
820 }
821 }
822 if ( !m_Seg ) {
823 NCBI_THROW_FMT(CSeqVectorException, eDataError,
824 "CSeqVector_CI: invalid sequence length: "
825 <<pos<<" <> "<<size);
826 }
827 // Try to re-use backup cache
828 if ( pos >= x_CachePos() && pos < x_CacheEndPos() ) {
829 m_Cache = m_CacheData.get() + pos - x_CachePos();
830 }
831 else {
832 // can not use backup cache
833 x_ResetCache();
834 x_UpdateCacheDown(pos);
835 _ASSERT(GetPos() == pos);
836 _ASSERT(x_CacheSize());
837 _ASSERT(x_CacheEndPos() == pos+1);
838 }
839 }
840
841
SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer)842 void CSeqVector_CI::SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer)
843 {
844 if ( randomizer != m_Randomizer ) {
845 TSeqPos pos = GetPos();
846 m_Randomizer = randomizer;
847 x_ResetBackup();
848 if ( x_CacheSize() ) {
849 x_ResetCache();
850 if ( m_Seg ) {
851 x_SetPos(pos);
852 }
853 }
854 }
855 }
856
857
x_InitRandomizer(CRandom & random_gen)858 void CSeqVector_CI::x_InitRandomizer(CRandom& random_gen)
859 {
860 CRef<INcbi2naRandomizer> randomizer(new CNcbi2naRandomizer(random_gen));
861 SetRandomizeAmbiguities(randomizer);
862 }
863
864
SetRandomizeAmbiguities(void)865 void CSeqVector_CI::SetRandomizeAmbiguities(void)
866 {
867 CRandom random_gen;
868 x_InitRandomizer(random_gen);
869 }
870
871
SetRandomizeAmbiguities(Uint4 seed)872 void CSeqVector_CI::SetRandomizeAmbiguities(Uint4 seed)
873 {
874 CRandom random_gen(seed);
875 x_InitRandomizer(random_gen);
876 }
877
878
SetRandomizeAmbiguities(CRandom & random_gen)879 void CSeqVector_CI::SetRandomizeAmbiguities(CRandom& random_gen)
880 {
881 x_InitRandomizer(random_gen);
882 }
883
884
SetNoAmbiguities(void)885 void CSeqVector_CI::SetNoAmbiguities(void)
886 {
887 SetRandomizeAmbiguities(null);
888 }
889
890
891 END_SCOPE(objects)
892 END_NCBI_SCOPE
893