1 /*  $Id: seq_vector_ci.cpp 406788 2013-07-16 14:29:35Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 *   Seq-vector iterator
30 *
31 */
32 
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/seq_vector.hpp>
36 #include <objmgr/seq_vector_ci.hpp>
37 #include <objects/seq/NCBI8aa.hpp>
38 #include <objects/seq/NCBIpaa.hpp>
39 #include <objects/seq/NCBIstdaa.hpp>
40 #include <objects/seq/NCBIeaa.hpp>
41 #include <objects/seq/NCBIpna.hpp>
42 #include <objects/seq/NCBI8na.hpp>
43 #include <objects/seq/NCBI4na.hpp>
44 #include <objects/seq/NCBI2na.hpp>
45 #include <objects/seq/IUPACaa.hpp>
46 #include <objects/seq/IUPACna.hpp>
47 #include <algorithm>
48 #include <objmgr/impl/seq_vector_cvt.hpp>
49 #include <objmgr/objmgr_exception.hpp>
50 #include <util/random_gen.hpp>
51 
52 BEGIN_NCBI_SCOPE
53 BEGIN_SCOPE(objects)
54 
55 
56 static const TSeqPos kCacheSize = 1024;
57 
ThrowOutOfRangeSeq_inst(size_t pos)58 void ThrowOutOfRangeSeq_inst(size_t pos)
59 {
60     NCBI_THROW_FMT(CSeqVectorException, eOutOfRange,
61                    "reference out of range of Seq-inst data: "<<pos);
62 }
63 
64 // CSeqVector_CI::
65 
66 
CSeqVector_CI(void)67 CSeqVector_CI::CSeqVector_CI(void)
68     : m_Strand(eNa_strand_unknown),
69       m_Coding(CSeq_data::e_not_set),
70       m_CaseConversion(eCaseConversion_none),
71       m_Cache(0),
72       m_CachePos(0),
73       m_CacheData(),
74       m_CacheEnd(0),
75       m_BackupPos(0),
76       m_BackupData(),
77       m_BackupEnd(0),
78       m_ScannedStart(0),
79       m_ScannedEnd(0)
80 {
81 }
82 
83 
~CSeqVector_CI(void)84 CSeqVector_CI::~CSeqVector_CI(void)
85 {
86 }
87 
88 
CSeqVector_CI(const CSeqVector_CI & sv_it)89 CSeqVector_CI::CSeqVector_CI(const CSeqVector_CI& sv_it)
90     : m_Strand(eNa_strand_unknown),
91       m_Coding(CSeq_data::e_not_set),
92       m_CaseConversion(eCaseConversion_none),
93       m_Cache(0),
94       m_CachePos(0),
95       m_CacheData(),
96       m_CacheEnd(0),
97       m_BackupPos(0),
98       m_BackupData(),
99       m_BackupEnd(0),
100       m_Randomizer(sv_it.m_Randomizer),
101       m_ScannedStart(0),
102       m_ScannedEnd(0)
103 {
104     *this = sv_it;
105 }
106 
107 
CSeqVector_CI(const CSeqVector & seq_vector,TSeqPos pos)108 CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos)
109     : m_Scope(seq_vector.m_Scope),
110       m_SeqMap(seq_vector.m_SeqMap),
111       m_TSE(seq_vector.m_TSE),
112       m_Strand(seq_vector.m_Strand),
113       m_Coding(seq_vector.m_Coding),
114       m_CaseConversion(eCaseConversion_none),
115       m_Cache(0),
116       m_CachePos(0),
117       m_CacheData(),
118       m_CacheEnd(0),
119       m_BackupPos(0),
120       m_BackupData(),
121       m_BackupEnd(0),
122       m_Randomizer(seq_vector.m_Randomizer),
123       m_ScannedStart(0),
124       m_ScannedEnd(0)
125 {
126     x_SetPos(pos);
127 }
128 
129 
CSeqVector_CI(const CSeqVector & seq_vector,TSeqPos pos,ECaseConversion case_cvt)130 CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos,
131                              ECaseConversion case_cvt)
132     : m_Scope(seq_vector.m_Scope),
133       m_SeqMap(seq_vector.m_SeqMap),
134       m_TSE(seq_vector.m_TSE),
135       m_Strand(seq_vector.m_Strand),
136       m_Coding(seq_vector.m_Coding),
137       m_CaseConversion(case_cvt),
138       m_Cache(0),
139       m_CachePos(0),
140       m_CacheData(),
141       m_CacheEnd(0),
142       m_BackupPos(0),
143       m_BackupData(),
144       m_BackupEnd(0),
145       m_Randomizer(seq_vector.m_Randomizer),
146       m_ScannedStart(0),
147       m_ScannedEnd(0)
148 {
149     x_SetPos(pos);
150 }
151 
152 
CSeqVector_CI(const CSeqVector & seq_vector,ENa_strand strand,TSeqPos pos,ECaseConversion case_cvt)153 CSeqVector_CI::CSeqVector_CI(const CSeqVector& seq_vector, ENa_strand strand,
154                              TSeqPos pos, ECaseConversion case_cvt)
155     : m_Scope(seq_vector.m_Scope),
156       m_SeqMap(seq_vector.m_SeqMap),
157       m_TSE(seq_vector.m_TSE),
158       m_Strand(strand),
159       m_Coding(seq_vector.m_Coding),
160       m_CaseConversion(case_cvt),
161       m_Cache(0),
162       m_CachePos(0),
163       m_CacheData(),
164       m_CacheEnd(0),
165       m_BackupPos(0),
166       m_BackupData(),
167       m_BackupEnd(0),
168       m_Randomizer(seq_vector.m_Randomizer),
169       m_ScannedStart(0),
170       m_ScannedEnd(0)
171 {
172     x_SetPos(pos);
173 }
174 
175 
x_SetVector(CSeqVector & seq_vector)176 void CSeqVector_CI::x_SetVector(CSeqVector& seq_vector)
177 {
178     if ( m_SeqMap ) {
179         // reset old values
180         m_Seg = CSeqMap_CI();
181         x_ResetCache();
182         x_ResetBackup();
183     }
184 
185     m_Scope  = seq_vector.m_Scope;
186     m_SeqMap = seq_vector.m_SeqMap;
187     m_TSE = seq_vector.m_TSE;
188     m_Strand = seq_vector.m_Strand;
189     m_Coding = seq_vector.m_Coding;
190     m_CachePos = seq_vector.size();
191     m_Randomizer = seq_vector.m_Randomizer;
192     m_ScannedStart = m_ScannedEnd = 0;
193 }
194 
195 
196 inline
x_GetSize(void) const197 TSeqPos CSeqVector_CI::x_GetSize(void) const
198 {
199     return m_SeqMap->GetLength(m_Scope.GetScopeOrNull());
200 }
201 
202 
203 static const TSeqPos kMaxPreloadBases = 10*1000*1000;
204 
205 
CanGetRange(TSeqPos start,TSeqPos stop)206 bool CSeqVector_CI::CanGetRange(TSeqPos start, TSeqPos stop)
207 {
208     try {
209         if ( stop < start ) {
210             return false;
211         }
212         SSeqMapSelector sel(CSeqMap::fDefaultFlags, kMax_UInt);
213         sel.SetStrand(m_Strand).SetRange(start, stop-start);
214         sel.SetLinkUsedTSE(m_TSE).SetLinkUsedTSE(m_UsedTSEs);
215         if ( !m_SeqMap->CanResolveRange(m_Scope.GetScopeOrNull(), sel) ) {
216             return false;
217         }
218         if ( start > m_ScannedEnd || stop < m_ScannedStart ) {
219             m_ScannedStart = start;
220             m_ScannedEnd = stop;
221         }
222         else {
223             m_ScannedStart = min(m_ScannedStart, start);
224             m_ScannedEnd = max(m_ScannedEnd, stop);
225         }
226         return true;
227     }
228     catch ( exception& /*ignored*/ ) {
229         return false;
230     }
231 }
232 
233 
x_CheckForward(void)234 void CSeqVector_CI::x_CheckForward(void)
235 {
236     TSeqPos scanned = m_ScannedEnd - m_ScannedStart;
237     TSeqPos more = x_GetSize() - m_ScannedEnd;
238     TSeqPos check = min(min(scanned, more), kMaxPreloadBases);
239     if ( check > 0 ) {
240         CanGetRange(m_ScannedEnd, m_ScannedEnd+check);
241     }
242 }
243 
244 
x_CheckBackward(void)245 void CSeqVector_CI::x_CheckBackward(void)
246 {
247     TSeqPos scanned = m_ScannedEnd - m_ScannedStart;
248     TSeqPos more = m_ScannedStart;
249     TSeqPos check = min(min(scanned, more), kMaxPreloadBases);
250     if ( check > 0 ) {
251         CanGetRange(m_ScannedStart-check, m_ScannedStart);
252     }
253 }
254 
255 
256 inline
x_InitSeg(TSeqPos pos)257 void CSeqVector_CI::x_InitSeg(TSeqPos pos)
258 {
259     SSeqMapSelector sel(CSeqMap::fDefaultFlags, kMax_UInt);
260     sel.SetStrand(m_Strand).SetLinkUsedTSE(m_TSE);
261     if ( pos == m_ScannedEnd ) {
262         x_CheckForward();
263     }
264     else if ( pos < m_ScannedStart || pos > m_ScannedEnd ) {
265         m_ScannedStart = m_ScannedEnd = pos;
266     }
267     m_Seg = CSeqMap_CI(m_SeqMap, m_Scope.GetScopeOrNull(), sel, pos);
268     m_ScannedStart = min(m_ScannedStart, m_Seg.GetPosition());
269     m_ScannedEnd = max(m_ScannedEnd, m_Seg.GetEndPosition());
270 }
271 
272 
273 inline
x_IncSeg(void)274 void CSeqVector_CI::x_IncSeg(void)
275 {
276     if ( m_Seg.GetEndPosition() == m_ScannedEnd ) {
277         x_CheckForward();
278     }
279     ++m_Seg;
280     m_ScannedEnd = max(m_ScannedEnd, m_Seg.GetEndPosition());
281 }
282 
283 
284 inline
x_DecSeg(void)285 void CSeqVector_CI::x_DecSeg(void)
286 {
287     if ( m_Seg.GetPosition() == m_ScannedStart ) {
288         x_CheckBackward();
289     }
290     --m_Seg;
291     m_ScannedStart = min(m_ScannedStart, m_Seg.GetPosition());
292 }
293 
294 
x_ThrowOutOfRange(void) const295 void CSeqVector_CI::x_ThrowOutOfRange(void) const
296 {
297     NCBI_THROW_FMT(CSeqVectorException, eOutOfRange,
298                    "iterator out of range: "<<GetPos()<<">="<<x_GetSize());
299 }
300 
301 
SetCoding(TCoding coding)302 void CSeqVector_CI::SetCoding(TCoding coding)
303 {
304     if ( m_Coding != coding ) {
305         TSeqPos pos = GetPos();
306         m_Coding = coding;
307         x_ResetBackup();
308         if ( x_CacheSize() ) {
309             x_ResetCache();
310             if ( m_Seg ) {
311                 x_SetPos(pos);
312             }
313         }
314     }
315 }
316 
317 
SetStrand(ENa_strand strand)318 void CSeqVector_CI::SetStrand(ENa_strand strand)
319 {
320     if ( IsReverse(m_Strand) == IsReverse(strand) ) {
321         m_Strand = strand;
322         return;
323     }
324 
325     TSeqPos pos = GetPos();
326     m_Strand = strand;
327     x_ResetBackup();
328     if ( x_CacheSize() ) {
329         x_ResetCache();
330         if ( m_Seg ) {
331             m_Seg = CSeqMap_CI();
332             x_SetPos(pos);
333         }
334     }
335 }
336 
337 
338 // returns gap Seq-data object ref
339 // returns null if it's not a gap or an unspecified gap
GetGapSeq_literal(void) const340 CConstRef<CSeq_literal> CSeqVector_CI::GetGapSeq_literal(void) const
341 {
342     if ( !IsInGap() ) {
343         return null;
344     }
345     return m_Seg.GetRefGapLiteral();
346 }
347 
348 
349 // returns number of gap symbols ahead including current symbol
350 // returns 0 if current position is not in gap
GetGapSizeForward(void) const351 TSeqPos CSeqVector_CI::GetGapSizeForward(void) const
352 {
353     if ( !IsInGap() ) {
354         return 0;
355     }
356     return m_Seg.GetEndPosition() - GetPos();
357 }
358 
359 
360 // returns number of gap symbols before current symbol
361 // returns 0 if current position is not in gap
GetGapSizeBackward(void) const362 TSeqPos CSeqVector_CI::GetGapSizeBackward(void) const
363 {
364     if ( !IsInGap() ) {
365         return 0;
366     }
367     return GetPos() - m_Seg.GetPosition();
368 }
369 
370 
371 // skip current gap forward
372 // returns number of skipped gap symbols
373 // does nothing and returns 0 if current position is not in gap
SkipGap(void)374 TSeqPos CSeqVector_CI::SkipGap(void)
375 {
376     if ( !IsInGap() ) {
377         return 0;
378     }
379     TSeqPos skip = GetGapSizeForward();
380     SetPos(GetPos()+skip);
381     return skip;
382 }
383 
384 
385 // skip current gap backward
386 // returns number of skipped gap symbols
387 // does nothing and returns 0 if current position is not in gap
SkipGapBackward(void)388 TSeqPos CSeqVector_CI::SkipGapBackward(void)
389 {
390     if ( !IsInGap() ) {
391         return 0;
392     }
393     TSeqPos skip = GetGapSizeBackward()+1;
394     SetPos(GetPos()-skip);
395     return skip;
396 }
397 
398 
399 // return true if there is zero-length gap before current position
400 // it might happen only if current position is at the beginning of buffer
HasZeroGapBefore(void)401 bool CSeqVector_CI::HasZeroGapBefore(void)
402 {
403     if ( x_CacheOffset() != 0 ) {
404         return false;
405     }
406     TSeqPos pos = GetPos();
407     if ( IsReverse(m_Strand) ) {
408         pos = x_GetSize() - pos;
409     }
410     return m_SeqMap->HasZeroGapAt(pos, m_Scope.GetScopeOrNull());
411 }
412 
413 
operator =(const CSeqVector_CI & sv_it)414 CSeqVector_CI& CSeqVector_CI::operator=(const CSeqVector_CI& sv_it)
415 {
416     if ( this == &sv_it ) {
417         return *this;
418     }
419 
420     m_Scope = sv_it.m_Scope;
421     m_SeqMap = sv_it.m_SeqMap;
422     m_TSE = sv_it.m_TSE;
423     m_Strand = sv_it.m_Strand;
424     m_Coding = sv_it.GetCoding();
425     m_CaseConversion = sv_it.m_CaseConversion;
426     m_Seg = sv_it.m_Seg;
427     m_CachePos = sv_it.x_CachePos();
428     m_Randomizer = sv_it.m_Randomizer;
429     m_ScannedStart = sv_it.m_ScannedStart;
430     m_ScannedEnd = sv_it.m_ScannedEnd;
431     // copy cache if any
432     size_t cache_size = sv_it.x_CacheSize();
433     if ( cache_size ) {
434         x_InitializeCache();
435         m_CacheEnd = m_CacheData.get() + cache_size;
436         m_Cache = m_CacheData.get() + sv_it.x_CacheOffset();
437         memcpy(m_CacheData.get(), sv_it.m_CacheData.get(), cache_size);
438 
439         // copy backup cache if any
440         size_t backup_size = sv_it.x_BackupSize();
441         if ( backup_size ) {
442             m_BackupPos = sv_it.x_BackupPos();
443             m_BackupEnd = m_BackupData.get() + backup_size;
444             memcpy(m_BackupData.get(), sv_it.m_BackupData.get(), backup_size);
445         }
446         else {
447             x_ResetBackup();
448         }
449     }
450     else {
451         x_ResetCache();
452         x_ResetBackup();
453     }
454     return *this;
455 }
456 
457 
x_InitializeCache(void)458 void CSeqVector_CI::x_InitializeCache(void)
459 {
460     if ( !m_Cache ) {
461         m_CacheData.reset(new char[kCacheSize]);
462         m_BackupData.reset(new char[kCacheSize]);
463         m_BackupEnd = m_BackupData.get();
464         m_Cache = m_CacheEnd = m_CacheData.get();
465     }
466     else {
467         x_ResetCache();
468     }
469 }
470 
471 
472 inline
x_ResizeCache(size_t size)473 void CSeqVector_CI::x_ResizeCache(size_t size)
474 {
475     _ASSERT(size <= kCacheSize);
476     if ( !m_CacheData.get() ) {
477         x_InitializeCache();
478     }
479     m_Cache = m_CacheData.get();
480     m_CacheEnd = m_CacheData.get() + size;
481 }
482 
483 
x_UpdateCacheUp(TSeqPos pos)484 void CSeqVector_CI::x_UpdateCacheUp(TSeqPos pos)
485 {
486     _ASSERT(pos < x_GetSize());
487 
488     TSeqPos segEnd = m_Seg.GetEndPosition();
489     _ASSERT(pos >= m_Seg.GetPosition() && pos < segEnd);
490 
491     TSeqPos cache_size = min(kCacheSize, segEnd - pos);
492     x_FillCache(pos, cache_size);
493     m_Cache = m_CacheData.get();
494     _ASSERT(GetPos() == pos);
495 }
496 
497 
x_UpdateCacheDown(TSeqPos pos)498 void CSeqVector_CI::x_UpdateCacheDown(TSeqPos pos)
499 {
500     _ASSERT(pos < x_GetSize());
501 
502     TSeqPos segStart = m_Seg.GetPosition();
503     _ASSERT(pos >= segStart && pos < m_Seg.GetEndPosition());
504 
505     TSeqPos cache_offset = min(kCacheSize - 1, pos - segStart);
506     x_FillCache(pos - cache_offset, cache_offset + 1);
507     m_Cache = m_CacheData.get() + cache_offset;
508     _ASSERT(GetPos() == pos);
509 }
510 
511 
x_FillCache(TSeqPos start,TSeqPos count)512 void CSeqVector_CI::x_FillCache(TSeqPos start, TSeqPos count)
513 {
514     _ASSERT(m_Seg.GetType() != CSeqMap::eSeqEnd);
515     _ASSERT(start >= m_Seg.GetPosition());
516     _ASSERT(start < m_Seg.GetEndPosition());
517 
518     x_ResizeCache(count);
519 
520     switch ( m_Seg.GetType() ) {
521     case CSeqMap::eSeqData:
522     {
523         const CSeq_data& data = m_Seg.GetRefData();
524         if ( data.IsGap() && m_Seg.GetType() == CSeqMap::eSeqGap ) {
525             // workaround for erroneously split gap Seq-data
526             x_FillCache(start, count);
527             return;
528         }
529 
530         TCoding dataCoding = data.Which();
531         TCoding cacheCoding = x_GetCoding(m_Coding, dataCoding);
532         bool reverse = m_Seg.GetRefMinusStrand();
533 
534         bool randomize = false;
535         if ( cacheCoding != dataCoding &&
536              cacheCoding == CSeq_data::e_Ncbi2na &&
537              m_Randomizer) {
538             cacheCoding = CSeq_data::e_Ncbi4na;
539             randomize = true;
540         }
541 
542         const char* table = 0;
543         if ( cacheCoding != dataCoding || reverse ||
544              m_CaseConversion != eCaseConversion_none ) {
545             table = sx_GetConvertTable(dataCoding, cacheCoding,
546                                        reverse, m_CaseConversion);
547             if ( !table && cacheCoding != dataCoding ) {
548                 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
549                                "Incompatible sequence codings: "<<
550                                dataCoding<<" -> "<<cacheCoding);
551             }
552         }
553 
554         TSeqPos dataPos;
555         if ( reverse ) {
556             // Revert segment offset
557             dataPos = m_Seg.GetRefEndPosition() -
558                 (start - m_Seg.GetPosition()) - count;
559         }
560         else {
561             dataPos = m_Seg.GetRefPosition() +
562                 (start - m_Seg.GetPosition());
563         }
564 
565         switch ( dataCoding ) {
566         case CSeq_data::e_Iupacna:
567             copy_8bit_any(m_Cache, count, data.GetIupacna().Get(), dataPos,
568                           table, reverse);
569             break;
570         case CSeq_data::e_Iupacaa:
571             copy_8bit_any(m_Cache, count, data.GetIupacaa().Get(), dataPos,
572                           table, reverse);
573             break;
574         case CSeq_data::e_Ncbi2na:
575             copy_2bit_any(m_Cache, count, data.GetNcbi2na().Get(), dataPos,
576                             table, reverse);
577             break;
578         case CSeq_data::e_Ncbi4na:
579             copy_4bit_any(m_Cache, count, data.GetNcbi4na().Get(), dataPos,
580                           table, reverse);
581             break;
582         case CSeq_data::e_Ncbi8na:
583             copy_8bit_any(m_Cache, count, data.GetNcbi8na().Get(), dataPos,
584                           table, reverse);
585             break;
586         case CSeq_data::e_Ncbipna:
587             NCBI_THROW(CSeqVectorException, eCodingError,
588                        "Ncbipna conversion not implemented");
589         case CSeq_data::e_Ncbi8aa:
590             copy_8bit_any(m_Cache, count, data.GetNcbi8aa().Get(), dataPos,
591                           table, reverse);
592             break;
593         case CSeq_data::e_Ncbieaa:
594             copy_8bit_any(m_Cache, count, data.GetNcbieaa().Get(), dataPos,
595                           table, reverse);
596             break;
597         case CSeq_data::e_Ncbipaa:
598             NCBI_THROW(CSeqVectorException, eCodingError,
599                        "Ncbipaa conversion not implemented");
600         case CSeq_data::e_Ncbistdaa:
601             copy_8bit_any(m_Cache, count, data.GetNcbistdaa().Get(), dataPos,
602                           table, reverse);
603             break;
604         default:
605             NCBI_THROW_FMT(CSeqVectorException, eCodingError,
606                            "Invalid data coding: "<<dataCoding);
607         }
608         if ( randomize ) {
609             m_Randomizer->RandomizeData(m_Cache, count, start);
610         }
611         break;
612     }
613     case CSeqMap::eSeqGap:
614         if (m_Coding == CSeq_data::e_Ncbi2na  &&  m_Randomizer) {
615             fill_n(m_Cache, count,
616                    sx_GetGapChar(CSeq_data::e_Ncbi4na, eCaseConversion_none));
617             m_Randomizer->RandomizeData(m_Cache, count, start);
618         }
619         else {
620             fill_n(m_Cache, count, GetGapChar());
621         }
622         break;
623     default:
624         NCBI_THROW_FMT(CSeqVectorException, eDataError,
625                        "Invalid segment type: "<<m_Seg.GetType());
626     }
627     m_CachePos = start;
628 }
629 
630 
x_SetPos(TSeqPos pos)631 void CSeqVector_CI::x_SetPos(TSeqPos pos)
632 {
633     TSeqPos size = x_GetSize();
634     if ( pos >= size ) {
635         if ( x_CacheSize() ) {
636             // save current cache as backup
637             x_SwapCache();
638             x_ResetCache();
639         }
640         _ASSERT(x_CacheSize() == 0 && x_CacheOffset() == 0);
641         m_CachePos = size;
642         _ASSERT(GetPos() == size);
643         return;
644     }
645 
646     _ASSERT(pos - x_CachePos() >= x_CacheSize());
647 
648     // update current segment
649     x_UpdateSeg(pos);
650 
651     // save current cache as backup and restore old backup
652     x_SwapCache();
653 
654     // check if old backup is suitable
655     TSeqPos cache_offset = pos - x_CachePos();
656     TSeqPos cache_size = x_CacheSize();
657     if ( cache_offset < cache_size ) {
658         // can use backup
659         _ASSERT(x_CacheSize() &&
660                 x_CachePos() >= m_Seg.GetPosition() &&
661                 x_CacheEndPos() <= m_Seg.GetEndPosition());
662         m_Cache = m_CacheData.get() + cache_offset;
663     }
664     else {
665         // cannot use backup
666         x_InitializeCache();
667         TSeqPos old_pos = x_BackupPos();
668         if ( pos < old_pos && pos >= old_pos - kCacheSize &&
669              m_Seg.GetEndPosition() >= old_pos ) {
670             x_UpdateCacheDown(old_pos - 1);
671             cache_offset = pos - x_CachePos();
672             m_Cache = m_CacheData.get() + cache_offset;
673         }
674         else {
675             x_UpdateCacheUp(pos);
676         }
677     }
678     _ASSERT(x_CacheOffset() < x_CacheSize());
679     _ASSERT(GetPos() == pos);
680 }
681 
682 
x_UpdateSeg(TSeqPos pos)683 void CSeqVector_CI::x_UpdateSeg(TSeqPos pos)
684 {
685     if ( m_Seg.IsInvalid() ) {
686         x_InitSeg(pos);
687     }
688     else if ( m_Seg.GetPosition() > pos ) {
689         // segment is ahead
690         do {
691             x_DecSeg();
692         } while ( m_Seg && m_Seg.GetLength() == 0 ); // skip 0 length segments
693         if ( !m_Seg || m_Seg.GetPosition() > pos ) {
694             // too far
695             x_InitSeg(pos);
696         }
697     }
698     else if ( m_Seg.GetEndPosition() <= pos ) {
699         // segment is behind
700         do {
701             x_IncSeg();
702         } while ( m_Seg && m_Seg.GetLength() == 0 ); // skip 0 length segments
703         if ( !m_Seg || m_Seg.GetEndPosition() <= pos ) {
704             // too far
705             x_InitSeg(pos);
706         }
707     }
708     if ( !m_Seg && pos == x_GetSize() ) {
709         // it's ok to position to the very end
710         return;
711     }
712     if ( !m_Seg || pos<m_Seg.GetPosition() || pos>=m_Seg.GetEndPosition() ) {
713         NCBI_THROW_FMT(CSeqVectorException, eDataError,
714                        "CSeqVector_CI: cannot locate segment at "<<pos);
715     }
716     _ASSERT(m_Seg && pos>=m_Seg.GetPosition() && pos<m_Seg.GetEndPosition());
717 }
718 
719 
GetSeqData(string & buffer,TSeqPos count)720 void CSeqVector_CI::GetSeqData(string& buffer, TSeqPos count)
721 {
722     buffer.erase();
723     TSeqPos pos = GetPos();
724     _ASSERT(pos <= x_GetSize());
725     count = min(count, x_GetSize() - pos);
726     if ( !count ) {
727         return;
728     }
729 
730     if ( m_TSE && !CanGetRange(pos, pos+count) ) {
731         NCBI_THROW_FMT(CSeqVectorException, eDataError,
732                        "CSeqVector_CI::GetSeqData: "
733                        "cannot get seq-data in range: "
734                        <<pos<<"-"<<pos+count);
735     }
736 
737     buffer.reserve(count);
738     while ( count ) {
739         TCache_I cache = m_Cache;
740         TCache_I cache_end = m_CacheEnd;
741         TSeqPos chunk_count = min(count, TSeqPos(cache_end - cache));
742         _ASSERT(chunk_count > 0);
743         TCache_I chunk_end = cache + chunk_count;
744         buffer.append(cache, chunk_end);
745         count -= chunk_count;
746         //if ( count == 0 ) break;
747         if ( chunk_end == cache_end ) {
748             x_NextCacheSeg();
749         }
750         else {
751             m_Cache = chunk_end;
752         }
753     }
754 }
755 
756 
x_NextCacheSeg()757 void CSeqVector_CI::x_NextCacheSeg()
758 {
759     _ASSERT(m_SeqMap);
760     TSeqPos pos = x_CacheEndPos();
761     TSeqPos size = x_GetSize();
762     if ( pos >= size ) {
763         if ( x_CachePos() < pos ) {
764             x_SwapCache();
765             x_ResetCache();
766             m_CachePos = pos;
767             return;
768         }
769         else {
770             // Can not go further
771             NCBI_THROW(CSeqVectorException, eOutOfRange,
772                        "Can not update cache: iterator beyond end");
773         }
774     }
775     // save current cache in backup
776     _ASSERT(x_CacheSize());
777     x_SwapCache();
778     // update segment if needed
779     x_UpdateSeg(pos);
780     if ( !m_Seg ) {
781         // end of sequence
782         NCBI_THROW_FMT(CSeqVectorException, eDataError,
783                        "CSeqVector_CI: invalid sequence length: "
784                        <<pos<<" <> "<<size);
785     }
786     // Try to re-use backup cache
787     if ( pos < x_CacheEndPos() && pos >= x_CachePos() ) {
788         m_Cache = m_CacheData.get() + pos - x_CachePos();
789     }
790     else {
791         // can not use backup cache
792         x_ResetCache();
793         x_UpdateCacheUp(pos);
794         _ASSERT(GetPos() == pos);
795         _ASSERT(x_CacheSize());
796         _ASSERT(x_CachePos() == pos);
797     }
798 }
799 
800 
x_PrevCacheSeg()801 void CSeqVector_CI::x_PrevCacheSeg()
802 {
803     _ASSERT(m_SeqMap);
804     TSeqPos pos = x_CachePos();
805     if ( pos-- == 0 ) {
806         // Can not go further
807         NCBI_THROW(CSeqVectorException, eOutOfRange,
808                    "Can not update cache: iterator beyond start");
809     }
810     TSeqPos size = x_GetSize();
811     // save current cache in backup
812     x_SwapCache();
813     // update segment if needed
814     if ( m_Seg.IsInvalid() ) {
815         x_InitSeg(pos);
816     }
817     else {
818         while ( m_Seg && m_Seg.GetPosition() > pos ) {
819             x_DecSeg();
820         }
821     }
822     if ( !m_Seg ) {
823         NCBI_THROW_FMT(CSeqVectorException, eDataError,
824                        "CSeqVector_CI: invalid sequence length: "
825                        <<pos<<" <> "<<size);
826     }
827     // Try to re-use backup cache
828     if ( pos >= x_CachePos()  &&  pos < x_CacheEndPos() ) {
829         m_Cache = m_CacheData.get() + pos - x_CachePos();
830     }
831     else {
832         // can not use backup cache
833         x_ResetCache();
834         x_UpdateCacheDown(pos);
835         _ASSERT(GetPos() == pos);
836         _ASSERT(x_CacheSize());
837         _ASSERT(x_CacheEndPos() == pos+1);
838     }
839 }
840 
841 
SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer)842 void CSeqVector_CI::SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer)
843 {
844     if ( randomizer != m_Randomizer ) {
845         TSeqPos pos = GetPos();
846         m_Randomizer = randomizer;
847         x_ResetBackup();
848         if ( x_CacheSize() ) {
849             x_ResetCache();
850             if ( m_Seg ) {
851                 x_SetPos(pos);
852             }
853         }
854     }
855 }
856 
857 
x_InitRandomizer(CRandom & random_gen)858 void CSeqVector_CI::x_InitRandomizer(CRandom& random_gen)
859 {
860     CRef<INcbi2naRandomizer> randomizer(new CNcbi2naRandomizer(random_gen));
861     SetRandomizeAmbiguities(randomizer);
862 }
863 
864 
SetRandomizeAmbiguities(void)865 void CSeqVector_CI::SetRandomizeAmbiguities(void)
866 {
867     CRandom random_gen;
868     x_InitRandomizer(random_gen);
869 }
870 
871 
SetRandomizeAmbiguities(Uint4 seed)872 void CSeqVector_CI::SetRandomizeAmbiguities(Uint4 seed)
873 {
874     CRandom random_gen(seed);
875     x_InitRandomizer(random_gen);
876 }
877 
878 
SetRandomizeAmbiguities(CRandom & random_gen)879 void CSeqVector_CI::SetRandomizeAmbiguities(CRandom& random_gen)
880 {
881     x_InitRandomizer(random_gen);
882 }
883 
884 
SetNoAmbiguities(void)885 void CSeqVector_CI::SetNoAmbiguities(void)
886 {
887     SetRandomizeAmbiguities(null);
888 }
889 
890 
891 END_SCOPE(objects)
892 END_NCBI_SCOPE
893