1 /* $Id: seq_vector.cpp 513580 2016-09-13 11:58:16Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Sequence data container for object manager
30 *
31 */
32
33
34 #include <ncbi_pch.hpp>
35 #include <objmgr/seq_vector.hpp>
36 #include <objmgr/seq_vector_ci.hpp>
37 #include <corelib/ncbimtx.hpp>
38 #include <objmgr/impl/data_source.hpp>
39 #include <objects/seq/seqport_util.hpp>
40 #include <objects/seqloc/Seq_loc.hpp>
41 #include <objmgr/seq_map.hpp>
42 #include <objmgr/objmgr_exception.hpp>
43 #include <objmgr/impl/seq_vector_cvt.hpp>
44 #include <algorithm>
45 #include <map>
46 #include <vector>
47 #include <util/random_gen.hpp>
48
49 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)50 BEGIN_SCOPE(objects)
51
52
53 ////////////////////////////////////////////////////////////////////
54 //
55 // CNcbi2naRandomizer::
56 //
57
58 INcbi2naRandomizer::~INcbi2naRandomizer(void)
59 {
60 }
61
62
CNcbi2naRandomizer(CRandom & gen)63 CNcbi2naRandomizer::CNcbi2naRandomizer(CRandom& gen)
64 {
65 unsigned int bases[4]; // Count of each base in the random distribution
66 for (int na4 = 0; na4 < 16; na4++) {
67 int bit_count = 0;
68 char set_bit = 0;
69 for (int bit = 0; bit < 4; bit++) {
70 // na4 == 0 is special case (gap) should be treated as 0xf
71 if ( !na4 || (na4 & (1 << bit)) ) {
72 bit_count++;
73 bases[bit] = 1;
74 set_bit = (char)bit;
75 }
76 else {
77 bases[bit] = 0;
78 }
79 }
80 if (bit_count == 1) {
81 // Single base
82 m_FixedTable[na4] = set_bit;
83 continue;
84 }
85 m_FixedTable[na4] = kRandomValue;
86 // Ambiguity: create random distribution with possible bases
87 for (int bit = 0; bit < 4; bit++) {
88 bases[bit] *= kRandomDataSize/bit_count +
89 kRandomDataSize % bit_count;
90 }
91 for (int i = kRandomDataSize - 1; i >= 0; i--) {
92 CRandom::TValue rnd = gen.GetRand(0, i);
93 for (int base = 0; base < 4; base++) {
94 if (!bases[base] || rnd > bases[base]) {
95 rnd -= bases[base];
96 continue;
97 }
98 m_RandomTable[na4][i] = (char)base;
99 bases[base]--;
100 break;
101 }
102 }
103 }
104 }
105
106
~CNcbi2naRandomizer(void)107 CNcbi2naRandomizer::~CNcbi2naRandomizer(void)
108 {
109 }
110
111
RandomizeData(char * data,size_t count,TSeqPos pos)112 void CNcbi2naRandomizer::RandomizeData(char* data,
113 size_t count,
114 TSeqPos pos)
115 {
116 for (char* stop = data + count; data < stop; ++data, ++pos) {
117 int base4na = *data;
118 char base2na = m_FixedTable[base4na];
119 if ( base2na == kRandomValue ) {
120 // Ambiguity, use random value
121 base2na = m_RandomTable[base4na][(pos & kRandomizerPosMask)];
122 }
123 *data = base2na;
124 }
125 }
126
127
128 ////////////////////////////////////////////////////////////////////
129 //
130 // CSeqVector::
131 //
132
133
CSeqVector(void)134 CSeqVector::CSeqVector(void)
135 : m_Size(0)
136 {
137 }
138
139
CSeqVector(const CSeqVector & vec)140 CSeqVector::CSeqVector(const CSeqVector& vec)
141 : m_Scope(vec.m_Scope),
142 m_SeqMap(vec.m_SeqMap),
143 m_TSE(vec.m_TSE),
144 m_Size(vec.m_Size),
145 m_Mol(vec.m_Mol),
146 m_Strand(vec.m_Strand),
147 m_Coding(vec.m_Coding)
148 {
149 }
150
151
CSeqVector(const CBioseq_Handle & bioseq,EVectorCoding coding,ENa_strand strand)152 CSeqVector::CSeqVector(const CBioseq_Handle& bioseq,
153 EVectorCoding coding, ENa_strand strand)
154 : m_Scope(bioseq.GetScope()),
155 m_SeqMap(&bioseq.GetSeqMap()),
156 m_TSE(bioseq.GetTSE_Handle()),
157 m_Strand(strand),
158 m_Coding(CSeq_data::e_not_set)
159 {
160 m_Size = bioseq.GetBioseqLength();
161 m_Mol = bioseq.GetSequenceType();
162 SetCoding(coding);
163 }
164
165
CSeqVector(const CSeqMap & seqMap,CScope & scope,EVectorCoding coding,ENa_strand strand)166 CSeqVector::CSeqVector(const CSeqMap& seqMap, CScope& scope,
167 EVectorCoding coding, ENa_strand strand)
168 : m_Scope(&scope),
169 m_SeqMap(&seqMap),
170 m_Strand(strand),
171 m_Coding(CSeq_data::e_not_set)
172 {
173 m_Size = m_SeqMap->GetLength(m_Scope);
174 m_Mol = m_SeqMap->GetMol();
175 SetCoding(coding);
176 }
177
178
CSeqVector(const CSeqMap & seqMap,const CTSE_Handle & top_tse,EVectorCoding coding,ENa_strand strand)179 CSeqVector::CSeqVector(const CSeqMap& seqMap, const CTSE_Handle& top_tse,
180 EVectorCoding coding, ENa_strand strand)
181 : m_Scope(top_tse.GetScope()),
182 m_SeqMap(&seqMap),
183 m_TSE(top_tse),
184 m_Strand(strand),
185 m_Coding(CSeq_data::e_not_set)
186 {
187 m_Size = m_SeqMap->GetLength(m_Scope);
188 m_Mol = m_SeqMap->GetMol();
189 SetCoding(coding);
190 }
191
192
CSeqVector(const CSeq_loc & loc,CScope & scope,EVectorCoding coding,ENa_strand strand)193 CSeqVector::CSeqVector(const CSeq_loc& loc, CScope& scope,
194 EVectorCoding coding, ENa_strand strand)
195 : m_Scope(&scope),
196 m_SeqMap(CSeqMap::GetSeqMapForSeq_loc(loc, &scope)),
197 m_Strand(strand),
198 m_Coding(CSeq_data::e_not_set)
199 {
200 if ( const CSeq_id* id = loc.GetId() ) {
201 if ( CBioseq_Handle bh = scope.GetBioseqHandle(*id) ) {
202 m_TSE = bh.GetTSE_Handle();
203 }
204 }
205 m_Size = m_SeqMap->GetLength(m_Scope);
206 m_Mol = m_SeqMap->GetMol();
207 SetCoding(coding);
208 }
209
210
CSeqVector(const CSeq_loc & loc,const CTSE_Handle & top_tse,EVectorCoding coding,ENa_strand strand)211 CSeqVector::CSeqVector(const CSeq_loc& loc, const CTSE_Handle& top_tse,
212 EVectorCoding coding, ENa_strand strand)
213 : m_Scope(top_tse.GetScope()),
214 m_SeqMap(CSeqMap::GetSeqMapForSeq_loc(loc, &top_tse.GetScope())),
215 m_TSE(top_tse),
216 m_Strand(strand),
217 m_Coding(CSeq_data::e_not_set)
218 {
219 m_Size = m_SeqMap->GetLength(m_Scope);
220 m_Mol = m_SeqMap->GetMol();
221 SetCoding(coding);
222 }
223
224
CSeqVector(const CBioseq & bioseq,CScope * scope,EVectorCoding coding,ENa_strand strand)225 CSeqVector::CSeqVector(const CBioseq& bioseq,
226 CScope* scope,
227 EVectorCoding coding, ENa_strand strand)
228 : m_Scope(scope),
229 m_SeqMap(CSeqMap::CreateSeqMapForBioseq(bioseq)),
230 m_Strand(strand),
231 m_Coding(CSeq_data::e_not_set)
232 {
233 m_Size = m_SeqMap->GetLength(scope);
234 m_Mol = bioseq.GetInst().GetMol();
235 SetCoding(coding);
236 }
237
238
~CSeqVector(void)239 CSeqVector::~CSeqVector(void)
240 {
241 }
242
243
operator =(const CSeqVector & vec)244 CSeqVector& CSeqVector::operator= (const CSeqVector& vec)
245 {
246 if ( &vec != this ) {
247 TMutexGuard guard(GetMutex());
248 m_Scope = vec.m_Scope;
249 m_SeqMap = vec.m_SeqMap;
250 m_TSE = vec.m_TSE;
251 m_Size = vec.m_Size;
252 m_Mol = vec.m_Mol;
253 m_Strand = vec.m_Strand;
254 m_Coding = vec.m_Coding;
255 m_Iterator.reset();
256 }
257 return *this;
258 }
259
260
x_CreateIterator(TSeqPos pos) const261 CSeqVector_CI* CSeqVector::x_CreateIterator(TSeqPos pos) const
262 {
263 CSeqVector_CI* iter;
264 m_Iterator.reset(iter = new CSeqVector_CI(*this, pos));
265 return iter;
266 }
267
268
x_ResetIterator(void) const269 void CSeqVector::x_ResetIterator(void) const
270 {
271 if ( m_Iterator.get() ) {
272 TMutexGuard guard(GetMutex());
273 m_Iterator.reset();
274 }
275 }
276
277
GetGapSizeForward(TSeqPos pos) const278 TSeqPos CSeqVector::GetGapSizeForward(TSeqPos pos) const
279 {
280 TMutexGuard guard(GetMutex());
281 return x_GetIterator(pos).GetGapSizeForward();
282 }
283
284
GetGapSeq_literal(TSeqPos pos) const285 CConstRef<CSeq_literal> CSeqVector::GetGapSeq_literal(TSeqPos pos) const
286 {
287 TMutexGuard guard(GetMutex());
288 return x_GetIterator(pos).GetGapSeq_literal();
289 }
290
291
CanGetRange(TSeqPos start,TSeqPos stop) const292 bool CSeqVector::CanGetRange(TSeqPos start, TSeqPos stop) const
293 {
294 try {
295 TMutexGuard guard(GetMutex());
296 return x_GetIterator(start).CanGetRange(start, stop);
297 }
298 catch ( CException& /*ignored*/ ) {
299 return false;
300 }
301 }
302
303
GetSeqData(TSeqPos start,TSeqPos stop,string & buffer) const304 void CSeqVector::GetSeqData(TSeqPos start, TSeqPos stop, string& buffer) const
305 {
306 TMutexGuard guard(GetMutex());
307 x_GetIterator(start).GetSeqData(start, stop, buffer);
308 }
309
310
GetPackedSeqData(string & dst_str,TSeqPos src_pos,TSeqPos src_end)311 void CSeqVector::GetPackedSeqData(string& dst_str,
312 TSeqPos src_pos,
313 TSeqPos src_end)
314 {
315 dst_str.erase();
316 src_end = min(src_end, size());
317 if ( src_pos >= src_end ) {
318 return;
319 }
320
321 if ( m_TSE && !CanGetRange(src_pos, src_end) ) {
322 NCBI_THROW_FMT(CSeqVectorException, eDataError,
323 "CSeqVector::GetPackedSeqData: "
324 "cannot get seq-data in range: "
325 <<src_pos<<"-"<<src_end);
326 }
327
328 TCoding dst_coding = GetCoding();
329 switch ( dst_coding ) {
330 case CSeq_data::e_Iupacna:
331 case CSeq_data::e_Ncbi8na:
332 case CSeq_data::e_Iupacaa:
333 case CSeq_data::e_Ncbieaa:
334 case CSeq_data::e_Ncbi8aa:
335 case CSeq_data::e_Ncbistdaa:
336 x_GetPacked8SeqData(dst_str, src_pos, src_end);
337 break;
338 case CSeq_data::e_Ncbi4na:
339 x_GetPacked4naSeqData(dst_str, src_pos, src_end);
340 break;
341 case CSeq_data::e_Ncbi2na:
342 x_GetPacked2naSeqData(dst_str, src_pos, src_end);
343 break;
344 default:
345 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
346 "Can not pack data using the selected coding: "<<
347 GetCoding());
348 }
349 }
350
351 static const size_t kBufferSize = 1024; // must be multiple of 4
352
353 static inline
x_Append8To8(string & dst_str,const string & src_str,size_t src_pos,size_t count)354 void x_Append8To8(string& dst_str, const string& src_str,
355 size_t src_pos, size_t count)
356 {
357 _ASSERT(src_pos+count >= src_pos); // check for overflow
358 _ASSERT(src_pos+count <= src_str.size());
359 if ( count ) {
360 dst_str.append(src_str.data()+src_pos, count);
361 }
362 }
363
364
365 static inline
x_Append8To8(string & dst_str,const vector<char> & src_str,size_t src_pos,size_t count)366 void x_Append8To8(string& dst_str, const vector<char>& src_str,
367 size_t src_pos, size_t count)
368 {
369 _ASSERT(src_pos+count >= src_pos); // check for overflow
370 _ASSERT(src_pos+count <= src_str.size());
371 if ( count ) {
372 dst_str.append(&src_str[src_pos], count);
373 }
374 }
375
376
377 static inline
x_AppendGapTo8(string & dst_str,size_t count,char gap)378 void x_AppendGapTo8(string& dst_str, size_t count, char gap)
379 {
380 if ( count ) {
381 dst_str.append(count, gap);
382 }
383 }
384
385
386 static
x_Append8To4(string & dst,char & dst_c,TSeqPos dst_pos,const char * src,size_t count)387 void x_Append8To4(string& dst, char& dst_c, TSeqPos dst_pos,
388 const char* src, size_t count)
389 {
390 _ASSERT(src+count >= src); // check for overflow
391 if ( !count ) {
392 return;
393 }
394 if ( dst_pos & 1 ) {
395 dst += char((dst_c<<4)|*src);
396 dst_c = 0;
397 ++dst_pos;
398 ++src;
399 --count;
400 }
401 for ( ; count >= 2; dst_pos += 2, src += 2, count -= 2 ) {
402 dst += char((src[0]<<4)|src[1]);
403 }
404 if ( count&1 ) {
405 dst_c = *src;
406 }
407 }
408
409
410 static
x_Append4To4(string & dst,char & dst_c,TSeqPos dst_pos,const vector<char> & src,TSeqPos src_pos,TSeqPos count)411 void x_Append4To4(string& dst, char& dst_c, TSeqPos dst_pos,
412 const vector<char>& src, TSeqPos src_pos,
413 TSeqPos count)
414 {
415 _ASSERT(src_pos+count >= src_pos); // check for overflow
416 _ASSERT(src_pos+count <= src.size()*2);
417 if ( !count ) {
418 return;
419 }
420 if ( (src_pos^dst_pos) & 1 ) {
421 // misaligned data -> dst_str
422 if ( dst_pos & 1 ) {
423 // align dst_pos
424 dst += char((dst_c<<4)|((src[src_pos>>1]>>4)&15));
425 dst_c = 0;
426 ++dst_pos;
427 ++src_pos;
428 --count;
429 }
430 _ASSERT((src_pos&1));
431 size_t pos = src_pos>>1;
432 for ( ; count >= 2; dst_pos += 2, pos += 1, count -= 2 ) {
433 dst += char(((src[pos]<<4)&0xf0)|((src[pos+1]>>4)&0x0f));
434 }
435 if ( count&1 ) {
436 _ASSERT((src_pos&1));
437 dst_c = (src[pos])&15;
438 }
439 }
440 else {
441 // aligned data -> dst_str
442 if ( dst_pos & 1 ) {
443 // align dst_pos
444 dst += char((dst_c<<4)|((src[src_pos>>1])&15));
445 dst_c = 0;
446 ++dst_pos;
447 ++src_pos;
448 --count;
449 }
450 _ASSERT(!(src_pos&1));
451 _ASSERT(!(dst_pos&1));
452 size_t octets = count>>1;
453 size_t pos = src_pos>>1;
454 if ( octets ) {
455 dst.append(&src[pos], octets);
456 }
457 if ( count&1 ) {
458 _ASSERT(!(src_pos&1));
459 dst_c = (src[pos+octets]>>4)&15;
460 }
461 }
462 }
463
464
465 static
x_AppendGapTo4(string & dst_str,char & dst_c,TSeqPos dst_pos,TSeqPos count,char gap)466 void x_AppendGapTo4(string& dst_str, char& dst_c, TSeqPos dst_pos,
467 TSeqPos count, char gap)
468 {
469 if ( !count ) {
470 return;
471 }
472 if ( dst_pos & 1 ) {
473 // align dst_pos
474 dst_str += char((dst_c << 4)|gap);
475 dst_c = 0;
476 ++dst_pos;
477 --count;
478 }
479 _ASSERT(!(dst_pos&1));
480 size_t octets = count>>1;
481 if ( octets ) {
482 dst_str.append(octets, char((gap<<4)|gap));
483 }
484 if ( count&1 ) {
485 dst_c = gap;
486 }
487 }
488
489
490 static
x_Append8To2(string & dst_str,char & dst_c,TSeqPos dst_pos,const char * buffer,TSeqPos count)491 void x_Append8To2(string& dst_str, char& dst_c, TSeqPos dst_pos,
492 const char* buffer, TSeqPos count)
493 {
494 if ( !count ) {
495 return;
496 }
497 _ASSERT(dst_str.size() == dst_pos>>2);
498 const char* unpacked = buffer;
499 if ( dst_pos&3 ) {
500 char c = dst_c;
501 for ( ; count && (dst_pos&3); --count, ++dst_pos ) {
502 c = char((c<<2)|*unpacked++);
503 }
504 if ( (dst_pos&3) == 0 ) {
505 dst_str += c;
506 dst_c = 0;
507 }
508 else {
509 dst_c = c;
510 }
511 if ( !count ) {
512 return;
513 }
514 }
515 _ASSERT((dst_pos&3) == 0);
516 _ASSERT(dst_str.size() == dst_pos>>2);
517 char packed_buffer[kBufferSize/4];
518 char* packed_end = packed_buffer;
519 for ( ; count >= 4; count -= 4, unpacked += 4 ) {
520 *packed_end++ = char(
521 (unpacked[0]<<6)|(unpacked[1]<<4)|(unpacked[2]<<2)|unpacked[3] );
522 }
523 dst_str.append(packed_buffer, packed_end);
524 switch ( count ) {
525 case 1:
526 dst_c = unpacked[0];
527 break;
528 case 2:
529 dst_c = char((unpacked[0]<<2)|unpacked[1]);
530 break;
531 case 3:
532 dst_c = char((unpacked[0]<<4)|(unpacked[1]<<2)|unpacked[2]);
533 break;
534 default:
535 dst_c = 0;
536 break;
537 }
538 }
539
540
541 static
x_Append2To2(string & dst,char & dst_c,TSeqPos dst_pos,const vector<char> & src,TSeqPos src_pos,TSeqPos count)542 void x_Append2To2(string& dst, char& dst_c, TSeqPos dst_pos,
543 const vector<char>& src, TSeqPos src_pos,
544 TSeqPos count)
545 {
546 _ASSERT(src_pos+count >= src_pos); // check for overflow
547 _ASSERT(src_pos+count <= src.size()*4);
548 if ( !count ) {
549 return;
550 }
551 if ( (src_pos^dst_pos) & 3 ) {
552 // misaligned src -> dst
553 char buffer[kBufferSize];
554 while ( count ) {
555 // if count is larger than buffer size make sure
556 // that the next dst_pos is aligned to 4.
557 TSeqPos chunk = min(count, TSeqPos(kBufferSize-(dst_pos&3)));
558 copy_2bit(buffer, chunk, src, src_pos);
559 // Array buffer[] is properly initialized in copy_2bit()
560 // but Clang static analyzer fails to notice it
561 // and issues false warning inside x_Append8To2() call.
562 x_Append8To2(dst, dst_c, dst_pos, buffer, chunk);
563 dst_pos += chunk;
564 src_pos += chunk;
565 count -= chunk;
566 }
567 return;
568 }
569
570 // aligned src -> dst
571 if ( dst_pos&3 ) {
572 // align dst_pos
573 TSeqPos add = 4-(dst_pos&3);
574 char c = char((dst_c<<(add*2))|(src[src_pos>>2]&((1<<(add*2))-1)));
575 if ( count < add ) {
576 dst_c = char(c >> (2*(add-count)));
577 return;
578 }
579 dst += c;
580 dst_c = 0;
581 src_pos += add;
582 // Dead increment: dst_pos is not used anymore
583 //dst_pos += add;
584 count -= add;
585 }
586 _ASSERT(!(src_pos&3));
587 size_t octets = count>>2;
588 size_t pos = src_pos>>2;
589 if ( octets ) {
590 dst.append(&src[pos], octets);
591 }
592 size_t rem = count&3;
593 if ( rem ) {
594 _ASSERT(!(src_pos&3));
595 dst_c = char((src[pos+octets]&255)>>(2*(4-rem)));
596 }
597 }
598
599
600 static
x_AppendRandomTo2(string & dst_str,char & dst_c,TSeqPos dst_pos,TSeqPos src_pos,TSeqPos count,INcbi2naRandomizer & randomizer,char gap)601 void x_AppendRandomTo2(string& dst_str, char& dst_c, TSeqPos dst_pos,
602 TSeqPos src_pos, TSeqPos count,
603 INcbi2naRandomizer& randomizer, char gap)
604 {
605 _ASSERT(src_pos+count >= src_pos); // check for overflow
606 char buffer[kBufferSize];
607 while ( count ) {
608 _ASSERT(dst_str.size() == dst_pos>>2);
609 // if count is larger than buffer size make sure
610 // that the next dst_pos is aligned to 4.
611 TSeqPos chunk = min(count, TSeqPos(kBufferSize-(dst_pos&3)));
612 fill_n(buffer, chunk, gap);
613 randomizer.RandomizeData(buffer, chunk, src_pos);
614 x_Append8To2(dst_str, dst_c, dst_pos, buffer, chunk);
615 count -= chunk;
616 src_pos += chunk;
617 dst_pos += chunk;
618 _ASSERT(dst_str.size() == dst_pos>>2);
619 }
620 }
621
622
623 static
x_AppendAnyTo8(string & dst_str,const CSeq_data & data,TSeqPos dataPos,TSeqPos total_count,const char * table=0,bool reverse=false)624 void x_AppendAnyTo8(string& dst_str,
625 const CSeq_data& data, TSeqPos dataPos,
626 TSeqPos total_count,
627 const char* table = 0, bool reverse = false)
628 {
629 _ASSERT(dataPos+total_count >= dataPos); // check for overflow
630 char buffer[kBufferSize];
631 CSeq_data::E_Choice src_coding = data.Which();
632 if ( reverse ) {
633 dataPos += total_count;
634 }
635 while ( total_count ) {
636 TSeqPos count = min(total_count, TSeqPos(sizeof(buffer)));
637 if ( reverse ) {
638 dataPos -= count;
639 }
640 switch ( src_coding ) {
641 case CSeq_data::e_Iupacna:
642 copy_8bit_any(buffer, count, data.GetIupacna().Get(), dataPos,
643 table, reverse);
644 break;
645 case CSeq_data::e_Iupacaa:
646 copy_8bit_any(buffer, count, data.GetIupacaa().Get(), dataPos,
647 table, reverse);
648 break;
649 case CSeq_data::e_Ncbi2na:
650 copy_2bit_any(buffer, count, data.GetNcbi2na().Get(), dataPos,
651 table, reverse);
652 break;
653 case CSeq_data::e_Ncbi4na:
654 copy_4bit_any(buffer, count, data.GetNcbi4na().Get(), dataPos,
655 table, reverse);
656 break;
657 case CSeq_data::e_Ncbi8na:
658 copy_8bit_any(buffer, count, data.GetNcbi8na().Get(), dataPos,
659 table, reverse);
660 break;
661 case CSeq_data::e_Ncbi8aa:
662 copy_8bit_any(buffer, count, data.GetNcbi8aa().Get(), dataPos,
663 table, reverse);
664 break;
665 case CSeq_data::e_Ncbieaa:
666 copy_8bit_any(buffer, count, data.GetNcbieaa().Get(), dataPos,
667 table, reverse);
668 break;
669 case CSeq_data::e_Ncbistdaa:
670 copy_8bit_any(buffer, count, data.GetNcbistdaa().Get(), dataPos,
671 table, reverse);
672 break;
673 default:
674 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
675 "Invalid data coding: "<<src_coding);
676 }
677 dst_str.append(buffer, count);
678 if ( !reverse ) {
679 dataPos += count;
680 }
681 total_count -= count;
682 }
683 }
684
685
686 static
x_AppendAnyTo4(string & dst_str,char & dst_c,TSeqPos dst_pos,const CSeq_data & data,TSeqPos dataPos,TSeqPos total_count,const char * table,bool reverse)687 void x_AppendAnyTo4(string& dst_str, char& dst_c, TSeqPos dst_pos,
688 const CSeq_data& data, TSeqPos dataPos,
689 TSeqPos total_count,
690 const char* table, bool reverse)
691 {
692 _ASSERT(dataPos+total_count >= dataPos); // check for overflow
693 _ASSERT(table || reverse);
694 char buffer[kBufferSize];
695 CSeq_data::E_Choice src_coding = data.Which();
696 if ( reverse ) {
697 dataPos += total_count;
698 }
699 while ( total_count ) {
700 TSeqPos count = min(total_count, TSeqPos(sizeof(buffer)));
701 if ( reverse ) {
702 dataPos -= count;
703 }
704 switch ( src_coding ) {
705 case CSeq_data::e_Iupacna:
706 copy_8bit_any(buffer, count, data.GetIupacna().Get(), dataPos,
707 table, reverse);
708 break;
709 case CSeq_data::e_Iupacaa:
710 copy_8bit_any(buffer, count, data.GetIupacaa().Get(), dataPos,
711 table, reverse);
712 break;
713 case CSeq_data::e_Ncbi2na:
714 copy_2bit_any(buffer, count, data.GetNcbi2na().Get(), dataPos,
715 table, reverse);
716 break;
717 case CSeq_data::e_Ncbi4na:
718 copy_4bit_any(buffer, count, data.GetNcbi4na().Get(), dataPos,
719 table, reverse);
720 break;
721 case CSeq_data::e_Ncbi8na:
722 copy_8bit_any(buffer, count, data.GetNcbi8na().Get(), dataPos,
723 table, reverse);
724 break;
725 case CSeq_data::e_Ncbi8aa:
726 copy_8bit_any(buffer, count, data.GetNcbi8aa().Get(), dataPos,
727 table, reverse);
728 break;
729 case CSeq_data::e_Ncbieaa:
730 copy_8bit_any(buffer, count, data.GetNcbieaa().Get(), dataPos,
731 table, reverse);
732 break;
733 case CSeq_data::e_Ncbistdaa:
734 copy_8bit_any(buffer, count, data.GetNcbistdaa().Get(), dataPos,
735 table, reverse);
736 break;
737 default:
738 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
739 "Invalid data coding: "<<src_coding);
740 }
741 x_Append8To4(dst_str, dst_c, dst_pos, buffer, count);
742 if ( !reverse ) {
743 dataPos += count;
744 }
745 dst_pos += count;
746 total_count -= count;
747 }
748 }
749
750
751 static
x_AppendAnyTo2(string & dst_str,char & dst_c,TSeqPos dst_pos,const CSeq_data & data,TSeqPos dataPos,TSeqPos total_count,const char * table,bool reverse,INcbi2naRandomizer * randomizer,TSeqPos randomizer_pos)752 void x_AppendAnyTo2(string& dst_str, char& dst_c, TSeqPos dst_pos,
753 const CSeq_data& data, TSeqPos dataPos,
754 TSeqPos total_count,
755 const char* table, bool reverse,
756 INcbi2naRandomizer* randomizer, TSeqPos randomizer_pos)
757 {
758 _ASSERT(dataPos+total_count >= dataPos); // check for overflow
759 _ASSERT(table || reverse || randomizer);
760 char buffer[kBufferSize];
761 CSeq_data::E_Choice src_coding = data.Which();
762 if ( reverse ) {
763 dataPos += total_count;
764 }
765 while ( total_count ) {
766 TSeqPos count = min(total_count, TSeqPos(sizeof(buffer)));
767 if ( reverse ) {
768 dataPos -= count;
769 }
770 switch ( src_coding ) {
771 case CSeq_data::e_Iupacna:
772 copy_8bit_any(buffer, count, data.GetIupacna().Get(), dataPos,
773 table, reverse);
774 break;
775 case CSeq_data::e_Iupacaa:
776 copy_8bit_any(buffer, count, data.GetIupacaa().Get(), dataPos,
777 table, reverse);
778 break;
779 case CSeq_data::e_Ncbi2na:
780 copy_2bit_any(buffer, count, data.GetNcbi2na().Get(), dataPos,
781 table, reverse);
782 break;
783 case CSeq_data::e_Ncbi4na:
784 copy_4bit_any(buffer, count, data.GetNcbi4na().Get(), dataPos,
785 table, reverse);
786 break;
787 case CSeq_data::e_Ncbi8na:
788 copy_8bit_any(buffer, count, data.GetNcbi8na().Get(), dataPos,
789 table, reverse);
790 break;
791 case CSeq_data::e_Ncbi8aa:
792 copy_8bit_any(buffer, count, data.GetNcbi8aa().Get(), dataPos,
793 table, reverse);
794 break;
795 case CSeq_data::e_Ncbieaa:
796 copy_8bit_any(buffer, count, data.GetNcbieaa().Get(), dataPos,
797 table, reverse);
798 break;
799 case CSeq_data::e_Ncbistdaa:
800 copy_8bit_any(buffer, count, data.GetNcbistdaa().Get(), dataPos,
801 table, reverse);
802 break;
803 default:
804 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
805 "Invalid data coding: "<<src_coding);
806 }
807 if ( randomizer ) {
808 randomizer->RandomizeData(buffer, count, randomizer_pos);
809 }
810 x_Append8To2(dst_str, dst_c, dst_pos, buffer, count);
811 if ( !reverse ) {
812 dataPos += count;
813 }
814 dst_pos += count;
815 randomizer_pos += count;
816 total_count -= count;
817 }
818 }
819
820
x_GetPacked8SeqData(string & dst_str,TSeqPos src_pos,TSeqPos src_end)821 void CSeqVector::x_GetPacked8SeqData(string& dst_str,
822 TSeqPos src_pos,
823 TSeqPos src_end)
824 {
825 ECaseConversion case_conversion = eCaseConversion_none;
826 SSeqMapSelector sel(CSeqMap::fDefaultFlags, kMax_UInt);
827 sel.SetStrand(m_Strand);
828 if ( m_TSE ) {
829 sel.SetLinkUsedTSE(m_TSE);
830 }
831 CSeqMap_CI seg(m_SeqMap, m_Scope.GetScopeOrNull(), sel, src_pos);
832
833 dst_str.reserve(src_end-src_pos);
834 TCoding dst_coding = GetCoding();
835 TSeqPos dst_pos = 0;
836 while ( src_pos < src_end ) {
837 _ASSERT(dst_str.size() == dst_pos);
838 TSeqPos count = min(src_end-src_pos, seg.GetEndPosition()-src_pos);
839 if ( seg.GetType() == CSeqMap::eSeqGap ) {
840 x_AppendGapTo8(dst_str, count, GetGapChar());
841 }
842 else {
843 const CSeq_data& data = seg.GetRefData();
844 bool reverse = seg.GetRefMinusStrand();
845 TCoding src_coding = data.Which();
846
847 const char* table = 0;
848 if ( dst_coding != src_coding || reverse ||
849 case_conversion != eCaseConversion_none ) {
850 table = sx_GetConvertTable(src_coding, dst_coding,
851 reverse, case_conversion);
852 if ( !table && src_coding != dst_coding ) {
853 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
854 "Incompatible sequence codings: "<<
855 src_coding<<" -> "<<dst_coding);
856 }
857 }
858
859 TSeqPos dataPos;
860 if ( reverse ) {
861 // Revert segment offset
862 dataPos = seg.GetRefEndPosition() -
863 (src_pos - seg.GetPosition()) - count;
864 }
865 else {
866 dataPos = seg.GetRefPosition() +
867 (src_pos - seg.GetPosition());
868 }
869
870 if ( ( !table || table == sm_TrivialTable) && !reverse ) {
871 switch ( src_coding ) {
872 case CSeq_data::e_Iupacna:
873 x_Append8To8(dst_str, data.GetIupacna().Get(),
874 dataPos, count);
875 break;
876 case CSeq_data::e_Iupacaa:
877 x_Append8To8(dst_str, data.GetIupacaa().Get(),
878 dataPos, count);
879 break;
880 case CSeq_data::e_Ncbi8na:
881 x_Append8To8(dst_str, data.GetNcbi8na().Get(),
882 dataPos, count);
883 break;
884 case CSeq_data::e_Ncbi8aa:
885 x_Append8To8(dst_str, data.GetNcbi8aa().Get(),
886 dataPos, count);
887 break;
888 case CSeq_data::e_Ncbieaa:
889 x_Append8To8(dst_str, data.GetNcbieaa().Get(),
890 dataPos, count);
891 break;
892 case CSeq_data::e_Ncbistdaa:
893 x_Append8To8(dst_str, data.GetNcbistdaa().Get(),
894 dataPos, count);
895 break;
896 default:
897 x_AppendAnyTo8(dst_str, data, dataPos, count);
898 break;
899 }
900 }
901 else {
902 x_AppendAnyTo8(dst_str, data, dataPos, count, table, reverse);
903 }
904 }
905 ++seg;
906 dst_pos += count;
907 src_pos += count;
908 _ASSERT(dst_str.size() == dst_pos);
909 }
910 }
911
912
x_GetPacked4naSeqData(string & dst_str,TSeqPos src_pos,TSeqPos src_end)913 void CSeqVector::x_GetPacked4naSeqData(string& dst_str,
914 TSeqPos src_pos,
915 TSeqPos src_end)
916 {
917 ECaseConversion case_conversion = eCaseConversion_none;
918 SSeqMapSelector sel(CSeqMap::fDefaultFlags, kMax_UInt);
919 sel.SetStrand(m_Strand);
920 if ( m_TSE ) {
921 sel.SetLinkUsedTSE(m_TSE);
922 }
923 CSeqMap_CI seg(m_SeqMap, m_Scope.GetScopeOrNull(), sel, src_pos);
924
925 dst_str.reserve((src_end-src_pos+1)>>1);
926 TCoding dst_coding = GetCoding();
927 TSeqPos dst_pos = 0;
928 char dst_c = 0;
929 while ( src_pos < src_end ) {
930 _ASSERT(dst_str.size() == dst_pos>>1);
931 TSeqPos count = min(src_end-src_pos, seg.GetEndPosition()-src_pos);
932 if ( seg.GetType() == CSeqMap::eSeqGap ) {
933 x_AppendGapTo4(dst_str, dst_c, dst_pos, count, GetGapChar());
934 }
935 else {
936 const CSeq_data& data = seg.GetRefData();
937 bool reverse = seg.GetRefMinusStrand();
938 TCoding src_coding = data.Which();
939
940 const char* table = 0;
941 if ( dst_coding != src_coding || reverse ||
942 case_conversion != eCaseConversion_none ) {
943 table = sx_GetConvertTable(src_coding, dst_coding,
944 reverse, case_conversion);
945 if ( !table && src_coding != dst_coding ) {
946 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
947 "Incompatible sequence codings: "<<
948 src_coding<<" -> "<<dst_coding);
949 }
950 }
951
952 if ( (table && table != sm_TrivialTable) || reverse ) {
953 TSeqPos dataPos;
954 if ( reverse ) {
955 // Revert segment offset
956 dataPos = seg.GetRefEndPosition() -
957 (src_pos - seg.GetPosition()) - count;
958 }
959 else {
960 dataPos = seg.GetRefPosition() +
961 (src_pos - seg.GetPosition());
962 }
963 x_AppendAnyTo4(dst_str, dst_c, dst_pos,
964 data, dataPos, count, table, reverse);
965 }
966 else {
967 TSeqPos dataPos = seg.GetRefPosition() +
968 (src_pos - seg.GetPosition());
969 x_Append4To4(dst_str, dst_c, dst_pos,
970 data.GetNcbi4na().Get(), dataPos, count);
971 }
972 }
973 ++seg;
974 dst_pos += count;
975 src_pos += count;
976 _ASSERT(dst_str.size() == dst_pos>>1);
977 }
978 if ( dst_pos&1 ) {
979 dst_str += char(dst_c<<4);
980 }
981 }
982
983
x_GetPacked2naSeqData(string & dst_str,TSeqPos src_pos,TSeqPos src_end)984 void CSeqVector::x_GetPacked2naSeqData(string& dst_str,
985 TSeqPos src_pos,
986 TSeqPos src_end)
987 {
988 ECaseConversion case_conversion = eCaseConversion_none;
989 SSeqMapSelector sel(CSeqMap::fDefaultFlags, kMax_UInt);
990 sel.SetStrand(m_Strand);
991 if ( m_TSE ) {
992 sel.SetLinkUsedTSE(m_TSE);
993 }
994 CSeqMap_CI seg(m_SeqMap, m_Scope.GetScopeOrNull(), sel, src_pos);
995
996 dst_str.reserve((src_end-src_pos+3)>>2);
997 _ASSERT(GetCoding() == CSeq_data::e_Ncbi2na);
998 TSeqPos dst_pos = 0;
999 char dst_c = 0;
1000 while ( src_pos < src_end ) {
1001 _ASSERT(dst_str.size() == dst_pos>>2);
1002 TSeqPos count = min(src_end-src_pos, seg.GetEndPosition()-src_pos);
1003 if ( seg.GetType() == CSeqMap::eSeqGap ) {
1004 if ( !m_Randomizer ) {
1005 NCBI_THROW(CSeqVectorException, eCodingError,
1006 "Cannot fill NCBI2na gap without randomizer");
1007 }
1008 x_AppendRandomTo2(dst_str, dst_c, dst_pos, src_pos, count,
1009 *m_Randomizer,
1010 sx_GetGapChar(CSeq_data::e_Ncbi4na,
1011 eCaseConversion_none));
1012 }
1013 else {
1014 const CSeq_data& data = seg.GetRefData();
1015 bool reverse = seg.GetRefMinusStrand();
1016 TCoding src_coding = data.Which();
1017 TCoding dst_coding = CSeq_data::e_Ncbi2na;
1018 INcbi2naRandomizer* randomizer = 0;
1019 if ( src_coding != dst_coding && m_Randomizer) {
1020 randomizer = m_Randomizer.GetPointer();
1021 _ASSERT(randomizer);
1022 dst_coding = CSeq_data::e_Ncbi4na;
1023 }
1024
1025 const char* table = 0;
1026 if ( dst_coding != src_coding || reverse ||
1027 case_conversion != eCaseConversion_none ) {
1028 table = sx_GetConvertTable(src_coding, dst_coding,
1029 reverse, case_conversion);
1030 if ( !table && src_coding != dst_coding ) {
1031 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
1032 "Incompatible sequence codings: "<<
1033 src_coding<<" -> "<<dst_coding);
1034 }
1035 }
1036
1037 if ( (table && table != sm_TrivialTable) || reverse
1038 || randomizer ) {
1039 TSeqPos dataPos;
1040 if ( reverse ) {
1041 // Revert segment offset
1042 dataPos = seg.GetRefEndPosition() -
1043 (src_pos - seg.GetPosition()) - count;
1044 }
1045 else {
1046 dataPos = seg.GetRefPosition() +
1047 (src_pos - seg.GetPosition());
1048 }
1049 _ASSERT((!randomizer && dst_coding == CSeq_data::e_Ncbi2na) ||
1050 (randomizer && dst_coding == CSeq_data::e_Ncbi4na));
1051 x_AppendAnyTo2(dst_str, dst_c, dst_pos,
1052 data, dataPos, count, table, reverse,
1053 randomizer, src_pos);
1054 }
1055 else {
1056 _ASSERT(dst_coding == CSeq_data::e_Ncbi2na);
1057 TSeqPos dataPos = seg.GetRefPosition() +
1058 (src_pos - seg.GetPosition());
1059 x_Append2To2(dst_str, dst_c, dst_pos,
1060 data.GetNcbi2na().Get(), dataPos, count);
1061 }
1062 }
1063 ++seg;
1064 dst_pos += count;
1065 src_pos += count;
1066 _ASSERT(dst_str.size() == dst_pos>>2);
1067 }
1068 if ( dst_pos&3 ) {
1069 dst_str += char(dst_c << 2*TSeqPos(-TSignedSeqPos(dst_pos)&3));
1070 }
1071 }
1072
1073
1074 CSeqVectorTypes::TResidue
sx_GetGapChar(TCoding coding,ECaseConversion case_cvt)1075 CSeqVectorTypes::sx_GetGapChar(TCoding coding, ECaseConversion case_cvt)
1076 {
1077 switch (coding) {
1078 case CSeq_data::e_Iupacna: // DNA - N
1079 return case_cvt == eCaseConversion_lower? 'n': 'N';
1080
1081 case CSeq_data::e_Ncbi8na: // DNA - bit representation
1082 case CSeq_data::e_Ncbi4na:
1083 return 0; // all bits set == any base
1084
1085 case CSeq_data::e_Ncbieaa: // Proteins - X
1086 case CSeq_data::e_Ncbi8aa: // Protein - numeric representation
1087 return '-';
1088 case CSeq_data::e_Iupacaa:
1089 return case_cvt == eCaseConversion_lower? 'x': 'X';
1090
1091 case CSeq_data::e_Ncbistdaa:
1092 return 0;
1093
1094 case CSeq_data::e_not_set:
1095 return 0; // It's not good to throw an exception here
1096
1097 case CSeq_data::e_Ncbi2na: // Codings without gap symbols
1098 // Exception is not good here because it conflicts with CSeqVector_CI.
1099 return 0xff;
1100
1101 case CSeq_data::e_Ncbipaa: //### Not sure about this
1102 case CSeq_data::e_Ncbipna: //### Not sure about this
1103 default:
1104 NCBI_THROW_FMT(CSeqVectorException, eCodingError,
1105 "Can not indicate gap using the selected coding: "<<
1106 coding);
1107 }
1108 }
1109
1110
1111 DEFINE_STATIC_FAST_MUTEX(s_ConvertTableMutex2);
1112
1113 const char*
sx_GetConvertTable(TCoding src,TCoding dst,bool reverse,ECaseConversion case_cvt)1114 CSeqVectorTypes::sx_GetConvertTable(TCoding src, TCoding dst,
1115 bool reverse, ECaseConversion case_cvt)
1116 {
1117 CFastMutexGuard guard(s_ConvertTableMutex2);
1118 typedef pair<TCoding, TCoding> TMainConversion;
1119 typedef pair<bool, ECaseConversion> TConversionFlags;
1120 typedef pair<TMainConversion, TConversionFlags> TConversionKey;
1121 typedef vector<char> TConversionTable;
1122 typedef map<TConversionKey, TConversionTable> TTables;
1123 static CSafeStatic<TTables> tables;
1124
1125 TConversionKey key;
1126 key.first = TMainConversion(src, dst);
1127 key.second = TConversionFlags(reverse, case_cvt);
1128 TTables::iterator it = tables->find(key);
1129 if ( it != tables->end() ) {
1130 // already created, but may be a stand-in
1131 switch (it->second.size()) {
1132 case 0: return 0; // error -- incompatible codings or the like
1133 case 1: return sm_TrivialTable;
1134 default: return &it->second[0];
1135 }
1136 }
1137 TConversionTable& table = (*tables)[key];
1138 if ( !CSeqportUtil::IsCodeAvailable(src) ||
1139 !CSeqportUtil::IsCodeAvailable(dst) ) {
1140 // invalid types
1141 return 0;
1142 }
1143
1144 const size_t COUNT = kMax_UChar+1;
1145 const unsigned kInvalidCode = kMax_UChar;
1146
1147 pair<unsigned, unsigned> srcIndex = CSeqportUtil::GetCodeIndexFromTo(src);
1148 if ( srcIndex.second >= COUNT ) {
1149 // too large range
1150 return 0;
1151 }
1152
1153 if ( reverse ) {
1154 // check if src needs complement conversion
1155 try {
1156 CSeqportUtil::GetIndexComplement(src, srcIndex.first);
1157 }
1158 catch ( exception& /*noComplement*/ ) {
1159 reverse = false;
1160 }
1161 }
1162 if ( case_cvt != eCaseConversion_none ) {
1163 // check if dst is text format
1164 if ( dst != CSeq_data::e_Iupacaa &&
1165 dst != CSeq_data::e_Iupacna &&
1166 dst != CSeq_data::e_Ncbieaa ) {
1167 case_cvt = eCaseConversion_none;
1168 }
1169 }
1170
1171 if ( dst != src ) {
1172 pair<unsigned, unsigned> dstIndex =
1173 CSeqportUtil::GetCodeIndexFromTo(dst);
1174 if ( dstIndex.second >= COUNT ) {
1175 // too large range
1176 return 0;
1177 }
1178
1179 try {
1180 // check for types compatibility
1181 CSeqportUtil::GetMapToIndex(src, dst, srcIndex.first);
1182 }
1183 catch ( exception& /*badType*/ ) {
1184 // incompatible types
1185 return 0;
1186 }
1187 }
1188 else if ( !reverse && case_cvt == eCaseConversion_none ) {
1189 // no need to convert at all
1190 return 0;
1191 }
1192
1193 table.resize(COUNT, char(kInvalidCode));
1194 bool different = false;
1195 for ( unsigned i = srcIndex.first; i <= srcIndex.second; ++i ) {
1196 try {
1197 unsigned code = i;
1198 if ( reverse ) {
1199 code = CSeqportUtil::GetIndexComplement(src, code);
1200 }
1201 if ( dst != src ) {
1202 code = CSeqportUtil::GetMapToIndex(src, dst, code);
1203 }
1204 code = min(kInvalidCode, code);
1205 if ( case_cvt == eCaseConversion_upper ) {
1206 code = toupper((unsigned char) code);
1207 }
1208 else if( case_cvt == eCaseConversion_lower ) {
1209 code = tolower((unsigned char) code);
1210 }
1211 if ( code != i ) {
1212 different = true;
1213 }
1214 table[i] = char(code);
1215 }
1216 catch ( exception& /*noConversion or noComplement*/ ) {
1217 different = true;
1218 }
1219 }
1220 if ( !different ) {
1221 table.resize(1);
1222 return sm_TrivialTable;
1223 }
1224 return &table[0];
1225 }
1226
1227
1228 const char CSeqVectorTypes::sm_TrivialTable[256] = {
1229 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
1230 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
1231 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
1232 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
1233 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
1234 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
1235 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
1236 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
1237 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
1238 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
1239 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
1240 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
1241 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
1242 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
1243 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
1244 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
1245 '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
1246 '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
1247 '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
1248 '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
1249 '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
1250 '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
1251 '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
1252 '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
1253 '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
1254 '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
1255 '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
1256 '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
1257 '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
1258 '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
1259 '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
1260 '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff'
1261 };
1262
1263
SetStrand(ENa_strand strand)1264 void CSeqVector::SetStrand(ENa_strand strand)
1265 {
1266 if ( strand != m_Strand ) {
1267 m_Strand = strand;
1268 x_ResetIterator();
1269 }
1270 }
1271
1272
SetCoding(TCoding coding)1273 void CSeqVector::SetCoding(TCoding coding)
1274 {
1275 if (m_Coding != coding) {
1276 m_Coding = coding;
1277 x_ResetIterator();
1278 }
1279 }
1280
1281
SetIupacCoding(void)1282 void CSeqVector::SetIupacCoding(void)
1283 {
1284 SetCoding(IsProtein()? CSeq_data::e_Iupacaa: CSeq_data::e_Iupacna);
1285 }
1286
1287
SetNcbiCoding(void)1288 void CSeqVector::SetNcbiCoding(void)
1289 {
1290 SetCoding(IsProtein()? CSeq_data::e_Ncbistdaa: CSeq_data::e_Ncbi4na);
1291 }
1292
1293
SetCoding(EVectorCoding coding)1294 void CSeqVector::SetCoding(EVectorCoding coding)
1295 {
1296 switch ( coding ) {
1297 case CBioseq_Handle::eCoding_Iupac:
1298 SetIupacCoding();
1299 break;
1300 case CBioseq_Handle::eCoding_Ncbi:
1301 SetNcbiCoding();
1302 break;
1303 default:
1304 SetCoding(CSeq_data::e_not_set);
1305 break;
1306 }
1307 }
1308
1309
SetRandomizeAmbiguities(void)1310 void CSeqVector::SetRandomizeAmbiguities(void)
1311 {
1312 CRandom random_gen;
1313 x_InitRandomizer(random_gen);
1314 }
1315
1316
SetRandomizeAmbiguities(Uint4 seed)1317 void CSeqVector::SetRandomizeAmbiguities(Uint4 seed)
1318 {
1319 CRandom random_gen(seed);
1320 x_InitRandomizer(random_gen);
1321 }
1322
1323
SetRandomizeAmbiguities(CRandom & random_gen)1324 void CSeqVector::SetRandomizeAmbiguities(CRandom& random_gen)
1325 {
1326 x_InitRandomizer(random_gen);
1327 }
1328
1329
x_InitRandomizer(CRandom & random_gen)1330 void CSeqVector::x_InitRandomizer(CRandom& random_gen)
1331 {
1332 CRef<INcbi2naRandomizer> randomizer(new CNcbi2naRandomizer(random_gen));
1333 SetRandomizeAmbiguities(randomizer);
1334 }
1335
1336
SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer)1337 void CSeqVector::SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer)
1338 {
1339 if ( m_Randomizer != randomizer ) {
1340 m_Randomizer = randomizer;
1341 x_ResetIterator();
1342 }
1343 }
1344
1345
SetNoAmbiguities(void)1346 void CSeqVector::SetNoAmbiguities(void)
1347 {
1348 SetRandomizeAmbiguities(null);
1349 }
1350
1351
1352 END_SCOPE(objects)
1353 END_NCBI_SCOPE
1354