1 /*
2
3 HyPhy - Hypothesis Testing Using Phylogenies.
4
5 Copyright (C) 1997-now
6 Core Developers:
7 Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8 Art FY Poon (apoon42@uwo.ca)
9 Steven Weaver (sweaver@temple.edu)
10
11 Module Developers:
12 Lance Hepler (nlhepler@gmail.com)
13 Martin Smith (martin.audacis@gmail.com)
14
15 Significant contributions from:
16 Spencer V Muse (muse@stat.ncsu.edu)
17 Simon DW Frost (sdf22@cam.ac.uk)
18
19 Permission is hereby granted, free of charge, to any person obtaining a
20 copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to
24 permit persons to whom the Software is furnished to do so, subject to
25 the following conditions:
26
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37
38 */
39
40 #include "global_things.h"
41 #include "translation_table.h"
42 #include "function_templates.h"
43
44 #define HYPHY_SITE_DEFAULT_BUFFER_SIZE 256
45
46 _TranslationTable hy_default_translation_table;
47
48 using namespace hy_global;
49
50 _List _TranslationTable::_list_of_default_tables(_List() < "ACGT" < "ACGU" <
51 "ACDEFGHIKLMNPQRSTVWY" < "01");
52
GetDefaultTable(long tableType)53 const _String &_TranslationTable::GetDefaultTable(long tableType) {
54
55 switch (tableType) {
56 case HY_TRANSLATION_TABLE_BINARY:
57 return *(_String *)_list_of_default_tables(3);
58 case HY_TRANSLATION_TABLE_RNA:
59 return *(_String *)_list_of_default_tables(1);
60 case HY_TRANSLATION_TABLE_PROTEIN:
61 return *(_String *)_list_of_default_tables(2);
62 case HY_TRANSLATION_TABLE_DNA:
63 return *(_String *)_list_of_default_tables(0);
64 }
65
66 return kEmptyString;
67 }
68
_TranslationTable(void)69 _TranslationTable::_TranslationTable(void) {
70 baseLength = 4;
71 checkTable = NULL;
72 }
73
74 //_________________________________________________________
_TranslationTable(unsigned char baseL)75 _TranslationTable::_TranslationTable(unsigned char baseL) {
76 baseLength = (baseL == 20) ? 20 : 4;
77 checkTable = NULL;
78 }
79
80 //_________________________________________________________
_TranslationTable(_TranslationTable const & t)81 _TranslationTable::_TranslationTable(_TranslationTable const &t) {
82 *this = t;
83 }
84
85 //_________________________________________________________
operator =(_TranslationTable const & t)86 _TranslationTable const & _TranslationTable::operator = (_TranslationTable const &t) {
87 if (this != &t) {
88 tokensAdded = t.tokensAdded;
89 baseLength = t.baseLength;
90 baseSet = t.baseSet;
91 translationsAdded << t.translationsAdded;
92 checkTable = NULL;
93 }
94 return *this;
95 }
96
97 //_________________________________________________________
_TranslationTable(_String & alphabet)98 _TranslationTable::_TranslationTable(_String &alphabet) {
99 baseLength = alphabet.length();
100 checkTable = NULL;
101 if (_list_of_default_tables.FindObject(&alphabet) < 0L) {
102 AddBaseSet(alphabet);
103 }
104 }
105
106 //_________________________________________________________
makeDynamic(void) const107 BaseRef _TranslationTable::makeDynamic(void) const {
108 _TranslationTable *r = new _TranslationTable;
109 r->baseLength = baseLength;
110 r->tokensAdded.Duplicate(&tokensAdded);
111 r->baseSet.Duplicate(&baseSet);
112 r->translationsAdded.Duplicate(&translationsAdded);
113 r->checkTable = NULL;
114 return r;
115 }
116
117 //_________________________________________________________
Duplicate(BaseRefConst source)118 void _TranslationTable::Duplicate(BaseRefConst source) {
119 _TranslationTable const *s = (_TranslationTable const *)source;
120 tokensAdded.Duplicate(&s->tokensAdded);
121 baseSet.Duplicate(&s->baseSet);
122 translationsAdded.Duplicate(&s->translationsAdded);
123 if (checkTable) {
124 free(checkTable);
125 };
126 checkTable = NULL;
127 }
128
129 //_________________________________________________________
TokenCode(char token) const130 long _TranslationTable::TokenCode(char token) const {
131 // standard translations
132 long receptacle[256], resolution_count = TokenResolutions(token, receptacle);
133
134 long theCode = 0L;
135
136 for (unsigned long i = 0; i < resolution_count; i++) {
137 theCode |= (1L << receptacle[i]); // set the right bit
138 }
139
140 return theCode;
141 }
142
143 //_________________________________________________________
AmbigToLetter(long * split,unsigned long resolutions) const144 char _TranslationTable::AmbigToLetter(long *split,
145 unsigned long resolutions) const
146 // assumes a non-unique translation of split
147 // for unique - use ConvertCodeToLetters
148 {
149 long encoding = 0L;
150
151 for (unsigned long k = 0UL; k < resolutions; k++) {
152 encoding |= (1L << split[k]);
153 }
154
155 if (baseSet.length() == 0)
156 // one of the standard alphabers
157 {
158 if (baseLength == 4)
159 // nucleotides
160 {
161 switch (encoding) {
162 case 3:
163 return 'M';
164 case 5:
165 return 'S';
166 case 6:
167 return 'R';
168 case 7:
169 return 'V';
170 case 9:
171 return 'W';
172 case 10:
173 return 'Y';
174 case 11:
175 return 'H';
176 case 12:
177 return 'K';
178 case 14:
179 return 'B';
180 }
181 } else if (baseLength == 20)
182 // amino acids
183 {
184 switch (encoding) {
185 case 2052:
186 return 'B';
187 case 8200:
188 return 'Z';
189 }
190 }
191 } else if (tokensAdded.length()) {
192 long lookup = translationsAdded.Find(encoding);
193 // linear search for (binary) translations
194 if (lookup >= 0L) {
195 return tokensAdded.char_at(lookup);
196 }
197 }
198 return '?';
199 }
200
201 //_________________________________________________________
SplitTokenCode(long code,long * receptacle) const202 void _TranslationTable::SplitTokenCode(long code, long *receptacle) const {
203 unsigned long shifter = 1L;
204 for (unsigned int i = 0; i < baseLength; i++) {
205 receptacle[i] = ((code & shifter) != 0) ? 1L : 0L;
206 shifter >>= 1;
207 }
208 }
209
210 //_________________________________________________________
LengthOfAlphabet(void) const211 long _TranslationTable::LengthOfAlphabet(void) const {
212 return baseSet.length() ? baseSet.length() : baseLength;
213 }
214
215 //_________________________________________________________
216
ExpandToken(char token) const217 const _String _TranslationTable::ExpandToken(char token) const {
218 long buf[256];
219
220 long resolution_count = TokenResolutions(token, buf);
221 _String const *base_set = &GetAlphabetString();
222 _StringBuffer expansion(base_set->length());
223
224 for (long tc = 0; tc < resolution_count; tc++) {
225 expansion << base_set->char_at(buf[tc]);
226 }
227
228 return expansion;
229 }
230
231 //_________________________________________________________
232
MultiTokenResolutions(_String const & tokens,long * receptacle,bool gapToOnes) const233 long _TranslationTable::MultiTokenResolutions(_String const &tokens,
234 long *receptacle,
235 bool gapToOnes) const {
236
237 if (tokens.length() == 1UL) {
238 return TokenResolutions(tokens.char_at (0UL), receptacle, gapToOnes);
239 } else {
240
241 long *large_store, large_store_static[HYPHY_SITE_DEFAULT_BUFFER_SIZE];
242
243 if ((baseLength + 1)* tokens.length() >=
244 HYPHY_SITE_DEFAULT_BUFFER_SIZE) {
245 large_store = new long[baseLength * tokens.length() + tokens.length()];
246 } else {
247 large_store = large_store_static;
248 }
249
250 /*
251 large_store is a linear array which stores the following data
252
253 [0,unitLength) -- the number of resolutions for the i-th character
254
255 [unitLength,unitLength + baseLength] -- the actual resolutions for the 1st
256 char [unitLength + baseLength, unitLength + 2*baseLength] -- the actual
257 resolutions for the 2nd char
258 ...
259 */
260
261 long resolution_count = 1L;
262
263 for (unsigned long char_index = 0; char_index < tokens.length();
264 char_index++) {
265 large_store[char_index] = TokenResolutions(
266 tokens.char_at(char_index),
267 large_store + tokens.length() + baseLength * char_index, gapToOnes);
268 if (gapToOnes && large_store[char_index] == 0) {
269 large_store[char_index] = baseLength;
270 InitializeArray(large_store + tokens.length() + baseLength * char_index,
271 baseLength, 1L);
272 }
273 resolution_count *=
274 large_store[char_index] > 0 ? large_store[char_index] : 0;
275 }
276
277 if (resolution_count == 1L) {
278 for (unsigned long char_index = 0; char_index < tokens.length();
279 char_index++) {
280 large_store[char_index] =
281 large_store[tokens.length() + baseLength * char_index];
282 }
283
284 if (receptacle) {
285 receptacle[0] = CombineDigits(large_store, tokens.length(), baseLength);
286 } else {
287 resolution_count =
288 CombineDigits(large_store, tokens.length(), baseLength);
289 }
290 } else {
291 if (receptacle) {
292 // handle cases of 2 and 3 characters separately since they are the most
293 // common
294
295 if (resolution_count > HYPHY_SITE_DEFAULT_BUFFER_SIZE) {
296 HandleApplicationError(
297 (_String("Too many ambiguous states in call to ") &
298 _String(__PRETTY_FUNCTION__).Enquote()));
299 return -1L;
300 }
301
302 if (tokens.length() == 3) {
303 long digits[3],
304 *resolution_arrays[3] = {large_store + tokens.length(),
305 large_store + tokens.length() +
306 baseLength,
307 large_store + tokens.length() +
308 2 * baseLength},
309 resolutions_index = 0L;
310
311 for ( long digit1 = 0L; digit1 < large_store[0]; digit1++) {
312 for ( long digit2 = 0L; digit2 < large_store[1]; digit2++) {
313 for ( long digit3 = 0L; digit3 < large_store[2];
314 digit3++) {
315 receptacle[resolutions_index++] =
316 resolution_arrays[0][digit1] * baseLength * baseLength +
317 resolution_arrays[1][digit2] * baseLength +
318 resolution_arrays[2][digit3];
319 }
320 }
321 }
322
323 } else {
324 if (tokens.length() == 2) {
325 long *resolution_arrays[2] = {large_store + tokens.length(),
326 large_store + tokens.length() +
327 baseLength},
328 resolutions_index = 0L;
329
330 for ( long digit1 = 0L; digit1 < large_store[0]; digit1++) {
331 for ( long digit2 = 0L; digit2 < large_store[1];
332 digit2++) {
333 receptacle[resolutions_index++] =
334 resolution_arrays[0][digit1] * baseLength +
335 resolution_arrays[1][digit2];
336 }
337 }
338 } else { // more than 3 tokens [rare!]
339
340 if (tokens.length() >= 32) {
341 HandleApplicationError(
342 _String("The token string is too long in call to ") &
343 _String(__PRETTY_FUNCTION__).Enquote());
344 return -1L;
345 }
346
347 long digits[32]{}, resolutions_index = 0L;
348
349 do {
350 // assemble the current token, backwards
351 long this_resolution = 0L, weight = 1L;
352 for (long digit = tokens.length() - 1; digit >= 0; digit--) {
353 this_resolution +=
354 weight * *(large_store + tokens.length() +
355 baseLength * digit + digits[digit]);
356 weight *= tokens.length();
357 }
358
359 receptacle[resolutions_index++] = this_resolution;
360
361 for (long digit = tokens.length() - 1; digit >= 0; digit--) {
362 if (++digits[digit] < large_store[digit]) {
363 break;
364 }
365 if (digit > 0) {
366 digits[digit] = 0L;
367 }
368 }
369
370 } while (digits[0] < large_store[0]);
371 }
372 }
373 } else {
374 resolution_count = -1L;
375 }
376 }
377
378 if (large_store != large_store_static) {
379 delete[] large_store;
380 }
381
382 return resolution_count;
383 }
384 }
385
386 //_________________________________________________________
387
TokenResolutions(char token,long * receptacle,bool gapToOnes) const388 long _TranslationTable::TokenResolutions(char token, long *receptacle,
389 bool gapToOnes) const {
390
391 long custom_code = tokensAdded.length() ? tokensAdded.Find(token) : -1;
392 long resolution_counter = -1L;
393
394 if (custom_code != -1) {
395 resolution_counter = 0L;
396 unsigned long shifter = 1L;
397 for (unsigned long i = 0UL; i < baseLength; i++) {
398 if ((custom_code & shifter) != 0) {
399 receptacle[resolution_counter++] = i;
400 }
401 shifter >>= 1;
402 }
403 } else {
404
405 if (baseSet.length()) {
406
407 long base_char = baseSet.Find(token);
408 // OPTIMIZE FLAG linear search:
409 // SLKP 20071002 should really be a 256 char lookup table
410
411 if (base_char != -1) {
412 resolution_counter = 1;
413 receptacle[0] = base_char;
414 }
415 } else {
416
417 if (baseLength == 4) {
418
419 switch (token) {
420 case 'A':
421 resolution_counter = 1L;
422 receptacle[0] = 0;
423 break;
424
425 case 'C':
426 resolution_counter = 1L;
427 receptacle[0] = 1;
428 break;
429
430 case 'G':
431 resolution_counter = 1L;
432 receptacle[0] = 2;
433 break;
434
435 case 'T':
436 case 'U':
437 resolution_counter = 1L;
438 receptacle[0] = 3;
439 break;
440
441 case 'Y':
442 resolution_counter = 2L;
443 receptacle[0] = 1;
444 receptacle[1] = 3;
445 break;
446
447 case 'R':
448 resolution_counter = 2L;
449 receptacle[0] = 0;
450 receptacle[1] = 2;
451 break;
452
453 case 'W':
454 resolution_counter = 2L;
455 receptacle[0] = 0;
456 receptacle[1] = 3;
457 break;
458
459 case 'S':
460 resolution_counter = 2L;
461 receptacle[0] = 1;
462 receptacle[1] = 2;
463 break;
464
465 case 'K':
466 resolution_counter = 2L;
467 receptacle[0] = 2;
468 receptacle[1] = 3;
469 break;
470
471 case 'M':
472 resolution_counter = 2L;
473 receptacle[0] = 0;
474 receptacle[1] = 1;
475 break;
476
477 case 'B':
478 resolution_counter = 3L;
479 receptacle[0] = 1;
480 receptacle[1] = 2;
481 receptacle[2] = 3;
482 break;
483
484 case 'D':
485 resolution_counter = 3L;
486 receptacle[0] = 0;
487 receptacle[1] = 2;
488 receptacle[2] = 3;
489 break;
490
491 case 'H':
492 resolution_counter = 3L;
493 receptacle[0] = 0;
494 receptacle[1] = 1;
495 receptacle[2] = 3;
496 break;
497
498 case 'V':
499 resolution_counter = 3L;
500 receptacle[0] = 0;
501 receptacle[1] = 1;
502 receptacle[2] = 2;
503 break;
504
505 case 'X':
506 case 'N':
507 case '?':
508 case '.':
509 case '*':
510 resolution_counter = 4L;
511 receptacle[0] = 0;
512 receptacle[1] = 1;
513 receptacle[2] = 2;
514 receptacle[3] = 3;
515 break;
516
517 case '-':
518 resolution_counter = 0L;
519 break;
520 }
521 } else {
522 if (baseLength == 20) {
523
524 switch (token) {
525 case 'A':
526 resolution_counter = 1L;
527 receptacle[0] = 0;
528 break;
529
530 case 'B':
531 resolution_counter = 2L;
532 receptacle[0] = 2;
533 receptacle[1] = 11;
534 break;
535
536 case 'C':
537 resolution_counter = 1L;
538 receptacle[0] = 1;
539 break;
540
541 case 'D':
542 resolution_counter = 1L;
543 receptacle[0] = 2;
544 break;
545
546 case 'E':
547 resolution_counter = 1L;
548 receptacle[0] = 3;
549 break;
550
551 case 'F':
552 resolution_counter = 1L;
553 receptacle[0] = 4;
554 break;
555
556 case 'G':
557 resolution_counter = 1L;
558 receptacle[0] = 5;
559 break;
560
561 case 'H':
562 resolution_counter = 1L;
563 receptacle[0] = 6;
564 break;
565
566 case 'I':
567 resolution_counter = 1L;
568 receptacle[0] = 7;
569 break;
570
571 case 'K':
572 resolution_counter = 1L;
573 receptacle[0] = 8;
574 break;
575
576 case 'L':
577 resolution_counter = 1L;
578 receptacle[0] = 9;
579 break;
580
581 case 'M':
582 resolution_counter = 1L;
583 receptacle[0] = 10;
584 break;
585
586 case 'N':
587 resolution_counter = 1L;
588 receptacle[0] = 11;
589 break;
590
591 case 'P':
592 resolution_counter = 1L;
593 receptacle[0] = 12;
594 break;
595
596 case 'Q':
597 resolution_counter = 1L;
598 receptacle[0] = 13;
599 break;
600
601 case 'R':
602 resolution_counter = 1L;
603 receptacle[0] = 14;
604 break;
605
606 case 'S':
607 resolution_counter = 1L;
608 receptacle[0] = 15;
609 break;
610
611 case 'T':
612 resolution_counter = 1L;
613 receptacle[0] = 16;
614 break;
615
616 case 'V':
617 resolution_counter = 1L;
618 receptacle[0] = 17;
619 break;
620
621 case 'W':
622 resolution_counter = 1L;
623 receptacle[0] = 18;
624 break;
625
626 case 'Y':
627 resolution_counter = 1L;
628 receptacle[0] = 19;
629 break;
630
631 case 'Z':
632 resolution_counter = 2L;
633 receptacle[0] = 3;
634 receptacle[1] = 13;
635 break;
636
637 case 'X':
638 case '?':
639 case '.':
640 case '*': {
641 resolution_counter = 20L;
642 for (unsigned long j = 0UL; j < 20UL; j++) {
643 receptacle[j] = j;
644 }
645 } break;
646 case '-': {
647 resolution_counter = 0L;
648 } break;
649 }
650 } else
651 // binary
652 {
653
654 switch (token) {
655 case '0':
656 resolution_counter = 1L;
657 receptacle[0] = 0;
658 break;
659
660 case '1':
661 resolution_counter = 1L;
662 receptacle[0] = 1;
663 break;
664
665 case 'X':
666 case '?':
667 case '.':
668 case '*': {
669 resolution_counter = 2L;
670 receptacle[0] = 0;
671 receptacle[1] = 1;
672 } break;
673 case '-': {
674 resolution_counter = 0L;
675 } break;
676 }
677 }
678 }
679 }
680 }
681
682 if (resolution_counter == 0L && gapToOnes) {
683 for (unsigned long i = 0UL; i < baseLength; i++) {
684 receptacle[i] = i;
685 }
686 return baseLength;
687 }
688
689 return resolution_counter;
690 }
691
692 //_________________________________________________________
PrepareForChecks(void)693 void _TranslationTable::PrepareForChecks(void) {
694 if (checkTable == NULL) {
695 checkTable = (char *)MemAllocate(256);
696 }
697
698 InitializeArray(checkTable, 256, (char)0);
699
700 _String checkSymbols;
701 // if (baseLength == 4)
702 // checkSymbols = _String("ACGTUYRWSKMBDHVXN?O-.")&tokensAdded;
703 if (baseSet.length()) {
704 checkSymbols = baseSet & tokensAdded;
705 } else if (baseLength == 2) {
706 checkSymbols = _String("01*?-.") & tokensAdded;
707 } else {
708 checkSymbols = _String("ABCDEFGHIJKLMNOPQRSTUVWXYZ*?-.") & tokensAdded;
709 }
710
711 for (long i = 0; i < checkSymbols.length(); i++) {
712 checkTable[(unsigned char)checkSymbols(i)] = (char)1;
713 }
714 }
715
716 //_________________________________________________________
IsCharLegal(char c)717 bool _TranslationTable::IsCharLegal(char c) {
718 if (!checkTable) {
719 PrepareForChecks();
720 }
721 return checkTable[(unsigned char)c];
722 }
723
GetAlphabetString(void) const724 const _String &_TranslationTable::GetAlphabetString(void) const {
725 if (baseSet.length()) {
726 return baseSet;
727 }
728
729 if (baseLength == 4) {
730 return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_DNA);
731 } else if (baseLength == 20) {
732 return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_PROTEIN);
733 } else {
734 return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_BINARY);
735 }
736
737 return kEmptyString;
738 }
739
740 //___________________________________________
741
AddTokenCode(char token,_String const & code)742 void _TranslationTable::AddTokenCode(char token, _String const &code) {
743 long f, newCode = 0;
744
745 bool killBS = false;
746
747 if (baseSet.length() == 0)
748 // fill in baseSet for standard alphabets
749 {
750 if (baseLength == 4) {
751 baseSet = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_DNA);
752 } else if (baseLength == 20) {
753 baseSet =
754 _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_PROTEIN);
755 } else {
756 baseSet = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_BINARY);
757 }
758 killBS = true;
759 }
760
761 if (baseSet.length()) {
762 long shifter = 1;
763 for (int j = 0; j < baseSet.length(); j++, shifter *= 2)
764 if (code.Find(baseSet.get_char(j)) >= 0) {
765 newCode += shifter;
766 }
767 }
768
769 f = baseSet.Find(token);
770
771 if (killBS) {
772 baseSet = kEmptyString;
773 }
774
775 if (f >= 0) {
776 return;
777 }
778 // see if the character being added is a base
779 // character; those cannot be redefined
780
781 f = tokensAdded.Find(token, 0, -1);
782 // new definition or redefinition?
783
784 if (f == -1) { // new
785 tokensAdded = tokensAdded & token;
786 translationsAdded << 0;
787 f = tokensAdded.length() - 1;
788 }
789
790 translationsAdded.list_data[f] = newCode;
791 }
792
793 //_________________________________________________________
794
AddBaseSet(_String const & code)795 void _TranslationTable::AddBaseSet(_String const &code) {
796 baseSet = code;
797 baseSet.StripQuotes();
798 baseLength = baseSet.length();
799 if (baseLength > HY_WIDTH_OF_LONG) {
800 // longer than the bit size of 'long'
801 // can't handle those
802 HandleApplicationError(_String("Alphabets with more than ") &
803 HY_WIDTH_OF_LONG & " characters are not supported");
804 }
805 }
806
807 //_________________________________________________________
808
GetSkipChar(void)809 char _TranslationTable::GetSkipChar(void) {
810 if (baseSet.length() == 0 && translationsAdded.lLength == 0) {
811 return '?'; // this is the default
812 }
813
814 // see if there is a symbol
815 // which maps to all '1'
816
817 long all = 0, ul = baseSet.length() ? baseSet.length() : baseLength,
818 shifter = 1;
819
820 for (long f = 0; f < ul; f++, shifter <<= 1) {
821 all |= shifter;
822 }
823
824 if ((all = translationsAdded.Find(all)) == -1) {
825 return '?';
826 } else {
827 return tokensAdded[all];
828 }
829 }
830
831 //_________________________________________________________
832
GetGapChar(void) const833 char _TranslationTable::GetGapChar(void) const {
834 if (baseSet.length() == 0 && translationsAdded.lLength == 0) {
835 return '-'; // default gap character
836 }
837
838 long f = translationsAdded.Find(0L);
839
840 return f >= 0 ? tokensAdded[f] : '\0';
841 }
842
843 //_________________________________________________________
844 const _String
ConvertCodeToLetters(long code,unsigned char base) const845 _TranslationTable::ConvertCodeToLetters(long code, unsigned char base) const {
846
847 _String res ((unsigned long)base);
848
849 if (code >= 0) {
850 // OPTIMIZE FLAG; repeated memory allocation/deallocation
851 if (baseSet.length())
852 for (long k = 1; k <= base; k++, code /= baseLength) {
853 res.set_char(base - k,baseSet.char_at(code % baseLength));
854 }
855 else if (baseLength == 4) {
856 for (long k = 1; k <= base; k++, code /= baseLength) {
857 switch (code % baseLength) {
858 case 0:
859 res[base - k] = 'A';
860 break;
861 case 1:
862 res[base - k] = 'C';
863 break;
864 case 2:
865 res[base - k] = 'G';
866 break;
867 case 3:
868 res[base - k] = 'T';
869 break;
870 }
871 }
872 } else if (baseLength == 20) {
873 for (long k = 1; k <= base; k++, code /= baseLength) {
874 char out = code % baseLength;
875 if (out == 0) {
876 res[base - k] = 'A';
877 } else if (out <= 7) {
878 res[base - k] = 'B' + out;
879 } else if (out <= 11) {
880 res[base - k] = 'C' + out;
881 } else if (out <= 16) {
882 res[base - k] = 'D' + out;
883 } else if (out <= 18) {
884 res[base - k] = 'E' + out;
885 } else {
886 res[base - k] = 'Y';
887 }
888 }
889 } else if (baseLength == 2)
890 for (long k = 1; k <= base; k++, code /= baseLength) {
891 switch (code % baseLength) {
892 case 0:
893 res[base - k] = '0';
894 break;
895 case 1:
896 res[base - k] = '1';
897 break;
898 }
899 }
900 } else {
901 char c = GetGapChar();
902 for (long k = 0L; k < base; k++) {
903 res.set_char(k,c);
904 }
905 }
906 return res;
907 }
908
909 //_________________________________________________________
910
operator ==(const _TranslationTable & rhs) const911 bool _TranslationTable::operator == (const _TranslationTable& rhs) const {
912
913 if (baseSet.length() == rhs.baseSet.length()) {
914 if (baseSet.empty()) { // standard alphabet
915 if (baseLength != rhs.baseLength) {
916 return false;
917 }
918 } else if (baseSet != rhs.baseSet) {
919 return false;
920 }
921
922 if (tokensAdded.length() == rhs.tokensAdded.length()) {
923
924 for (unsigned i = 0; i < tokensAdded.length(); i++) {
925 if (ExpandToken (tokensAdded.get_char(i)) != rhs.ExpandToken (tokensAdded.get_char(i))) {
926 return false;
927 }
928 }
929
930 return true;
931 }
932
933 }
934 return false;
935
936 }
937
938 //_________________________________________________________
939
940 _TranslationTable *
MergeTables(_TranslationTable const * table2) const941 _TranslationTable::MergeTables(_TranslationTable const *table2) const
942 // merge the translation tables if they are compatible, return the result,
943 // otherwise return nil
944 {
945 if (baseSet.length() == table2->baseSet.length()) {
946 if (baseSet.empty()) { // standard alphabet
947 if (baseLength != table2->baseLength) {
948 return nil;
949 }
950 } else if (baseSet != table2->baseSet) {
951 return nil;
952 }
953
954 _TranslationTable *result = new _TranslationTable(*this);
955 if (table2->tokensAdded.length()) {
956 for (long i = 0; i < table2->tokensAdded.length(); i++) {
957 long f = tokensAdded.Find(table2->tokensAdded[i]);
958 if (f == -1) {
959 result->tokensAdded = result->tokensAdded & table2->tokensAdded[i];
960 // SLKP 20071002 added the next line;
961 // was not adding the translation for the new token
962 result->translationsAdded << table2->translationsAdded(i);
963 } else if (translationsAdded.list_data[f] !=
964 table2->translationsAdded.list_data[i]) {
965 DeleteObject(result);
966 return nil;
967 }
968 }
969 return result;
970 } else {
971 return result;
972 }
973 }
974 return nil;
975 }
976
977
978