1 /*
2 
3 HyPhy - Hypothesis Testing Using Phylogenies.
4 
5 Copyright (C) 1997-now
6 Core Developers:
7    Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8    Art FY Poon    (apoon42@uwo.ca)
9    Steven Weaver (sweaver@temple.edu)
10 
11 Module Developers:
12         Lance Hepler (nlhepler@gmail.com)
13         Martin Smith (martin.audacis@gmail.com)
14 
15 Significant contributions from:
16   Spencer V Muse (muse@stat.ncsu.edu)
17   Simon DW Frost (sdf22@cam.ac.uk)
18 
19 Permission is hereby granted, free of charge, to any person obtaining a
20 copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to
24 permit persons to whom the Software is furnished to do so, subject to
25 the following conditions:
26 
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29 
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 
38 */
39 
40 #include "global_things.h"
41 #include "translation_table.h"
42 #include "function_templates.h"
43 
44 #define HYPHY_SITE_DEFAULT_BUFFER_SIZE 256
45 
46 _TranslationTable hy_default_translation_table;
47 
48 using namespace hy_global;
49 
50 _List _TranslationTable::_list_of_default_tables(_List() < "ACGT" < "ACGU" <
51                                                  "ACDEFGHIKLMNPQRSTVWY" < "01");
52 
GetDefaultTable(long tableType)53 const _String &_TranslationTable::GetDefaultTable(long tableType) {
54 
55   switch (tableType) {
56   case HY_TRANSLATION_TABLE_BINARY:
57     return *(_String *)_list_of_default_tables(3);
58   case HY_TRANSLATION_TABLE_RNA:
59     return *(_String *)_list_of_default_tables(1);
60   case HY_TRANSLATION_TABLE_PROTEIN:
61     return *(_String *)_list_of_default_tables(2);
62   case HY_TRANSLATION_TABLE_DNA:
63     return *(_String *)_list_of_default_tables(0);
64   }
65 
66   return kEmptyString;
67 }
68 
_TranslationTable(void)69 _TranslationTable::_TranslationTable(void) {
70   baseLength = 4;
71   checkTable = NULL;
72 }
73 
74 //_________________________________________________________
_TranslationTable(unsigned char baseL)75 _TranslationTable::_TranslationTable(unsigned char baseL) {
76   baseLength = (baseL == 20) ? 20 : 4;
77   checkTable = NULL;
78 }
79 
80 //_________________________________________________________
_TranslationTable(_TranslationTable const & t)81 _TranslationTable::_TranslationTable(_TranslationTable const &t) {
82    *this = t;
83 }
84 
85 //_________________________________________________________
operator =(_TranslationTable const & t)86 _TranslationTable const & _TranslationTable::operator = (_TranslationTable const &t) {
87    if (this != &t) {
88        tokensAdded = t.tokensAdded;
89        baseLength = t.baseLength;
90        baseSet = t.baseSet;
91        translationsAdded << t.translationsAdded;
92        checkTable = NULL;
93     }
94     return *this;
95  }
96 
97 //_________________________________________________________
_TranslationTable(_String & alphabet)98 _TranslationTable::_TranslationTable(_String &alphabet) {
99   baseLength = alphabet.length();
100   checkTable = NULL;
101   if (_list_of_default_tables.FindObject(&alphabet) < 0L) {
102     AddBaseSet(alphabet);
103   }
104 }
105 
106 //_________________________________________________________
makeDynamic(void) const107 BaseRef _TranslationTable::makeDynamic(void) const {
108   _TranslationTable *r = new _TranslationTable;
109   r->baseLength = baseLength;
110   r->tokensAdded.Duplicate(&tokensAdded);
111   r->baseSet.Duplicate(&baseSet);
112   r->translationsAdded.Duplicate(&translationsAdded);
113   r->checkTable = NULL;
114   return r;
115 }
116 
117 //_________________________________________________________
Duplicate(BaseRefConst source)118 void _TranslationTable::Duplicate(BaseRefConst source) {
119   _TranslationTable const *s = (_TranslationTable const *)source;
120   tokensAdded.Duplicate(&s->tokensAdded);
121   baseSet.Duplicate(&s->baseSet);
122   translationsAdded.Duplicate(&s->translationsAdded);
123   if (checkTable) {
124     free(checkTable);
125   };
126   checkTable = NULL;
127 }
128 
129 //_________________________________________________________
TokenCode(char token) const130 long _TranslationTable::TokenCode(char token) const {
131   // standard translations
132   long receptacle[256], resolution_count = TokenResolutions(token, receptacle);
133 
134   long theCode = 0L;
135 
136   for (unsigned long i = 0; i < resolution_count; i++) {
137     theCode |= (1L << receptacle[i]); // set the right bit
138   }
139 
140   return theCode;
141 }
142 
143 //_________________________________________________________
AmbigToLetter(long * split,unsigned long resolutions) const144 char _TranslationTable::AmbigToLetter(long *split,
145                                       unsigned long resolutions) const
146 // assumes a non-unique translation of split
147 // for unique - use ConvertCodeToLetters
148 {
149   long encoding = 0L;
150 
151   for (unsigned long k = 0UL; k < resolutions; k++) {
152     encoding |= (1L << split[k]);
153   }
154 
155   if (baseSet.length() == 0)
156   // one of the standard alphabers
157   {
158     if (baseLength == 4)
159     // nucleotides
160     {
161       switch (encoding) {
162       case 3:
163         return 'M';
164       case 5:
165         return 'S';
166       case 6:
167         return 'R';
168       case 7:
169         return 'V';
170       case 9:
171         return 'W';
172       case 10:
173         return 'Y';
174       case 11:
175         return 'H';
176       case 12:
177         return 'K';
178       case 14:
179         return 'B';
180       }
181     } else if (baseLength == 20)
182     // amino acids
183     {
184       switch (encoding) {
185       case 2052:
186         return 'B';
187       case 8200:
188         return 'Z';
189       }
190     }
191   } else if (tokensAdded.length()) {
192     long lookup = translationsAdded.Find(encoding);
193     // linear search for (binary) translations
194     if (lookup >= 0L) {
195       return tokensAdded.char_at(lookup);
196     }
197   }
198   return '?';
199 }
200 
201 //_________________________________________________________
SplitTokenCode(long code,long * receptacle) const202 void _TranslationTable::SplitTokenCode(long code, long *receptacle) const {
203   unsigned long shifter = 1L;
204   for (unsigned int i = 0; i < baseLength; i++) {
205     receptacle[i] = ((code & shifter) != 0) ? 1L : 0L;
206     shifter >>= 1;
207   }
208 }
209 
210 //_________________________________________________________
LengthOfAlphabet(void) const211 long _TranslationTable::LengthOfAlphabet(void) const {
212   return baseSet.length() ? baseSet.length() : baseLength;
213 }
214 
215 //_________________________________________________________
216 
ExpandToken(char token) const217 const _String _TranslationTable::ExpandToken(char token) const {
218   long buf[256];
219 
220   long resolution_count = TokenResolutions(token, buf);
221   _String const *base_set = &GetAlphabetString();
222   _StringBuffer expansion(base_set->length());
223 
224   for (long tc = 0; tc < resolution_count; tc++) {
225     expansion << base_set->char_at(buf[tc]);
226   }
227 
228   return expansion;
229 }
230 
231 //_________________________________________________________
232 
MultiTokenResolutions(_String const & tokens,long * receptacle,bool gapToOnes) const233 long _TranslationTable::MultiTokenResolutions(_String const &tokens,
234                                               long *receptacle,
235                                               bool gapToOnes) const {
236 
237   if (tokens.length() == 1UL) {
238     return TokenResolutions(tokens.char_at (0UL), receptacle, gapToOnes);
239   } else {
240 
241     long *large_store, large_store_static[HYPHY_SITE_DEFAULT_BUFFER_SIZE];
242 
243     if ((baseLength + 1)* tokens.length()  >=
244         HYPHY_SITE_DEFAULT_BUFFER_SIZE) {
245       large_store = new long[baseLength * tokens.length() + tokens.length()];
246     } else {
247       large_store = large_store_static;
248     }
249 
250     /*
251      large_store is a linear array which stores the following data
252 
253      [0,unitLength) -- the number of resolutions for the i-th character
254 
255      [unitLength,unitLength + baseLength] -- the actual resolutions for the 1st
256      char [unitLength + baseLength, unitLength + 2*baseLength] -- the actual
257      resolutions for the 2nd char
258      ...
259      */
260 
261     long resolution_count = 1L;
262 
263     for (unsigned long char_index = 0; char_index < tokens.length();
264          char_index++) {
265       large_store[char_index] = TokenResolutions(
266           tokens.char_at(char_index),
267           large_store + tokens.length() + baseLength * char_index, gapToOnes);
268       if (gapToOnes && large_store[char_index] == 0) {
269         large_store[char_index] = baseLength;
270         InitializeArray(large_store + tokens.length() + baseLength * char_index,
271                         baseLength, 1L);
272       }
273       resolution_count *=
274           large_store[char_index] > 0 ? large_store[char_index] : 0;
275     }
276 
277     if (resolution_count == 1L) {
278       for (unsigned long char_index = 0; char_index < tokens.length();
279            char_index++) {
280         large_store[char_index] =
281             large_store[tokens.length() + baseLength * char_index];
282       }
283 
284       if (receptacle) {
285         receptacle[0] = CombineDigits(large_store, tokens.length(), baseLength);
286       } else {
287         resolution_count =
288             CombineDigits(large_store, tokens.length(), baseLength);
289       }
290     } else {
291       if (receptacle) {
292         // handle cases of 2 and 3 characters separately since they are the most
293         // common
294 
295         if (resolution_count > HYPHY_SITE_DEFAULT_BUFFER_SIZE) {
296           HandleApplicationError(
297               (_String("Too many ambiguous states in call to ") &
298                _String(__PRETTY_FUNCTION__).Enquote()));
299           return -1L;
300         }
301 
302         if (tokens.length() == 3) {
303           long digits[3],
304               *resolution_arrays[3] = {large_store + tokens.length(),
305                                        large_store + tokens.length() +
306                                            baseLength,
307                                        large_store + tokens.length() +
308                                            2 * baseLength},
309               resolutions_index = 0L;
310 
311           for ( long digit1 = 0L; digit1 < large_store[0]; digit1++) {
312             for ( long digit2 = 0L; digit2 < large_store[1]; digit2++) {
313               for ( long digit3 = 0L; digit3 < large_store[2];
314                    digit3++) {
315                 receptacle[resolutions_index++] =
316                     resolution_arrays[0][digit1] * baseLength * baseLength +
317                     resolution_arrays[1][digit2] * baseLength +
318                     resolution_arrays[2][digit3];
319               }
320             }
321           }
322 
323         } else {
324           if (tokens.length() == 2) {
325             long *resolution_arrays[2] = {large_store + tokens.length(),
326                                          large_store + tokens.length() +
327                                              baseLength},
328                 resolutions_index = 0L;
329 
330             for ( long digit1 = 0L; digit1 < large_store[0]; digit1++) {
331                 for ( long digit2 = 0L; digit2 < large_store[1];
332                    digit2++) {
333                 receptacle[resolutions_index++] =
334                     resolution_arrays[0][digit1] * baseLength +
335                     resolution_arrays[1][digit2];
336               }
337             }
338           } else { // more than 3 tokens [rare!]
339 
340             if (tokens.length() >= 32) {
341               HandleApplicationError(
342                   _String("The token string is too long in call to ") &
343                   _String(__PRETTY_FUNCTION__).Enquote());
344               return -1L;
345             }
346 
347             long digits[32]{}, resolutions_index = 0L;
348 
349             do {
350               // assemble the current token, backwards
351               long this_resolution = 0L, weight = 1L;
352               for (long digit = tokens.length() - 1; digit >= 0; digit--) {
353                 this_resolution +=
354                     weight * *(large_store + tokens.length() +
355                                baseLength * digit + digits[digit]);
356                 weight *= tokens.length();
357               }
358 
359               receptacle[resolutions_index++] = this_resolution;
360 
361               for (long digit = tokens.length() - 1; digit >= 0; digit--) {
362                 if (++digits[digit] < large_store[digit]) {
363                   break;
364                 }
365                 if (digit > 0) {
366                   digits[digit] = 0L;
367                 }
368               }
369 
370             } while (digits[0] < large_store[0]);
371           }
372         }
373       } else {
374         resolution_count = -1L;
375       }
376     }
377 
378     if (large_store != large_store_static) {
379       delete[] large_store;
380     }
381 
382     return resolution_count;
383   }
384 }
385 
386 //_________________________________________________________
387 
TokenResolutions(char token,long * receptacle,bool gapToOnes) const388 long _TranslationTable::TokenResolutions(char token, long *receptacle,
389                                          bool gapToOnes) const {
390 
391   long custom_code = tokensAdded.length() ? tokensAdded.Find(token) : -1;
392   long resolution_counter = -1L;
393 
394   if (custom_code != -1) {
395     resolution_counter = 0L;
396     unsigned long shifter = 1L;
397     for (unsigned long i = 0UL; i < baseLength; i++) {
398       if ((custom_code & shifter) != 0) {
399         receptacle[resolution_counter++] = i;
400       }
401       shifter >>= 1;
402     }
403   } else {
404 
405     if (baseSet.length()) {
406 
407       long base_char = baseSet.Find(token);
408       // OPTIMIZE FLAG linear search:
409       // SLKP 20071002 should really be a 256 char lookup table
410 
411       if (base_char != -1) {
412         resolution_counter = 1;
413         receptacle[0] = base_char;
414       }
415     } else {
416 
417       if (baseLength == 4) {
418 
419         switch (token) {
420         case 'A':
421           resolution_counter = 1L;
422           receptacle[0] = 0;
423           break;
424 
425         case 'C':
426           resolution_counter = 1L;
427           receptacle[0] = 1;
428           break;
429 
430         case 'G':
431           resolution_counter = 1L;
432           receptacle[0] = 2;
433           break;
434 
435         case 'T':
436         case 'U':
437           resolution_counter = 1L;
438           receptacle[0] = 3;
439           break;
440 
441         case 'Y':
442           resolution_counter = 2L;
443           receptacle[0] = 1;
444           receptacle[1] = 3;
445           break;
446 
447         case 'R':
448           resolution_counter = 2L;
449           receptacle[0] = 0;
450           receptacle[1] = 2;
451           break;
452 
453         case 'W':
454           resolution_counter = 2L;
455           receptacle[0] = 0;
456           receptacle[1] = 3;
457           break;
458 
459         case 'S':
460           resolution_counter = 2L;
461           receptacle[0] = 1;
462           receptacle[1] = 2;
463           break;
464 
465         case 'K':
466           resolution_counter = 2L;
467           receptacle[0] = 2;
468           receptacle[1] = 3;
469           break;
470 
471         case 'M':
472           resolution_counter = 2L;
473           receptacle[0] = 0;
474           receptacle[1] = 1;
475           break;
476 
477         case 'B':
478           resolution_counter = 3L;
479           receptacle[0] = 1;
480           receptacle[1] = 2;
481           receptacle[2] = 3;
482           break;
483 
484         case 'D':
485           resolution_counter = 3L;
486           receptacle[0] = 0;
487           receptacle[1] = 2;
488           receptacle[2] = 3;
489           break;
490 
491         case 'H':
492           resolution_counter = 3L;
493           receptacle[0] = 0;
494           receptacle[1] = 1;
495           receptacle[2] = 3;
496           break;
497 
498         case 'V':
499           resolution_counter = 3L;
500           receptacle[0] = 0;
501           receptacle[1] = 1;
502           receptacle[2] = 2;
503           break;
504 
505         case 'X':
506         case 'N':
507         case '?':
508         case '.':
509         case '*':
510           resolution_counter = 4L;
511           receptacle[0] = 0;
512           receptacle[1] = 1;
513           receptacle[2] = 2;
514           receptacle[3] = 3;
515           break;
516 
517         case '-':
518           resolution_counter = 0L;
519           break;
520         }
521       } else {
522         if (baseLength == 20) {
523 
524           switch (token) {
525           case 'A':
526             resolution_counter = 1L;
527             receptacle[0] = 0;
528             break;
529 
530           case 'B':
531             resolution_counter = 2L;
532             receptacle[0] = 2;
533             receptacle[1] = 11;
534             break;
535 
536           case 'C':
537             resolution_counter = 1L;
538             receptacle[0] = 1;
539             break;
540 
541           case 'D':
542             resolution_counter = 1L;
543             receptacle[0] = 2;
544             break;
545 
546           case 'E':
547             resolution_counter = 1L;
548             receptacle[0] = 3;
549             break;
550 
551           case 'F':
552             resolution_counter = 1L;
553             receptacle[0] = 4;
554             break;
555 
556           case 'G':
557             resolution_counter = 1L;
558             receptacle[0] = 5;
559             break;
560 
561           case 'H':
562             resolution_counter = 1L;
563             receptacle[0] = 6;
564             break;
565 
566           case 'I':
567             resolution_counter = 1L;
568             receptacle[0] = 7;
569             break;
570 
571           case 'K':
572             resolution_counter = 1L;
573             receptacle[0] = 8;
574             break;
575 
576           case 'L':
577             resolution_counter = 1L;
578             receptacle[0] = 9;
579             break;
580 
581           case 'M':
582             resolution_counter = 1L;
583             receptacle[0] = 10;
584             break;
585 
586           case 'N':
587             resolution_counter = 1L;
588             receptacle[0] = 11;
589             break;
590 
591           case 'P':
592             resolution_counter = 1L;
593             receptacle[0] = 12;
594             break;
595 
596           case 'Q':
597             resolution_counter = 1L;
598             receptacle[0] = 13;
599             break;
600 
601           case 'R':
602             resolution_counter = 1L;
603             receptacle[0] = 14;
604             break;
605 
606           case 'S':
607             resolution_counter = 1L;
608             receptacle[0] = 15;
609             break;
610 
611           case 'T':
612             resolution_counter = 1L;
613             receptacle[0] = 16;
614             break;
615 
616           case 'V':
617             resolution_counter = 1L;
618             receptacle[0] = 17;
619             break;
620 
621           case 'W':
622             resolution_counter = 1L;
623             receptacle[0] = 18;
624             break;
625 
626           case 'Y':
627             resolution_counter = 1L;
628             receptacle[0] = 19;
629             break;
630 
631           case 'Z':
632             resolution_counter = 2L;
633             receptacle[0] = 3;
634             receptacle[1] = 13;
635             break;
636 
637           case 'X':
638           case '?':
639           case '.':
640           case '*': {
641             resolution_counter = 20L;
642             for (unsigned long j = 0UL; j < 20UL; j++) {
643               receptacle[j] = j;
644             }
645           } break;
646           case '-': {
647             resolution_counter = 0L;
648           } break;
649           }
650         } else
651         // binary
652         {
653 
654           switch (token) {
655           case '0':
656             resolution_counter = 1L;
657             receptacle[0] = 0;
658             break;
659 
660           case '1':
661             resolution_counter = 1L;
662             receptacle[0] = 1;
663             break;
664 
665           case 'X':
666           case '?':
667           case '.':
668           case '*': {
669             resolution_counter = 2L;
670             receptacle[0] = 0;
671             receptacle[1] = 1;
672           } break;
673           case '-': {
674             resolution_counter = 0L;
675           } break;
676           }
677         }
678       }
679     }
680   }
681 
682   if (resolution_counter == 0L && gapToOnes) {
683     for (unsigned long i = 0UL; i < baseLength; i++) {
684       receptacle[i] = i;
685     }
686     return baseLength;
687   }
688 
689   return resolution_counter;
690 }
691 
692 //_________________________________________________________
PrepareForChecks(void)693 void _TranslationTable::PrepareForChecks(void) {
694   if (checkTable == NULL) {
695     checkTable = (char *)MemAllocate(256);
696   }
697 
698   InitializeArray(checkTable, 256, (char)0);
699 
700   _String checkSymbols;
701   //  if (baseLength == 4)
702   //      checkSymbols = _String("ACGTUYRWSKMBDHVXN?O-.")&tokensAdded;
703   if (baseSet.length()) {
704     checkSymbols = baseSet & tokensAdded;
705   } else if (baseLength == 2) {
706     checkSymbols = _String("01*?-.") & tokensAdded;
707   } else {
708     checkSymbols = _String("ABCDEFGHIJKLMNOPQRSTUVWXYZ*?-.") & tokensAdded;
709   }
710 
711   for (long i = 0; i < checkSymbols.length(); i++) {
712     checkTable[(unsigned char)checkSymbols(i)] = (char)1;
713   }
714 }
715 
716 //_________________________________________________________
IsCharLegal(char c)717 bool _TranslationTable::IsCharLegal(char c) {
718   if (!checkTable) {
719     PrepareForChecks();
720   }
721   return checkTable[(unsigned char)c];
722 }
723 
GetAlphabetString(void) const724 const _String &_TranslationTable::GetAlphabetString(void) const {
725   if (baseSet.length()) {
726     return baseSet;
727   }
728 
729   if (baseLength == 4) {
730     return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_DNA);
731   } else if (baseLength == 20) {
732     return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_PROTEIN);
733   } else {
734     return _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_BINARY);
735   }
736 
737   return kEmptyString;
738 }
739 
740 //___________________________________________
741 
AddTokenCode(char token,_String const & code)742 void _TranslationTable::AddTokenCode(char token, _String const &code) {
743   long f, newCode = 0;
744 
745   bool killBS = false;
746 
747   if (baseSet.length() == 0)
748   // fill in baseSet for standard alphabets
749   {
750     if (baseLength == 4) {
751       baseSet = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_DNA);
752     } else if (baseLength == 20) {
753       baseSet =
754           _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_PROTEIN);
755     } else {
756       baseSet = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_BINARY);
757     }
758     killBS = true;
759   }
760 
761   if (baseSet.length()) {
762     long shifter = 1;
763     for (int j = 0; j < baseSet.length(); j++, shifter *= 2)
764       if (code.Find(baseSet.get_char(j)) >= 0) {
765         newCode += shifter;
766       }
767   }
768 
769   f = baseSet.Find(token);
770 
771   if (killBS) {
772     baseSet = kEmptyString;
773   }
774 
775   if (f >= 0) {
776     return;
777   }
778   // see if the character being added is a base
779   // character; those cannot be redefined
780 
781   f = tokensAdded.Find(token, 0, -1);
782   // new definition or redefinition?
783 
784   if (f == -1) { // new
785     tokensAdded = tokensAdded & token;
786     translationsAdded << 0;
787     f = tokensAdded.length() - 1;
788   }
789 
790   translationsAdded.list_data[f] = newCode;
791 }
792 
793 //_________________________________________________________
794 
AddBaseSet(_String const & code)795 void _TranslationTable::AddBaseSet(_String const &code) {
796   baseSet = code;
797   baseSet.StripQuotes();
798   baseLength = baseSet.length();
799   if (baseLength > HY_WIDTH_OF_LONG) {
800     // longer than the bit size of 'long'
801     // can't handle those
802     HandleApplicationError(_String("Alphabets with more than ") &
803                            HY_WIDTH_OF_LONG & " characters are not supported");
804   }
805 }
806 
807 //_________________________________________________________
808 
GetSkipChar(void)809 char _TranslationTable::GetSkipChar(void) {
810   if (baseSet.length() == 0 && translationsAdded.lLength == 0) {
811     return '?'; // this is the default
812   }
813 
814   // see if there is a symbol
815   // which maps to all '1'
816 
817   long all = 0, ul = baseSet.length() ? baseSet.length() : baseLength,
818        shifter = 1;
819 
820   for (long f = 0; f < ul; f++, shifter <<= 1) {
821     all |= shifter;
822   }
823 
824   if ((all = translationsAdded.Find(all)) == -1) {
825     return '?';
826   } else {
827     return tokensAdded[all];
828   }
829 }
830 
831 //_________________________________________________________
832 
GetGapChar(void) const833 char _TranslationTable::GetGapChar(void) const {
834   if (baseSet.length() == 0 && translationsAdded.lLength == 0) {
835     return '-'; // default gap character
836   }
837 
838   long f = translationsAdded.Find(0L);
839 
840   return f >= 0 ? tokensAdded[f] : '\0';
841 }
842 
843 //_________________________________________________________
844 const _String
ConvertCodeToLetters(long code,unsigned char base) const845 _TranslationTable::ConvertCodeToLetters(long code, unsigned char base) const {
846 
847   _String res ((unsigned long)base);
848 
849   if (code >= 0) {
850     // OPTIMIZE FLAG; repeated memory allocation/deallocation
851     if (baseSet.length())
852       for (long k = 1; k <= base; k++, code /= baseLength) {
853         res.set_char(base - k,baseSet.char_at(code % baseLength));
854       }
855     else if (baseLength == 4) {
856       for (long k = 1; k <= base; k++, code /= baseLength) {
857         switch (code % baseLength) {
858         case 0:
859           res[base - k] = 'A';
860           break;
861         case 1:
862           res[base - k] = 'C';
863           break;
864         case 2:
865           res[base - k] = 'G';
866           break;
867         case 3:
868           res[base - k] = 'T';
869           break;
870         }
871       }
872     } else if (baseLength == 20) {
873       for (long k = 1; k <= base; k++, code /= baseLength) {
874         char out = code % baseLength;
875         if (out == 0) {
876           res[base - k] = 'A';
877         } else if (out <= 7) {
878           res[base - k] = 'B' + out;
879         } else if (out <= 11) {
880           res[base - k] = 'C' + out;
881         } else if (out <= 16) {
882           res[base - k] = 'D' + out;
883         } else if (out <= 18) {
884           res[base - k] = 'E' + out;
885         } else {
886           res[base - k] = 'Y';
887         }
888       }
889     } else if (baseLength == 2)
890       for (long k = 1; k <= base; k++, code /= baseLength) {
891         switch (code % baseLength) {
892         case 0:
893           res[base - k] = '0';
894           break;
895         case 1:
896           res[base - k] = '1';
897           break;
898         }
899       }
900   } else {
901     char c = GetGapChar();
902     for (long k = 0L; k < base; k++) {
903       res.set_char(k,c);
904     }
905   }
906   return res;
907 }
908 
909 //_________________________________________________________
910 
operator ==(const _TranslationTable & rhs) const911 bool _TranslationTable::operator == (const _TranslationTable& rhs) const {
912 
913     if (baseSet.length() == rhs.baseSet.length()) {
914         if (baseSet.empty()) { // standard alphabet
915             if (baseLength != rhs.baseLength) {
916                 return false;
917             }
918         } else if (baseSet != rhs.baseSet) {
919             return false;
920         }
921 
922         if (tokensAdded.length() == rhs.tokensAdded.length()) {
923 
924             for (unsigned i = 0; i < tokensAdded.length(); i++) {
925                 if (ExpandToken (tokensAdded.get_char(i)) != rhs.ExpandToken (tokensAdded.get_char(i))) {
926                     return false;
927                 }
928             }
929 
930             return true;
931         }
932 
933     }
934     return false;
935 
936 }
937 
938 //_________________________________________________________
939 
940 _TranslationTable *
MergeTables(_TranslationTable const * table2) const941 _TranslationTable::MergeTables(_TranslationTable const *table2) const
942 // merge the translation tables if they are compatible, return the result,
943 // otherwise return nil
944 {
945   if (baseSet.length() == table2->baseSet.length()) {
946     if (baseSet.empty()) { // standard alphabet
947       if (baseLength != table2->baseLength) {
948         return nil;
949       }
950     } else if (baseSet != table2->baseSet) {
951       return nil;
952     }
953 
954     _TranslationTable *result = new _TranslationTable(*this);
955     if (table2->tokensAdded.length()) {
956       for (long i = 0; i < table2->tokensAdded.length(); i++) {
957         long f = tokensAdded.Find(table2->tokensAdded[i]);
958         if (f == -1) {
959           result->tokensAdded = result->tokensAdded & table2->tokensAdded[i];
960           // SLKP 20071002 added the next line;
961           // was not adding the translation for the new token
962           result->translationsAdded << table2->translationsAdded(i);
963         } else if (translationsAdded.list_data[f] !=
964                    table2->translationsAdded.list_data[i]) {
965           DeleteObject(result);
966           return nil;
967         }
968       }
969       return result;
970     } else {
971       return result;
972     }
973   }
974   return nil;
975 }
976 
977 
978