1 /* 2 * Copyright (C) 2010 Regents of the University of Michigan 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #ifndef _BASE_ASCII_MAP_H 19 #define _BASE_ASCII_MAP_H 20 21 #include "StringBasics.h" 22 23 /// Map between characters and the associated base type. 24 class BaseAsciiMap 25 { 26 public: 27 /// Value associated with 'N' in the ascii to base map (bad read). 28 static const int baseNIndex = 004; 29 /// Value associated with any non-base character in the ascii to base 30 /// map (unknown, bad data). 31 static const int baseXIndex = 005; 32 33 // Two arrays for converting back and forth between base pair character 34 // value (ASCII) to a base integer in the range 0..3. Note there is actually 35 // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me). 36 // 37 /// Convert from int representation to the base. 38 static const char int2base[]; 39 /// Convert from int representation to colorspace representation. 40 static const char int2colorSpace[]; 41 static unsigned char base2complement[]; 42 43 /// The type of space (color or base) to use in the mapping. 44 enum SPACE_TYPE { 45 /// Base decision on the first raw seq character/type has yet 46 /// to be determined. 47 UNKNOWN, 48 BASE_SPACE, ///< Bases only (A,C,G,T,N). 49 COLOR_SPACE ///< Color space only (0,1,2,3,.). 50 }; 51 52 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 53 /// both base and color space. 54 /// 'A'/'a'/'0' -> 0; 'C'/'c'/'1' -> 1; 'G'/'g'/'2' -> 2; 'T'/'t'/'3' -> 3; 55 /// 'N'/'n'/'4' -> 4; anything else -> 5. 56 static unsigned char baseColor2int[256+1]; // base space read (ATCG) 57 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 58 /// just base space (ACTGNactgn). 59 /// 'A'/'a' -> 0; 'C'/'c' -> 1; 'G'/'g' -> 2; 'T'/'t' -> 3; 60 /// 'N'/'n' -> 4; anything else -> 5. 61 static unsigned char base2int[256+1]; // base space read (ATCG) 62 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 63 /// just color space (0123). 64 /// '0' -> 0; '1' -> 1; '2' -> 2; '3' -> 3; '4' -> 4; anything else -> 5. 65 static unsigned char color2int[256+1]; // base space read (ATCG) 66 67 public: 68 BaseAsciiMap(); 69 ~BaseAsciiMap(); 70 71 /// Set the base type based on the passed in option. setBaseMapType(SPACE_TYPE spaceType)72 inline void setBaseMapType(SPACE_TYPE spaceType) 73 { 74 resetPrimerCount(); 75 //First check to see if it is in base space. 76 switch (spaceType) 77 { 78 case BASE_SPACE: 79 // base space. 80 myBase2IntMapPtr = base2int; 81 break; 82 case COLOR_SPACE: 83 // color space. 84 myBase2IntMapPtr = color2int; 85 break; 86 default: 87 // Unknown map type, zero the pointer. 88 myBase2IntMapPtr = NULL; 89 break; 90 } 91 }; 92 93 /// Returns the baseIndex value for the character passed in. getBaseIndex(const char & letter)94 inline int getBaseIndex(const char& letter) 95 { 96 if (myBase2IntMapPtr == NULL) 97 { 98 // Check to see if we have hit the number of primer bases. 99 if (myPrimerCount < myNumPrimerBases) 100 { 101 // Still expecting primer bases, so lookup 102 // the letter in the base map. 103 ++myPrimerCount; 104 return(base2int[(int)letter]); 105 } 106 107 // Have already processed all the primers, so determine 108 // whether this is base or color space. 109 110 // Need to determime the base type. 111 setBaseMapType(letter); 112 113 // If it is still null, return invalid. Will be set when the first 114 // letter is either color or base. 115 if (myBase2IntMapPtr == NULL) 116 { 117 return(baseXIndex); 118 } 119 } 120 121 // Also check if configured as color space that the primers are correct. 122 if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases)) 123 { 124 // Still expecting primer bases, so lookup 125 // the letter in the base map. 126 ++myPrimerCount; 127 return(base2int[(int)letter]); 128 } 129 130 return myBase2IntMapPtr[(int)letter]; 131 } 132 133 /// Return the space type that is currently set. getSpaceType()134 inline SPACE_TYPE getSpaceType() 135 { 136 if (myBase2IntMapPtr == base2int) 137 { 138 return(BASE_SPACE); 139 } 140 else if (myBase2IntMapPtr == color2int) 141 { 142 return(COLOR_SPACE); 143 } 144 else 145 { 146 return(UNKNOWN); 147 } 148 } 149 150 /// Set the number of primer bases expected before the actual 151 /// base/color space type occurs for the rest of the entries. setNumPrimerBases(int numPrimerBases)152 void setNumPrimerBases(int numPrimerBases) 153 { 154 myNumPrimerBases = numPrimerBases; 155 } 156 157 /// Reset the number of primers to 0. resetPrimerCount()158 void resetPrimerCount() 159 { 160 myPrimerCount = 0; 161 }; 162 163 /// Reset the base mapping type to UNKNOWN. resetBaseMapType()164 void resetBaseMapType() 165 { 166 myBase2IntMapPtr = NULL; 167 resetPrimerCount(); 168 }; 169 170 private: 171 // Set the base type based on the passed in letter. 172 // If the letter is in neither the color space or the base space, both 173 // will be allowed. setBaseMapType(const char & letter)174 inline void setBaseMapType(const char& letter) 175 { 176 //First check to see if it is in base space. 177 if (base2int[(int)letter] != baseXIndex) 178 { 179 // This is a valid base space index, so it is base space. 180 myBase2IntMapPtr = base2int; 181 } 182 else if (color2int[(int)letter] != baseXIndex) 183 { 184 // This is a valid color space index, so it is base space. 185 myBase2IntMapPtr = color2int; 186 } 187 else 188 { 189 // Unknown map type, zero the pointer. 190 myBase2IntMapPtr = NULL; 191 } 192 }; 193 194 195 // The number of primer bases to expect for a color-space file. 196 unsigned int myNumPrimerBases; 197 198 // This is the number of primer bases that have been seen since 199 // the map type was set/reset. 200 unsigned int myPrimerCount; 201 202 unsigned char* myBase2IntMapPtr; 203 }; 204 205 #endif 206