1 /*
2  *  Copyright (C) 2010  Regents of the University of Michigan
3  *
4  *   This program is free software: you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation, either version 3 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *   GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef _BASE_ASCII_MAP_H
19 #define _BASE_ASCII_MAP_H
20 
21 #include "StringBasics.h"
22 
23 /// Map between characters and the associated base type.
24 class BaseAsciiMap
25 {
26 public:
27     /// Value associated with 'N' in the ascii to base map (bad read).
28     static const int baseNIndex = 004;
29     /// Value associated with any non-base character in the ascii to base
30     /// map (unknown, bad data).
31     static const int baseXIndex = 005;
32 
33     // Two arrays for converting back and forth between base pair character
34     // value (ASCII) to a base integer in the range 0..3.  Note there is actually
35     // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me).
36     //
37     /// Convert from int representation to the base.
38     static const char int2base[];
39     /// Convert from int representation to colorspace representation.
40     static const char int2colorSpace[];
41     static unsigned char base2complement[];
42 
43     /// The type of space (color or base) to use in the mapping.
44     enum SPACE_TYPE {
45         /// Base decision on the first raw seq character/type has yet
46         /// to be determined.
47         UNKNOWN,
48         BASE_SPACE, ///< Bases only (A,C,G,T,N).
49         COLOR_SPACE ///< Color space only (0,1,2,3,.).
50     };
51 
52     /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
53     /// both base and color space.
54     /// 'A'/'a'/'0' -> 0; 'C'/'c'/'1' -> 1; 'G'/'g'/'2' -> 2; 'T'/'t'/'3' -> 3;
55     /// 'N'/'n'/'4' -> 4; anything else -> 5.
56     static unsigned char baseColor2int[256+1];   // base space read (ATCG)
57     /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
58     /// just base space (ACTGNactgn).
59     /// 'A'/'a' -> 0;  'C'/'c' -> 1;  'G'/'g' -> 2;  'T'/'t' -> 3;
60     /// 'N'/'n' -> 4; anything else -> 5.
61     static unsigned char base2int[256+1];        // base space read (ATCG)
62     /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
63     /// just color space (0123).
64     /// '0' -> 0; '1' -> 1; '2' -> 2; '3' -> 3; '4' -> 4; anything else -> 5.
65     static unsigned char color2int[256+1];       // base space read (ATCG)
66 
67 public:
68     BaseAsciiMap();
69     ~BaseAsciiMap();
70 
71     /// Set the base type based on the passed in option.
setBaseMapType(SPACE_TYPE spaceType)72     inline void setBaseMapType(SPACE_TYPE spaceType)
73     {
74         resetPrimerCount();
75         //First check to see if it is in base space.
76         switch (spaceType)
77         {
78             case BASE_SPACE:
79                 // base space.
80                 myBase2IntMapPtr = base2int;
81                 break;
82             case COLOR_SPACE:
83                 // color space.
84                 myBase2IntMapPtr = color2int;
85                 break;
86             default:
87                 // Unknown map type, zero the pointer.
88                 myBase2IntMapPtr = NULL;
89                 break;
90         }
91     };
92 
93     /// Returns the baseIndex value for the character passed in.
getBaseIndex(const char & letter)94     inline int getBaseIndex(const char& letter)
95     {
96         if (myBase2IntMapPtr == NULL)
97         {
98             // Check to see if we have hit the number of primer bases.
99             if (myPrimerCount < myNumPrimerBases)
100             {
101                 // Still expecting primer bases, so lookup
102                 // the letter in the base map.
103                 ++myPrimerCount;
104                 return(base2int[(int)letter]);
105             }
106 
107             // Have already processed all the primers, so determine
108             // whether this is base or color space.
109 
110             // Need to determime the base type.
111             setBaseMapType(letter);
112 
113             // If it is still null, return invalid.  Will be set when the first
114             // letter is either color or base.
115             if (myBase2IntMapPtr == NULL)
116             {
117                 return(baseXIndex);
118             }
119         }
120 
121         // Also check if configured as color space that the primers are correct.
122         if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases))
123         {
124             // Still expecting primer bases, so lookup
125             // the letter in the base map.
126             ++myPrimerCount;
127             return(base2int[(int)letter]);
128         }
129 
130         return myBase2IntMapPtr[(int)letter];
131     }
132 
133     /// Return the space type that is currently set.
getSpaceType()134     inline SPACE_TYPE getSpaceType()
135     {
136         if (myBase2IntMapPtr == base2int)
137         {
138             return(BASE_SPACE);
139         }
140         else if (myBase2IntMapPtr == color2int)
141         {
142             return(COLOR_SPACE);
143         }
144         else
145         {
146             return(UNKNOWN);
147         }
148     }
149 
150     /// Set the number of primer bases expected before the actual
151     /// base/color space type occurs for the rest of the entries.
setNumPrimerBases(int numPrimerBases)152     void setNumPrimerBases(int numPrimerBases)
153     {
154         myNumPrimerBases = numPrimerBases;
155     }
156 
157     /// Reset the number of primers to 0.
resetPrimerCount()158     void resetPrimerCount()
159     {
160         myPrimerCount = 0;
161     };
162 
163     /// Reset the base mapping type to UNKNOWN.
resetBaseMapType()164     void resetBaseMapType()
165     {
166         myBase2IntMapPtr = NULL;
167         resetPrimerCount();
168     };
169 
170 private:
171     // Set the base type based on the passed in letter.
172     // If the letter is in neither the color space or the base space, both
173     // will be allowed.
setBaseMapType(const char & letter)174     inline void setBaseMapType(const char& letter)
175     {
176         //First check to see if it is in base space.
177         if (base2int[(int)letter] != baseXIndex)
178         {
179             // This is a valid base space index, so it is base space.
180             myBase2IntMapPtr = base2int;
181         }
182         else if (color2int[(int)letter] != baseXIndex)
183         {
184             // This is a valid color space index, so it is base space.
185             myBase2IntMapPtr = color2int;
186         }
187         else
188         {
189             // Unknown map type, zero the pointer.
190             myBase2IntMapPtr = NULL;
191         }
192     };
193 
194 
195     // The number of primer bases to expect for a color-space file.
196     unsigned int myNumPrimerBases;
197 
198     // This is the number of primer bases that have been seen since
199     // the map type was set/reset.
200     unsigned int myPrimerCount;
201 
202     unsigned char* myBase2IntMapPtr;
203 };
204 
205 #endif
206