1 /*
2  *  Copyright (C) 2010  Regents of the University of Michigan
3  *
4  *   This program is free software: you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation, either version 3 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *   GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __GLF_RECORD_H__
19 #define __GLF_RECORD_H__
20 
21 #include <map>
22 #include <stdint.h>
23 
24 #include "InputFile.h"
25 #include "CharBuffer.h"
26 
27 /// This class allows a user to easily get/set the fields in a GLF record.
28 class GlfRecord
29 {
30 public:
31     /// Constructor
32     GlfRecord();
33 
34     /// Destructor
35     ~GlfRecord();
36 
37 //     // Copy Constructor
38 //     GlfRecord(const GlfRecord& record);
39 
40 //     // Overload operator = to copy the passed in record into this record.
41 //     GlfRecord & operator = (const GlfRecord& record);
42 
43 //     // Overload operator = to copy the passed in record into this record.
44 //     bool copy(const GlfRecord& record);
45 
46     /// Clear this record back to the default setting.
47     void reset();
48 
49     /// Read the record from the specified file (file MUST be in
50     /// the correct position for reading a record).
51     /// \param filePtr file to read from that is in the correct position.
52     /// \return true if the record was successfully read from the file (even
53     /// if it is an endMarker), false if it was not successfully read.
54     bool read(IFILE filePtr);
55 
56     /// Write the record to the specified file.
57     /// \param filePtr file to write to that is in the correct position.
58     /// \return true if the record was successfully written to the
59     /// file, false if not.
60     bool write(IFILE filePtr) const;
61 
62     /// Print the reference section in a readable format.
63     void print() const;
64 
65     /// @name Generic Accessors for Record Types 1 & 2
66     //@{
67     /// Set the record type and reference base.
68     /// \param rtypeRef record type & reference base. Formatted as:
69     /// record_type<<4|numeric_ref_base.
70     /// \return true if the record type and reference base were successfully
71     /// set, false if not.
72     bool setRtypeRef(uint8_t rtypeRef);
73 
74     /// Set the record type.
75     /// \param recType record type: 1 - simple likelihood record,
76     /// 2 - indel likelihood record, 0 - end maker
77     /// \return true if the record type was successfully set, false if not.
78     bool setRecordType(uint8_t recType);
79 
80     /// Set the reference base from an integer value.
81     /// \param refBase integer representation of the reference base.
82     /// \anchor BaseCharacterIntMap
83     /// <table>
84     /// <tr><th>Int Value</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td><td>10</td><td>11</td><td>12</td><td>13</td><td>14</td><td>15</td></tr>
85     /// <tr><th>Character Base</th><td>X</td><td>A</td><td>C</td><td>M</td><td>G</td><td>R</td><td>S</td><td>V</td><td>T</td><td>W</td><td>Y</td><td>H</td><td>K</td><td>D</td><td>B</td><td>N</td></tr>
86     /// </table>
87     /// \return true if the reference base was successfully set, false if not.
88     bool setRefBaseInt(uint8_t refBase);
89 
90     // TODO   bool setRefBaseChar(char refBase);
91 
92     /// Set the offset from the precedent record.
93     /// 0-based coordinate of the record minus the coordinate of the
94     /// precedent record. For the first record in a reference sequence,
95     /// the previous coordinate is 0.
96     /// For insertions between x & x+1, the coordinate is x.
97     /// For deletions between x & y, the coordinate is x.
98     /// \param offset offset from the precedent record.
99     /// \return true if successfully set, false if not.
100     bool setOffset(uint32_t offset);
101 
102     /// Set the minimum likelihood and the read depth.
103     /// \param minDepth minimum likelihood and read depth. Formatted as:
104     /// min_lk<<24|read_dpeth. (min_lk capped at 255)
105     /// \return true if successfully set, false if not.
106     bool setMinDepth(uint32_t minDepth);
107 
108     /// Set the minimum likelihood.
109     /// \param minLk minimum likelihood (capped at 255).
110     /// \return true if successfully set, false if not.
111     bool setMinLk(uint8_t minLk);
112 
113     /// Set the the read depth.
114     /// \param readDepth read depth.
115     /// \return true if successfully set, false if not.
116     bool setReadDepth(uint32_t readDepth);
117 
118     /// Set the RMS of mapping qualities of reads covering the site.
119     /// \param rmsMapQ RMS of mapping qualities
120     /// \return true if successfully set, false if not.
121     bool setRmsMapQ(uint8_t rmsMapQ);
122 
123     /// Return the record type.
124     /// \return record type for this record: 0 - endMarker,
125     /// 1 - simple likelihood, 2 - indel likelihood
getRecordType()126     inline int getRecordType() const
127     {
128         return(myRecTypeRefBase >> REC_TYPE_SHIFT);
129     }
130 
131     /// Return the reference base as an integer.
132     /// \return integer representation of the reference base.
133     /// See: \ref BaseCharacterIntMap
getRefBase()134     inline int getRefBase() const
135     {
136         return(myRecTypeRefBase & REF_BASE_MASK);
137     }
138 
139     /// Return the reference base as a character.
140     /// \return character representation of the reference base.
141     char getRefBaseChar() const;
142 
143     /// Return the offset from the precedent record.
144     /// \return offset from the precedent record.
145     uint32_t getOffset() const;
146 
147     /// Return the minimum likelihood and read depth.  Formatted as:
148     /// min_lk<<24|read_dpeth. (min_lk capped at 255)
149     /// \return minimum likelihood and read depth
150     uint32_t getMinDepth() const;
151 
152     /// Return the minimum likelihood
153     /// \return minimum likelihood
154     uint8_t getMinLk() const;
155 
156     /// Return the read depth.
157     /// \return read depth
158     uint32_t getReadDepth() const;
159 
160     /// Return the RMS of mapping qualities of reads covering the site.
161     /// \return RMS of maping qualities.
162     uint8_t getRmsMapQ() const;
163 
164     //@}
165 
166     /// @name Record Type 1 Accessors
167     /// Record Type 1: Simple Likelihood Record
168     //@{
169     //bool setType1(all fields for type 1);
170 
171     /// Set the likelihood for the specified genotype.
172     /// Throws an exception if index is out of range.
173     /// \param index index for the genotype for which the likelihood is
174     /// being set.
175     /// \anchor GenotypeIndexTable
176     /// <table>
177     /// <tr><th>Index</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td></tr>
178     /// <tr><th>Genotype</th><td>AA</td><td>AC</td><td>AG</td><td>AT</td><td>CC</td><td>CG</td><td>CT</td><td>GG</td><td>GT</td><td>TT</td></tr>
179     /// </table>
180     /// \param value likelihood for the genotype at the specified index.
181     /// \return true if successfully set, false if not.
182     bool setLk(int index, uint8_t value);
183 
184     //bool getType1(all fields for type 1);
185 
186     /// Get the likelihood for the specified genotype index.
187     /// Throws an exception if index is out of range.
188     /// \param index index of the genotype for which the likelihood should
189     /// be returned.  See: \ref GenotypeIndexTable
190     /// \return likelihood of the specified index.
191     uint8_t getLk(int index);
192     //@}
193 
194     /// @name Record Type 2 Accessors
195     /// Record Type2: Indel Likelihood Record
196     //@{
197 //     bool setType2(all fields for type 2);
198 
199     /// Set the likelihood of the first homozygous indel allele.
200     /// \param lk likelihood of the 1st homozygous indel allele (capped at 255)
201     /// \return true if successfully set, false if not.
202     bool setLkHom1(uint8_t lk);
203 
204     /// Set the likelihood of the 2nd homozygous indel allele.
205     /// \param lk likelihood of the 2nd homozygous indel allele (capped at 255)
206     /// \return true if successfully set, false if not.
207     bool setLkHom2(uint8_t lk);
208 
209     /// Set the likelihood of a heterozygote.
210     /// \param lk likelihood of a heterozygote (capped at 255)
211     /// \return true if successfully set, false if not.
212     bool setLkHet(uint8_t lk);
213 
214     /// Set the sequence of the first indel allele if the
215     /// first indel is an insertion.
216     /// \param indelSeq sequence of the first indel allele (insertion).
217     /// \return true if successfully set, false if not.
218     bool setInsertionIndel1(const std::string& indelSeq);
219 
220     /// Set the sequence of the first indel allele if the
221     /// first indel is an deletion.
222     /// \param indelSeq sequence of the first indel allele (deletion).
223     /// \return true if successfully set, false if not.
224     bool setDeletionIndel1(const std::string& indelSeq);
225 
226     /// Set the sequence of the 2nd indel allele if the
227     /// 2nd indel is an insertion.
228     /// \param indelSeq sequence of the 2nd indel allele (insertion).
229     /// \return true if successfully set, false if not.
230     bool setInsertionIndel2(const std::string& indelSeq);
231 
232     /// Set the sequence of the 2nd indel allele if the
233     /// 2nd indel is an deletion.
234     /// \param indelSeq sequence of the 2nd indel allele (deletion).
235     /// \return true if successfully set, false if not.
236     bool setDeletionIndel2(const std::string& indelSeq);
237 
238     //     bool setType2(all fields for type 2);
239 
240     /// Return the likelihood of the 1st homozygous indel allele.
241     /// \return likelihood of the 1st homozygous indel allele.
242     uint8_t getLkHom1();
243 
244     /// Return the likelihood of the 2nd homozygous indel allele.
245     /// \return likelihood of the 2nd homozygous indel allele.
246     uint8_t getLkHom2();
247 
248     /// Return the likelihood of a heterozygote.
249     /// \return likelihood of a hetereozygote.
250     uint8_t getLkHet();
251 
252     /// Get the sequence and length (+:ins, -:del) of the 1st indel allele.
253     /// \param indelSeq string to set with the sequence of the 1st indel allele
254     /// \return length of the 1st indel allele
255     /// (positive=insertion; negative=deletion; 0=no-indel)
256     int16_t getIndel1(std::string& indelSeq);
257 
258     /// Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
259     /// \param indelSeq string to set with the sequence of the 2nd indel allele
260     /// \return length of the 2nd indel allele
261     /// (positive=insertion; negative=deletion; 0=no-indel)
262     int16_t getIndel2(std::string& indelSeq);
263     //@}
264 
265 private:
266     // Read a record of record type 1.
267     void readType1(IFILE filePtr);
268 
269     // Read a record of record type 2.
270     void readType2(IFILE filePtr);
271 
272 
273     // Write the rtyperef field.
274     void writeRtypeRef(IFILE filePtr) const;
275 
276 
277     // Write a record of record type 1.
278     void writeType1(IFILE filePtr) const;
279 
280     // Write a record of record type 2.
281     void writeType2(IFILE filePtr) const;
282 
283     // Contains record_type and ref_base.
284     uint8_t myRecTypeRefBase;
285 
286     static const uint8_t REC_TYPE_SHIFT = 4;
287     static const uint8_t REF_BASE_MASK = 0xF;
288     static const uint8_t REC_TYPE_MASK = 0xF0;
289 
290     static const uint32_t MIN_LK_SHIFT = 24;
291     static const uint32_t READ_DEPTH_MASK = 0xFFFFFF;
292     static const uint32_t MIN_LK_MASK = 0xFF000000;
293 
294     static const char REF_BASE_MAX = 15;
295     static std::string REF_BASE_CHAR;
296 
297     static const int NUM_REC1_LIKELIHOOD = 10;
298 
299     struct
300     {
301         uint32_t offset;
302         uint32_t min_depth;
303         uint8_t rmsMapQ;
304         uint8_t lk[GlfRecord::NUM_REC1_LIKELIHOOD];
305     } myRec1Base;
306 
307     static const int REC1_BASE_SIZE = 19;
308 
309     struct
310     {
311         uint32_t offset;
312         uint32_t min_depth;
313         uint8_t rmsMapQ;
314         uint8_t lkHom1;
315         uint8_t lkHom2;
316         uint8_t lkHet;
317         int16_t indelLen1;
318         int16_t indelLen2;
319     } myRec2Base;
320 
321     // TODO rest of rec 2.
322     CharBuffer myIndelSeq1;
323     CharBuffer myIndelSeq2;
324 
325     static const int REC2_BASE_SIZE = 16;
326 
327 };
328 
329 #endif
330