1 /* 2 * Copyright (C) 2010 Regents of the University of Michigan 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #ifndef __GLF_RECORD_H__ 19 #define __GLF_RECORD_H__ 20 21 #include <map> 22 #include <stdint.h> 23 24 #include "InputFile.h" 25 #include "CharBuffer.h" 26 27 /// This class allows a user to easily get/set the fields in a GLF record. 28 class GlfRecord 29 { 30 public: 31 /// Constructor 32 GlfRecord(); 33 34 /// Destructor 35 ~GlfRecord(); 36 37 // // Copy Constructor 38 // GlfRecord(const GlfRecord& record); 39 40 // // Overload operator = to copy the passed in record into this record. 41 // GlfRecord & operator = (const GlfRecord& record); 42 43 // // Overload operator = to copy the passed in record into this record. 44 // bool copy(const GlfRecord& record); 45 46 /// Clear this record back to the default setting. 47 void reset(); 48 49 /// Read the record from the specified file (file MUST be in 50 /// the correct position for reading a record). 51 /// \param filePtr file to read from that is in the correct position. 52 /// \return true if the record was successfully read from the file (even 53 /// if it is an endMarker), false if it was not successfully read. 54 bool read(IFILE filePtr); 55 56 /// Write the record to the specified file. 57 /// \param filePtr file to write to that is in the correct position. 58 /// \return true if the record was successfully written to the 59 /// file, false if not. 60 bool write(IFILE filePtr) const; 61 62 /// Print the reference section in a readable format. 63 void print() const; 64 65 /// @name Generic Accessors for Record Types 1 & 2 66 //@{ 67 /// Set the record type and reference base. 68 /// \param rtypeRef record type & reference base. Formatted as: 69 /// record_type<<4|numeric_ref_base. 70 /// \return true if the record type and reference base were successfully 71 /// set, false if not. 72 bool setRtypeRef(uint8_t rtypeRef); 73 74 /// Set the record type. 75 /// \param recType record type: 1 - simple likelihood record, 76 /// 2 - indel likelihood record, 0 - end maker 77 /// \return true if the record type was successfully set, false if not. 78 bool setRecordType(uint8_t recType); 79 80 /// Set the reference base from an integer value. 81 /// \param refBase integer representation of the reference base. 82 /// \anchor BaseCharacterIntMap 83 /// <table> 84 /// <tr><th>Int Value</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td><td>10</td><td>11</td><td>12</td><td>13</td><td>14</td><td>15</td></tr> 85 /// <tr><th>Character Base</th><td>X</td><td>A</td><td>C</td><td>M</td><td>G</td><td>R</td><td>S</td><td>V</td><td>T</td><td>W</td><td>Y</td><td>H</td><td>K</td><td>D</td><td>B</td><td>N</td></tr> 86 /// </table> 87 /// \return true if the reference base was successfully set, false if not. 88 bool setRefBaseInt(uint8_t refBase); 89 90 // TODO bool setRefBaseChar(char refBase); 91 92 /// Set the offset from the precedent record. 93 /// 0-based coordinate of the record minus the coordinate of the 94 /// precedent record. For the first record in a reference sequence, 95 /// the previous coordinate is 0. 96 /// For insertions between x & x+1, the coordinate is x. 97 /// For deletions between x & y, the coordinate is x. 98 /// \param offset offset from the precedent record. 99 /// \return true if successfully set, false if not. 100 bool setOffset(uint32_t offset); 101 102 /// Set the minimum likelihood and the read depth. 103 /// \param minDepth minimum likelihood and read depth. Formatted as: 104 /// min_lk<<24|read_dpeth. (min_lk capped at 255) 105 /// \return true if successfully set, false if not. 106 bool setMinDepth(uint32_t minDepth); 107 108 /// Set the minimum likelihood. 109 /// \param minLk minimum likelihood (capped at 255). 110 /// \return true if successfully set, false if not. 111 bool setMinLk(uint8_t minLk); 112 113 /// Set the the read depth. 114 /// \param readDepth read depth. 115 /// \return true if successfully set, false if not. 116 bool setReadDepth(uint32_t readDepth); 117 118 /// Set the RMS of mapping qualities of reads covering the site. 119 /// \param rmsMapQ RMS of mapping qualities 120 /// \return true if successfully set, false if not. 121 bool setRmsMapQ(uint8_t rmsMapQ); 122 123 /// Return the record type. 124 /// \return record type for this record: 0 - endMarker, 125 /// 1 - simple likelihood, 2 - indel likelihood getRecordType()126 inline int getRecordType() const 127 { 128 return(myRecTypeRefBase >> REC_TYPE_SHIFT); 129 } 130 131 /// Return the reference base as an integer. 132 /// \return integer representation of the reference base. 133 /// See: \ref BaseCharacterIntMap getRefBase()134 inline int getRefBase() const 135 { 136 return(myRecTypeRefBase & REF_BASE_MASK); 137 } 138 139 /// Return the reference base as a character. 140 /// \return character representation of the reference base. 141 char getRefBaseChar() const; 142 143 /// Return the offset from the precedent record. 144 /// \return offset from the precedent record. 145 uint32_t getOffset() const; 146 147 /// Return the minimum likelihood and read depth. Formatted as: 148 /// min_lk<<24|read_dpeth. (min_lk capped at 255) 149 /// \return minimum likelihood and read depth 150 uint32_t getMinDepth() const; 151 152 /// Return the minimum likelihood 153 /// \return minimum likelihood 154 uint8_t getMinLk() const; 155 156 /// Return the read depth. 157 /// \return read depth 158 uint32_t getReadDepth() const; 159 160 /// Return the RMS of mapping qualities of reads covering the site. 161 /// \return RMS of maping qualities. 162 uint8_t getRmsMapQ() const; 163 164 //@} 165 166 /// @name Record Type 1 Accessors 167 /// Record Type 1: Simple Likelihood Record 168 //@{ 169 //bool setType1(all fields for type 1); 170 171 /// Set the likelihood for the specified genotype. 172 /// Throws an exception if index is out of range. 173 /// \param index index for the genotype for which the likelihood is 174 /// being set. 175 /// \anchor GenotypeIndexTable 176 /// <table> 177 /// <tr><th>Index</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td></tr> 178 /// <tr><th>Genotype</th><td>AA</td><td>AC</td><td>AG</td><td>AT</td><td>CC</td><td>CG</td><td>CT</td><td>GG</td><td>GT</td><td>TT</td></tr> 179 /// </table> 180 /// \param value likelihood for the genotype at the specified index. 181 /// \return true if successfully set, false if not. 182 bool setLk(int index, uint8_t value); 183 184 //bool getType1(all fields for type 1); 185 186 /// Get the likelihood for the specified genotype index. 187 /// Throws an exception if index is out of range. 188 /// \param index index of the genotype for which the likelihood should 189 /// be returned. See: \ref GenotypeIndexTable 190 /// \return likelihood of the specified index. 191 uint8_t getLk(int index); 192 //@} 193 194 /// @name Record Type 2 Accessors 195 /// Record Type2: Indel Likelihood Record 196 //@{ 197 // bool setType2(all fields for type 2); 198 199 /// Set the likelihood of the first homozygous indel allele. 200 /// \param lk likelihood of the 1st homozygous indel allele (capped at 255) 201 /// \return true if successfully set, false if not. 202 bool setLkHom1(uint8_t lk); 203 204 /// Set the likelihood of the 2nd homozygous indel allele. 205 /// \param lk likelihood of the 2nd homozygous indel allele (capped at 255) 206 /// \return true if successfully set, false if not. 207 bool setLkHom2(uint8_t lk); 208 209 /// Set the likelihood of a heterozygote. 210 /// \param lk likelihood of a heterozygote (capped at 255) 211 /// \return true if successfully set, false if not. 212 bool setLkHet(uint8_t lk); 213 214 /// Set the sequence of the first indel allele if the 215 /// first indel is an insertion. 216 /// \param indelSeq sequence of the first indel allele (insertion). 217 /// \return true if successfully set, false if not. 218 bool setInsertionIndel1(const std::string& indelSeq); 219 220 /// Set the sequence of the first indel allele if the 221 /// first indel is an deletion. 222 /// \param indelSeq sequence of the first indel allele (deletion). 223 /// \return true if successfully set, false if not. 224 bool setDeletionIndel1(const std::string& indelSeq); 225 226 /// Set the sequence of the 2nd indel allele if the 227 /// 2nd indel is an insertion. 228 /// \param indelSeq sequence of the 2nd indel allele (insertion). 229 /// \return true if successfully set, false if not. 230 bool setInsertionIndel2(const std::string& indelSeq); 231 232 /// Set the sequence of the 2nd indel allele if the 233 /// 2nd indel is an deletion. 234 /// \param indelSeq sequence of the 2nd indel allele (deletion). 235 /// \return true if successfully set, false if not. 236 bool setDeletionIndel2(const std::string& indelSeq); 237 238 // bool setType2(all fields for type 2); 239 240 /// Return the likelihood of the 1st homozygous indel allele. 241 /// \return likelihood of the 1st homozygous indel allele. 242 uint8_t getLkHom1(); 243 244 /// Return the likelihood of the 2nd homozygous indel allele. 245 /// \return likelihood of the 2nd homozygous indel allele. 246 uint8_t getLkHom2(); 247 248 /// Return the likelihood of a heterozygote. 249 /// \return likelihood of a hetereozygote. 250 uint8_t getLkHet(); 251 252 /// Get the sequence and length (+:ins, -:del) of the 1st indel allele. 253 /// \param indelSeq string to set with the sequence of the 1st indel allele 254 /// \return length of the 1st indel allele 255 /// (positive=insertion; negative=deletion; 0=no-indel) 256 int16_t getIndel1(std::string& indelSeq); 257 258 /// Get the sequence and length (+:ins, -:del) of the 2nd indel allele. 259 /// \param indelSeq string to set with the sequence of the 2nd indel allele 260 /// \return length of the 2nd indel allele 261 /// (positive=insertion; negative=deletion; 0=no-indel) 262 int16_t getIndel2(std::string& indelSeq); 263 //@} 264 265 private: 266 // Read a record of record type 1. 267 void readType1(IFILE filePtr); 268 269 // Read a record of record type 2. 270 void readType2(IFILE filePtr); 271 272 273 // Write the rtyperef field. 274 void writeRtypeRef(IFILE filePtr) const; 275 276 277 // Write a record of record type 1. 278 void writeType1(IFILE filePtr) const; 279 280 // Write a record of record type 2. 281 void writeType2(IFILE filePtr) const; 282 283 // Contains record_type and ref_base. 284 uint8_t myRecTypeRefBase; 285 286 static const uint8_t REC_TYPE_SHIFT = 4; 287 static const uint8_t REF_BASE_MASK = 0xF; 288 static const uint8_t REC_TYPE_MASK = 0xF0; 289 290 static const uint32_t MIN_LK_SHIFT = 24; 291 static const uint32_t READ_DEPTH_MASK = 0xFFFFFF; 292 static const uint32_t MIN_LK_MASK = 0xFF000000; 293 294 static const char REF_BASE_MAX = 15; 295 static std::string REF_BASE_CHAR; 296 297 static const int NUM_REC1_LIKELIHOOD = 10; 298 299 struct 300 { 301 uint32_t offset; 302 uint32_t min_depth; 303 uint8_t rmsMapQ; 304 uint8_t lk[GlfRecord::NUM_REC1_LIKELIHOOD]; 305 } myRec1Base; 306 307 static const int REC1_BASE_SIZE = 19; 308 309 struct 310 { 311 uint32_t offset; 312 uint32_t min_depth; 313 uint8_t rmsMapQ; 314 uint8_t lkHom1; 315 uint8_t lkHom2; 316 uint8_t lkHet; 317 int16_t indelLen1; 318 int16_t indelLen2; 319 } myRec2Base; 320 321 // TODO rest of rec 2. 322 CharBuffer myIndelSeq1; 323 CharBuffer myIndelSeq2; 324 325 static const int REC2_BASE_SIZE = 16; 326 327 }; 328 329 #endif 330