1 /*
2  *  Copyright (C) 2010  Regents of the University of Michigan
3  *
4  *   This program is free software: you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation, either version 3 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *   GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdlib.h>
19 #include "GlfRecord.h"
20 #include "GlfException.h"
21 #include "StringBasics.h"
22 
23 std::string GlfRecord::REF_BASE_CHAR = "XACMGRSVTQYHKSVN";
24 
GlfRecord()25 GlfRecord::GlfRecord()
26 {
27     reset();
28 }
29 
30 
~GlfRecord()31 GlfRecord::~GlfRecord()
32 {
33     reset();
34 }
35 
36 
37 // Reset the record for a new entry, clearing out previous values.
reset()38 void GlfRecord::reset()
39 {
40     myRecTypeRefBase = 0;
41     myRec1Base.offset = 0;
42     myRec1Base.min_depth = 0;
43     myRec1Base.rmsMapQ = 0;
44     for(int i = 0; i < 10; i++)
45     {
46         myRec1Base.lk[i] = 0;
47     }
48 
49     myRec2Base.offset = 0;
50     myRec2Base.min_depth = 0;
51     myRec2Base.rmsMapQ = 0;
52     myRec2Base.lkHom1 = 0;
53     myRec2Base.lkHom2 = 0;
54     myRec2Base.lkHet = 0;
55     myRec2Base.indelLen1 = 0;
56     myRec2Base.indelLen2 = 0;
57 
58     myIndelSeq1.reset();
59     myIndelSeq2.reset();
60 }
61 
62 
63 // Read the record from the specified file.  Assumes the file is in
64 // the correct position for reading the record.
read(IFILE filePtr)65 bool GlfRecord::read(IFILE filePtr)
66 {
67     // Read the record type and reference base.
68     int numRead = 0;
69     int byteLen = sizeof(uint8_t);
70     numRead = ifread(filePtr, &myRecTypeRefBase, byteLen);
71     if(numRead != byteLen)
72     {
73         String errorMsg = "Failed to read the record type & reference base (";
74         errorMsg += byteLen;
75         errorMsg += " bytes).  Only read ";
76         errorMsg += numRead;
77         errorMsg += " bytes.";
78         std::string errorString = errorMsg.c_str();
79         throw(GlfException(GlfStatus::FAIL_IO, errorString));
80         return(false);
81     }
82 
83     // TODO, split up by types of records...
84     switch(getRecordType())
85     {
86         case 0:
87             //  Last record.
88             // Nothing more to read.
89             break;
90         case 1:
91             // Read type 1.
92             readType1(filePtr);
93             break;
94         case 2:
95             // Read type 2.
96             readType2(filePtr);
97             break;
98         default:
99             String errorMsg = "Failed to read the record: unknown type: ";
100             errorMsg += getRecordType();
101             std::string errorString = errorMsg.c_str();
102             throw(GlfException(GlfStatus::INVALID, errorString));
103             return(false);
104             break;
105     };
106 
107     // Successfully read, return success.
108     return(true);
109 }
110 
111 
112 // Write the record to the specified file.
write(IFILE filePtr) const113 bool GlfRecord::write(IFILE filePtr) const
114 {
115     // TODO, split up by types of records...
116     switch(getRecordType())
117     {
118         case 0:
119             writeRtypeRef(filePtr);
120             break;
121         case 1:
122             // write type 1.
123             writeType1(filePtr);
124             break;
125         case 2:
126             // write type 2.
127             writeType2(filePtr);
128             break;
129         default:
130             // unknown type, return error.
131             String errorMsg = "Failed to write the record: unknown type: ";
132             errorMsg += getRecordType();
133             std::string errorString = errorMsg.c_str();
134             throw(GlfException(GlfStatus::INVALID, errorString));
135             return(false);
136             break;
137     };
138 
139     return(true);
140 }
141 
142 
print() const143 void GlfRecord::print() const
144 {
145     std::cout << "record_type: " << getRecordType()
146               << "; ref_base: " << getRefBase()
147               << "; ref_base_char: " << getRefBaseChar()
148               << "\n";
149 
150     // TODO, split up by types of records...
151     switch(getRecordType())
152     {
153         case 0:
154             break;
155         case 1:
156             // print type 1.
157             std::cout << "\toffset: " << myRec1Base.offset
158                       << "; min_lk: " << (myRec1Base.min_depth >> 24)
159                       << "; read_depth: " << (myRec1Base.min_depth & 0xFFFFFF)
160                       << "; rmsMapQ: " << (int)myRec1Base.rmsMapQ;
161             for(int i = 0; i < 10; ++i)
162             {
163                 std::cout << "; lk[" << i << "]: " << (int)myRec1Base.lk[i];
164             }
165 
166             std::cout << "\n";
167             break;
168         case 2:
169             // print type 2.
170             std::cout << "\toffset: " << myRec2Base.offset
171                       << "; min_lk: " << (myRec2Base.min_depth >> 24)
172                       << "; read_depth: " << (myRec2Base.min_depth & 0xFFFFF)
173                       << "; rmsMapQ: " << (int)myRec2Base.rmsMapQ
174                       << "; lkHom1: " << (int)myRec2Base.lkHom1
175                       << "; lkHom2: " << (int)myRec2Base.lkHom2
176                       << "; lkHet: " << (int)myRec2Base.lkHet
177                       << "; indelLen1: " << myRec2Base.indelLen1
178                       << "; indelLen2: " << myRec2Base.indelLen2
179                       << "; myIndelSeq1: " << myIndelSeq1.c_str()
180                       << "; myIndelSeq2: " << myIndelSeq2.c_str()
181                       << "\n";
182             break;
183         default:
184             break;
185     };
186 }
187 
setRtypeRef(uint8_t rtypeRef)188 bool GlfRecord::setRtypeRef(uint8_t rtypeRef)
189 {
190     myRecTypeRefBase = rtypeRef;
191     return(true);
192 }
193 
setRecordType(uint8_t recType)194 bool GlfRecord::setRecordType(uint8_t recType)
195 {
196     myRecTypeRefBase =
197         (myRecTypeRefBase & REF_BASE_MASK) | (recType << REC_TYPE_SHIFT);
198     return(true);
199 }
200 
setRefBaseInt(uint8_t refBase)201 bool GlfRecord::setRefBaseInt(uint8_t refBase)
202 {
203     myRecTypeRefBase =
204         (myRecTypeRefBase & REC_TYPE_MASK) | (refBase & REF_BASE_MASK);
205     return(true);
206 }
207 
208 // bool GlfRecord::setRefBaseChar(char refBase)
209 // {
210 
211 //     uint8_t refBaseInt = REF_BASE_CHAR_TO_INT[refBase];
212 //     return(setRefBaseInt(refBaseInt));
213 // }
214 
setOffset(uint32_t offset)215 bool GlfRecord::setOffset(uint32_t offset)
216 {
217     myRec1Base.offset = offset;
218     myRec2Base.offset = offset;
219     return(true);
220 }
221 
setMinDepth(uint32_t minDepth)222 bool GlfRecord::setMinDepth(uint32_t minDepth)
223 {
224     myRec1Base.min_depth = minDepth;
225     myRec2Base.min_depth = minDepth;
226     return(true);
227 }
228 
setMinLk(uint8_t minLk)229 bool GlfRecord::setMinLk(uint8_t minLk)
230 {
231     setMinDepth((myRec1Base.min_depth & READ_DEPTH_MASK) |
232                 (minLk << MIN_LK_SHIFT));
233     return(true);
234 }
235 
setReadDepth(uint32_t readDepth)236 bool GlfRecord::setReadDepth(uint32_t readDepth)
237 {
238     setMinDepth((myRec1Base.min_depth & MIN_LK_MASK) |
239                 (readDepth & READ_DEPTH_MASK));
240     return(true);
241 }
242 
setRmsMapQ(uint8_t rmsMapQ)243 bool GlfRecord::setRmsMapQ(uint8_t rmsMapQ)
244 {
245     myRec1Base.rmsMapQ = rmsMapQ;
246     myRec2Base.rmsMapQ = rmsMapQ;
247     return(true);
248 }
249 
250 // Accessors to get the gneric values.
getRefBaseChar() const251 char GlfRecord::getRefBaseChar() const
252 {
253     int index = myRecTypeRefBase & REF_BASE_MASK;
254     if((index > REF_BASE_MAX) || (index < 0))
255     {
256         // TODO throw exception.
257         return('N');
258     }
259     return(REF_BASE_CHAR[index]);
260 }
261 
262 
getOffset() const263 uint32_t GlfRecord::getOffset() const
264 {
265     if(getRecordType() == 1)
266     {
267         return(myRec1Base.offset);
268     }
269     else if(getRecordType() == 2)
270     {
271         return(myRec2Base.offset);
272     }
273     throw(GlfException(GlfStatus::UNKNOWN,
274                        "Tried to call getOffset for Record not of type 1 or 2."));
275     return(0);
276 }
277 
getMinDepth() const278 uint32_t GlfRecord::getMinDepth() const
279 {
280     if(getRecordType() == 1)
281     {
282         return(myRec1Base.min_depth);
283     }
284     else if(getRecordType() == 2)
285     {
286         return(myRec2Base.min_depth);
287     }
288     throw(GlfException(GlfStatus::UNKNOWN,
289                        "Tried to call getMinDepth for Record not of type 1 or 2."));
290     return(0);
291 }
292 
getMinLk() const293 uint8_t GlfRecord::getMinLk() const
294 {
295     if(getRecordType() == 1)
296     {
297         return(myRec1Base.min_depth >> MIN_LK_SHIFT);
298     }
299     else if(getRecordType() == 2)
300     {
301         return(myRec2Base.min_depth >> MIN_LK_SHIFT);
302     }
303     throw(GlfException(GlfStatus::UNKNOWN,
304                        "Tried to call getMinLk for Record not of type 1 or 2."));
305     return(0);
306 }
307 
getReadDepth() const308 uint32_t GlfRecord::getReadDepth() const
309 {
310     if(getRecordType() == 1)
311     {
312         return(myRec1Base.min_depth & READ_DEPTH_MASK);
313     }
314     else if(getRecordType() == 2)
315     {
316         return(myRec2Base.min_depth & READ_DEPTH_MASK);
317     }
318     throw(GlfException(GlfStatus::UNKNOWN,
319                        "Tried to call getReadDepth for Record not of type 1 or 2."));
320     return(0);
321 }
322 
getRmsMapQ() const323 uint8_t GlfRecord::getRmsMapQ() const
324 {
325     if(getRecordType() == 1)
326     {
327         return(myRec1Base.rmsMapQ);
328     }
329     else if(getRecordType() == 2)
330     {
331         return(myRec2Base.rmsMapQ);
332     }
333     throw(GlfException(GlfStatus::UNKNOWN,
334                        "Tried to call getRmsMapQ for Record not of type 1 or 2."));
335     return(0);
336 }
337 
338 
339 // Accessors for getting record type 1
setLk(int index,uint8_t value)340 bool GlfRecord::setLk(int index, uint8_t value)
341 {
342     if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
343     {
344         //  Out of range.
345         throw(GlfException(GlfStatus::UNKNOWN,
346                            "Trying to set Record Type 1 likelihood position< 0 or >= 10."));
347         return(false);
348     }
349 
350     // In range.
351     myRec1Base.lk[index] = value;
352     return(true);
353 }
354 
getLk(int index)355 uint8_t GlfRecord::getLk(int index)
356 {
357     if(getRecordType() != 1)
358     {
359         throw(GlfException(GlfStatus::UNKNOWN,
360                            "Tried to call getLk for Record not of type 1."));
361         return(0);
362     }
363     if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
364     {
365         throw(GlfException(GlfStatus::UNKNOWN,
366                            "Tried to call getLk for index < 0 or >= 10."));
367         return(0);
368     }
369     return(myRec1Base.lk[index]);
370 }
371 
372 
373 // Accessors for getting record type 2
setLkHom1(uint8_t lk)374 bool GlfRecord::setLkHom1(uint8_t lk)
375 {
376     myRec2Base.lkHom1 = lk;
377     return(true);
378 }
379 
setLkHom2(uint8_t lk)380 bool GlfRecord::setLkHom2(uint8_t lk)
381 {
382     myRec2Base.lkHom2 = lk;
383     return(true);
384 }
385 
setLkHet(uint8_t lk)386 bool GlfRecord::setLkHet(uint8_t lk)
387 {
388     myRec2Base.lkHet = lk;
389     return(true);
390 }
391 
setInsertionIndel1(const std::string & indelSeq)392 bool GlfRecord::setInsertionIndel1(const std::string& indelSeq)
393 {
394     myRec2Base.indelLen1 = indelSeq.length();
395     myIndelSeq1 = indelSeq;
396     return(true);
397 }
398 
setDeletionIndel1(const std::string & indelSeq)399 bool GlfRecord::setDeletionIndel1(const std::string& indelSeq)
400 {
401     myRec2Base.indelLen1 = 0 - (indelSeq.length());
402     myIndelSeq1 = indelSeq;
403     return(true);
404 }
405 
setInsertionIndel2(const std::string & indelSeq)406 bool GlfRecord::setInsertionIndel2(const std::string& indelSeq)
407 {
408     myRec2Base.indelLen2 = indelSeq.length();
409     myIndelSeq2 = indelSeq;
410     return(true);
411 }
412 
setDeletionIndel2(const std::string & indelSeq)413 bool GlfRecord::setDeletionIndel2(const std::string& indelSeq)
414 {
415     myRec2Base.indelLen2 = 0 - (indelSeq.length());
416     myIndelSeq2 = indelSeq;
417     return(true);
418 }
419 
getLkHom1()420 uint8_t GlfRecord::getLkHom1()
421 {
422     if(getRecordType() != 2)
423     {
424         throw(GlfException(GlfStatus::UNKNOWN,
425                            "Tried to call getLkHom1 for Record not of type 2."));
426         return(0);
427     }
428     return(myRec2Base.lkHom1);
429 }
430 
getLkHom2()431 uint8_t GlfRecord::getLkHom2()
432 {
433     if(getRecordType() != 2)
434     {
435         throw(GlfException(GlfStatus::UNKNOWN,
436                            "Tried to call getLkHom2 for Record not of type 2."));
437         return(0);
438     }
439     return(myRec2Base.lkHom2);
440 }
441 
getLkHet()442 uint8_t GlfRecord::getLkHet()
443 {
444     if(getRecordType() != 2)
445     {
446         throw(GlfException(GlfStatus::UNKNOWN,
447                            "Tried to call getLkHet for Record not of type 2."));
448         return(0);
449     }
450     return(myRec2Base.lkHet);
451 }
452 
getIndel1(std::string & indelSeq)453 int16_t GlfRecord::getIndel1(std::string& indelSeq)
454 {
455     if(getRecordType() != 2)
456     {
457         throw(GlfException(GlfStatus::UNKNOWN,
458                            "Tried to call getIndel1 for Record not of type 2."));
459         return(0);
460     }
461     indelSeq = myIndelSeq1.c_str();
462     return(myRec2Base.indelLen1);
463 }
464 
getIndel2(std::string & indelSeq)465 int16_t GlfRecord::getIndel2(std::string& indelSeq)
466 {
467     if(getRecordType() != 2)
468     {
469         throw(GlfException(GlfStatus::UNKNOWN,
470                            "Tried to call getIndel2 for Record not of type 2."));
471         return(0);
472     }
473     indelSeq = myIndelSeq2.c_str();
474     return(myRec2Base.indelLen2);
475 }
476 
477 
readType1(IFILE filePtr)478 void GlfRecord::readType1(IFILE filePtr)
479 {
480     // Read record type 1 information.
481     int numRead = 0;
482     numRead = ifread(filePtr, &myRec1Base, REC1_BASE_SIZE);
483     if(numRead != REC1_BASE_SIZE)
484     {
485         String errorMsg = "Failed to read record of type 1 (";
486         errorMsg += REC1_BASE_SIZE;
487         errorMsg += " bytes).  Only read ";
488         errorMsg += numRead;
489         errorMsg += " bytes.";
490         std::string errorString = errorMsg.c_str();
491         throw(GlfException(GlfStatus::FAIL_IO, errorString));
492     }
493 
494     // Record type 1 is fixed size and has no additional variable length
495     // fields, so done reading.
496 }
497 
498 
readType2(IFILE filePtr)499 void GlfRecord::readType2(IFILE filePtr)
500 {
501     // Read record type 2 information.
502     int numRead = 0;
503     numRead = ifread(filePtr, &myRec2Base, REC2_BASE_SIZE);
504     if(numRead != REC2_BASE_SIZE)
505     {
506         String errorMsg = "Failed to read record of type 2 base info (";
507         errorMsg += REC2_BASE_SIZE;
508         errorMsg += " bytes).  Only read ";
509         errorMsg += numRead;
510         errorMsg += " bytes.";
511         std::string errorString = errorMsg.c_str();
512         throw(GlfException(GlfStatus::FAIL_IO, errorString));
513     }
514 
515     // Record type 2 has 2 additional variable length fields.  Read those
516     // fields.
517     int16_t len = abs(myRec2Base.indelLen1);
518     numRead = myIndelSeq1.readFromFile(filePtr, len);
519     if(numRead != len)
520     {
521         String errorMsg = "Failed to read record of type 2, 1st indel sequence (";
522         errorMsg += len;
523         errorMsg += " bytes).  Only read ";
524         errorMsg += numRead;
525         errorMsg += " bytes.";
526         std::string errorString = errorMsg.c_str();
527         throw(GlfException(GlfStatus::FAIL_IO, errorString));
528     }
529     len = abs(myRec2Base.indelLen2);
530     numRead = myIndelSeq2.readFromFile(filePtr, len);
531     if(numRead != len)
532     {
533         String errorMsg = "Failed to read record of type 2, 2nd indel sequence (";
534         errorMsg += len;
535         errorMsg += " bytes).  Only read ";
536         errorMsg += numRead;
537         errorMsg += " bytes.";
538         std::string errorString = errorMsg.c_str();
539         throw(GlfException(GlfStatus::FAIL_IO, errorString));
540     }
541 }
542 
543 
writeRtypeRef(IFILE filePtr) const544 void GlfRecord::writeRtypeRef(IFILE filePtr) const
545 {
546     int byteLen = sizeof(myRecTypeRefBase);
547     int numWrite =
548         ifwrite(filePtr, &myRecTypeRefBase, byteLen);
549     if(numWrite != byteLen)
550     {
551         String errorMsg =
552             "Failed to write the length of the record type and reference base (";
553         errorMsg += byteLen;
554         errorMsg += " bytes).  Only wrote ";
555         errorMsg += numWrite;
556         errorMsg += " bytes.";
557         std::string errorString = errorMsg.c_str();
558         throw(GlfException(GlfStatus::FAIL_IO, errorString));
559     }
560 }
561 
562 
writeType1(IFILE filePtr) const563 void GlfRecord::writeType1(IFILE filePtr) const
564 {
565     // Write the generic record field that all records have.
566     writeRtypeRef(filePtr);
567 
568     // Record type 1 is fixed size and has no additional variable length
569     // fields, so just write the base info.
570     int numWrite = ifwrite(filePtr, &myRec1Base, REC1_BASE_SIZE);
571     if(numWrite != REC1_BASE_SIZE)
572     {
573         // failed to write.
574         String errorMsg = "Failed to write record of type 1 (";
575         errorMsg += REC1_BASE_SIZE;
576         errorMsg += " bytes).  Only wrote ";
577         errorMsg += numWrite;
578         errorMsg += " bytes.";
579         std::string errorString = errorMsg.c_str();
580         throw(GlfException(GlfStatus::FAIL_IO, errorString));
581     }
582 
583 
584     // Done writing the record.
585 }
586 
587 
writeType2(IFILE filePtr) const588 void GlfRecord::writeType2(IFILE filePtr) const
589 {
590     // Write the generic record field that all records have.
591     writeRtypeRef(filePtr);
592 
593     // Write the record type 2 base info.
594     int numWrite = ifwrite(filePtr, &myRec2Base, REC2_BASE_SIZE);
595     if(numWrite != REC2_BASE_SIZE)
596     {
597         // failed to write.
598         String errorMsg = "Failed to write record of type 2 base info (";
599         errorMsg += REC2_BASE_SIZE;
600         errorMsg += " bytes).  Only wrote ";
601         errorMsg += numWrite;
602         errorMsg += " bytes.";
603         std::string errorString = errorMsg.c_str();
604         throw(GlfException(GlfStatus::FAIL_IO, errorString));
605     }
606 
607     // Record type 2 has 2 additional variable length fields.  Write those
608     // fields.
609     int len = myIndelSeq1.length();
610     numWrite = ifwrite(filePtr, myIndelSeq1.c_str(), len);
611     if(numWrite != len)
612     {
613         // failed to write.
614         String errorMsg = "Failed to write record of type 2, 1st indel sequence (";
615         errorMsg += len;
616         errorMsg += " bytes).  Only wrote ";
617         errorMsg += numWrite;
618         errorMsg += " bytes.";
619         std::string errorString = errorMsg.c_str();
620         throw(GlfException(GlfStatus::FAIL_IO, errorString));
621     }
622     len = myIndelSeq2.length();
623     numWrite = ifwrite(filePtr, myIndelSeq2.c_str(), len);
624     if(numWrite != len)
625     {
626         // failed to write.
627         String errorMsg = "Failed to write record of type 2, 2nd indel sequence (";
628         errorMsg += len;
629         errorMsg += " bytes).  Only wrote ";
630         errorMsg += numWrite;
631         errorMsg += " bytes.";
632         std::string errorString = errorMsg.c_str();
633         throw(GlfException(GlfStatus::FAIL_IO, errorString));
634     }
635 
636     // Done writing the record.
637 }
638