1 /*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <stdlib.h>
19 #include "GlfRecord.h"
20 #include "GlfException.h"
21 #include "StringBasics.h"
22
23 std::string GlfRecord::REF_BASE_CHAR = "XACMGRSVTQYHKSVN";
24
GlfRecord()25 GlfRecord::GlfRecord()
26 {
27 reset();
28 }
29
30
~GlfRecord()31 GlfRecord::~GlfRecord()
32 {
33 reset();
34 }
35
36
37 // Reset the record for a new entry, clearing out previous values.
reset()38 void GlfRecord::reset()
39 {
40 myRecTypeRefBase = 0;
41 myRec1Base.offset = 0;
42 myRec1Base.min_depth = 0;
43 myRec1Base.rmsMapQ = 0;
44 for(int i = 0; i < 10; i++)
45 {
46 myRec1Base.lk[i] = 0;
47 }
48
49 myRec2Base.offset = 0;
50 myRec2Base.min_depth = 0;
51 myRec2Base.rmsMapQ = 0;
52 myRec2Base.lkHom1 = 0;
53 myRec2Base.lkHom2 = 0;
54 myRec2Base.lkHet = 0;
55 myRec2Base.indelLen1 = 0;
56 myRec2Base.indelLen2 = 0;
57
58 myIndelSeq1.reset();
59 myIndelSeq2.reset();
60 }
61
62
63 // Read the record from the specified file. Assumes the file is in
64 // the correct position for reading the record.
read(IFILE filePtr)65 bool GlfRecord::read(IFILE filePtr)
66 {
67 // Read the record type and reference base.
68 int numRead = 0;
69 int byteLen = sizeof(uint8_t);
70 numRead = ifread(filePtr, &myRecTypeRefBase, byteLen);
71 if(numRead != byteLen)
72 {
73 String errorMsg = "Failed to read the record type & reference base (";
74 errorMsg += byteLen;
75 errorMsg += " bytes). Only read ";
76 errorMsg += numRead;
77 errorMsg += " bytes.";
78 std::string errorString = errorMsg.c_str();
79 throw(GlfException(GlfStatus::FAIL_IO, errorString));
80 return(false);
81 }
82
83 // TODO, split up by types of records...
84 switch(getRecordType())
85 {
86 case 0:
87 // Last record.
88 // Nothing more to read.
89 break;
90 case 1:
91 // Read type 1.
92 readType1(filePtr);
93 break;
94 case 2:
95 // Read type 2.
96 readType2(filePtr);
97 break;
98 default:
99 String errorMsg = "Failed to read the record: unknown type: ";
100 errorMsg += getRecordType();
101 std::string errorString = errorMsg.c_str();
102 throw(GlfException(GlfStatus::INVALID, errorString));
103 return(false);
104 break;
105 };
106
107 // Successfully read, return success.
108 return(true);
109 }
110
111
112 // Write the record to the specified file.
write(IFILE filePtr) const113 bool GlfRecord::write(IFILE filePtr) const
114 {
115 // TODO, split up by types of records...
116 switch(getRecordType())
117 {
118 case 0:
119 writeRtypeRef(filePtr);
120 break;
121 case 1:
122 // write type 1.
123 writeType1(filePtr);
124 break;
125 case 2:
126 // write type 2.
127 writeType2(filePtr);
128 break;
129 default:
130 // unknown type, return error.
131 String errorMsg = "Failed to write the record: unknown type: ";
132 errorMsg += getRecordType();
133 std::string errorString = errorMsg.c_str();
134 throw(GlfException(GlfStatus::INVALID, errorString));
135 return(false);
136 break;
137 };
138
139 return(true);
140 }
141
142
print() const143 void GlfRecord::print() const
144 {
145 std::cout << "record_type: " << getRecordType()
146 << "; ref_base: " << getRefBase()
147 << "; ref_base_char: " << getRefBaseChar()
148 << "\n";
149
150 // TODO, split up by types of records...
151 switch(getRecordType())
152 {
153 case 0:
154 break;
155 case 1:
156 // print type 1.
157 std::cout << "\toffset: " << myRec1Base.offset
158 << "; min_lk: " << (myRec1Base.min_depth >> 24)
159 << "; read_depth: " << (myRec1Base.min_depth & 0xFFFFFF)
160 << "; rmsMapQ: " << (int)myRec1Base.rmsMapQ;
161 for(int i = 0; i < 10; ++i)
162 {
163 std::cout << "; lk[" << i << "]: " << (int)myRec1Base.lk[i];
164 }
165
166 std::cout << "\n";
167 break;
168 case 2:
169 // print type 2.
170 std::cout << "\toffset: " << myRec2Base.offset
171 << "; min_lk: " << (myRec2Base.min_depth >> 24)
172 << "; read_depth: " << (myRec2Base.min_depth & 0xFFFFF)
173 << "; rmsMapQ: " << (int)myRec2Base.rmsMapQ
174 << "; lkHom1: " << (int)myRec2Base.lkHom1
175 << "; lkHom2: " << (int)myRec2Base.lkHom2
176 << "; lkHet: " << (int)myRec2Base.lkHet
177 << "; indelLen1: " << myRec2Base.indelLen1
178 << "; indelLen2: " << myRec2Base.indelLen2
179 << "; myIndelSeq1: " << myIndelSeq1.c_str()
180 << "; myIndelSeq2: " << myIndelSeq2.c_str()
181 << "\n";
182 break;
183 default:
184 break;
185 };
186 }
187
setRtypeRef(uint8_t rtypeRef)188 bool GlfRecord::setRtypeRef(uint8_t rtypeRef)
189 {
190 myRecTypeRefBase = rtypeRef;
191 return(true);
192 }
193
setRecordType(uint8_t recType)194 bool GlfRecord::setRecordType(uint8_t recType)
195 {
196 myRecTypeRefBase =
197 (myRecTypeRefBase & REF_BASE_MASK) | (recType << REC_TYPE_SHIFT);
198 return(true);
199 }
200
setRefBaseInt(uint8_t refBase)201 bool GlfRecord::setRefBaseInt(uint8_t refBase)
202 {
203 myRecTypeRefBase =
204 (myRecTypeRefBase & REC_TYPE_MASK) | (refBase & REF_BASE_MASK);
205 return(true);
206 }
207
208 // bool GlfRecord::setRefBaseChar(char refBase)
209 // {
210
211 // uint8_t refBaseInt = REF_BASE_CHAR_TO_INT[refBase];
212 // return(setRefBaseInt(refBaseInt));
213 // }
214
setOffset(uint32_t offset)215 bool GlfRecord::setOffset(uint32_t offset)
216 {
217 myRec1Base.offset = offset;
218 myRec2Base.offset = offset;
219 return(true);
220 }
221
setMinDepth(uint32_t minDepth)222 bool GlfRecord::setMinDepth(uint32_t minDepth)
223 {
224 myRec1Base.min_depth = minDepth;
225 myRec2Base.min_depth = minDepth;
226 return(true);
227 }
228
setMinLk(uint8_t minLk)229 bool GlfRecord::setMinLk(uint8_t minLk)
230 {
231 setMinDepth((myRec1Base.min_depth & READ_DEPTH_MASK) |
232 (minLk << MIN_LK_SHIFT));
233 return(true);
234 }
235
setReadDepth(uint32_t readDepth)236 bool GlfRecord::setReadDepth(uint32_t readDepth)
237 {
238 setMinDepth((myRec1Base.min_depth & MIN_LK_MASK) |
239 (readDepth & READ_DEPTH_MASK));
240 return(true);
241 }
242
setRmsMapQ(uint8_t rmsMapQ)243 bool GlfRecord::setRmsMapQ(uint8_t rmsMapQ)
244 {
245 myRec1Base.rmsMapQ = rmsMapQ;
246 myRec2Base.rmsMapQ = rmsMapQ;
247 return(true);
248 }
249
250 // Accessors to get the gneric values.
getRefBaseChar() const251 char GlfRecord::getRefBaseChar() const
252 {
253 int index = myRecTypeRefBase & REF_BASE_MASK;
254 if((index > REF_BASE_MAX) || (index < 0))
255 {
256 // TODO throw exception.
257 return('N');
258 }
259 return(REF_BASE_CHAR[index]);
260 }
261
262
getOffset() const263 uint32_t GlfRecord::getOffset() const
264 {
265 if(getRecordType() == 1)
266 {
267 return(myRec1Base.offset);
268 }
269 else if(getRecordType() == 2)
270 {
271 return(myRec2Base.offset);
272 }
273 throw(GlfException(GlfStatus::UNKNOWN,
274 "Tried to call getOffset for Record not of type 1 or 2."));
275 return(0);
276 }
277
getMinDepth() const278 uint32_t GlfRecord::getMinDepth() const
279 {
280 if(getRecordType() == 1)
281 {
282 return(myRec1Base.min_depth);
283 }
284 else if(getRecordType() == 2)
285 {
286 return(myRec2Base.min_depth);
287 }
288 throw(GlfException(GlfStatus::UNKNOWN,
289 "Tried to call getMinDepth for Record not of type 1 or 2."));
290 return(0);
291 }
292
getMinLk() const293 uint8_t GlfRecord::getMinLk() const
294 {
295 if(getRecordType() == 1)
296 {
297 return(myRec1Base.min_depth >> MIN_LK_SHIFT);
298 }
299 else if(getRecordType() == 2)
300 {
301 return(myRec2Base.min_depth >> MIN_LK_SHIFT);
302 }
303 throw(GlfException(GlfStatus::UNKNOWN,
304 "Tried to call getMinLk for Record not of type 1 or 2."));
305 return(0);
306 }
307
getReadDepth() const308 uint32_t GlfRecord::getReadDepth() const
309 {
310 if(getRecordType() == 1)
311 {
312 return(myRec1Base.min_depth & READ_DEPTH_MASK);
313 }
314 else if(getRecordType() == 2)
315 {
316 return(myRec2Base.min_depth & READ_DEPTH_MASK);
317 }
318 throw(GlfException(GlfStatus::UNKNOWN,
319 "Tried to call getReadDepth for Record not of type 1 or 2."));
320 return(0);
321 }
322
getRmsMapQ() const323 uint8_t GlfRecord::getRmsMapQ() const
324 {
325 if(getRecordType() == 1)
326 {
327 return(myRec1Base.rmsMapQ);
328 }
329 else if(getRecordType() == 2)
330 {
331 return(myRec2Base.rmsMapQ);
332 }
333 throw(GlfException(GlfStatus::UNKNOWN,
334 "Tried to call getRmsMapQ for Record not of type 1 or 2."));
335 return(0);
336 }
337
338
339 // Accessors for getting record type 1
setLk(int index,uint8_t value)340 bool GlfRecord::setLk(int index, uint8_t value)
341 {
342 if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
343 {
344 // Out of range.
345 throw(GlfException(GlfStatus::UNKNOWN,
346 "Trying to set Record Type 1 likelihood position< 0 or >= 10."));
347 return(false);
348 }
349
350 // In range.
351 myRec1Base.lk[index] = value;
352 return(true);
353 }
354
getLk(int index)355 uint8_t GlfRecord::getLk(int index)
356 {
357 if(getRecordType() != 1)
358 {
359 throw(GlfException(GlfStatus::UNKNOWN,
360 "Tried to call getLk for Record not of type 1."));
361 return(0);
362 }
363 if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
364 {
365 throw(GlfException(GlfStatus::UNKNOWN,
366 "Tried to call getLk for index < 0 or >= 10."));
367 return(0);
368 }
369 return(myRec1Base.lk[index]);
370 }
371
372
373 // Accessors for getting record type 2
setLkHom1(uint8_t lk)374 bool GlfRecord::setLkHom1(uint8_t lk)
375 {
376 myRec2Base.lkHom1 = lk;
377 return(true);
378 }
379
setLkHom2(uint8_t lk)380 bool GlfRecord::setLkHom2(uint8_t lk)
381 {
382 myRec2Base.lkHom2 = lk;
383 return(true);
384 }
385
setLkHet(uint8_t lk)386 bool GlfRecord::setLkHet(uint8_t lk)
387 {
388 myRec2Base.lkHet = lk;
389 return(true);
390 }
391
setInsertionIndel1(const std::string & indelSeq)392 bool GlfRecord::setInsertionIndel1(const std::string& indelSeq)
393 {
394 myRec2Base.indelLen1 = indelSeq.length();
395 myIndelSeq1 = indelSeq;
396 return(true);
397 }
398
setDeletionIndel1(const std::string & indelSeq)399 bool GlfRecord::setDeletionIndel1(const std::string& indelSeq)
400 {
401 myRec2Base.indelLen1 = 0 - (indelSeq.length());
402 myIndelSeq1 = indelSeq;
403 return(true);
404 }
405
setInsertionIndel2(const std::string & indelSeq)406 bool GlfRecord::setInsertionIndel2(const std::string& indelSeq)
407 {
408 myRec2Base.indelLen2 = indelSeq.length();
409 myIndelSeq2 = indelSeq;
410 return(true);
411 }
412
setDeletionIndel2(const std::string & indelSeq)413 bool GlfRecord::setDeletionIndel2(const std::string& indelSeq)
414 {
415 myRec2Base.indelLen2 = 0 - (indelSeq.length());
416 myIndelSeq2 = indelSeq;
417 return(true);
418 }
419
getLkHom1()420 uint8_t GlfRecord::getLkHom1()
421 {
422 if(getRecordType() != 2)
423 {
424 throw(GlfException(GlfStatus::UNKNOWN,
425 "Tried to call getLkHom1 for Record not of type 2."));
426 return(0);
427 }
428 return(myRec2Base.lkHom1);
429 }
430
getLkHom2()431 uint8_t GlfRecord::getLkHom2()
432 {
433 if(getRecordType() != 2)
434 {
435 throw(GlfException(GlfStatus::UNKNOWN,
436 "Tried to call getLkHom2 for Record not of type 2."));
437 return(0);
438 }
439 return(myRec2Base.lkHom2);
440 }
441
getLkHet()442 uint8_t GlfRecord::getLkHet()
443 {
444 if(getRecordType() != 2)
445 {
446 throw(GlfException(GlfStatus::UNKNOWN,
447 "Tried to call getLkHet for Record not of type 2."));
448 return(0);
449 }
450 return(myRec2Base.lkHet);
451 }
452
getIndel1(std::string & indelSeq)453 int16_t GlfRecord::getIndel1(std::string& indelSeq)
454 {
455 if(getRecordType() != 2)
456 {
457 throw(GlfException(GlfStatus::UNKNOWN,
458 "Tried to call getIndel1 for Record not of type 2."));
459 return(0);
460 }
461 indelSeq = myIndelSeq1.c_str();
462 return(myRec2Base.indelLen1);
463 }
464
getIndel2(std::string & indelSeq)465 int16_t GlfRecord::getIndel2(std::string& indelSeq)
466 {
467 if(getRecordType() != 2)
468 {
469 throw(GlfException(GlfStatus::UNKNOWN,
470 "Tried to call getIndel2 for Record not of type 2."));
471 return(0);
472 }
473 indelSeq = myIndelSeq2.c_str();
474 return(myRec2Base.indelLen2);
475 }
476
477
readType1(IFILE filePtr)478 void GlfRecord::readType1(IFILE filePtr)
479 {
480 // Read record type 1 information.
481 int numRead = 0;
482 numRead = ifread(filePtr, &myRec1Base, REC1_BASE_SIZE);
483 if(numRead != REC1_BASE_SIZE)
484 {
485 String errorMsg = "Failed to read record of type 1 (";
486 errorMsg += REC1_BASE_SIZE;
487 errorMsg += " bytes). Only read ";
488 errorMsg += numRead;
489 errorMsg += " bytes.";
490 std::string errorString = errorMsg.c_str();
491 throw(GlfException(GlfStatus::FAIL_IO, errorString));
492 }
493
494 // Record type 1 is fixed size and has no additional variable length
495 // fields, so done reading.
496 }
497
498
readType2(IFILE filePtr)499 void GlfRecord::readType2(IFILE filePtr)
500 {
501 // Read record type 2 information.
502 int numRead = 0;
503 numRead = ifread(filePtr, &myRec2Base, REC2_BASE_SIZE);
504 if(numRead != REC2_BASE_SIZE)
505 {
506 String errorMsg = "Failed to read record of type 2 base info (";
507 errorMsg += REC2_BASE_SIZE;
508 errorMsg += " bytes). Only read ";
509 errorMsg += numRead;
510 errorMsg += " bytes.";
511 std::string errorString = errorMsg.c_str();
512 throw(GlfException(GlfStatus::FAIL_IO, errorString));
513 }
514
515 // Record type 2 has 2 additional variable length fields. Read those
516 // fields.
517 int16_t len = abs(myRec2Base.indelLen1);
518 numRead = myIndelSeq1.readFromFile(filePtr, len);
519 if(numRead != len)
520 {
521 String errorMsg = "Failed to read record of type 2, 1st indel sequence (";
522 errorMsg += len;
523 errorMsg += " bytes). Only read ";
524 errorMsg += numRead;
525 errorMsg += " bytes.";
526 std::string errorString = errorMsg.c_str();
527 throw(GlfException(GlfStatus::FAIL_IO, errorString));
528 }
529 len = abs(myRec2Base.indelLen2);
530 numRead = myIndelSeq2.readFromFile(filePtr, len);
531 if(numRead != len)
532 {
533 String errorMsg = "Failed to read record of type 2, 2nd indel sequence (";
534 errorMsg += len;
535 errorMsg += " bytes). Only read ";
536 errorMsg += numRead;
537 errorMsg += " bytes.";
538 std::string errorString = errorMsg.c_str();
539 throw(GlfException(GlfStatus::FAIL_IO, errorString));
540 }
541 }
542
543
writeRtypeRef(IFILE filePtr) const544 void GlfRecord::writeRtypeRef(IFILE filePtr) const
545 {
546 int byteLen = sizeof(myRecTypeRefBase);
547 int numWrite =
548 ifwrite(filePtr, &myRecTypeRefBase, byteLen);
549 if(numWrite != byteLen)
550 {
551 String errorMsg =
552 "Failed to write the length of the record type and reference base (";
553 errorMsg += byteLen;
554 errorMsg += " bytes). Only wrote ";
555 errorMsg += numWrite;
556 errorMsg += " bytes.";
557 std::string errorString = errorMsg.c_str();
558 throw(GlfException(GlfStatus::FAIL_IO, errorString));
559 }
560 }
561
562
writeType1(IFILE filePtr) const563 void GlfRecord::writeType1(IFILE filePtr) const
564 {
565 // Write the generic record field that all records have.
566 writeRtypeRef(filePtr);
567
568 // Record type 1 is fixed size and has no additional variable length
569 // fields, so just write the base info.
570 int numWrite = ifwrite(filePtr, &myRec1Base, REC1_BASE_SIZE);
571 if(numWrite != REC1_BASE_SIZE)
572 {
573 // failed to write.
574 String errorMsg = "Failed to write record of type 1 (";
575 errorMsg += REC1_BASE_SIZE;
576 errorMsg += " bytes). Only wrote ";
577 errorMsg += numWrite;
578 errorMsg += " bytes.";
579 std::string errorString = errorMsg.c_str();
580 throw(GlfException(GlfStatus::FAIL_IO, errorString));
581 }
582
583
584 // Done writing the record.
585 }
586
587
writeType2(IFILE filePtr) const588 void GlfRecord::writeType2(IFILE filePtr) const
589 {
590 // Write the generic record field that all records have.
591 writeRtypeRef(filePtr);
592
593 // Write the record type 2 base info.
594 int numWrite = ifwrite(filePtr, &myRec2Base, REC2_BASE_SIZE);
595 if(numWrite != REC2_BASE_SIZE)
596 {
597 // failed to write.
598 String errorMsg = "Failed to write record of type 2 base info (";
599 errorMsg += REC2_BASE_SIZE;
600 errorMsg += " bytes). Only wrote ";
601 errorMsg += numWrite;
602 errorMsg += " bytes.";
603 std::string errorString = errorMsg.c_str();
604 throw(GlfException(GlfStatus::FAIL_IO, errorString));
605 }
606
607 // Record type 2 has 2 additional variable length fields. Write those
608 // fields.
609 int len = myIndelSeq1.length();
610 numWrite = ifwrite(filePtr, myIndelSeq1.c_str(), len);
611 if(numWrite != len)
612 {
613 // failed to write.
614 String errorMsg = "Failed to write record of type 2, 1st indel sequence (";
615 errorMsg += len;
616 errorMsg += " bytes). Only wrote ";
617 errorMsg += numWrite;
618 errorMsg += " bytes.";
619 std::string errorString = errorMsg.c_str();
620 throw(GlfException(GlfStatus::FAIL_IO, errorString));
621 }
622 len = myIndelSeq2.length();
623 numWrite = ifwrite(filePtr, myIndelSeq2.c_str(), len);
624 if(numWrite != len)
625 {
626 // failed to write.
627 String errorMsg = "Failed to write record of type 2, 2nd indel sequence (";
628 errorMsg += len;
629 errorMsg += " bytes). Only wrote ";
630 errorMsg += numWrite;
631 errorMsg += " bytes.";
632 std::string errorString = errorMsg.c_str();
633 throw(GlfException(GlfStatus::FAIL_IO, errorString));
634 }
635
636 // Done writing the record.
637 }
638