1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include <time.h>
23
24 #include <U2Core/DNAAlphabet.h>
25 #include <U2Core/DNAChromatogramObject.h>
26 #include <U2Core/DNAInfo.h>
27 #include <U2Core/DNASequenceObject.h>
28 #include <U2Core/GObjectRelationRoles.h>
29 #include <U2Core/GObjectTypes.h>
30 #include <U2Core/IOAdapter.h>
31 #include <U2Core/L10n.h>
32 #include <U2Core/TextObject.h>
33 #include <U2Core/TextUtils.h>
34 #include <U2Core/U2AlphabetUtils.h>
35 #include <U2Core/U2DbiUtils.h>
36 #include <U2Core/U2ObjectDbi.h>
37 #include <U2Core/U2OpStatus.h>
38 #include <U2Core/U2SafePoints.h>
39 #include <U2Core/U2SequenceUtils.h>
40
41 #include "ABIFormat.h"
42 #include "IOLibUtils.h"
43
44 /* TRANSLATOR U2::ABIFormat */
45
46 namespace U2 {
47
ABIFormat(QObject * p)48 ABIFormat::ABIFormat(QObject *p)
49 : DocumentFormat(p, BaseDocumentFormats::ABIF, DocumentFormatFlag_SupportStreaming, {"ab1", "abi", "abif"}) {
50 formatName = tr("ABIF");
51 formatDescription = tr("A chromatogram file format");
52 supportedObjectTypes += GObjectTypes::SEQUENCE;
53 supportedObjectTypes += GObjectTypes::CHROMATOGRAM;
54 }
55
checkRawData(const QByteArray & rawData,const GUrl &) const56 FormatCheckResult ABIFormat::checkRawData(const QByteArray &rawData, const GUrl &) const {
57 const char *data = rawData.constData();
58 int size = rawData.size();
59
60 if (size <= 4 || data[0] != 'A' || data[1] != 'B' || data[2] != 'I' || data[3] != 'F') {
61 /*
62 * Maybe we've got a file in MacBinary format in which case
63 * we'll have an extra offset 128 bytes to add.
64 */
65 data += 128;
66 size -= 128;
67 if (size <= 4 || data[0] != 'A' || data[1] != 'B' || data[2] != 'I' || data[3] != 'F') {
68 return FormatDetection_NotMatched;
69 }
70 }
71 bool hasBinaryBlocks = TextUtils::contains(TextUtils::BINARY, data, size);
72 return hasBinaryBlocks ? FormatDetection_Matched : FormatDetection_NotMatched;
73 }
74
75 #define MAX_SUPPORTED_ABIF_SIZE 10 * 1024 * 1024
76
loadDocument(IOAdapter * io,const U2DbiRef & dbiRef,const QVariantMap & fs,U2OpStatus & os)77 Document *ABIFormat::loadDocument(IOAdapter *io, const U2DbiRef &dbiRef, const QVariantMap &fs, U2OpStatus &os) {
78 QByteArray readBuff;
79 QByteArray block(BUFF_SIZE, 0);
80 quint64 len = 0;
81 while ((len = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
82 readBuff.append(QByteArray(block.data(), len));
83 CHECK_EXT(readBuff.size() <= MAX_SUPPORTED_ABIF_SIZE, os.setError(L10N::errorFileTooLarge(io->getURL())), nullptr);
84 }
85
86 SeekableBuf sf;
87 sf.head = readBuff.constData();
88 sf.pos = 0;
89 sf.size = readBuff.size();
90 Document *doc = parseABI(dbiRef, &sf, io, fs, os);
91 CHECK_OP(os, nullptr)
92 CHECK_EXT(doc != nullptr, os.setError(tr("Not a valid ABIF file: %1").arg(io->toString())), nullptr);
93 return doc;
94 }
95
loadSequence(IOAdapter * io,U2OpStatus & os)96 DNASequence *ABIFormat::loadSequence(IOAdapter *io, U2OpStatus &os) {
97 if (io->isEof()) {
98 return nullptr;
99 }
100
101 CHECK_EXT((io != nullptr) && (io->isOpen() == true), os.setError(L10N::badArgument("IO adapter")), nullptr);
102 QByteArray readBuff;
103 QByteArray block(BUFF_SIZE, 0);
104 quint64 len = 0;
105 while ((len = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
106 readBuff.append(QByteArray(block.data(), len));
107 CHECK_EXT(readBuff.size() <= MAX_SUPPORTED_ABIF_SIZE, os.setError(L10N::errorFileTooLarge(io->getURL())), nullptr);
108 }
109
110 SeekableBuf sf;
111 sf.head = readBuff.constData();
112 sf.pos = 0;
113 sf.size = readBuff.size();
114
115 DNASequence *seq = new DNASequence();
116 DNAChromatogram cd;
117
118 if (!loadABIObjects(&sf, (*seq), cd)) {
119 os.setError(tr("Failed to load sequence from ABI file %1").arg(io->toString()));
120 }
121
122 return seq;
123 }
124
125 /*
126 * Copyright (c) Medical Research Council 1994. All rights reserved.
127 *
128 * Permission to use, copy, modify and distribute this software and its
129 * documentation for any purpose is hereby granted without fee, provided that
130 * this copyright and notice appears in all copies.
131 *
132 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
133 * as part of the Staden Package at the MRC Laboratory of Molecular
134 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
135 *
136 * MRC disclaims all warranties with regard to this software.
137 */
138
139 /*
140 * Title: seqIOABI
141 *
142 * File: seqIOABI.c
143 * Purpose: Reading (not writing) of ABI sequences
144 * Last update: Fri Sep 02, 1994
145 */
146
147 /*
148 * The ABI magic number - "ABIF"
149 */
150 #define ABI_MAGIC ((int)((((('A' << 8) + 'B') << 8) + 'I') << 8) + 'F')
151
152 /*
153 * The index is located towards the end of the ABI trace file.
154 * It's location is given by a longword at a fixed place.
155 */
156 #define IndexPO 26
157
158 #define IndexEntryLength 28
159
160 /*
161 * Here are some labels we will be looking for, four chars packed
162 * into an int_4
163 */
164 #define LABEL(a) ((int)((((((a)[0] << 8) + (a)[1]) << 8) + (a)[2]) << 8) + (a)[3])
165 #define DataEntryLabel LABEL("DATA")
166 #define BaseEntryLabel LABEL("PBAS")
167 #define BasePosEntryLabel LABEL("PLOC")
168 #define SpacingEntryLabel LABEL("SPAC")
169 #define SignalEntryLabel LABEL("S/N%")
170 #define FWO_Label LABEL("FWO_")
171 #define MCHNLabel LABEL("MCHN")
172 #define PDMFLabel LABEL("PDMF")
173 #define SMPLLabel LABEL("SMPL")
174 #define PPOSLabel LABEL("PPOS")
175 #define CMNTLabel LABEL("CMNT")
176 #define GelNameLabel LABEL("GELN")
177 #define LANELabel LABEL("LANE")
178 #define RUNDLabel LABEL("RUND")
179 #define RUNTLabel LABEL("RUNT")
180 #define MTXFLabel LABEL("MTXF")
181 #define SPACLabel LABEL("SPAC")
182 #define SVERLabel LABEL("SVER")
183 #define MODLLabel LABEL("MODL")
184 #define BaseConfLabel LABEL("PCON")
185
186 #define baseIndex(B) ((B) == 'C' ? 0 : (B) == 'A' ? 1 \
187 : (B) == 'G' ? 2 \
188 : 3)
189
190 /*
191 * Gets the offset of the ABI index.
192 * Returns -1 for failure, 0 for success.
193 */
getABIIndexOffset(SeekableBuf * fp,uint * indexO)194 static int getABIIndexOffset(SeekableBuf *fp, uint *indexO) {
195 uint magic = 0;
196
197 /*
198 * Initialise header_fudge.
199 *
200 * This is usually zero, but maybe we've transfered a file in MacBinary
201 * format in which case we'll have an extra 128 bytes to add to all
202 * our fseeks.
203 */
204 be_read_int_4(fp, &magic);
205 if (magic != ABI_MAGIC) {
206 fp->head += 128;
207 fp->size -= 128;
208 }
209
210 if ((SeekBuf(fp, IndexPO, 0) != 0) || (!be_read_int_4(fp, indexO)))
211 return -1;
212 else
213 return 0;
214 }
215
216 /*
217 * From the ABI results file connected to `fp' whose index starts
218 * at byte offset `indexO', return in `val' the `lw'th long word
219 * from the `count'th entry labelled `label'.
220 * The result is 0 for failure, or index offset for success.
221 */
getABIIndexEntryLW(SeekableBuf * fp,int indexO,uint label,uint count,int lw,uint * val)222 int getABIIndexEntryLW(SeekableBuf *fp, int indexO, uint label, uint count, int lw, uint *val) {
223 int entryNum = -1;
224 int i;
225 uint entryLabel, entryLw1;
226
227 do {
228 entryNum++;
229
230 if (SeekBuf(fp, indexO + (entryNum * IndexEntryLength), 0) != 0)
231 return 0;
232
233 if (!be_read_int_4(fp, &entryLabel))
234 return 0;
235
236 if (!be_read_int_4(fp, &entryLw1))
237 return 0;
238 } while (!(entryLabel == label && entryLw1 == count));
239
240 for (i = 2; i <= lw; i++) {
241 if (!be_read_int_4(fp, val))
242 return 0;
243 }
244
245 return indexO + (entryNum * IndexEntryLength);
246 }
247
248 /*
249 * From the ABI results file connected to `fp' whose index starts
250 * at byte offset `indexO', return in `val' the `sw'th short word
251 * from the `count'th entry labelled `label'.
252 * The result is 0 for failure, or index offset for success.
253 */
getABIIndexEntrySW(SeekableBuf * fp,int indexO,uint label,uint count,int sw,ushort * val)254 int getABIIndexEntrySW(SeekableBuf *fp, int indexO, uint label, uint count, int sw, ushort *val) {
255 int entryNum = -1;
256 int i;
257 uint entryLabel, entryLw1;
258
259 do {
260 entryNum++;
261
262 if (SeekBuf(fp, indexO + (entryNum * IndexEntryLength), 0) != 0)
263 return 0;
264
265 if (!be_read_int_4(fp, &entryLabel))
266 return 0;
267
268 if (!be_read_int_4(fp, &entryLw1))
269 return 0;
270 } while (!(entryLabel == label && entryLw1 == count));
271
272 for (i = 4; i <= sw; i++) {
273 if (!be_read_int_2(fp, val))
274 return 0;
275 }
276
277 return indexO + (entryNum * IndexEntryLength);
278 }
279
280 /*
281 * Get an "ABI String". These strings are either pointed to by the index
282 * offset, or held in the offset itself when the string is <= 4 characters.
283 * The "type" of the index entry is either 0x12 (a pascal string in which
284 * case the first byte of the string determines its length) or a 0x02 (a
285 * C-style string with length coming from the abi index).
286 *
287 * "string" will be max 256 bytes for the pascal string, but is of unknown
288 * (and hence potentially buggy) length for C-strings. For now we live with
289 * it as this entire file needs rewriting from scratch anyway.
290 *
291 * Returns -1 for failure, string length for success.
292 */
getABIString(SeekableBuf * fp,int indexO,uint label,uint count,char * string)293 int getABIString(SeekableBuf *fp, int indexO, uint label, uint count, char *string) {
294 uint off;
295 uint len;
296 quint16 type;
297
298 off = getABIIndexEntrySW(fp, indexO, label, count, 4, &type);
299 if (!off)
300 return -1;
301
302 if ((off = getABIIndexEntryLW(fp, indexO, label, count, 4, &len))) {
303 uchar len2 = 0;
304
305 if (!len)
306 return 0;
307
308 /* Determine offset */
309 if (len <= 4)
310 off += 20;
311 else
312 getABIIndexEntryLW(fp, indexO, label, count, 5, &off);
313
314 /* Read length byte */
315 if (type == 0x12) {
316 SeekBuf(fp, off, 0);
317 be_read_int_1(fp, &len2);
318 } else {
319 len2 = len;
320 }
321
322 /* Read data (max 255 bytes) */
323 fp->read(string, len2);
324 string[len2] = 0;
325
326 return len2;
327 } else {
328 return -1;
329 }
330 }
331
332 /*
333 * Get an "ABI Int_1". This is raw 1-byte integer data pointed to by the
334 * offset, or held in the offset itself when the data is <= 4 characters.
335 *
336 * If indexO is 0 then we do not search for (or indeed use) label and count,
337 * but simply assume that we are already at the correct offset and read from
338 * here. (NB: This negates the length <= 4 check.)
339 *
340 * Returns -1 for failure, length desired for success (it'll only fill out
341 * up to max_data_len elements, but it gives an indication of whether there
342 * was more to come).
343 */
getABIint1(SeekableBuf * fp,int indexO,uint label,uint count,uchar * data,int max_data_len)344 int getABIint1(SeekableBuf *fp, int indexO, uint label, uint count, uchar *data, int max_data_len) {
345 uint off;
346 uint len, len2;
347
348 if (indexO) {
349 if (!(off = getABIIndexEntryLW(fp, indexO, label, count, 4, &len)))
350 return -1;
351
352 if (!len)
353 return 0;
354
355 /* Determine offset */
356 if (len <= 4)
357 off += 20;
358 else
359 getABIIndexEntryLW(fp, indexO, label, count, 5, &off);
360
361 len2 = qMin((uint)max_data_len, len);
362
363 SeekBuf(fp, off, 0);
364 } else {
365 len = len2 = max_data_len;
366 }
367
368 fp->read((char *)data, len2);
369
370 return len;
371 }
372
373 /*
374 * Get an "ABI Int_2". This is raw 2-byte integer data pointed to by the
375 * offset, or held in the offset itself when the data is <= 4 characters.
376 *
377 * Returns -1 for failure, length desired for success (it'll only fill out
378 * up to max_data_len elements, but it gives an indication of whether there
379 * was more to come).
380 */
getABIint2(SeekableBuf * fp,int indexO,uint label,uint count,ushort * data,int max_data_len)381 int getABIint2(SeekableBuf *fp, int indexO, uint label, uint count, ushort *data, int max_data_len) {
382 int len, l2;
383 int i;
384
385 len = getABIint1(fp, indexO, label, count, (uchar *)data, max_data_len * 2);
386 if (-1 == len)
387 return -1;
388
389 len /= 2;
390 l2 = qMin(len, max_data_len);
391 for (i = 0; i < l2; i++) {
392 data[i] = be_int2((uchar *)(data + i));
393 }
394
395 return len;
396 }
397
398 /*
399 * Get an "ABI Int_4". This is raw 4-byte integer data pointed to by the
400 * offset, or held in the offset itself when the data is <= 4 characters.
401 *
402 * Returns -1 for failure, length desired for success (it'll only fill out
403 * up to max_data_len elements, but it gives an indication of whether there
404 * was more to come).
405 */
getABIint4(SeekableBuf * fp,int indexO,uint label,uint count,uint * data,int max_data_len)406 int getABIint4(SeekableBuf *fp, int indexO, uint label, uint count, uint *data, int max_data_len) {
407 int len, l2;
408 int i;
409
410 len = getABIint1(fp, indexO, label, count, (uchar *)data, max_data_len * 4);
411 if (-1 == len)
412 return -1;
413
414 len /= 4;
415 l2 = qMin(len, max_data_len);
416 for (i = 0; i < l2; i++) {
417 data[i] = be_int4((uchar *)(data + i));
418 }
419
420 return len;
421 }
422
replace_nl(char * string)423 static void replace_nl(char *string) {
424 char *cp;
425
426 for (cp = string; *cp; cp++) {
427 if (*cp == '\n')
428 *cp = ' ';
429 }
430 }
431
parseABI(const U2DbiRef & dbiRef,SeekableBuf * fp,IOAdapter * io,const QVariantMap & fs,U2OpStatus & os)432 Document *ABIFormat::parseABI(const U2DbiRef &dbiRef, SeekableBuf *fp, IOAdapter *io, const QVariantMap &fs, U2OpStatus &os) {
433 DbiOperationsBlock opBlock(dbiRef, os);
434 CHECK_OP(os, nullptr);
435 DNASequence dna;
436 DNAChromatogram cd;
437
438 if (!loadABIObjects(fp, dna, cd)) {
439 return nullptr;
440 }
441 if (dna.getName().isEmpty()) {
442 dna.setName("Sequence");
443 }
444
445 QList<GObject *> objects;
446 QVariantMap hints;
447 QString folder = fs.value(DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();
448 hints.insert(DBI_FOLDER_HINT, folder);
449 if (dna.alphabet == nullptr) {
450 dna.alphabet = U2AlphabetUtils::findBestAlphabet(dna.seq);
451 CHECK_EXT(dna.alphabet != nullptr, os.setError(tr("Undefined sequence alphabet")), nullptr);
452 }
453 U2EntityRef ref = U2SequenceUtils::import(os, dbiRef, folder, dna, dna.alphabet->getId());
454 CHECK_OP(os, nullptr);
455 U2SequenceObject *seqObj = new U2SequenceObject(dna.getName(), ref);
456 objects.append(seqObj);
457
458 DNAChromatogramObject *chromObj = DNAChromatogramObject::createInstance(cd, "Chromatogram", dbiRef, os, hints);
459 CHECK_OP(os, nullptr);
460 objects.append(chromObj);
461
462 QString seqComment = dna.info.value(DNAInfo::COMMENT).toStringList().join("\n");
463 TextObject *textObj = TextObject::createInstance(seqComment, "Info", dbiRef, os, hints);
464 CHECK_OP(os, nullptr);
465 objects.append(textObj);
466
467 Document *doc = new Document(this, io->getFactory(), io->getURL(), dbiRef, objects, fs);
468 chromObj->addObjectRelation(GObjectRelation(GObjectReference(seqObj), ObjectRole_Sequence));
469 return doc;
470 }
471
loadABIObjects(SeekableBuf * fp,DNASequence & dna,DNAChromatogram & cd)472 bool ABIFormat::loadABIObjects(SeekableBuf *fp, DNASequence &dna, DNAChromatogram &cd) {
473 uint numPoints, numBases;
474 uint signalO;
475 int no_bases = 0;
476 int sections = READ_ALL;
477
478 uint fwo_; /* base -> lane mapping */
479 uint indexO; /* File offset where the index is */
480 uint baseO; /* File offset where the bases are stored */
481 uint dataCO; /* File offset where the C trace is stored */
482 uint dataAO; /* File offset where the A trace is stored */
483 uint dataGO; /* File offset where the G trace is stored */
484 uint dataTO; /* File offset where the T trace is stored */
485 uint offset; /* Generic offset */
486 uint offset2; /* Generic offset */
487 uint offset3; /* Generic offset */
488 uint offset4; /* Generic offset */
489
490 /* DATA block numbers for traces, in order of FWO_ */
491 int DataCount[4] = {9, 10, 11, 12};
492
493 QString sequenceName;
494 QString sequenceComment;
495 QByteArray sequence;
496 DNAQuality quality;
497
498 /* Get the index offset */
499 if (-1 == getABIIndexOffset(fp, &indexO))
500 return false;
501
502 /* Get the number of points */
503 if (!getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[0], 3, &numPoints))
504 return false;
505
506 /* Get the number of bases */
507 if (!getABIIndexEntryLW(fp, indexO, BaseEntryLabel, 1, 3, &numBases)) {
508 no_bases = 1;
509 numBases = 0;
510 }
511
512 /* Allocate the sequence */
513 /* Allocate space for the bases - 1 extra for the ->base field so
514 * that we can treat it as a NULL terminated string.
515 */
516 if (sections & READ_BASES) {
517 cd.prob_A.resize(numBases + 1);
518 cd.prob_C.resize(numBases + 1);
519 cd.prob_G.resize(numBases + 1);
520 cd.prob_T.resize(numBases + 1);
521 cd.baseCalls.resize(numBases + 1);
522 }
523
524 if (sections & READ_SAMPLES) {
525 cd.A.resize(numPoints + 1);
526 cd.C.resize(numPoints + 1);
527 cd.G.resize(numPoints + 1);
528 cd.T.resize(numPoints + 1);
529 }
530
531 /* Get the Filter Wheel Order (FWO_) field ... */
532 if (!getABIIndexEntryLW(fp, indexO, FWO_Label, 1, 5, &fwo_)) {
533 /* Guess at CAGT */
534 fwo_ = 0x43414754;
535 }
536
537 /*
538 * The order of the DATA fields is determined by the field FWO_
539 * Juggle around with data pointers to get it right
540 */
541 if (sections & READ_SAMPLES) {
542 uint *dataxO[4];
543
544 dataxO[0] = &dataCO;
545 dataxO[1] = &dataAO;
546 dataxO[2] = &dataGO;
547 dataxO[3] = &dataTO;
548
549 /*Get the positions of the four traces */
550 if (!(getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[0], 5, dataxO[baseIndex((char)(fwo_ >> 24 & 255))]) &&
551 getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[1], 5, dataxO[baseIndex((char)(fwo_ >> 16 & 255))]) &&
552 getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[2], 5, dataxO[baseIndex((char)(fwo_ >> 8 & 255))]) &&
553 getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[3], 5, dataxO[baseIndex((char)(fwo_ & 255))]))) {
554 return false;
555 }
556 }
557
558 /*************************************************************
559 * Read the traces and bases information
560 *************************************************************/
561
562 if (sections & READ_SAMPLES) {
563 /* Read in the C trace */
564 if (SeekBuf(fp, dataCO, 0) == -1)
565 return false;
566 getABIint2(fp, 0, 0, 0, cd.C.data(), numPoints);
567
568 /* Read in the A trace */
569 if (SeekBuf(fp, dataAO, 0) == -1)
570 return false;
571 getABIint2(fp, 0, 0, 0, cd.A.data(), numPoints);
572
573 /* Read in the G trace */
574 if (SeekBuf(fp, dataGO, 0) == -1)
575 return false;
576 getABIint2(fp, 0, 0, 0, cd.G.data(), numPoints);
577
578 /* Read in the T trace */
579 if (SeekBuf(fp, dataTO, 0) == -1)
580 return false;
581 getABIint2(fp, 0, 0, 0, cd.T.data(), numPoints);
582
583 /* Compute highest trace peak */
584 /*for (i=0; i < numPoints; i++) {
585 if (read->maxTraceVal < read->traceA[i])
586 read->maxTraceVal = read->traceA[i];
587 if (read->maxTraceVal < read->traceC[i])
588 read->maxTraceVal = read->traceC[i];
589 if (read->maxTraceVal < read->traceG[i])
590 read->maxTraceVal = read->traceG[i];
591 if (read->maxTraceVal < read->traceT[i])
592 read->maxTraceVal = read->traceT[i];
593 }*/
594 }
595
596 if (no_bases || !(sections & READ_BASES)) {
597 goto skip_bases;
598 }
599
600 /* Read in base confidence values */
601 {
602 QVector<uchar> conf(numBases);
603 int res = getABIint1(fp, indexO, BaseConfLabel, 2, conf.data(), numBases);
604
605 /* Read in the bases */
606 if (!(getABIIndexEntryLW(fp, indexO, BaseEntryLabel, 1, 5, &baseO) && (SeekBuf(fp, baseO, 0) == 0))) {
607 return false;
608 }
609
610 sequence = QByteArray(numBases, 0);
611 if (!fp->read(sequence.data(), numBases)) {
612 return false;
613 }
614
615 QByteArray qualCodes(numBases, 0);
616
617 if (res != -1) {
618 for (uint i = 0; i < numBases; i++) {
619 qualCodes[i] = DNAQuality::encode(conf[i], DNAQualityType_Sanger);
620 switch (sequence[i]) {
621 case 'A':
622 case 'a':
623 cd.prob_A[i] = conf[i];
624 cd.prob_C[i] = 0;
625 cd.prob_G[i] = 0;
626 cd.prob_T[i] = 0;
627 break;
628
629 case 'C':
630 case 'c':
631 cd.prob_A[i] = 0;
632 cd.prob_C[i] = conf[i];
633 cd.prob_G[i] = 0;
634 cd.prob_T[i] = 0;
635 break;
636
637 case 'G':
638 case 'g':
639 cd.prob_A[i] = 0;
640 cd.prob_C[i] = 0;
641 cd.prob_G[i] = conf[i];
642 cd.prob_T[i] = 0;
643 break;
644
645 case 'T':
646 case 't':
647 cd.prob_A[i] = 0;
648 cd.prob_C[i] = 0;
649 cd.prob_G[i] = 0;
650 cd.prob_T[i] = conf[i];
651 break;
652
653 default:
654 cd.prob_A[i] = 0;
655 cd.prob_C[i] = 0;
656 cd.prob_G[i] = 0;
657 cd.prob_T[i] = 0;
658 break;
659 }
660 }
661 }
662
663 quality.setQualCodes(qualCodes);
664 }
665
666 /* Read in the base positions */
667 if (-1 == getABIint2(fp, indexO, BasePosEntryLabel, 1, cd.baseCalls.data(), numBases)) {
668 return false;
669 }
670
671 /*
672 * Check for corrupted traces where the bases are positioned on sample
673 * coordinates which do not exist. Witnessed on some MegaBACE files.
674 */
675 if (cd.baseCalls[numBases - 1] > numPoints) {
676 int n = cd.baseCalls[numBases - 1] + 1;
677 cd.A.resize(n);
678 cd.C.resize(n);
679 cd.G.resize(n);
680 cd.T.resize(n);
681 numPoints = n;
682 }
683
684 skip_bases:
685 /*************************************************************
686 * Gather useful information - the comments field
687 *************************************************************/
688 if (sections & READ_COMMENTS) {
689 char buffer[257];
690 char commstr[256];
691 int spacing;
692 ushort i2;
693 uint i4;
694
695 /* The ABI comments */
696 int clen = getABIString(fp, indexO, CMNTLabel, 1, commstr);
697 if (clen != -1) {
698 commstr[clen] = 0;
699 char *commstrp = commstr;
700 char *p;
701 do {
702 if ((p = strchr(commstrp, '\n'))) {
703 *p++ = 0;
704 }
705 sequenceComment.append(QString("ABI Comment: %1\n").arg(commstrp));
706 } while ((commstrp = p));
707 }
708
709 /* Get Sample Name Offset */
710 if (-1 != getABIString(fp, indexO, SMPLLabel, 1, buffer)) {
711 replace_nl(buffer);
712 sequenceComment.append(QString("Sample: %1\n").arg(buffer));
713 sequenceName.append(buffer);
714 }
715
716 /* LANE */
717 if (-1 != getABIint2(fp, indexO, LANELabel, 1, &i2, 1)) {
718 sequenceComment.append(QString("LANE=%1\n").arg(i2));
719 }
720
721 /* Get Signal Strength Offset */
722 if (getABIIndexEntryLW(fp, indexO, SignalEntryLabel, 1, 5, &signalO)) {
723 short C, A, G, T;
724 short *base[4];
725 base[0] = &C;
726 base[1] = &A;
727 base[2] = &G;
728 base[3] = &T;
729
730 if (SeekBuf(fp, signalO, 0) != -1 &&
731 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ >> 24 & 255))]) &&
732 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ >> 16 & 255))]) &&
733 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ >> 8 & 255))]) &&
734 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ & 255))])) {
735 sequenceComment.append(QString("SIGN=A=%1,C=%2,G=%3,T=%4\n").arg(A).arg(C).arg(G).arg(T));
736 }
737 }
738
739 /* Get the spacing.. it's a float but don't worry yet */
740 float fspacing = 0;
741 if (-1 != getABIint4(fp, indexO, SpacingEntryLabel, 1, (uint *)&spacing, 1)) {
742 fspacing = int_to_float(spacing);
743 sequenceComment.append(QString("SPAC=%1\n").arg(fspacing)); //-6.2f",
744 }
745 /* Correction for when spacing is negative. Why does this happen? */
746 if (fspacing <= 0) {
747 if (numBases > 1) {
748 if (sections & READ_BASES)
749 fspacing = (float)(cd.baseCalls[numBases - 1] - cd.baseCalls[0]) / (float)(numBases - 1);
750 else
751 fspacing = (float)numPoints / (float)numBases;
752 } else {
753 fspacing = 1;
754 }
755 }
756
757 /* Get primer position */
758 if (getABIIndexEntryLW(fp, indexO, PPOSLabel, 1, 5, (uint *)&i4)) {
759 /* ppos stores in MBShort of pointer */
760 sequenceComment.append(QString("PRIM=%1\n").arg(i4 >> 16));
761 }
762
763 /* RUND/RUNT */
764 if (getABIIndexEntryLW(fp, indexO, RUNDLabel, 1, 5, &offset) &&
765 getABIIndexEntryLW(fp, indexO, RUNDLabel, 2, 5, &offset2) &&
766 getABIIndexEntryLW(fp, indexO, RUNTLabel, 1, 5, &offset3) &&
767 getABIIndexEntryLW(fp, indexO, RUNTLabel, 2, 5, &offset4)) {
768 // char buffer[1025];
769 char buffer_s[1025];
770 char buffer_e[1025];
771 struct tm t;
772 uint rund_s, rund_e, runt_s, runt_e;
773
774 rund_s = offset;
775 rund_e = offset2;
776 runt_s = offset3;
777 runt_e = offset4;
778
779 // sprintf(buffer, "%04d%02d%02d.%02d%02d%02d - %04d%02d%02d.%02d%02d%02d",
780 // rund_s >> 16, (rund_s >> 8) & 0xff, rund_s & 0xff,
781 // runt_s >> 24, (runt_s >> 16) & 0xff, (runt_s >> 8) & 0xff,
782 // rund_e >> 16, (rund_e >> 8) & 0xff, rund_e & 0xff,
783 // runt_e >> 24, (runt_e >> 16) & 0xff, (runt_e >> 8) & 0xff);
784 QString rundBuffer = QString("%1%2%3.%4%5%6 - %7%8%9.%10%11%12")
785 .arg((rund_s >> 16), 4, 10, QLatin1Char('0'))
786 .arg((rund_s >> 8) & 0xff, 2, 10, QLatin1Char('0'))
787 .arg((rund_s & 0xff), 2, 10, QLatin1Char('0'))
788 .arg(runt_s >> 24, 2, 10, QLatin1Char('0'))
789 .arg((runt_s >> 16) & 0xff, 2, 10, QLatin1Char('0'))
790 .arg((runt_s >> 8) & 0xff, 2, 10, QLatin1Char('0'))
791 .arg(rund_e >> 16, 4, 10, QLatin1Char('0'))
792 .arg((rund_e >> 8) & 0xff, 2, 10, QLatin1Char('0'))
793 .arg(rund_e & 0xff, 2, 10, QLatin1Char('0'))
794 .arg(runt_e >> 24, 2, 10, QLatin1Char('0'))
795 .arg((runt_e >> 16) & 0xff, 2, 10, QLatin1Char('0'))
796 .arg((runt_e >> 8) & 0xff, 2, 10, QLatin1Char('0'));
797
798 memset(&t, 0, sizeof(t));
799 t.tm_mday = rund_s & 0xff;
800 t.tm_mon = ((rund_s >> 8) & 0xff) - 1;
801 t.tm_year = (rund_s >> 16) - 1900;
802 t.tm_hour = runt_s >> 24;
803 t.tm_min = (runt_s >> 16) & 0xff;
804 t.tm_sec = (runt_s >> 8) & 0xff;
805 t.tm_isdst = -1;
806 /*
807 * Convert struct tm to time_t. We ignore the time_t value, but
808 * the conversion process will update the tm_wday element of
809 * struct tm.
810 */
811 mktime(&t);
812 strftime(buffer_s, 1024, "%a %d %b %H:%M:%S %Y", &t);
813
814 t.tm_mday = rund_e & 0xff;
815 t.tm_mon = ((rund_e >> 8) & 0xff) - 1;
816 t.tm_year = (rund_e >> 16) - 1900;
817 t.tm_hour = runt_e >> 24;
818 t.tm_min = (runt_e >> 16) & 0xff;
819 t.tm_sec = (runt_e >> 8) & 0xff;
820 t.tm_isdst = -1;
821 /*
822 * Convert struct tm to time_t. We ignore the time_t value, but
823 * the conversion process will update the tm_wday element of
824 * struct tm.
825 */
826 mktime(&t);
827 strftime(buffer_e, 1024, "%a %d %b %H:%M:%S %Y", &t);
828
829 sequenceComment.append(QString("DATE=%1 to %2\nRUND=%3\n").arg(buffer_s).arg(buffer_e).arg(rundBuffer));
830 }
831
832 /* Get Dye Primer Offset */
833 if (-1 != getABIString(fp, indexO, PDMFLabel, 1, buffer)) {
834 replace_nl(buffer);
835 sequenceComment.append(QString("DYEP=%1\n").arg(buffer));
836 }
837
838 /* Get Machine Name Offset */
839 if (-1 != getABIString(fp, indexO, MCHNLabel, 1, buffer)) {
840 replace_nl(buffer);
841 sequenceComment.append(QString("MACH=%1\n").arg(buffer));
842 }
843
844 /* Machine model */
845 if (-1 != getABIString(fp, indexO, MODLLabel, 1, buffer)) {
846 replace_nl(buffer);
847 sequenceComment.append(QString("MODL=%1\n").arg(buffer));
848 }
849
850 /* Matrix file */
851 if (-1 != getABIString(fp, indexO, MTXFLabel, 1, buffer)) {
852 replace_nl(buffer);
853 sequenceComment.append(QString("MTXF=%1\n").arg(buffer));
854 }
855
856 /* Base calling version */
857 if (-1 != getABIString(fp, indexO, SPACLabel, 2, buffer)) {
858 replace_nl(buffer);
859 sequenceComment.append(QString("BCAL=%1\n").arg(buffer));
860 }
861
862 /* Software versions */
863 if (-1 != getABIString(fp, indexO, SVERLabel, 1, buffer)) {
864 replace_nl(buffer);
865 sequenceComment.append(QString("VER1=%1\n").arg(buffer));
866 }
867 if (-1 != getABIString(fp, indexO, SVERLabel, 2, buffer)) {
868 replace_nl(buffer);
869 sequenceComment.append(QString("VER2=%1\n").arg(buffer));
870 }
871
872 /* Get Gel Name Offset */
873 if (-1 != getABIString(fp, indexO, GelNameLabel, 1, buffer)) {
874 replace_nl(buffer);
875 sequenceComment.append(QString("GELN=%1\n").arg(buffer));
876 }
877 }
878
879 /* SUCCESS */
880 cd.name = sequenceName.isEmpty() ? "chromatogram" : sequenceName + " chromatogram";
881 cd.hasQV = true;
882 cd.seqLength = sequence.size();
883 assert(sequence.size() == int(numBases));
884 cd.traceLength = numPoints;
885 assert(cd.A.size() == int(numPoints + 1));
886
887 dna.setName(sequenceName);
888 dna.seq = sequence;
889 dna.info.insert(DNAInfo::COMMENT, sequenceComment.split("\n"));
890 dna.quality = quality;
891
892 return true;
893 }
894
895 } // namespace U2
896