1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include <time.h>
23 
24 #include <U2Core/DNAAlphabet.h>
25 #include <U2Core/DNAChromatogramObject.h>
26 #include <U2Core/DNAInfo.h>
27 #include <U2Core/DNASequenceObject.h>
28 #include <U2Core/GObjectRelationRoles.h>
29 #include <U2Core/GObjectTypes.h>
30 #include <U2Core/IOAdapter.h>
31 #include <U2Core/L10n.h>
32 #include <U2Core/TextObject.h>
33 #include <U2Core/TextUtils.h>
34 #include <U2Core/U2AlphabetUtils.h>
35 #include <U2Core/U2DbiUtils.h>
36 #include <U2Core/U2ObjectDbi.h>
37 #include <U2Core/U2OpStatus.h>
38 #include <U2Core/U2SafePoints.h>
39 #include <U2Core/U2SequenceUtils.h>
40 
41 #include "ABIFormat.h"
42 #include "IOLibUtils.h"
43 
44 /* TRANSLATOR U2::ABIFormat */
45 
46 namespace U2 {
47 
ABIFormat(QObject * p)48 ABIFormat::ABIFormat(QObject *p)
49     : DocumentFormat(p, BaseDocumentFormats::ABIF, DocumentFormatFlag_SupportStreaming, {"ab1", "abi", "abif"}) {
50     formatName = tr("ABIF");
51     formatDescription = tr("A chromatogram file format");
52     supportedObjectTypes += GObjectTypes::SEQUENCE;
53     supportedObjectTypes += GObjectTypes::CHROMATOGRAM;
54 }
55 
checkRawData(const QByteArray & rawData,const GUrl &) const56 FormatCheckResult ABIFormat::checkRawData(const QByteArray &rawData, const GUrl &) const {
57     const char *data = rawData.constData();
58     int size = rawData.size();
59 
60     if (size <= 4 || data[0] != 'A' || data[1] != 'B' || data[2] != 'I' || data[3] != 'F') {
61         /*
62          * Maybe we've got a file in MacBinary format in which case
63          * we'll have an extra offset 128 bytes to add.
64          */
65         data += 128;
66         size -= 128;
67         if (size <= 4 || data[0] != 'A' || data[1] != 'B' || data[2] != 'I' || data[3] != 'F') {
68             return FormatDetection_NotMatched;
69         }
70     }
71     bool hasBinaryBlocks = TextUtils::contains(TextUtils::BINARY, data, size);
72     return hasBinaryBlocks ? FormatDetection_Matched : FormatDetection_NotMatched;
73 }
74 
75 #define MAX_SUPPORTED_ABIF_SIZE 10 * 1024 * 1024
76 
loadDocument(IOAdapter * io,const U2DbiRef & dbiRef,const QVariantMap & fs,U2OpStatus & os)77 Document *ABIFormat::loadDocument(IOAdapter *io, const U2DbiRef &dbiRef, const QVariantMap &fs, U2OpStatus &os) {
78     QByteArray readBuff;
79     QByteArray block(BUFF_SIZE, 0);
80     quint64 len = 0;
81     while ((len = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
82         readBuff.append(QByteArray(block.data(), len));
83         CHECK_EXT(readBuff.size() <= MAX_SUPPORTED_ABIF_SIZE, os.setError(L10N::errorFileTooLarge(io->getURL())), nullptr);
84     }
85 
86     SeekableBuf sf;
87     sf.head = readBuff.constData();
88     sf.pos = 0;
89     sf.size = readBuff.size();
90     Document *doc = parseABI(dbiRef, &sf, io, fs, os);
91     CHECK_OP(os, nullptr)
92     CHECK_EXT(doc != nullptr, os.setError(tr("Not a valid ABIF file: %1").arg(io->toString())), nullptr);
93     return doc;
94 }
95 
loadSequence(IOAdapter * io,U2OpStatus & os)96 DNASequence *ABIFormat::loadSequence(IOAdapter *io, U2OpStatus &os) {
97     if (io->isEof()) {
98         return nullptr;
99     }
100 
101     CHECK_EXT((io != nullptr) && (io->isOpen() == true), os.setError(L10N::badArgument("IO adapter")), nullptr);
102     QByteArray readBuff;
103     QByteArray block(BUFF_SIZE, 0);
104     quint64 len = 0;
105     while ((len = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
106         readBuff.append(QByteArray(block.data(), len));
107         CHECK_EXT(readBuff.size() <= MAX_SUPPORTED_ABIF_SIZE, os.setError(L10N::errorFileTooLarge(io->getURL())), nullptr);
108     }
109 
110     SeekableBuf sf;
111     sf.head = readBuff.constData();
112     sf.pos = 0;
113     sf.size = readBuff.size();
114 
115     DNASequence *seq = new DNASequence();
116     DNAChromatogram cd;
117 
118     if (!loadABIObjects(&sf, (*seq), cd)) {
119         os.setError(tr("Failed to load sequence from ABI file %1").arg(io->toString()));
120     }
121 
122     return seq;
123 }
124 
125 /*
126  * Copyright (c) Medical Research Council 1994. All rights reserved.
127  *
128  * Permission to use, copy, modify and distribute this software and its
129  * documentation for any purpose is hereby granted without fee, provided that
130  * this copyright and notice appears in all copies.
131  *
132  * This file was written by James Bonfield, Simon Dear, Rodger Staden,
133  * as part of the Staden Package at the MRC Laboratory of Molecular
134  * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
135  *
136  * MRC disclaims all warranties with regard to this software.
137  */
138 
139 /*
140  * Title:    seqIOABI
141  *
142  * File:     seqIOABI.c
143  * Purpose:  Reading (not writing) of ABI sequences
144  * Last update: Fri Sep 02, 1994
145  */
146 
147 /*
148  * The ABI magic number - "ABIF"
149  */
150 #define ABI_MAGIC ((int)((((('A' << 8) + 'B') << 8) + 'I') << 8) + 'F')
151 
152 /*
153  * The index is located towards the end of the ABI trace file.
154  * It's location is given by a longword at a fixed place.
155  */
156 #define IndexPO 26
157 
158 #define IndexEntryLength 28
159 
160 /*
161  * Here are some labels we will be looking for, four chars packed
162  * into an int_4
163  */
164 #define LABEL(a) ((int)((((((a)[0] << 8) + (a)[1]) << 8) + (a)[2]) << 8) + (a)[3])
165 #define DataEntryLabel LABEL("DATA")
166 #define BaseEntryLabel LABEL("PBAS")
167 #define BasePosEntryLabel LABEL("PLOC")
168 #define SpacingEntryLabel LABEL("SPAC")
169 #define SignalEntryLabel LABEL("S/N%")
170 #define FWO_Label LABEL("FWO_")
171 #define MCHNLabel LABEL("MCHN")
172 #define PDMFLabel LABEL("PDMF")
173 #define SMPLLabel LABEL("SMPL")
174 #define PPOSLabel LABEL("PPOS")
175 #define CMNTLabel LABEL("CMNT")
176 #define GelNameLabel LABEL("GELN")
177 #define LANELabel LABEL("LANE")
178 #define RUNDLabel LABEL("RUND")
179 #define RUNTLabel LABEL("RUNT")
180 #define MTXFLabel LABEL("MTXF")
181 #define SPACLabel LABEL("SPAC")
182 #define SVERLabel LABEL("SVER")
183 #define MODLLabel LABEL("MODL")
184 #define BaseConfLabel LABEL("PCON")
185 
186 #define baseIndex(B) ((B) == 'C' ? 0 : (B) == 'A' ? 1 \
187                                    : (B) == 'G'   ? 2 \
188                                                   : 3)
189 
190 /*
191  * Gets the offset of the ABI index.
192  * Returns -1 for failure, 0 for success.
193  */
getABIIndexOffset(SeekableBuf * fp,uint * indexO)194 static int getABIIndexOffset(SeekableBuf *fp, uint *indexO) {
195     uint magic = 0;
196 
197     /*
198      * Initialise header_fudge.
199      *
200      * This is usually zero, but maybe we've transfered a file in MacBinary
201      * format in which case we'll have an extra 128 bytes to add to all
202      * our fseeks.
203      */
204     be_read_int_4(fp, &magic);
205     if (magic != ABI_MAGIC) {
206         fp->head += 128;
207         fp->size -= 128;
208     }
209 
210     if ((SeekBuf(fp, IndexPO, 0) != 0) || (!be_read_int_4(fp, indexO)))
211         return -1;
212     else
213         return 0;
214 }
215 
216 /*
217  * From the ABI results file connected to `fp' whose index starts
218  * at byte offset `indexO', return in `val' the `lw'th long word
219  * from the `count'th entry labelled `label'.
220  * The result is 0 for failure, or index offset for success.
221  */
getABIIndexEntryLW(SeekableBuf * fp,int indexO,uint label,uint count,int lw,uint * val)222 int getABIIndexEntryLW(SeekableBuf *fp, int indexO, uint label, uint count, int lw, uint *val) {
223     int entryNum = -1;
224     int i;
225     uint entryLabel, entryLw1;
226 
227     do {
228         entryNum++;
229 
230         if (SeekBuf(fp, indexO + (entryNum * IndexEntryLength), 0) != 0)
231             return 0;
232 
233         if (!be_read_int_4(fp, &entryLabel))
234             return 0;
235 
236         if (!be_read_int_4(fp, &entryLw1))
237             return 0;
238     } while (!(entryLabel == label && entryLw1 == count));
239 
240     for (i = 2; i <= lw; i++) {
241         if (!be_read_int_4(fp, val))
242             return 0;
243     }
244 
245     return indexO + (entryNum * IndexEntryLength);
246 }
247 
248 /*
249  * From the ABI results file connected to `fp' whose index starts
250  * at byte offset `indexO', return in `val' the `sw'th short word
251  * from the `count'th entry labelled `label'.
252  * The result is 0 for failure, or index offset for success.
253  */
getABIIndexEntrySW(SeekableBuf * fp,int indexO,uint label,uint count,int sw,ushort * val)254 int getABIIndexEntrySW(SeekableBuf *fp, int indexO, uint label, uint count, int sw, ushort *val) {
255     int entryNum = -1;
256     int i;
257     uint entryLabel, entryLw1;
258 
259     do {
260         entryNum++;
261 
262         if (SeekBuf(fp, indexO + (entryNum * IndexEntryLength), 0) != 0)
263             return 0;
264 
265         if (!be_read_int_4(fp, &entryLabel))
266             return 0;
267 
268         if (!be_read_int_4(fp, &entryLw1))
269             return 0;
270     } while (!(entryLabel == label && entryLw1 == count));
271 
272     for (i = 4; i <= sw; i++) {
273         if (!be_read_int_2(fp, val))
274             return 0;
275     }
276 
277     return indexO + (entryNum * IndexEntryLength);
278 }
279 
280 /*
281  * Get an "ABI String". These strings are either pointed to by the index
282  * offset, or held in the offset itself when the string is <= 4 characters.
283  * The "type" of the index entry is either 0x12 (a pascal string in which
284  * case the first byte of the string determines its length) or a 0x02 (a
285  * C-style string with length coming from the abi index).
286  *
287  * "string" will be max 256 bytes for the pascal string, but is of unknown
288  * (and hence potentially buggy) length for C-strings. For now we live with
289  * it as this entire file needs rewriting from scratch anyway.
290  *
291  * Returns -1 for failure, string length for success.
292  */
getABIString(SeekableBuf * fp,int indexO,uint label,uint count,char * string)293 int getABIString(SeekableBuf *fp, int indexO, uint label, uint count, char *string) {
294     uint off;
295     uint len;
296     quint16 type;
297 
298     off = getABIIndexEntrySW(fp, indexO, label, count, 4, &type);
299     if (!off)
300         return -1;
301 
302     if ((off = getABIIndexEntryLW(fp, indexO, label, count, 4, &len))) {
303         uchar len2 = 0;
304 
305         if (!len)
306             return 0;
307 
308         /* Determine offset */
309         if (len <= 4)
310             off += 20;
311         else
312             getABIIndexEntryLW(fp, indexO, label, count, 5, &off);
313 
314         /* Read length byte */
315         if (type == 0x12) {
316             SeekBuf(fp, off, 0);
317             be_read_int_1(fp, &len2);
318         } else {
319             len2 = len;
320         }
321 
322         /* Read data (max 255 bytes) */
323         fp->read(string, len2);
324         string[len2] = 0;
325 
326         return len2;
327     } else {
328         return -1;
329     }
330 }
331 
332 /*
333  * Get an "ABI Int_1". This is raw 1-byte integer data pointed to by the
334  * offset, or held in the offset itself when the data is <= 4 characters.
335  *
336  * If indexO is 0 then we do not search for (or indeed use) label and count,
337  * but simply assume that we are already at the correct offset and read from
338  * here. (NB: This negates the length <= 4 check.)
339  *
340  * Returns -1 for failure, length desired for success (it'll only fill out
341  * up to max_data_len elements, but it gives an indication of whether there
342  * was more to come).
343  */
getABIint1(SeekableBuf * fp,int indexO,uint label,uint count,uchar * data,int max_data_len)344 int getABIint1(SeekableBuf *fp, int indexO, uint label, uint count, uchar *data, int max_data_len) {
345     uint off;
346     uint len, len2;
347 
348     if (indexO) {
349         if (!(off = getABIIndexEntryLW(fp, indexO, label, count, 4, &len)))
350             return -1;
351 
352         if (!len)
353             return 0;
354 
355         /* Determine offset */
356         if (len <= 4)
357             off += 20;
358         else
359             getABIIndexEntryLW(fp, indexO, label, count, 5, &off);
360 
361         len2 = qMin((uint)max_data_len, len);
362 
363         SeekBuf(fp, off, 0);
364     } else {
365         len = len2 = max_data_len;
366     }
367 
368     fp->read((char *)data, len2);
369 
370     return len;
371 }
372 
373 /*
374  * Get an "ABI Int_2". This is raw 2-byte integer data pointed to by the
375  * offset, or held in the offset itself when the data is <= 4 characters.
376  *
377  * Returns -1 for failure, length desired for success (it'll only fill out
378  * up to max_data_len elements, but it gives an indication of whether there
379  * was more to come).
380  */
getABIint2(SeekableBuf * fp,int indexO,uint label,uint count,ushort * data,int max_data_len)381 int getABIint2(SeekableBuf *fp, int indexO, uint label, uint count, ushort *data, int max_data_len) {
382     int len, l2;
383     int i;
384 
385     len = getABIint1(fp, indexO, label, count, (uchar *)data, max_data_len * 2);
386     if (-1 == len)
387         return -1;
388 
389     len /= 2;
390     l2 = qMin(len, max_data_len);
391     for (i = 0; i < l2; i++) {
392         data[i] = be_int2((uchar *)(data + i));
393     }
394 
395     return len;
396 }
397 
398 /*
399  * Get an "ABI Int_4". This is raw 4-byte integer data pointed to by the
400  * offset, or held in the offset itself when the data is <= 4 characters.
401  *
402  * Returns -1 for failure, length desired for success (it'll only fill out
403  * up to max_data_len elements, but it gives an indication of whether there
404  * was more to come).
405  */
getABIint4(SeekableBuf * fp,int indexO,uint label,uint count,uint * data,int max_data_len)406 int getABIint4(SeekableBuf *fp, int indexO, uint label, uint count, uint *data, int max_data_len) {
407     int len, l2;
408     int i;
409 
410     len = getABIint1(fp, indexO, label, count, (uchar *)data, max_data_len * 4);
411     if (-1 == len)
412         return -1;
413 
414     len /= 4;
415     l2 = qMin(len, max_data_len);
416     for (i = 0; i < l2; i++) {
417         data[i] = be_int4((uchar *)(data + i));
418     }
419 
420     return len;
421 }
422 
replace_nl(char * string)423 static void replace_nl(char *string) {
424     char *cp;
425 
426     for (cp = string; *cp; cp++) {
427         if (*cp == '\n')
428             *cp = ' ';
429     }
430 }
431 
parseABI(const U2DbiRef & dbiRef,SeekableBuf * fp,IOAdapter * io,const QVariantMap & fs,U2OpStatus & os)432 Document *ABIFormat::parseABI(const U2DbiRef &dbiRef, SeekableBuf *fp, IOAdapter *io, const QVariantMap &fs, U2OpStatus &os) {
433     DbiOperationsBlock opBlock(dbiRef, os);
434     CHECK_OP(os, nullptr);
435     DNASequence dna;
436     DNAChromatogram cd;
437 
438     if (!loadABIObjects(fp, dna, cd)) {
439         return nullptr;
440     }
441     if (dna.getName().isEmpty()) {
442         dna.setName("Sequence");
443     }
444 
445     QList<GObject *> objects;
446     QVariantMap hints;
447     QString folder = fs.value(DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();
448     hints.insert(DBI_FOLDER_HINT, folder);
449     if (dna.alphabet == nullptr) {
450         dna.alphabet = U2AlphabetUtils::findBestAlphabet(dna.seq);
451         CHECK_EXT(dna.alphabet != nullptr, os.setError(tr("Undefined sequence alphabet")), nullptr);
452     }
453     U2EntityRef ref = U2SequenceUtils::import(os, dbiRef, folder, dna, dna.alphabet->getId());
454     CHECK_OP(os, nullptr);
455     U2SequenceObject *seqObj = new U2SequenceObject(dna.getName(), ref);
456     objects.append(seqObj);
457 
458     DNAChromatogramObject *chromObj = DNAChromatogramObject::createInstance(cd, "Chromatogram", dbiRef, os, hints);
459     CHECK_OP(os, nullptr);
460     objects.append(chromObj);
461 
462     QString seqComment = dna.info.value(DNAInfo::COMMENT).toStringList().join("\n");
463     TextObject *textObj = TextObject::createInstance(seqComment, "Info", dbiRef, os, hints);
464     CHECK_OP(os, nullptr);
465     objects.append(textObj);
466 
467     Document *doc = new Document(this, io->getFactory(), io->getURL(), dbiRef, objects, fs);
468     chromObj->addObjectRelation(GObjectRelation(GObjectReference(seqObj), ObjectRole_Sequence));
469     return doc;
470 }
471 
loadABIObjects(SeekableBuf * fp,DNASequence & dna,DNAChromatogram & cd)472 bool ABIFormat::loadABIObjects(SeekableBuf *fp, DNASequence &dna, DNAChromatogram &cd) {
473     uint numPoints, numBases;
474     uint signalO;
475     int no_bases = 0;
476     int sections = READ_ALL;
477 
478     uint fwo_; /* base -> lane mapping */
479     uint indexO; /* File offset where the index is */
480     uint baseO; /* File offset where the bases are stored */
481     uint dataCO; /* File offset where the C trace is stored */
482     uint dataAO; /* File offset where the A trace is stored */
483     uint dataGO; /* File offset where the G trace is stored */
484     uint dataTO; /* File offset where the T trace is stored */
485     uint offset; /* Generic offset */
486     uint offset2; /* Generic offset */
487     uint offset3; /* Generic offset */
488     uint offset4; /* Generic offset */
489 
490     /* DATA block numbers for traces, in order of FWO_ */
491     int DataCount[4] = {9, 10, 11, 12};
492 
493     QString sequenceName;
494     QString sequenceComment;
495     QByteArray sequence;
496     DNAQuality quality;
497 
498     /* Get the index offset */
499     if (-1 == getABIIndexOffset(fp, &indexO))
500         return false;
501 
502     /* Get the number of points */
503     if (!getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[0], 3, &numPoints))
504         return false;
505 
506     /* Get the number of bases */
507     if (!getABIIndexEntryLW(fp, indexO, BaseEntryLabel, 1, 3, &numBases)) {
508         no_bases = 1;
509         numBases = 0;
510     }
511 
512     /* Allocate the sequence */
513     /* Allocate space for the bases - 1 extra for the ->base field so
514      * that we can treat it as a NULL terminated string.
515      */
516     if (sections & READ_BASES) {
517         cd.prob_A.resize(numBases + 1);
518         cd.prob_C.resize(numBases + 1);
519         cd.prob_G.resize(numBases + 1);
520         cd.prob_T.resize(numBases + 1);
521         cd.baseCalls.resize(numBases + 1);
522     }
523 
524     if (sections & READ_SAMPLES) {
525         cd.A.resize(numPoints + 1);
526         cd.C.resize(numPoints + 1);
527         cd.G.resize(numPoints + 1);
528         cd.T.resize(numPoints + 1);
529     }
530 
531     /* Get the Filter Wheel Order (FWO_) field ... */
532     if (!getABIIndexEntryLW(fp, indexO, FWO_Label, 1, 5, &fwo_)) {
533         /* Guess at CAGT */
534         fwo_ = 0x43414754;
535     }
536 
537     /*
538      * The order of the DATA fields is determined by the field FWO_
539      * Juggle around with data pointers to get it right
540      */
541     if (sections & READ_SAMPLES) {
542         uint *dataxO[4];
543 
544         dataxO[0] = &dataCO;
545         dataxO[1] = &dataAO;
546         dataxO[2] = &dataGO;
547         dataxO[3] = &dataTO;
548 
549         /*Get the positions of the four traces */
550         if (!(getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[0], 5, dataxO[baseIndex((char)(fwo_ >> 24 & 255))]) &&
551               getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[1], 5, dataxO[baseIndex((char)(fwo_ >> 16 & 255))]) &&
552               getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[2], 5, dataxO[baseIndex((char)(fwo_ >> 8 & 255))]) &&
553               getABIIndexEntryLW(fp, indexO, DataEntryLabel, DataCount[3], 5, dataxO[baseIndex((char)(fwo_ & 255))]))) {
554             return false;
555         }
556     }
557 
558     /*************************************************************
559      * Read the traces and bases information
560      *************************************************************/
561 
562     if (sections & READ_SAMPLES) {
563         /* Read in the C trace */
564         if (SeekBuf(fp, dataCO, 0) == -1)
565             return false;
566         getABIint2(fp, 0, 0, 0, cd.C.data(), numPoints);
567 
568         /* Read in the A trace */
569         if (SeekBuf(fp, dataAO, 0) == -1)
570             return false;
571         getABIint2(fp, 0, 0, 0, cd.A.data(), numPoints);
572 
573         /* Read in the G trace */
574         if (SeekBuf(fp, dataGO, 0) == -1)
575             return false;
576         getABIint2(fp, 0, 0, 0, cd.G.data(), numPoints);
577 
578         /* Read in the T trace */
579         if (SeekBuf(fp, dataTO, 0) == -1)
580             return false;
581         getABIint2(fp, 0, 0, 0, cd.T.data(), numPoints);
582 
583         /* Compute highest trace peak */
584         /*for (i=0; i < numPoints; i++) {
585             if (read->maxTraceVal < read->traceA[i])
586                 read->maxTraceVal = read->traceA[i];
587             if (read->maxTraceVal < read->traceC[i])
588                 read->maxTraceVal = read->traceC[i];
589             if (read->maxTraceVal < read->traceG[i])
590                 read->maxTraceVal = read->traceG[i];
591             if (read->maxTraceVal < read->traceT[i])
592                 read->maxTraceVal = read->traceT[i];
593         }*/
594     }
595 
596     if (no_bases || !(sections & READ_BASES)) {
597         goto skip_bases;
598     }
599 
600     /* Read in base confidence values */
601     {
602         QVector<uchar> conf(numBases);
603         int res = getABIint1(fp, indexO, BaseConfLabel, 2, conf.data(), numBases);
604 
605         /* Read in the bases */
606         if (!(getABIIndexEntryLW(fp, indexO, BaseEntryLabel, 1, 5, &baseO) && (SeekBuf(fp, baseO, 0) == 0))) {
607             return false;
608         }
609 
610         sequence = QByteArray(numBases, 0);
611         if (!fp->read(sequence.data(), numBases)) {
612             return false;
613         }
614 
615         QByteArray qualCodes(numBases, 0);
616 
617         if (res != -1) {
618             for (uint i = 0; i < numBases; i++) {
619                 qualCodes[i] = DNAQuality::encode(conf[i], DNAQualityType_Sanger);
620                 switch (sequence[i]) {
621                     case 'A':
622                     case 'a':
623                         cd.prob_A[i] = conf[i];
624                         cd.prob_C[i] = 0;
625                         cd.prob_G[i] = 0;
626                         cd.prob_T[i] = 0;
627                         break;
628 
629                     case 'C':
630                     case 'c':
631                         cd.prob_A[i] = 0;
632                         cd.prob_C[i] = conf[i];
633                         cd.prob_G[i] = 0;
634                         cd.prob_T[i] = 0;
635                         break;
636 
637                     case 'G':
638                     case 'g':
639                         cd.prob_A[i] = 0;
640                         cd.prob_C[i] = 0;
641                         cd.prob_G[i] = conf[i];
642                         cd.prob_T[i] = 0;
643                         break;
644 
645                     case 'T':
646                     case 't':
647                         cd.prob_A[i] = 0;
648                         cd.prob_C[i] = 0;
649                         cd.prob_G[i] = 0;
650                         cd.prob_T[i] = conf[i];
651                         break;
652 
653                     default:
654                         cd.prob_A[i] = 0;
655                         cd.prob_C[i] = 0;
656                         cd.prob_G[i] = 0;
657                         cd.prob_T[i] = 0;
658                         break;
659                 }
660             }
661         }
662 
663         quality.setQualCodes(qualCodes);
664     }
665 
666     /* Read in the base positions */
667     if (-1 == getABIint2(fp, indexO, BasePosEntryLabel, 1, cd.baseCalls.data(), numBases)) {
668         return false;
669     }
670 
671     /*
672      * Check for corrupted traces where the bases are positioned on sample
673      * coordinates which do not exist. Witnessed on some MegaBACE files.
674      */
675     if (cd.baseCalls[numBases - 1] > numPoints) {
676         int n = cd.baseCalls[numBases - 1] + 1;
677         cd.A.resize(n);
678         cd.C.resize(n);
679         cd.G.resize(n);
680         cd.T.resize(n);
681         numPoints = n;
682     }
683 
684 skip_bases:
685     /*************************************************************
686      * Gather useful information - the comments field
687      *************************************************************/
688     if (sections & READ_COMMENTS) {
689         char buffer[257];
690         char commstr[256];
691         int spacing;
692         ushort i2;
693         uint i4;
694 
695         /* The ABI comments */
696         int clen = getABIString(fp, indexO, CMNTLabel, 1, commstr);
697         if (clen != -1) {
698             commstr[clen] = 0;
699             char *commstrp = commstr;
700             char *p;
701             do {
702                 if ((p = strchr(commstrp, '\n'))) {
703                     *p++ = 0;
704                 }
705                 sequenceComment.append(QString("ABI Comment: %1\n").arg(commstrp));
706             } while ((commstrp = p));
707         }
708 
709         /* Get Sample Name Offset */
710         if (-1 != getABIString(fp, indexO, SMPLLabel, 1, buffer)) {
711             replace_nl(buffer);
712             sequenceComment.append(QString("Sample: %1\n").arg(buffer));
713             sequenceName.append(buffer);
714         }
715 
716         /* LANE */
717         if (-1 != getABIint2(fp, indexO, LANELabel, 1, &i2, 1)) {
718             sequenceComment.append(QString("LANE=%1\n").arg(i2));
719         }
720 
721         /* Get Signal Strength Offset */
722         if (getABIIndexEntryLW(fp, indexO, SignalEntryLabel, 1, 5, &signalO)) {
723             short C, A, G, T;
724             short *base[4];
725             base[0] = &C;
726             base[1] = &A;
727             base[2] = &G;
728             base[3] = &T;
729 
730             if (SeekBuf(fp, signalO, 0) != -1 &&
731                 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ >> 24 & 255))]) &&
732                 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ >> 16 & 255))]) &&
733                 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ >> 8 & 255))]) &&
734                 be_read_int_2(fp, (ushort *)base[baseIndex((char)(fwo_ & 255))])) {
735                 sequenceComment.append(QString("SIGN=A=%1,C=%2,G=%3,T=%4\n").arg(A).arg(C).arg(G).arg(T));
736             }
737         }
738 
739         /* Get the spacing.. it's a float but don't worry yet */
740         float fspacing = 0;
741         if (-1 != getABIint4(fp, indexO, SpacingEntryLabel, 1, (uint *)&spacing, 1)) {
742             fspacing = int_to_float(spacing);
743             sequenceComment.append(QString("SPAC=%1\n").arg(fspacing));  //-6.2f",
744         }
745         /* Correction for when spacing is negative. Why does this happen? */
746         if (fspacing <= 0) {
747             if (numBases > 1) {
748                 if (sections & READ_BASES)
749                     fspacing = (float)(cd.baseCalls[numBases - 1] - cd.baseCalls[0]) / (float)(numBases - 1);
750                 else
751                     fspacing = (float)numPoints / (float)numBases;
752             } else {
753                 fspacing = 1;
754             }
755         }
756 
757         /* Get primer position */
758         if (getABIIndexEntryLW(fp, indexO, PPOSLabel, 1, 5, (uint *)&i4)) {
759             /* ppos stores in MBShort of pointer */
760             sequenceComment.append(QString("PRIM=%1\n").arg(i4 >> 16));
761         }
762 
763         /* RUND/RUNT */
764         if (getABIIndexEntryLW(fp, indexO, RUNDLabel, 1, 5, &offset) &&
765             getABIIndexEntryLW(fp, indexO, RUNDLabel, 2, 5, &offset2) &&
766             getABIIndexEntryLW(fp, indexO, RUNTLabel, 1, 5, &offset3) &&
767             getABIIndexEntryLW(fp, indexO, RUNTLabel, 2, 5, &offset4)) {
768             // char buffer[1025];
769             char buffer_s[1025];
770             char buffer_e[1025];
771             struct tm t;
772             uint rund_s, rund_e, runt_s, runt_e;
773 
774             rund_s = offset;
775             rund_e = offset2;
776             runt_s = offset3;
777             runt_e = offset4;
778 
779             //                 sprintf(buffer, "%04d%02d%02d.%02d%02d%02d - %04d%02d%02d.%02d%02d%02d",
780             //                     rund_s >> 16, (rund_s >> 8) & 0xff, rund_s & 0xff,
781             //                     runt_s >> 24, (runt_s >> 16) & 0xff, (runt_s >> 8) & 0xff,
782             //                     rund_e >> 16, (rund_e >> 8) & 0xff, rund_e & 0xff,
783             //                     runt_e >> 24, (runt_e >> 16) & 0xff, (runt_e >> 8) & 0xff);
784             QString rundBuffer = QString("%1%2%3.%4%5%6 - %7%8%9.%10%11%12")
785                                      .arg((rund_s >> 16), 4, 10, QLatin1Char('0'))
786                                      .arg((rund_s >> 8) & 0xff, 2, 10, QLatin1Char('0'))
787                                      .arg((rund_s & 0xff), 2, 10, QLatin1Char('0'))
788                                      .arg(runt_s >> 24, 2, 10, QLatin1Char('0'))
789                                      .arg((runt_s >> 16) & 0xff, 2, 10, QLatin1Char('0'))
790                                      .arg((runt_s >> 8) & 0xff, 2, 10, QLatin1Char('0'))
791                                      .arg(rund_e >> 16, 4, 10, QLatin1Char('0'))
792                                      .arg((rund_e >> 8) & 0xff, 2, 10, QLatin1Char('0'))
793                                      .arg(rund_e & 0xff, 2, 10, QLatin1Char('0'))
794                                      .arg(runt_e >> 24, 2, 10, QLatin1Char('0'))
795                                      .arg((runt_e >> 16) & 0xff, 2, 10, QLatin1Char('0'))
796                                      .arg((runt_e >> 8) & 0xff, 2, 10, QLatin1Char('0'));
797 
798             memset(&t, 0, sizeof(t));
799             t.tm_mday = rund_s & 0xff;
800             t.tm_mon = ((rund_s >> 8) & 0xff) - 1;
801             t.tm_year = (rund_s >> 16) - 1900;
802             t.tm_hour = runt_s >> 24;
803             t.tm_min = (runt_s >> 16) & 0xff;
804             t.tm_sec = (runt_s >> 8) & 0xff;
805             t.tm_isdst = -1;
806             /*
807              * Convert struct tm to time_t. We ignore the time_t value, but
808              * the conversion process will update the tm_wday element of
809              * struct tm.
810              */
811             mktime(&t);
812             strftime(buffer_s, 1024, "%a %d %b %H:%M:%S %Y", &t);
813 
814             t.tm_mday = rund_e & 0xff;
815             t.tm_mon = ((rund_e >> 8) & 0xff) - 1;
816             t.tm_year = (rund_e >> 16) - 1900;
817             t.tm_hour = runt_e >> 24;
818             t.tm_min = (runt_e >> 16) & 0xff;
819             t.tm_sec = (runt_e >> 8) & 0xff;
820             t.tm_isdst = -1;
821             /*
822              * Convert struct tm to time_t. We ignore the time_t value, but
823              * the conversion process will update the tm_wday element of
824              * struct tm.
825              */
826             mktime(&t);
827             strftime(buffer_e, 1024, "%a %d %b %H:%M:%S %Y", &t);
828 
829             sequenceComment.append(QString("DATE=%1 to %2\nRUND=%3\n").arg(buffer_s).arg(buffer_e).arg(rundBuffer));
830         }
831 
832         /* Get Dye Primer Offset */
833         if (-1 != getABIString(fp, indexO, PDMFLabel, 1, buffer)) {
834             replace_nl(buffer);
835             sequenceComment.append(QString("DYEP=%1\n").arg(buffer));
836         }
837 
838         /* Get Machine Name Offset */
839         if (-1 != getABIString(fp, indexO, MCHNLabel, 1, buffer)) {
840             replace_nl(buffer);
841             sequenceComment.append(QString("MACH=%1\n").arg(buffer));
842         }
843 
844         /* Machine model */
845         if (-1 != getABIString(fp, indexO, MODLLabel, 1, buffer)) {
846             replace_nl(buffer);
847             sequenceComment.append(QString("MODL=%1\n").arg(buffer));
848         }
849 
850         /* Matrix file */
851         if (-1 != getABIString(fp, indexO, MTXFLabel, 1, buffer)) {
852             replace_nl(buffer);
853             sequenceComment.append(QString("MTXF=%1\n").arg(buffer));
854         }
855 
856         /* Base calling version */
857         if (-1 != getABIString(fp, indexO, SPACLabel, 2, buffer)) {
858             replace_nl(buffer);
859             sequenceComment.append(QString("BCAL=%1\n").arg(buffer));
860         }
861 
862         /* Software versions */
863         if (-1 != getABIString(fp, indexO, SVERLabel, 1, buffer)) {
864             replace_nl(buffer);
865             sequenceComment.append(QString("VER1=%1\n").arg(buffer));
866         }
867         if (-1 != getABIString(fp, indexO, SVERLabel, 2, buffer)) {
868             replace_nl(buffer);
869             sequenceComment.append(QString("VER2=%1\n").arg(buffer));
870         }
871 
872         /* Get Gel Name Offset */
873         if (-1 != getABIString(fp, indexO, GelNameLabel, 1, buffer)) {
874             replace_nl(buffer);
875             sequenceComment.append(QString("GELN=%1\n").arg(buffer));
876         }
877     }
878 
879     /* SUCCESS */
880     cd.name = sequenceName.isEmpty() ? "chromatogram" : sequenceName + " chromatogram";
881     cd.hasQV = true;
882     cd.seqLength = sequence.size();
883     assert(sequence.size() == int(numBases));
884     cd.traceLength = numPoints;
885     assert(cd.A.size() == int(numPoints + 1));
886 
887     dna.setName(sequenceName);
888     dna.seq = sequence;
889     dna.info.insert(DNAInfo::COMMENT, sequenceComment.split("\n"));
890     dna.quality = quality;
891 
892     return true;
893 }
894 
895 }  // namespace U2
896