1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
4 This file is part of Velvet.
5
6 Velvet is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 Velvet is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Velvet; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <time.h>
25 #include <sys/time.h>
26
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 #include "globals.h"
32 #include "readSet.h"
33 #include "splay.h"
34 #include "tightString.h"
35 #include "utility.h"
36 #include "kmer.h"
37 #include "kmerOccurenceTable.h"
38 #include "recycleBin.h"
39 #include "binarySequences.h"
40
41 static RecycleBin * maskMemory = NULL;
42
allocateMask()43 static Mask *allocateMask()
44 {
45 if (maskMemory == NULL)
46 maskMemory = newRecycleBin(sizeof(Mask), 10000);
47
48 return (Mask *) allocatePointer(maskMemory);
49 }
50
newMask(Coordinate position)51 static Mask * newMask(Coordinate position)
52 {
53 Mask * mask = allocateMask();
54 mask->start = position;
55 mask->finish = position;
56 mask->next = NULL;
57 return mask;
58 }
59
60 // DEBUG
61 boolean debug = false;
62
63 #define HASH_BUCKETS_NB 16777216
64
65 #ifdef _OPENMP
66
67 #define NB_PUSH 32
68 #define BUFFER_SIZE 4096
69
70 static StringBuffer **annotationBuffer = NULL;
71 static StringBuffer **annotationBufferW = NULL;
72 static int *nbPush = NULL;
73 static boolean producing = 1;
74
initAnnotationBuffers(void)75 static void initAnnotationBuffers(void)
76 {
77 int n;
78 int i;
79
80 n = omp_get_max_threads();
81 annotationBuffer = callocOrExit(n, StringBuffer*);
82 annotationBufferW = callocOrExit(n, StringBuffer*);
83 nbPush = callocOrExit(n, int);
84
85 for (i = 0; i < n; i++)
86 {
87 annotationBuffer[i] = newStringBuffer(BUFFER_SIZE);
88 annotationBufferW[i] = newStringBuffer(BUFFER_SIZE);
89 }
90 }
91
destroyAnnotationBuffers(void)92 static void destroyAnnotationBuffers(void)
93 {
94 int n;
95 int i;
96
97 n = omp_get_max_threads();
98
99 for (i = 0; i < n; i++)
100 {
101 destroyStringBuffer(annotationBuffer[i], 1);
102 destroyStringBuffer(annotationBufferW[i], 1);
103 }
104
105 free(annotationBuffer);
106 free(annotationBufferW);
107 free(nbPush);
108 annotationBuffer = NULL;
109 annotationBufferW = NULL;
110 nbPush = NULL;
111 }
112
pushBufferCommit(int thread)113 static void pushBufferCommit(int thread)
114 {
115 StringBuffer *tmp;
116 char *s;
117
118 s = annotationBufferW[thread]->str;
119 do
120 {
121 #pragma omp flush(s)
122 }
123 while (*s);
124 tmp = annotationBufferW[thread];
125 annotationBufferW[thread] = annotationBuffer[thread];
126 annotationBuffer[thread] = tmp;
127 tmp = annotationBufferW[thread];
128 #pragma omp flush(tmp)
129 }
130
pushBuffer(int thread)131 static void pushBuffer(int thread)
132 {
133 if (++nbPush[thread] == NB_PUSH)
134 {
135 nbPush[thread] = 0;
136 pushBufferCommit(thread);
137 }
138 }
139
writeBuffers(FILE * outFile,int nbThreads)140 static void writeBuffers(FILE *outFile, int nbThreads)
141 {
142 int i;
143
144 for (i = 0; i < nbThreads; i++)
145 {
146 StringBuffer *b;
147 char *s;
148
149 b = annotationBufferW[i];
150 #pragma omp flush(b)
151 s = b->str;
152 #pragma omp flush(s)
153 if (*s)
154 {
155 velvetFprintf(outFile, "%s", annotationBufferW[i]->str);
156 resetStringBuffer(annotationBufferW[i]);
157 }
158 }
159 }
160
bufferWritter(FILE * outFile)161 static void bufferWritter(FILE *outFile)
162 {
163 int n;
164
165 n = omp_get_max_threads();
166 #pragma omp flush(producing)
167 while (producing)
168 {
169 writeBuffers(outFile, n);
170 #pragma omp flush(producing)
171 }
172 writeBuffers(outFile, n);
173 }
174
appendLine(char * line,int thread)175 static void appendLine(char *line, int thread)
176 {
177 appendStringBuffer(annotationBuffer[thread], line);
178 }
179 #else
180
181 #define BUFFER_SIZE 1024
182
183 StringBuffer *annotationBuffer = NULL;
184
appendLine(char * line,int thread)185 static void appendLine(char *line, int thread)
186 {
187 appendStringBuffer(annotationBuffer, line);
188 }
189 #endif
190
191 struct splayTable_st {
192 SplayTree **table;
193 #ifdef _OPENMP
194 omp_lock_t *tableLocks;
195 #endif
196 KmerOccurenceTable *kmerOccurenceTable;
197 int WORDLENGTH;
198 boolean double_strand;
199 };
200
newSplayTable(int WORDLENGTH,boolean double_strand)201 SplayTable *newSplayTable(int WORDLENGTH, boolean double_strand)
202 {
203 SplayTable *splayTable = mallocOrExit(1, SplayTable);
204 splayTable->WORDLENGTH = WORDLENGTH;
205 splayTable->table = callocOrExit(HASH_BUCKETS_NB, SplayTree *);
206 splayTable->kmerOccurenceTable = NULL;
207 splayTable->double_strand = double_strand;
208 #ifdef _OPENMP
209 splayTable->tableLocks = mallocOrExit(HASH_BUCKETS_NB, omp_lock_t);
210 int i;
211 #pragma omp parallel for
212 for (i = 0; i < HASH_BUCKETS_NB; i++)
213 omp_init_lock(splayTable->tableLocks + i);
214 initSplayTreeMemory();
215 #endif
216 return splayTable;
217 }
218
destroySplayTable(SplayTable * splayTable)219 void destroySplayTable(SplayTable * splayTable)
220 {
221 velvetLog("Destroying splay table\n");
222
223 destroyAllSplayTrees();
224 free(splayTable->table);
225 destroyKmerOccurenceTable(splayTable->kmerOccurenceTable);
226 free(splayTable);
227
228 velvetLog("Splay table destroyed\n");
229 }
230
hash_kmer(Kmer * kmer)231 static KmerKey hash_kmer(Kmer * kmer)
232 {
233 #if KMER_LONGLONGS
234 KmerKey key = kmer->longlongs[0];
235
236 #if KMER_LONGLONGS > 1
237 key ^= kmer->longlongs[1];
238 #endif
239 #if KMER_LONGLONGS > 2
240 key ^= kmer->longlongs[2];
241 #endif
242
243 key = (~key) + (key << 21);
244 key = key ^ (key >> 24);
245 key = (key + (key << 3)) + (key << 8);
246 key = key ^ (key >> 14);
247 key = (key + (key << 2)) + (key << 4);
248 key = key ^ (key >> 28);
249 key = key + (key << 31);
250
251 return key % HASH_BUCKETS_NB;
252 #elif KMER_LONGS
253 KmerKey key = kmer->longs;
254
255 key += ~(key << 15);
256 key ^= (key >> 10);
257 key += (key << 3);
258 key ^= (key >> 6);
259 key += ~(key << 11);
260 key ^= (key >> 16);
261
262 return key % HASH_BUCKETS_NB;
263
264 #elif KMER_INTS
265 return kmer->ints % HASH_BUCKETS_NB;
266 #elif KMER_CHARS
267 return kmer->chars % HASH_BUCKETS_NB;
268 #endif
269 }
270
getNearestHSPIndex(Coordinate position,IDnum * sequenceIDs,Coordinate sequenceLength)271 static Coordinate getNearestHSPIndex(Coordinate position, IDnum * sequenceIDs, Coordinate sequenceLength) {
272 Coordinate back_offset = -1;
273 Coordinate front_offset = -1;
274
275 for (back_offset = 1; position - back_offset > 0; back_offset++)
276 if (sequenceIDs[position - back_offset])
277 break;
278
279 for (front_offset = 1; position + front_offset < sequenceLength; front_offset++)
280 if (sequenceIDs[position + front_offset])
281 break;
282
283 if (back_offset == position && position + front_offset == sequenceLength)
284 return -1;
285 else if (back_offset == position)
286 return position + front_offset;
287 else if (front_offset + position == sequenceLength)
288 return position - back_offset;
289 else
290 return back_offset < front_offset? position - back_offset : position + front_offset;
291 }
292
getMostAppropriateHit(Coordinate readCoord,Coordinate readLength,boolean direct,KmerOccurence * kmerOccurence,IDnum mapCount,IDnum * mapSequenceID,Coordinate * mapCoord,int wordLength)293 static KmerOccurence * getMostAppropriateHit(Coordinate readCoord, Coordinate readLength, boolean direct, KmerOccurence * kmerOccurence, IDnum mapCount, IDnum * mapSequenceID, Coordinate * mapCoord, int wordLength) {
294 KmerOccurence * current;
295 KmerOccurence * best = NULL;
296 Coordinate expectedPosition;
297 Coordinate positionError;
298 IDnum mapIndex;
299
300 // If only one hit
301 if (!getNextKmerOccurence(kmerOccurence))
302 return kmerOccurence;
303
304 // If multiple hits by unmapped read
305 if (mapCount == 0)
306 return NULL;
307
308 // Compare cases
309 for (current = kmerOccurence; current; current = getNextKmerOccurence(current)) {
310 for (mapIndex = 0; mapIndex < mapCount; mapIndex++) {
311
312 // If wrong sequence or unconsistent orientation
313 if ((direct && getKmerOccurenceNodeID(current) != mapSequenceID[mapIndex])
314 || (!direct && getKmerOccurenceNodeID(current) != -mapSequenceID[mapIndex]))
315 continue;
316
317 // Compute where it is supposed to land on reference
318 if (mapSequenceID[mapIndex] < 0)
319 expectedPosition = mapCoord[mapIndex] + readLength - readCoord - 1;
320 else
321 expectedPosition = mapCoord[mapIndex] + readCoord - wordLength + 1;
322
323 // Compute positional error
324 positionError = getKmerOccurencePosition(current) - expectedPosition;
325
326 // If potential hit record
327 if (positionError < 1 && positionError > -1) {
328 if (best)
329 // If competing hit, give up
330 return NULL;
331 else
332 // Record current hit
333 best = current;
334 }
335 }
336 }
337
338 return best;
339 }
340
341 static inline boolean
doFindOrInsertOccurenceInSplayTree(Kmer * kmer,IDnum * seqID,Coordinate * position,SplayTable * table)342 doFindOrInsertOccurenceInSplayTree(Kmer * kmer, IDnum * seqID,
343 Coordinate * position, SplayTable *table)
344 {
345 #ifdef _OPENMP
346 const KmerKey kmerHash = hash_kmer(kmer);
347 boolean ret;
348
349 omp_set_lock(table->tableLocks + kmerHash);
350 ret = findOrInsertOccurenceInSplayTree(kmer, seqID, position,
351 table->table + kmerHash);
352 omp_unset_lock(table->tableLocks + kmerHash);
353
354 return ret;
355 #else
356 return findOrInsertOccurenceInSplayTree(kmer, seqID, position,
357 &table->table[hash_kmer(kmer)]);
358 #endif
359 }
360
361
findOrInsertOccurenceInSplayTable(Kmer * kmer,IDnum * seqID,Coordinate * position,SplayTable * table,IDnum * sequenceIDs,Coordinate * coords,Coordinate readIndex,Coordinate readLength,boolean direct)362 static boolean findOrInsertOccurenceInSplayTable(Kmer * kmer, IDnum * seqID,
363 Coordinate * position,
364 SplayTable * table, IDnum * sequenceIDs,
365 Coordinate * coords, Coordinate readIndex, Coordinate readLength, boolean direct)
366 {
367 KmerOccurence * hit;
368 Coordinate HSPIndex;
369
370 // Check if previous anchor
371 if (sequenceIDs && sequenceIDs[readIndex]) {
372 if (direct)
373 *seqID = sequenceIDs[readIndex];
374 else
375 *seqID = -sequenceIDs[readIndex];
376 if (sequenceIDs[readIndex] > 0)
377 *position = coords[readIndex] + readIndex;
378 else
379 *position = coords[readIndex] - readIndex + readLength - 1;
380
381 return true;
382 }
383 else if (coords && coords[readIndex])
384 // If in buffer zone:
385 return doFindOrInsertOccurenceInSplayTree(kmer, seqID, position, table);
386
387
388 if (debug)
389 abort();
390 // Look up first in reference sequence k-mers
391 if (table->kmerOccurenceTable
392 && (hit = findKmerInKmerOccurenceTable(kmer, table->kmerOccurenceTable))) {
393 if (!getNextKmerOccurence(hit)) {
394 *seqID = getKmerOccurenceNodeID(hit);
395 *position = getKmerOccurencePosition(hit);
396 return true;
397 } else if ((HSPIndex = getNearestHSPIndex(*position, sequenceIDs, readLength)) > 0) {
398 hit = getMostAppropriateHit(readIndex, readLength, direct, hit, 1, &(sequenceIDs[HSPIndex]), &(coords[HSPIndex]), table->WORDLENGTH);
399 if (hit) {
400 *seqID = getKmerOccurenceNodeID(hit);
401 *position = getKmerOccurencePosition(hit);
402 return true;
403 }
404
405 }
406 }
407
408 // If not, go through the novel k-mers
409 return doFindOrInsertOccurenceInSplayTree(kmer, seqID, position, table);
410 }
411
printAnnotations(IDnum * sequenceIDs,Coordinate * coords,TightString * array,SplayTable * table,FILE * file,boolean second_in_pair,IDnum seqID)412 static void printAnnotations(IDnum *sequenceIDs, Coordinate * coords,
413 TightString * array, SplayTable * table,
414 FILE * file, boolean second_in_pair, IDnum seqID)
415 {
416 Coordinate readNucleotideIndex = 0;
417 Coordinate writeNucleotideIndex = 0;
418 Kmer word;
419 Kmer antiWord;
420 boolean annotationClosed = true;
421 IDnum sequenceID;
422 Coordinate coord;
423 boolean found;
424 Coordinate position = 0;
425 Coordinate start = 0;
426 Coordinate finish = 0;
427 IDnum referenceSequenceID = 0;
428 Nucleotide nucleotide;
429 char lineBuffer[MAXLINE];
430 TightString * tString = getTightStringInArray(array, seqID - 1);
431 int thread = 0;
432
433 clearKmer(&word);
434 clearKmer(&antiWord);
435
436 #ifdef _OPENMP
437 thread = omp_get_thread_num();
438 #endif
439
440 if (debug)
441 abort();
442
443 sprintf(lineBuffer, "ROADMAP %li\n", (long)seqID);
444 appendLine(lineBuffer, thread);
445
446 // Neglect any string shorter than WORDLENGTH :
447 if (getLength(tString) < table->WORDLENGTH) {
448 #ifdef _OPENMP
449 pushBuffer(thread);
450 #else
451 velvetFprintf(file, "%s", annotationBuffer->str);
452 resetStringBuffer(annotationBuffer);
453 #endif
454 return;
455 }
456
457 // Fill in the initial word :
458 for (readNucleotideIndex = 0;
459 readNucleotideIndex < table->WORDLENGTH - 1;
460 readNucleotideIndex++) {
461 nucleotide = getNucleotide(readNucleotideIndex, tString);
462 pushNucleotide(&word, nucleotide);
463 #ifdef COLOR
464 reversePushNucleotide(&antiWord, nucleotide);
465 #else
466 reversePushNucleotide(&antiWord, 3 - nucleotide);
467 #endif
468 }
469
470 while (readNucleotideIndex < getLength(tString)) {
471 // Shift word:
472 nucleotide = getNucleotide(readNucleotideIndex, tString);
473 pushNucleotide(&word, nucleotide);
474 #ifdef COLOR
475 reversePushNucleotide(&antiWord, nucleotide);
476 #else
477 reversePushNucleotide(&antiWord, 3 - nucleotide);
478 #endif
479
480 sequenceID = seqID;
481 coord = writeNucleotideIndex;
482
483 if (table->double_strand) {
484 if (compareKmers(&word, &antiWord) <= 0) {
485 found =
486 findOrInsertOccurenceInSplayTable(&word,
487 &sequenceID,
488 &coord,
489 table,
490 sequenceIDs,
491 coords,
492 readNucleotideIndex,
493 getLength(tString),
494 true);
495 } else {
496 sequenceID = -sequenceID;
497 found =
498 findOrInsertOccurenceInSplayTable(&antiWord,
499 &sequenceID,
500 &coord,
501 table,
502 sequenceIDs,
503 coords,
504 readNucleotideIndex,
505 getLength(tString),
506 false);
507 sequenceID = -sequenceID;
508 }
509 } else {
510 if (!second_in_pair) {
511 found =
512 findOrInsertOccurenceInSplayTable(&word,
513 &sequenceID,
514 &coord,
515 table,
516 sequenceIDs,
517 coords,
518 readNucleotideIndex,
519 getLength(tString),
520 true);
521 } else {
522 sequenceID = -sequenceID;
523 found =
524 findOrInsertOccurenceInSplayTable(&antiWord,
525 &sequenceID,
526 &coord,
527 table,
528 sequenceIDs,
529 coords,
530 readNucleotideIndex,
531 getLength(tString),
532 false);
533 sequenceID = -sequenceID;
534 }
535 }
536
537 if (!found) {
538 writeNucleotideIndex++;
539 if (!annotationClosed) {
540 sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
541 (long) referenceSequenceID, (long long) position,
542 (long long) start, (long long) finish);
543 appendLine(lineBuffer, thread);
544 }
545 annotationClosed = true;
546 }
547 // Otherwise create/complete annotation:
548 else {
549 // Forbidden k-mer
550 if (sequenceID == 0) {
551 break;
552 }
553 // Closed/inexistant annotation
554 else if (annotationClosed) {
555 referenceSequenceID = sequenceID;
556 position = writeNucleotideIndex;
557 start = finish = coord;
558
559 if (referenceSequenceID > 0)
560 finish++;
561 else
562 finish--;
563
564 annotationClosed = false;
565 }
566 // Open annotation
567 else if (sequenceID == referenceSequenceID
568 && coord == finish) {
569 if (referenceSequenceID > 0)
570 finish++;
571 else
572 finish--;
573 }
574 // Previous non corresponding annotation
575 else {
576 sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
577 (long) referenceSequenceID, (long long) position,
578 (long long) start, (long long) finish);
579 appendLine(lineBuffer, thread);
580
581 referenceSequenceID = sequenceID;
582 position = writeNucleotideIndex;
583 start = finish = coord;
584
585 if (referenceSequenceID > 0)
586 finish++;
587 else
588 finish--;
589 }
590 }
591
592 readNucleotideIndex++;
593 }
594
595 if (!annotationClosed) {
596 sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
597 (long) referenceSequenceID, (long long) position,
598 (long long) start, (long long) finish);
599 appendLine(lineBuffer, thread);
600 }
601 #ifdef _OPENMP
602 pushBuffer(thread);
603 #else
604 velvetFprintf(file, "%s", annotationBuffer->str);
605 resetStringBuffer(annotationBuffer);
606 #endif
607
608 return;
609 }
610
computeClearHSPs(TightString * array,boolean second_in_pair,SplayTable * table,IDnum ** sequenceIDs,Coordinate ** coords,IDnum * mapReferenceIDs,Coordinate * mapCoords,Coordinate mapCount,IDnum seqID)611 static void computeClearHSPs(TightString * array, boolean second_in_pair, SplayTable * table, IDnum ** sequenceIDs, Coordinate ** coords, IDnum * mapReferenceIDs, Coordinate * mapCoords, Coordinate mapCount, IDnum seqID) {
612 Coordinate readNucleotideIndex = 0;
613 Kmer word;
614 Kmer antiWord;
615 Kmer polyA;
616 Nucleotide nucleotide;
617 KmerOccurence * hit;
618
619 int penalty;
620 TightString * tString;
621 Coordinate length;
622
623 clearKmer(&polyA);
624 tString = getTightStringInArray(array, seqID - 1);
625 length = getLength(tString);
626 *sequenceIDs = callocOrExit(length, IDnum);
627 *coords = callocOrExit(length, Coordinate);
628
629 // First pass for unambiguous hits
630 // Fill in the initial word :
631 clearKmer(&word);
632 clearKmer(&antiWord);
633 for (readNucleotideIndex = 0;
634 readNucleotideIndex < table->WORDLENGTH - 1;
635 readNucleotideIndex++) {
636 nucleotide = getNucleotide(readNucleotideIndex, tString);
637 pushNucleotide(&word, nucleotide);
638 #ifdef COLOR
639 reversePushNucleotide(&antiWord, nucleotide);
640 #else
641 reversePushNucleotide(&antiWord, 3 - nucleotide);
642 #endif
643 }
644
645 // Kill silly poly-T beginnings
646 while (readNucleotideIndex < getLength(tString) && (compareKmers(&antiWord, &polyA) == 0 || compareKmers(&word, &polyA) == 0)) {
647 nucleotide = getNucleotide(readNucleotideIndex++, tString);
648 pushNucleotide(&word, nucleotide);
649 #ifdef COLOR
650 reversePushNucleotide(&antiWord, nucleotide);
651 #else
652 reversePushNucleotide(&antiWord, 3 - nucleotide);
653 #endif
654 }
655
656 while (readNucleotideIndex < getLength(tString)) {
657 // Shift word:
658 nucleotide = getNucleotide(readNucleotideIndex, tString);
659 pushNucleotide(&word, nucleotide);
660
661 #ifdef COLOR
662 reversePushNucleotide(&antiWord, nucleotide);
663 #else
664 reversePushNucleotide(&antiWord, 3 - nucleotide);
665 #endif
666
667 if (table->double_strand) {
668 if (compareKmers(&word, &antiWord) <= 0) {
669 hit = findKmerInKmerOccurenceTable(&word, table->kmerOccurenceTable);
670
671 if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), true, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
672 (*sequenceIDs)[readNucleotideIndex] = getKmerOccurenceNodeID(hit);
673 } else {
674 hit = findKmerInKmerOccurenceTable(&antiWord, table->kmerOccurenceTable);
675
676 if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), false, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
677 (*sequenceIDs)[readNucleotideIndex] = -getKmerOccurenceNodeID(hit);
678 }
679 } else {
680 if (!second_in_pair) {
681 hit = findKmerInKmerOccurenceTable(&word, table->kmerOccurenceTable);
682
683 if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), true, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
684 (*sequenceIDs)[readNucleotideIndex] = getKmerOccurenceNodeID(hit);
685 } else {
686 hit = findKmerInKmerOccurenceTable(&antiWord, table->kmerOccurenceTable);
687
688 if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), false, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
689 (*sequenceIDs)[readNucleotideIndex] = -getKmerOccurenceNodeID(hit);
690 }
691 }
692
693 if ((*sequenceIDs)[readNucleotideIndex]) {
694 if ((*sequenceIDs)[readNucleotideIndex] > 0)
695 (*coords)[readNucleotideIndex] = getKmerOccurencePosition(hit) - readNucleotideIndex;
696 else
697 (*coords)[readNucleotideIndex] = getKmerOccurencePosition(hit) + readNucleotideIndex - getLength(tString) + 1;
698 }
699
700 // Barrier to flip-flopping
701 if ((*sequenceIDs)[readNucleotideIndex - 1] != 0
702 && ((*sequenceIDs)[readNucleotideIndex] != (*sequenceIDs)[readNucleotideIndex - 1]
703 || (*coords)[readNucleotideIndex] != (*coords)[readNucleotideIndex - 1])) {
704 // Break in continuity... skip k positions
705 (*sequenceIDs)[readNucleotideIndex] = 0;
706 (*coords)[readNucleotideIndex] = -1;
707 readNucleotideIndex++;
708
709 for (penalty = 0; penalty < table->WORDLENGTH - 1 && readNucleotideIndex < getLength(tString); penalty++) {
710 nucleotide = getNucleotide(readNucleotideIndex, tString);
711 pushNucleotide(&word, nucleotide);
712
713 #ifdef COLOR
714 reversePushNucleotide(&antiWord, nucleotide);
715 #else
716 reversePushNucleotide(&antiWord, 3 - nucleotide);
717 #endif
718 (*sequenceIDs)[readNucleotideIndex] = 0;
719 (*coords)[readNucleotideIndex] = -1;
720 readNucleotideIndex++;
721 }
722 } else
723 readNucleotideIndex++;
724
725 }
726
727 free(mapReferenceIDs);
728 free(mapCoords);
729 }
730
inputSequenceIntoSplayTable(TightString * array,SplayTable * table,FILE * file,boolean second_in_pair,IDnum * mapReferenceIDs,Coordinate * mapCoords,Coordinate mapCount,IDnum seqID)731 void inputSequenceIntoSplayTable(TightString * array,
732 SplayTable * table,
733 FILE * file,
734 boolean second_in_pair,
735 IDnum * mapReferenceIDs, Coordinate * mapCoords, Coordinate mapCount,
736 IDnum seqID)
737 {
738 IDnum * sequenceIDs = NULL;
739 Coordinate * coords = NULL;
740
741 //debug = (seqID == 29405);
742
743 // If appropriate, get the HSPs on reference sequences
744 if (table->kmerOccurenceTable)
745 computeClearHSPs(array, second_in_pair, table, &sequenceIDs, &coords, mapReferenceIDs, mapCoords, mapCount, seqID);
746
747 // Go through read, eventually with annotations
748 printAnnotations(sequenceIDs, coords, array, table, file, second_in_pair, seqID);
749
750 // Clean up
751 if (sequenceIDs) {
752 free(sequenceIDs);
753 free(coords);
754 }
755 }
756
inputReferenceIntoSplayTable(TightString * tString,SplayTable * table,FILE * file,IDnum seqID,Mask * mask)757 void inputReferenceIntoSplayTable(TightString * tString,
758 SplayTable * table, FILE * file, IDnum seqID, Mask * mask)
759 {
760 IDnum currentIndex;
761 Coordinate readNucleotideIndex = 0;
762 Coordinate kmerIndex = 0;
763 Kmer word;
764 Kmer antiWord;
765 Nucleotide nucleotide;
766 Mask * currentMask = mask;
767 #ifdef _OPENMP
768 char lineBuffer[MAXLINE];
769 #endif
770
771 clearKmer(&word);
772 clearKmer(&antiWord);
773
774 currentIndex = seqID;
775 #ifdef _OPENMP
776 sprintf(lineBuffer, "ROADMAP %li\n", (long)currentIndex);
777 appendLine(lineBuffer, omp_get_thread_num());
778 #else
779 velvetFprintf(file, "ROADMAP %li\n", (long)currentIndex);
780 #endif
781
782 // Neglect any string shorter than WORDLENGTH :
783 if (getLength(tString) < table->WORDLENGTH) {
784 return;
785 }
786
787 // Fill in the initial word :
788 for (readNucleotideIndex = 0;
789 readNucleotideIndex < table->WORDLENGTH - 1;
790 readNucleotideIndex++) {
791 nucleotide = getNucleotide(readNucleotideIndex, tString);
792 pushNucleotide(&word, nucleotide);
793 if (table->double_strand) {
794 #ifdef COLOR
795 reversePushNucleotide(&antiWord, nucleotide);
796 #else
797 reversePushNucleotide(&antiWord, 3 - nucleotide);
798 #endif
799 }
800 }
801
802 while (readNucleotideIndex < getLength(tString)) {
803 // Shift word:
804 nucleotide = getNucleotide(readNucleotideIndex, tString);
805 pushNucleotide(&word, nucleotide);
806
807 if (table->double_strand) {
808 #ifdef COLOR
809 reversePushNucleotide(&antiWord, nucleotide);
810 #else
811 reversePushNucleotide(&antiWord, 3 - nucleotide);
812 #endif
813 }
814
815 // Check for gap masks:
816 if (currentMask && currentMask->start - table->WORDLENGTH + 1 <= readNucleotideIndex) {
817 while(currentMask && currentMask->finish + table->WORDLENGTH - 1 < readNucleotideIndex)
818 currentMask = currentMask->next;
819
820 if (currentMask && currentMask->finish + table->WORDLENGTH - 1 >= readNucleotideIndex) {
821 readNucleotideIndex++;
822 kmerIndex++;
823 continue;
824 }
825 }
826
827 // Record k-mer
828 if (table->double_strand) {
829 if (compareKmers(&word, &antiWord) <= 0)
830 recordKmerOccurence(&word, currentIndex,
831 kmerIndex,
832 table->kmerOccurenceTable);
833 else
834 recordKmerOccurence(&antiWord, -currentIndex,
835 kmerIndex,
836 table->kmerOccurenceTable);
837 } else {
838 recordKmerOccurence(&word, currentIndex,
839 kmerIndex,
840 table->kmerOccurenceTable);
841 }
842 readNucleotideIndex++;
843 kmerIndex++;
844 }
845
846 return;
847 }
848
countReferenceKmers(ReadSet * reads,int wordLength)849 static Coordinate countReferenceKmers(ReadSet * reads, int wordLength) {
850 IDnum readIndex;
851 Coordinate length = 0;
852
853
854 for (readIndex = 0; readIndex < reads->readCount && reads->categories[readIndex] == REFERENCE; readIndex++)
855 {
856 Coordinate tmpLength = getLength(getTightStringInArray(reads->tSequences, readIndex));
857 if (tmpLength >= wordLength)
858 length += tmpLength - wordLength + 1;
859 }
860
861 return length;
862 }
863
scanReferenceSequences(FILE * file,IDnum referenceSequenceCount)864 Mask ** scanReferenceSequences(FILE * file, IDnum referenceSequenceCount) {
865 Mask ** referenceMasks = callocOrExit(referenceSequenceCount, Mask*);
866 IDnum index;
867 char line[MAXLINE];
868 char c = '\0';
869
870 // Search sequences for masks
871 for (index = 0; index < referenceSequenceCount; index++) {
872 Mask * current = NULL;
873 Coordinate position = 0;
874 boolean openMask = false;
875
876 // Read through header
877 fgets(line, MAXLINE, file);
878
879 // Read through sequence
880 while ((c = getc(file))) {
881 if (c == EOF || c == '>')
882 break;
883 else if (c == '\r' || c == '\n')
884 continue;
885 else if (c == 'n' || c == 'N') {
886 if (openMask)
887 current->finish++;
888 else if (referenceMasks[index] == NULL) {
889 referenceMasks[index] = newMask(position);
890 current = referenceMasks[index];
891 } else {
892 current->next = newMask(position);
893 current = current->next;
894 }
895 openMask = true;
896 position++;
897 } else {
898 openMask = false;
899 position++;
900 }
901 }
902 }
903
904 if (c != '\0')
905 ungetc(c, file);
906 return referenceMasks;
907 }
908
scanBinaryReferenceSequences(SequencesReader * seqReadInfo,IDnum referenceSequenceCount)909 Mask ** scanBinaryReferenceSequences(SequencesReader *seqReadInfo, IDnum referenceSequenceCount) {
910 Mask ** referenceMasks = callocOrExit(referenceSequenceCount, Mask*);
911 IDnum index;
912 char line[MAXLINE];
913 char c = '\0';
914 FILE *file = fopen(seqReadInfo->m_namesFilename, "r");
915 if (file == NULL) {
916 exitErrorf(EXIT_FAILURE, true, "Couldn't read file %s", seqReadInfo->m_namesFilename);
917 } else {
918 velvetLog("Reading mapping info from %s\n", seqReadInfo->m_namesFilename);
919 }
920
921 // Search sequences for masks
922 for (index = 0; index < referenceSequenceCount; index++) {
923 Mask * current = NULL;
924 long start = 0;
925 long finish = 0;
926 long number;
927 long cat;
928
929 // Read through header
930 if ((c = getc(file)) != '>') {
931 exitErrorf(EXIT_FAILURE, false, "names line did not start with >");
932 }
933 fgets(line, MAXLINE, file);
934 sscanf(line, "%*[^\t]\t%li\t%li\n", &number, &cat);
935 // ensure is is a ref cat
936 if ((IDnum) number != index + 1) {
937 exitErrorf(EXIT_FAILURE, false, "sequence %ld != expected %ld", number, (long) index);
938 }
939 if ((Category) cat != REFERENCE) {
940 exitErrorf(EXIT_FAILURE, false, "unexpected category %ld", cat);
941 }
942
943 // Read through the reference maps
944 while ((c = getc(file))) {
945 if (c == EOF || c == '>') {
946 break;
947 }
948 ungetc(c, file);
949 fgets(line, MAXLINE, file);
950 sscanf(line, "%li\t%li\n", &start, &finish);
951 if (referenceMasks[index] == NULL) {
952 referenceMasks[index] = newMask(start);
953 referenceMasks[index]->finish = finish;
954 current = referenceMasks[index];
955 } else {
956 current->next = newMask(start);
957 current->next->finish = finish;
958 current = current->next;
959 }
960 }
961 ungetc(c, file);
962 }
963
964 fclose(file);
965 return referenceMasks;
966 }
967
inputSequenceArrayIntoSplayTableAndArchive(ReadSet * reads,SplayTable * table,char * filename,char * seqFilename)968 void inputSequenceArrayIntoSplayTableAndArchive(ReadSet * reads,
969 SplayTable * table,
970 char *filename, char* seqFilename)
971 {
972 IDnum index;
973 IDnum sequenceCount = reads->readCount;
974 TightString *array;
975 FILE *outfile = fopen(filename, "w");
976 FILE *seqFile = NULL;
977 IDnum kmerCount;
978 IDnum referenceSequenceCount = 0;
979 struct timeval start, end, diff;
980 SequencesReader seqReadInfo;
981 memset(&seqReadInfo, 0, sizeof(seqReadInfo));
982 if (isCreateBinary()) {
983 seqReadInfo.m_bIsBinary = true;
984 seqReadInfo.m_pFile = openCnySeqForRead(seqFilename, &seqReadInfo.m_unifiedSeqFileHeader);
985 if (!seqReadInfo.m_pFile) {
986 exitErrorf(EXIT_FAILURE, true, "Could not open %s", seqFilename);
987 }
988 seqReadInfo.m_namesFilename = mallocOrExit(strlen(seqFilename) + sizeof(".names"), char);
989 sprintf(seqReadInfo.m_namesFilename, "%s.names", seqFilename);
990 seqReadInfo.m_numCategories = seqReadInfo.m_unifiedSeqFileHeader.m_numCategories;
991 seqReadInfo.m_minSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_minSeqLen;
992 seqReadInfo.m_maxSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_maxSeqLen;
993 seqReadInfo.m_bIsRef = false;
994 seqReadInfo.m_pReadBuffer = mallocOrExit(USF_READ_BUF_SIZE, uint8_t );
995 seqReadInfo.m_pCurrentReadPtr = seqReadInfo.m_pReadBufEnd = 0;
996
997 resetCnySeqCurrentRead(&seqReadInfo);
998 } else {
999 seqReadInfo.m_bIsBinary = false;
1000 }
1001 IDnum ** mapReferenceIDs = NULL;
1002 Coordinate ** mapCoords = NULL;
1003 Coordinate * mapCount = NULL;
1004
1005 char line[MAXLINE];
1006 char c;
1007 IDnum seqID = 0;
1008 long long_var;
1009 long long longlong_var;
1010 Coordinate maxCount = 20;
1011 Coordinate counter = 0;
1012 // DEBUG
1013 Mask ** referenceMasks = NULL;
1014
1015 if (outfile == NULL)
1016 exitErrorf(EXIT_FAILURE, true, "Couldn't write to file %s", filename);
1017 else
1018 velvetLog("Writing into roadmap file %s...\n", filename);
1019
1020 // Count reference sequences
1021 for (index = 0; index < reads->readCount && reads->categories[index] == REFERENCE; index++)
1022 referenceSequenceCount++;
1023
1024 velvetFprintf(outfile, "%ld\t%ld\t%i\t%hi\n", (long) sequenceCount, (long) referenceSequenceCount, table->WORDLENGTH, (short) table->double_strand);
1025
1026 if (reads->tSequences == NULL)
1027 convertSequences(reads);
1028
1029 gettimeofday(&start, NULL);
1030 array = reads->tSequences;
1031
1032 #ifdef _OPENMP
1033 if (omp_get_max_threads() == 1)
1034 {
1035 omp_set_num_threads(2);
1036 omp_set_nested(0);
1037 }
1038 else
1039 omp_set_nested(1);
1040 initAnnotationBuffers();
1041 #else
1042 annotationBuffer = newStringBuffer(BUFFER_SIZE);
1043 #endif
1044
1045 if (referenceSequenceCount && (kmerCount = countReferenceKmers(reads, table->WORDLENGTH)) > 0) {
1046 table->kmerOccurenceTable = newKmerOccurenceTable(24 , table->WORDLENGTH);
1047 allocateKmerOccurences(kmerCount, table->kmerOccurenceTable);
1048 if (seqReadInfo.m_bIsBinary) {
1049 referenceMasks = scanBinaryReferenceSequences(&seqReadInfo, referenceSequenceCount);
1050 // binary seqs have no Ns so just advance past the references
1051 for (index = 0; index < referenceSequenceCount; index++) {
1052 TightString cmpString;
1053 cmpString.length = seqReadInfo.m_currentReadLength;
1054 cmpString.sequence = mallocOrExit((seqReadInfo.m_currentReadLength + 3) / 4, uint8_t );
1055 getCnySeqNucl(&seqReadInfo, cmpString.sequence);
1056 if (seqReadInfo.m_bIsRef) {
1057 seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
1058 // now the next ptr is advanced
1059 seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
1060 RefInfo refElem;
1061 uint32_t refIdx;
1062 for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
1063 // not actually used so just read past refs
1064 refElem.m_referenceID = readCnySeqUint32(&seqReadInfo);
1065 refElem.m_pos = readCnySeqUint32(&seqReadInfo);
1066 }
1067 }
1068 // optional test to ensure reference mapping seqIDs are in sync
1069 #if 0
1070 TightString *tString;
1071 tString = getTightStringInArray(array, index);
1072 if (getLength(tString) != seqReadInfo.m_currentReadLength) {
1073 velvetLog("Error: TightString len mismatch, %d != %ld\n", getLength(tString), seqReadInfo.m_currentReadLength);
1074 exit(1);
1075 }
1076 char *str = readTightString(tString);
1077 char *cmpStr = readTightString(&cmpString);
1078 if (strcmp(str, cmpStr) != 0) {
1079 printf("seq %s != cmp %s\n", str, cmpStr);
1080 exit(1);
1081 }
1082 free(str);
1083 free(cmpStr);
1084 #endif
1085 advanceCnySeqCurrentRead(&seqReadInfo);
1086 free(cmpString.sequence);
1087 }
1088 } else {
1089 seqFile = fopen(seqFilename, "r");
1090
1091 if (seqFile == NULL)
1092 exitErrorf(EXIT_FAILURE, true, "Couldn't write to file %s", seqFilename);
1093 else
1094 velvetLog("Reading mapping info from file %s\n", seqFilename);
1095
1096 seqReadInfo.m_pFile = seqFile;
1097
1098 // Skip through reference headers quickly
1099 referenceMasks = scanReferenceSequences(seqFile, referenceSequenceCount);
1100 }
1101
1102 #ifdef _OPENMP
1103 producing = 1;
1104 #pragma omp parallel sections
1105 {
1106 #pragma omp section
1107 {
1108 bufferWritter(outfile);
1109 }
1110 #pragma omp section
1111 {
1112 #pragma omp parallel for
1113 #endif
1114 for (index = 0; index < referenceSequenceCount; index++)
1115 inputReferenceIntoSplayTable(getTightStringInArray(array, index),
1116 table, outfile, index + 1, referenceMasks[index]);
1117
1118 #ifdef _OPENMP
1119 for (index = omp_get_max_threads() - 1; index >= 0; index--)
1120 pushBufferCommit(index);
1121 producing = 0;
1122 #pragma omp flush(producing)
1123 }
1124 }
1125 #endif
1126
1127 if (maskMemory)
1128 destroyRecycleBin(maskMemory);
1129 maskMemory = NULL;
1130 sortKmerOccurenceTable(table->kmerOccurenceTable);
1131 }
1132
1133 velvetLog("Inputting sequences...\n");
1134
1135 if (table->kmerOccurenceTable) {
1136 mapReferenceIDs = callocOrExit(sequenceCount + 1, IDnum*);
1137 mapCoords = callocOrExit(sequenceCount + 1, Coordinate *);
1138 mapCount = callocOrExit(sequenceCount + 1, Coordinate);
1139
1140 RefInfo *refArray = NULL;
1141 if (seqReadInfo.m_bIsBinary) {
1142 TightString cmpString;
1143 for (seqID = referenceSequenceCount + 1; seqID < sequenceCount + 1; seqID++) {
1144 cmpString.length = seqReadInfo.m_currentReadLength;
1145 cmpString.sequence = mallocOrExit((seqReadInfo.m_currentReadLength + 3) / 4, uint8_t );
1146 getCnySeqNucl(&seqReadInfo, cmpString.sequence);
1147 if (seqReadInfo.m_bIsRef) {
1148 seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
1149 // now the next ptr is advanced
1150 seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
1151 refArray = callocOrExit(seqReadInfo.m_refCnt, RefInfo);
1152 uint32_t refIdx;
1153 for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
1154 refArray[refIdx].m_referenceID = readCnySeqUint32(&seqReadInfo);
1155 refArray[refIdx].m_pos = readCnySeqUint32(&seqReadInfo);
1156 }
1157 }
1158 // optional test to ensure reference mapping seqIDs are in sync
1159 #if 0
1160 TightString *tString;
1161 tString = getTightStringInArray(array, seqID - 1);
1162 if (getLength(tString) != seqReadInfo.m_currentReadLength) {
1163 velvetLog("Error: TightString len mismatch, %d != %ld\n", getLength(tString), seqReadInfo.m_currentReadLength);
1164 exit(1);
1165 }
1166 char *str = readTightString(tString);
1167 char *cmpStr = readTightString(&cmpString);
1168 if (strcmp(str, cmpStr) != 0) {
1169 printf("seq %s != cmp %s\n", str, cmpStr);
1170 exit(1);
1171 }
1172 free(str);
1173 free(cmpStr);
1174 #endif
1175 free(cmpString.sequence);
1176
1177 // set prior count
1178 mapCount[seqID - 1] = counter;
1179 counter = 0;
1180 maxCount = 20;
1181 mapReferenceIDs[seqID] = callocOrExit(maxCount, IDnum);
1182 mapCoords[seqID] = callocOrExit(maxCount, Coordinate);
1183
1184 if (seqReadInfo.m_bIsRef) {
1185 while (counter < seqReadInfo.m_refCnt) {
1186 mapReferenceIDs[seqID][counter] = (IDnum) refArray[counter].m_referenceID;
1187 mapCoords[seqID][counter] = (Coordinate) refArray[counter].m_pos;
1188
1189 if (++counter == maxCount) {
1190 maxCount *= 2;
1191 mapReferenceIDs[seqID] = reallocOrExit(mapReferenceIDs[seqID], maxCount, IDnum);
1192 mapCoords[seqID] = reallocOrExit(mapCoords[seqID], maxCount, Coordinate);
1193 }
1194 }
1195 free(refArray);
1196 }
1197 advanceCnySeqCurrentRead(&seqReadInfo);
1198 }
1199 } else {
1200 // Parse file for mapping info
1201 while (seqFile && (c = getc(seqFile)) != EOF) {
1202
1203 if (c == '>') {
1204 mapCount[seqID] = counter;
1205 counter = 0;
1206 maxCount = 20;
1207 fgets(line, MAXLINE, seqFile);
1208 sscanf(line,"%*[^\t]\t%li\t", &long_var);
1209 seqID = (IDnum) long_var;
1210 mapReferenceIDs[seqID] = callocOrExit(maxCount, IDnum);
1211 mapCoords[seqID] = callocOrExit(maxCount, Coordinate);
1212 } else if (c == 'M') {
1213 fgets(line, MAXLINE, seqFile);
1214 sscanf(line,"\t%li\t%lli\n", &long_var, &longlong_var);
1215 mapReferenceIDs[seqID][counter] = (IDnum) long_var;
1216 mapCoords[seqID][counter] = (Coordinate) longlong_var;
1217
1218 if (++counter == maxCount) {
1219 maxCount *= 2;
1220 mapReferenceIDs[seqID] = reallocOrExit(mapReferenceIDs[seqID], maxCount, IDnum);
1221 mapCoords[seqID] = reallocOrExit(mapCoords[seqID], maxCount, Coordinate);
1222 }
1223 }
1224 }
1225 }
1226 }
1227 if (seqFile)
1228 fclose(seqFile);
1229
1230 if (seqReadInfo.m_bIsBinary) {
1231 if (seqReadInfo.m_pReadBuffer) {
1232 free(seqReadInfo.m_pReadBuffer);
1233 }
1234 fclose(seqReadInfo.m_pFile);
1235 }
1236
1237 #ifdef _OPENMP
1238 producing = 1;
1239 #pragma omp parallel sections
1240 {
1241 #pragma omp section
1242 {
1243 bufferWritter(outfile);
1244 }
1245 #pragma omp section
1246 {
1247 #pragma omp parallel for
1248 #endif
1249 for (index = referenceSequenceCount; index < sequenceCount; index++)
1250 {
1251 boolean second_in_pair;
1252
1253 // Progress report on screen
1254 if (index % 1000000 == 0) {
1255 velvetLog("Inputting sequence %li / %li\n",
1256 (long)index, (long)sequenceCount);
1257 fflush(stdout);
1258 }
1259
1260 // Test to make sure that all the reference reads are before all the other reads
1261 if (reads->categories[index] == REFERENCE) {
1262 velvetLog("Reference sequence placed after a non-reference read!\n");
1263 velvetLog(">> Please re-order the filenames in your command line so as "
1264 "to have the reference sequence files before all the others\n");
1265 #ifdef DEBUG
1266 abort();
1267 #endif
1268 exit(0);
1269 }
1270 second_in_pair = reads->categories[index] % 2 && isSecondInPair(reads, index);
1271
1272 // Hashing the reads
1273 if (table->kmerOccurenceTable)
1274 inputSequenceIntoSplayTable(array, table,
1275 outfile,
1276 second_in_pair, mapReferenceIDs[index + 1], mapCoords[index+1], mapCount[index+1], index + 1);
1277 else
1278 inputSequenceIntoSplayTable(array, table,
1279 outfile,
1280 second_in_pair, NULL, NULL, 0, index + 1);
1281 }
1282 #ifdef _OPENMP
1283 for (index = omp_get_max_threads() - 1; index >= 0; index--)
1284 pushBufferCommit(index);
1285 producing = 0;
1286 #pragma omp flush(producing)
1287 }
1288 }
1289 destroyAnnotationBuffers();
1290 #else
1291 destroyStringBuffer(annotationBuffer, 1);
1292 #endif
1293
1294 gettimeofday(&end, NULL);
1295 timersub(&end, &start, &diff);
1296 velvetLog(" === Sequences loaded in %ld.%06ld s\n", (long) diff.tv_sec, (long) diff.tv_usec);
1297
1298 fclose(outfile);
1299
1300 if (mapReferenceIDs) {
1301 free(mapReferenceIDs);
1302 free(mapCoords);
1303 free(mapCount);
1304 }
1305 if (referenceMasks) {
1306 free(referenceMasks);
1307 }
1308 if (seqReadInfo.m_namesFilename) {
1309 free(seqReadInfo.m_namesFilename);
1310 }
1311 //free(reads->tSequences);
1312 //reads->tSequences = NULL;
1313 //destroyReadSet(reads);
1314 velvetLog("Done inputting sequences\n");
1315 }
1316