1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3 
4     This file is part of Velvet.
5 
6     Velvet is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     Velvet is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with Velvet; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <time.h>
25 #include <sys/time.h>
26 
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30 
31 #include "globals.h"
32 #include "readSet.h"
33 #include "splay.h"
34 #include "tightString.h"
35 #include "utility.h"
36 #include "kmer.h"
37 #include "kmerOccurenceTable.h"
38 #include "recycleBin.h"
39 #include "binarySequences.h"
40 
41 static RecycleBin * maskMemory = NULL;
42 
allocateMask()43 static Mask *allocateMask()
44 {
45 	if (maskMemory == NULL)
46 		maskMemory = newRecycleBin(sizeof(Mask), 10000);
47 
48 	return (Mask *) allocatePointer(maskMemory);
49 }
50 
newMask(Coordinate position)51 static Mask * newMask(Coordinate position)
52 {
53 	Mask * mask = allocateMask();
54 	mask->start = position;
55 	mask->finish = position;
56 	mask->next = NULL;
57 	return mask;
58 }
59 
60 // DEBUG
61 boolean debug = false;
62 
63 #define HASH_BUCKETS_NB 16777216
64 
65 #ifdef _OPENMP
66 
67 #define NB_PUSH 32
68 #define BUFFER_SIZE 4096
69 
70 static StringBuffer **annotationBuffer = NULL;
71 static StringBuffer **annotationBufferW = NULL;
72 static int *nbPush = NULL;
73 static boolean producing = 1;
74 
initAnnotationBuffers(void)75 static void initAnnotationBuffers(void)
76 {
77 	int n;
78 	int i;
79 
80 	n = omp_get_max_threads();
81 	annotationBuffer = callocOrExit(n, StringBuffer*);
82 	annotationBufferW = callocOrExit(n, StringBuffer*);
83 	nbPush = callocOrExit(n, int);
84 
85 	for (i = 0; i < n; i++)
86 	{
87 		annotationBuffer[i] = newStringBuffer(BUFFER_SIZE);
88 		annotationBufferW[i] = newStringBuffer(BUFFER_SIZE);
89 	}
90 }
91 
destroyAnnotationBuffers(void)92 static void destroyAnnotationBuffers(void)
93 {
94 	int n;
95 	int i;
96 
97 	n = omp_get_max_threads();
98 
99 	for (i = 0; i < n; i++)
100 	{
101 		destroyStringBuffer(annotationBuffer[i], 1);
102 		destroyStringBuffer(annotationBufferW[i], 1);
103 	}
104 
105 	free(annotationBuffer);
106 	free(annotationBufferW);
107 	free(nbPush);
108 	annotationBuffer = NULL;
109 	annotationBufferW = NULL;
110 	nbPush = NULL;
111 }
112 
pushBufferCommit(int thread)113 static void pushBufferCommit(int thread)
114 {
115 	StringBuffer *tmp;
116 	char *s;
117 
118 	s = annotationBufferW[thread]->str;
119 	do
120 	{
121 		#pragma omp flush(s)
122 	}
123 	while (*s);
124 	tmp = annotationBufferW[thread];
125 	annotationBufferW[thread] = annotationBuffer[thread];
126 	annotationBuffer[thread] = tmp;
127 	tmp = annotationBufferW[thread];
128 	#pragma omp flush(tmp)
129 }
130 
pushBuffer(int thread)131 static void pushBuffer(int thread)
132 {
133 	if (++nbPush[thread] == NB_PUSH)
134 	{
135 		nbPush[thread] = 0;
136 		pushBufferCommit(thread);
137 	}
138 }
139 
writeBuffers(FILE * outFile,int nbThreads)140 static void writeBuffers(FILE *outFile, int nbThreads)
141 {
142 	int i;
143 
144 	for (i = 0; i < nbThreads; i++)
145 	{
146 		StringBuffer *b;
147 		char *s;
148 
149 		b = annotationBufferW[i];
150 		#pragma omp flush(b)
151 		s = b->str;
152 		#pragma omp flush(s)
153 		if (*s)
154 		{
155 			velvetFprintf(outFile, "%s", annotationBufferW[i]->str);
156 			resetStringBuffer(annotationBufferW[i]);
157 		}
158 	}
159 }
160 
bufferWritter(FILE * outFile)161 static void bufferWritter(FILE *outFile)
162 {
163 	int n;
164 
165 	n = omp_get_max_threads();
166 	#pragma omp flush(producing)
167 	while (producing)
168 	{
169 		writeBuffers(outFile, n);
170 		#pragma omp flush(producing)
171 	}
172 	writeBuffers(outFile, n);
173 }
174 
appendLine(char * line,int thread)175 static void appendLine(char *line, int thread)
176 {
177 	appendStringBuffer(annotationBuffer[thread], line);
178 }
179 #else
180 
181 #define BUFFER_SIZE 1024
182 
183 StringBuffer *annotationBuffer = NULL;
184 
appendLine(char * line,int thread)185 static void appendLine(char *line, int thread)
186 {
187 	appendStringBuffer(annotationBuffer, line);
188 }
189 #endif
190 
191 struct splayTable_st {
192 	SplayTree **table;
193 #ifdef _OPENMP
194 	omp_lock_t *tableLocks;
195 #endif
196 	KmerOccurenceTable *kmerOccurenceTable;
197 	int WORDLENGTH;
198 	boolean double_strand;
199 };
200 
newSplayTable(int WORDLENGTH,boolean double_strand)201 SplayTable *newSplayTable(int WORDLENGTH, boolean double_strand)
202 {
203 	SplayTable *splayTable = mallocOrExit(1, SplayTable);
204 	splayTable->WORDLENGTH = WORDLENGTH;
205 	splayTable->table = callocOrExit(HASH_BUCKETS_NB, SplayTree *);
206 	splayTable->kmerOccurenceTable = NULL;
207 	splayTable->double_strand = double_strand;
208 #ifdef _OPENMP
209 	splayTable->tableLocks = mallocOrExit(HASH_BUCKETS_NB, omp_lock_t);
210 	int i;
211 	#pragma omp parallel for
212 	for (i = 0; i < HASH_BUCKETS_NB; i++)
213 		omp_init_lock(splayTable->tableLocks + i);
214 	initSplayTreeMemory();
215 #endif
216 	return splayTable;
217 }
218 
destroySplayTable(SplayTable * splayTable)219 void destroySplayTable(SplayTable * splayTable)
220 {
221 	velvetLog("Destroying splay table\n");
222 
223 	destroyAllSplayTrees();
224 	free(splayTable->table);
225 	destroyKmerOccurenceTable(splayTable->kmerOccurenceTable);
226 	free(splayTable);
227 
228 	velvetLog("Splay table destroyed\n");
229 }
230 
hash_kmer(Kmer * kmer)231 static KmerKey hash_kmer(Kmer * kmer)
232 {
233 #if KMER_LONGLONGS
234 	KmerKey key = kmer->longlongs[0];
235 
236 #if KMER_LONGLONGS > 1
237 	key ^= kmer->longlongs[1];
238 #endif
239 #if KMER_LONGLONGS > 2
240 	key ^= kmer->longlongs[2];
241 #endif
242 
243 	key = (~key) + (key << 21);
244 	key = key ^ (key >> 24);
245 	key = (key + (key << 3)) + (key << 8);
246 	key = key ^ (key >> 14);
247 	key = (key + (key << 2)) + (key << 4);
248 	key = key ^ (key >> 28);
249 	key = key + (key << 31);
250 
251 	return key % HASH_BUCKETS_NB;
252 #elif KMER_LONGS
253 	KmerKey key = kmer->longs;
254 
255 	key += ~(key << 15);
256 	key ^= (key >> 10);
257 	key += (key << 3);
258 	key ^= (key >> 6);
259 	key += ~(key << 11);
260 	key ^= (key >> 16);
261 
262 	return key % HASH_BUCKETS_NB;
263 
264 #elif KMER_INTS
265 	return kmer->ints % HASH_BUCKETS_NB;
266 #elif KMER_CHARS
267 	return kmer->chars % HASH_BUCKETS_NB;
268 #endif
269 }
270 
getNearestHSPIndex(Coordinate position,IDnum * sequenceIDs,Coordinate sequenceLength)271 static Coordinate getNearestHSPIndex(Coordinate position, IDnum * sequenceIDs, Coordinate sequenceLength) {
272 	Coordinate back_offset = -1;
273 	Coordinate front_offset = -1;
274 
275 	for (back_offset = 1; position - back_offset > 0; back_offset++)
276 		if (sequenceIDs[position - back_offset])
277 			break;
278 
279 	for (front_offset = 1; position + front_offset < sequenceLength; front_offset++)
280 		if (sequenceIDs[position + front_offset])
281 			break;
282 
283 	if (back_offset == position && position + front_offset == sequenceLength)
284 		return -1;
285 	else if (back_offset == position)
286 		return position + front_offset;
287 	else if (front_offset + position == sequenceLength)
288 		return position - back_offset;
289 	else
290 		return back_offset < front_offset? position - back_offset : position + front_offset;
291 }
292 
getMostAppropriateHit(Coordinate readCoord,Coordinate readLength,boolean direct,KmerOccurence * kmerOccurence,IDnum mapCount,IDnum * mapSequenceID,Coordinate * mapCoord,int wordLength)293 static KmerOccurence * getMostAppropriateHit(Coordinate readCoord, Coordinate readLength, boolean direct, KmerOccurence * kmerOccurence, IDnum mapCount, IDnum * mapSequenceID, Coordinate * mapCoord, int wordLength) {
294 	KmerOccurence * current;
295 	KmerOccurence * best = NULL;
296 	Coordinate expectedPosition;
297 	Coordinate positionError;
298 	IDnum mapIndex;
299 
300 	// If only one hit
301 	if (!getNextKmerOccurence(kmerOccurence))
302 		return kmerOccurence;
303 
304 	// If multiple hits by unmapped read
305 	if (mapCount == 0)
306 		return NULL;
307 
308 	// Compare cases
309 	for (current = kmerOccurence; current; current = getNextKmerOccurence(current)) {
310 		for (mapIndex = 0; mapIndex < mapCount; mapIndex++) {
311 
312 			// If wrong sequence or unconsistent orientation
313 			if ((direct && getKmerOccurenceNodeID(current) != mapSequenceID[mapIndex])
314 			    || (!direct && getKmerOccurenceNodeID(current) != -mapSequenceID[mapIndex]))
315 				continue;
316 
317 			// Compute where it is supposed to land on reference
318 			if (mapSequenceID[mapIndex] < 0)
319 				expectedPosition = mapCoord[mapIndex] + readLength - readCoord - 1;
320 			else
321 				expectedPosition = mapCoord[mapIndex] + readCoord - wordLength + 1;
322 
323 			// Compute positional error
324 			positionError = getKmerOccurencePosition(current) - expectedPosition;
325 
326 			// If potential hit record
327 			if (positionError < 1 && positionError > -1) {
328 				if (best)
329 					// If competing hit, give up
330 					return NULL;
331 				else
332 					// Record current hit
333 					best = current;
334 			}
335 		}
336 	}
337 
338 	return best;
339 }
340 
341 static inline boolean
doFindOrInsertOccurenceInSplayTree(Kmer * kmer,IDnum * seqID,Coordinate * position,SplayTable * table)342 doFindOrInsertOccurenceInSplayTree(Kmer * kmer, IDnum * seqID,
343 				   Coordinate * position, SplayTable *table)
344 {
345 #ifdef _OPENMP
346 	const KmerKey kmerHash = hash_kmer(kmer);
347 	boolean ret;
348 
349 	omp_set_lock(table->tableLocks + kmerHash);
350 	ret =  findOrInsertOccurenceInSplayTree(kmer, seqID, position,
351 						table->table + kmerHash);
352 	omp_unset_lock(table->tableLocks + kmerHash);
353 
354 	return ret;
355 #else
356 	return findOrInsertOccurenceInSplayTree(kmer, seqID, position,
357 						&table->table[hash_kmer(kmer)]);
358 #endif
359 }
360 
361 
findOrInsertOccurenceInSplayTable(Kmer * kmer,IDnum * seqID,Coordinate * position,SplayTable * table,IDnum * sequenceIDs,Coordinate * coords,Coordinate readIndex,Coordinate readLength,boolean direct)362 static boolean findOrInsertOccurenceInSplayTable(Kmer * kmer, IDnum * seqID,
363 						 Coordinate * position,
364 						 SplayTable * table, IDnum * sequenceIDs,
365 						 Coordinate * coords, Coordinate readIndex, Coordinate readLength, boolean direct)
366 {
367 	KmerOccurence * hit;
368 	Coordinate HSPIndex;
369 
370 	// Check if previous anchor
371 	if (sequenceIDs && sequenceIDs[readIndex]) {
372 		if (direct)
373 			*seqID = sequenceIDs[readIndex];
374 		else
375 			*seqID = -sequenceIDs[readIndex];
376 		if (sequenceIDs[readIndex] > 0)
377 			*position = coords[readIndex] + readIndex;
378 		else
379 			*position = coords[readIndex] - readIndex + readLength - 1;
380 
381 		return true;
382 	}
383 	else if (coords && coords[readIndex])
384 		// If in buffer zone:
385 		return doFindOrInsertOccurenceInSplayTree(kmer, seqID, position, table);
386 
387 
388 	if (debug)
389 		abort();
390 	// Look up first in reference sequence k-mers
391 	if (table->kmerOccurenceTable
392 	    && (hit = findKmerInKmerOccurenceTable(kmer, table->kmerOccurenceTable))) {
393 		if (!getNextKmerOccurence(hit)) {
394 			*seqID = getKmerOccurenceNodeID(hit);
395 			*position = getKmerOccurencePosition(hit);
396 			return true;
397 		} else if ((HSPIndex = getNearestHSPIndex(*position, sequenceIDs, readLength)) > 0) {
398 	    		hit = getMostAppropriateHit(readIndex, readLength, direct, hit, 1, &(sequenceIDs[HSPIndex]), &(coords[HSPIndex]), table->WORDLENGTH);
399 			if (hit) {
400 				*seqID = getKmerOccurenceNodeID(hit);
401 				*position = getKmerOccurencePosition(hit);
402 				return true;
403 			}
404 
405 		}
406 	}
407 
408 	// If not, go through the novel k-mers
409 	return doFindOrInsertOccurenceInSplayTree(kmer, seqID, position, table);
410 }
411 
printAnnotations(IDnum * sequenceIDs,Coordinate * coords,TightString * array,SplayTable * table,FILE * file,boolean second_in_pair,IDnum seqID)412 static void printAnnotations(IDnum *sequenceIDs, Coordinate * coords,
413 			     TightString * array, SplayTable * table,
414 			     FILE * file, boolean second_in_pair, IDnum seqID)
415 {
416 	Coordinate readNucleotideIndex = 0;
417 	Coordinate writeNucleotideIndex = 0;
418 	Kmer word;
419 	Kmer antiWord;
420 	boolean annotationClosed = true;
421 	IDnum sequenceID;
422 	Coordinate coord;
423 	boolean found;
424 	Coordinate position = 0;
425 	Coordinate start = 0;
426 	Coordinate finish = 0;
427 	IDnum referenceSequenceID = 0;
428 	Nucleotide nucleotide;
429 	char lineBuffer[MAXLINE];
430 	TightString * tString = getTightStringInArray(array, seqID - 1);
431 	int thread = 0;
432 
433 	clearKmer(&word);
434 	clearKmer(&antiWord);
435 
436 #ifdef _OPENMP
437 	thread = omp_get_thread_num();
438 #endif
439 
440 	if (debug)
441 		abort();
442 
443 	sprintf(lineBuffer, "ROADMAP %li\n", (long)seqID);
444 	appendLine(lineBuffer, thread);
445 
446 	// Neglect any string shorter than WORDLENGTH :
447 	if (getLength(tString) < table->WORDLENGTH) {
448 #ifdef _OPENMP
449 		pushBuffer(thread);
450 #else
451 		velvetFprintf(file, "%s", annotationBuffer->str);
452 		resetStringBuffer(annotationBuffer);
453 #endif
454 		return;
455 	}
456 
457 	// Fill in the initial word :
458 	for (readNucleotideIndex = 0;
459 	     readNucleotideIndex < table->WORDLENGTH - 1;
460 	     readNucleotideIndex++) {
461 		nucleotide = getNucleotide(readNucleotideIndex, tString);
462 		pushNucleotide(&word, nucleotide);
463 #ifdef COLOR
464 		reversePushNucleotide(&antiWord, nucleotide);
465 #else
466 		reversePushNucleotide(&antiWord, 3 - nucleotide);
467 #endif
468 	}
469 
470 	while (readNucleotideIndex < getLength(tString)) {
471 		// Shift word:
472 		nucleotide = getNucleotide(readNucleotideIndex, tString);
473 		pushNucleotide(&word, nucleotide);
474 #ifdef COLOR
475 		reversePushNucleotide(&antiWord, nucleotide);
476 #else
477 		reversePushNucleotide(&antiWord, 3 - nucleotide);
478 #endif
479 
480 		sequenceID = seqID;
481 		coord = writeNucleotideIndex;
482 
483 		if (table->double_strand) {
484 			if (compareKmers(&word, &antiWord) <= 0) {
485 				found =
486 				    findOrInsertOccurenceInSplayTable(&word,
487 								      &sequenceID,
488 								      &coord,
489 								      table,
490 								      sequenceIDs,
491 								      coords,
492 								      readNucleotideIndex,
493 								      getLength(tString),
494 								      true);
495 			} else {
496 				sequenceID = -sequenceID;
497 				found =
498 				    findOrInsertOccurenceInSplayTable(&antiWord,
499 								      &sequenceID,
500 								      &coord,
501 								      table,
502 								      sequenceIDs,
503 								      coords,
504 								      readNucleotideIndex,
505 								      getLength(tString),
506 								      false);
507 				sequenceID = -sequenceID;
508 			}
509 		} else {
510 			if (!second_in_pair) {
511 				found =
512 				    findOrInsertOccurenceInSplayTable(&word,
513 								      &sequenceID,
514 								      &coord,
515 								      table,
516 								      sequenceIDs,
517 								      coords,
518 								      readNucleotideIndex,
519 								      getLength(tString),
520 								      true);
521 			} else {
522 				sequenceID = -sequenceID;
523 				found =
524 				    findOrInsertOccurenceInSplayTable(&antiWord,
525 								      &sequenceID,
526 								      &coord,
527 								      table,
528 								      sequenceIDs,
529 								      coords,
530 								      readNucleotideIndex,
531 								      getLength(tString),
532 								      false);
533 				sequenceID = -sequenceID;
534 			}
535 		}
536 
537 		if (!found) {
538 			writeNucleotideIndex++;
539 			if (!annotationClosed) {
540 				sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
541 					(long) referenceSequenceID, (long long) position,
542 					(long long) start, (long long) finish);
543 				appendLine(lineBuffer, thread);
544 			}
545 			annotationClosed = true;
546 		}
547 		// Otherwise create/complete annotation:
548 		else {
549 			// Forbidden k-mer
550 			if (sequenceID == 0) {
551 				break;
552 			}
553 			// Closed/inexistant annotation
554 			else if (annotationClosed) {
555 				referenceSequenceID = sequenceID;
556 				position = writeNucleotideIndex;
557 				start = finish = coord;
558 
559 				if (referenceSequenceID > 0)
560 					finish++;
561 				else
562 					finish--;
563 
564 				annotationClosed = false;
565 			}
566 			// Open annotation
567 			else if (sequenceID == referenceSequenceID
568 				 && coord == finish) {
569 				if (referenceSequenceID > 0)
570 					finish++;
571 				else
572 					finish--;
573 			}
574 			// Previous non corresponding annotation
575 			else {
576 				sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
577 					(long) referenceSequenceID, (long long) position,
578 					(long long) start, (long long) finish);
579 				appendLine(lineBuffer, thread);
580 
581 				referenceSequenceID = sequenceID;
582 				position = writeNucleotideIndex;
583 				start = finish = coord;
584 
585 				if (referenceSequenceID > 0)
586 					finish++;
587 				else
588 					finish--;
589 			}
590 		}
591 
592 		readNucleotideIndex++;
593 	}
594 
595 	if (!annotationClosed) {
596 		sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
597 			(long) referenceSequenceID, (long long) position,
598 			(long long) start, (long long) finish);
599 		appendLine(lineBuffer, thread);
600 	}
601 #ifdef _OPENMP
602 	pushBuffer(thread);
603 #else
604 	velvetFprintf(file, "%s", annotationBuffer->str);
605 	resetStringBuffer(annotationBuffer);
606 #endif
607 
608 	return;
609 }
610 
computeClearHSPs(TightString * array,boolean second_in_pair,SplayTable * table,IDnum ** sequenceIDs,Coordinate ** coords,IDnum * mapReferenceIDs,Coordinate * mapCoords,Coordinate mapCount,IDnum seqID)611 static void computeClearHSPs(TightString * array, boolean second_in_pair, SplayTable * table, IDnum ** sequenceIDs, Coordinate ** coords, IDnum * mapReferenceIDs, Coordinate * mapCoords, Coordinate mapCount, IDnum seqID) {
612 	Coordinate readNucleotideIndex = 0;
613 	Kmer word;
614 	Kmer antiWord;
615 	Kmer polyA;
616 	Nucleotide nucleotide;
617 	KmerOccurence * hit;
618 
619 	int penalty;
620 	TightString * tString;
621 	Coordinate length;
622 
623 	clearKmer(&polyA);
624 	tString = getTightStringInArray(array, seqID - 1);
625 	length = getLength(tString);
626 	*sequenceIDs = callocOrExit(length, IDnum);
627 	*coords = callocOrExit(length, Coordinate);
628 
629 	// First pass for unambiguous hits
630 	// Fill in the initial word :
631 	clearKmer(&word);
632 	clearKmer(&antiWord);
633 	for (readNucleotideIndex = 0;
634 	     readNucleotideIndex < table->WORDLENGTH - 1;
635 	     readNucleotideIndex++) {
636 		nucleotide = getNucleotide(readNucleotideIndex, tString);
637 		pushNucleotide(&word, nucleotide);
638 #ifdef COLOR
639 		reversePushNucleotide(&antiWord, nucleotide);
640 #else
641 		reversePushNucleotide(&antiWord, 3 - nucleotide);
642 #endif
643 	}
644 
645 	// Kill silly poly-T beginnings
646 	while (readNucleotideIndex < getLength(tString) && (compareKmers(&antiWord, &polyA) == 0 || compareKmers(&word, &polyA) == 0)) {
647 		nucleotide = getNucleotide(readNucleotideIndex++, tString);
648 		pushNucleotide(&word, nucleotide);
649 #ifdef COLOR
650 		reversePushNucleotide(&antiWord, nucleotide);
651 #else
652 		reversePushNucleotide(&antiWord, 3 - nucleotide);
653 #endif
654 	}
655 
656 	while (readNucleotideIndex < getLength(tString)) {
657 		// Shift word:
658 		nucleotide = getNucleotide(readNucleotideIndex, tString);
659 		pushNucleotide(&word, nucleotide);
660 
661 #ifdef COLOR
662 		reversePushNucleotide(&antiWord, nucleotide);
663 #else
664 		reversePushNucleotide(&antiWord, 3 - nucleotide);
665 #endif
666 
667 		if (table->double_strand) {
668 			if (compareKmers(&word, &antiWord) <= 0) {
669 				hit = findKmerInKmerOccurenceTable(&word, table->kmerOccurenceTable);
670 
671 				if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), true, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
672 					(*sequenceIDs)[readNucleotideIndex] = getKmerOccurenceNodeID(hit);
673 			} else {
674 				hit = findKmerInKmerOccurenceTable(&antiWord, table->kmerOccurenceTable);
675 
676 				if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), false, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
677 					(*sequenceIDs)[readNucleotideIndex] = -getKmerOccurenceNodeID(hit);
678 			}
679 		} else {
680 			if (!second_in_pair) {
681 				hit = findKmerInKmerOccurenceTable(&word, table->kmerOccurenceTable);
682 
683 				if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), true, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
684 					(*sequenceIDs)[readNucleotideIndex] = getKmerOccurenceNodeID(hit);
685 			} else {
686 				hit = findKmerInKmerOccurenceTable(&antiWord, table->kmerOccurenceTable);
687 
688 				if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), false, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
689 					(*sequenceIDs)[readNucleotideIndex] = -getKmerOccurenceNodeID(hit);
690 			}
691 		}
692 
693 		if ((*sequenceIDs)[readNucleotideIndex]) {
694 			if ((*sequenceIDs)[readNucleotideIndex] > 0)
695 				(*coords)[readNucleotideIndex] = getKmerOccurencePosition(hit) - readNucleotideIndex;
696 			else
697 				(*coords)[readNucleotideIndex] = getKmerOccurencePosition(hit) + readNucleotideIndex - getLength(tString) + 1;
698 		}
699 
700 		// Barrier to flip-flopping
701 		if ((*sequenceIDs)[readNucleotideIndex - 1] != 0
702 		    && ((*sequenceIDs)[readNucleotideIndex] != (*sequenceIDs)[readNucleotideIndex - 1]
703 			|| (*coords)[readNucleotideIndex] != (*coords)[readNucleotideIndex - 1])) {
704 			// Break in continuity... skip k positions
705 			(*sequenceIDs)[readNucleotideIndex] = 0;
706 			(*coords)[readNucleotideIndex] = -1;
707 			readNucleotideIndex++;
708 
709 			for (penalty = 0; penalty < table->WORDLENGTH  - 1 && readNucleotideIndex < getLength(tString); penalty++) {
710 				nucleotide = getNucleotide(readNucleotideIndex, tString);
711 				pushNucleotide(&word, nucleotide);
712 
713 #ifdef COLOR
714 				reversePushNucleotide(&antiWord, nucleotide);
715 #else
716 				reversePushNucleotide(&antiWord, 3 - nucleotide);
717 #endif
718 				(*sequenceIDs)[readNucleotideIndex] = 0;
719 				(*coords)[readNucleotideIndex] = -1;
720 				readNucleotideIndex++;
721 			}
722 		} else
723 			readNucleotideIndex++;
724 
725 	}
726 
727 	free(mapReferenceIDs);
728 	free(mapCoords);
729 }
730 
inputSequenceIntoSplayTable(TightString * array,SplayTable * table,FILE * file,boolean second_in_pair,IDnum * mapReferenceIDs,Coordinate * mapCoords,Coordinate mapCount,IDnum seqID)731 void inputSequenceIntoSplayTable(TightString * array,
732 				 SplayTable * table,
733 				 FILE * file,
734 				 boolean second_in_pair,
735 				 IDnum * mapReferenceIDs, Coordinate * mapCoords, Coordinate mapCount,
736 				 IDnum seqID)
737 {
738 	IDnum * sequenceIDs = NULL;
739 	Coordinate * coords = NULL;
740 
741 	//debug = (seqID == 29405);
742 
743 	// If appropriate, get the HSPs on reference sequences
744 	if (table->kmerOccurenceTable)
745 		computeClearHSPs(array, second_in_pair, table, &sequenceIDs, &coords, mapReferenceIDs, mapCoords, mapCount, seqID);
746 
747 	// Go through read, eventually with annotations
748 	printAnnotations(sequenceIDs, coords, array, table, file, second_in_pair, seqID);
749 
750 	// Clean up
751 	if (sequenceIDs) {
752 		free(sequenceIDs);
753 		free(coords);
754 	}
755 }
756 
inputReferenceIntoSplayTable(TightString * tString,SplayTable * table,FILE * file,IDnum seqID,Mask * mask)757 void inputReferenceIntoSplayTable(TightString * tString,
758 				 SplayTable * table, FILE * file, IDnum seqID, Mask * mask)
759 {
760 	IDnum currentIndex;
761 	Coordinate readNucleotideIndex = 0;
762 	Coordinate kmerIndex = 0;
763 	Kmer word;
764 	Kmer antiWord;
765 	Nucleotide nucleotide;
766 	Mask * currentMask = mask;
767 #ifdef _OPENMP
768 	char lineBuffer[MAXLINE];
769 #endif
770 
771 	clearKmer(&word);
772 	clearKmer(&antiWord);
773 
774 	currentIndex = seqID;
775 #ifdef _OPENMP
776 	sprintf(lineBuffer, "ROADMAP %li\n", (long)currentIndex);
777 	appendLine(lineBuffer, omp_get_thread_num());
778 #else
779 	velvetFprintf(file, "ROADMAP %li\n", (long)currentIndex);
780 #endif
781 
782 	// Neglect any string shorter than WORDLENGTH :
783 	if (getLength(tString) < table->WORDLENGTH) {
784 		return;
785 	}
786 
787 	// Fill in the initial word :
788 	for (readNucleotideIndex = 0;
789 	     readNucleotideIndex < table->WORDLENGTH - 1;
790 	     readNucleotideIndex++) {
791 		nucleotide = getNucleotide(readNucleotideIndex, tString);
792 		pushNucleotide(&word, nucleotide);
793 		if (table->double_strand) {
794 #ifdef COLOR
795 			reversePushNucleotide(&antiWord, nucleotide);
796 #else
797 			reversePushNucleotide(&antiWord, 3 - nucleotide);
798 #endif
799 		}
800 	}
801 
802 	while (readNucleotideIndex < getLength(tString)) {
803 		// Shift word:
804 		nucleotide = getNucleotide(readNucleotideIndex, tString);
805 		pushNucleotide(&word, nucleotide);
806 
807 		if (table->double_strand) {
808 #ifdef COLOR
809 			reversePushNucleotide(&antiWord, nucleotide);
810 #else
811 			reversePushNucleotide(&antiWord, 3 - nucleotide);
812 #endif
813 		}
814 
815 		// Check for gap masks:
816 		if (currentMask && currentMask->start - table->WORDLENGTH + 1 <= readNucleotideIndex) {
817 			while(currentMask && currentMask->finish + table->WORDLENGTH - 1 < readNucleotideIndex)
818 				currentMask = currentMask->next;
819 
820 			if (currentMask && currentMask->finish + table->WORDLENGTH - 1 >= readNucleotideIndex) {
821 				readNucleotideIndex++;
822 				kmerIndex++;
823 				continue;
824 			}
825 		}
826 
827 		// Record k-mer
828 		if (table->double_strand) {
829 			if (compareKmers(&word, &antiWord) <= 0)
830 				recordKmerOccurence(&word, currentIndex,
831 						    kmerIndex,
832 						    table->kmerOccurenceTable);
833 			else
834 				recordKmerOccurence(&antiWord, -currentIndex,
835 							kmerIndex,
836 							table->kmerOccurenceTable);
837 		} else {
838 			recordKmerOccurence(&word, currentIndex,
839 					    kmerIndex,
840 					    table->kmerOccurenceTable);
841 		}
842 		readNucleotideIndex++;
843 		kmerIndex++;
844 	}
845 
846 	return;
847 }
848 
countReferenceKmers(ReadSet * reads,int wordLength)849 static Coordinate countReferenceKmers(ReadSet * reads, int wordLength) {
850 	IDnum readIndex;
851 	Coordinate length = 0;
852 
853 
854 	for (readIndex = 0; readIndex < reads->readCount && reads->categories[readIndex] == REFERENCE; readIndex++)
855 	{
856 		Coordinate tmpLength = getLength(getTightStringInArray(reads->tSequences, readIndex));
857 		if (tmpLength >= wordLength)
858 			length += tmpLength - wordLength + 1;
859 	}
860 
861 	return length;
862 }
863 
scanReferenceSequences(FILE * file,IDnum referenceSequenceCount)864 Mask ** scanReferenceSequences(FILE * file, IDnum referenceSequenceCount) {
865 	Mask ** referenceMasks = callocOrExit(referenceSequenceCount, Mask*);
866 	IDnum index;
867 	char line[MAXLINE];
868 	char c = '\0';
869 
870 	// Search sequences for masks
871 	for (index = 0; index < referenceSequenceCount; index++) {
872 		Mask * current = NULL;
873 		Coordinate position = 0;
874 		boolean openMask = false;
875 
876 		// Read through header
877 		fgets(line, MAXLINE, file);
878 
879 		// Read through sequence
880 		while ((c = getc(file))) {
881 			if (c == EOF || c == '>')
882 				break;
883 			else if (c == '\r' || c == '\n')
884 				continue;
885 			else if (c == 'n' || c == 'N') {
886 				if (openMask)
887 					current->finish++;
888 				else if (referenceMasks[index] == NULL) {
889 					referenceMasks[index] = newMask(position);
890 					current = referenceMasks[index];
891 				} else {
892 					current->next = newMask(position);
893 					current = current->next;
894 				}
895 				openMask = true;
896 				position++;
897 			} else {
898 				openMask = false;
899 				position++;
900 			}
901 		}
902 	}
903 
904 	if (c != '\0')
905 		ungetc(c, file);
906 	return referenceMasks;
907 }
908 
scanBinaryReferenceSequences(SequencesReader * seqReadInfo,IDnum referenceSequenceCount)909 Mask ** scanBinaryReferenceSequences(SequencesReader *seqReadInfo, IDnum referenceSequenceCount) {
910 	Mask ** referenceMasks = callocOrExit(referenceSequenceCount, Mask*);
911 	IDnum index;
912 	char line[MAXLINE];
913 	char c = '\0';
914 	FILE *file = fopen(seqReadInfo->m_namesFilename, "r");
915 	if (file == NULL) {
916 		exitErrorf(EXIT_FAILURE, true, "Couldn't read file %s", seqReadInfo->m_namesFilename);
917 	} else {
918 		velvetLog("Reading mapping info from %s\n", seqReadInfo->m_namesFilename);
919 	}
920 
921 	// Search sequences for masks
922 	for (index = 0; index < referenceSequenceCount; index++) {
923 		Mask * current = NULL;
924 		long start = 0;
925 		long finish = 0;
926 		long number;
927 		long cat;
928 
929 		// Read through header
930 		if ((c = getc(file)) != '>') {
931 			exitErrorf(EXIT_FAILURE, false, "names line did not start with >");
932 		}
933 		fgets(line, MAXLINE, file);
934 		sscanf(line, "%*[^\t]\t%li\t%li\n", &number, &cat);
935 		// ensure is is a ref cat
936 		if ((IDnum) number != index + 1) {
937 			exitErrorf(EXIT_FAILURE, false, "sequence %ld != expected %ld", number, (long) index);
938 		}
939 		if ((Category) cat != REFERENCE) {
940 			exitErrorf(EXIT_FAILURE, false, "unexpected category %ld", cat);
941 		}
942 
943 		// Read through the reference maps
944 		while ((c = getc(file))) {
945 			if (c == EOF || c == '>') {
946 				break;
947 			}
948 			ungetc(c, file);
949 			fgets(line, MAXLINE, file);
950 			sscanf(line, "%li\t%li\n", &start, &finish);
951 			if (referenceMasks[index] == NULL) {
952 				referenceMasks[index] = newMask(start);
953 				referenceMasks[index]->finish = finish;
954 				current = referenceMasks[index];
955 			} else {
956 				current->next = newMask(start);
957 				current->next->finish = finish;
958 				current = current->next;
959 			}
960 		}
961 		ungetc(c, file);
962 	}
963 
964 	fclose(file);
965 	return referenceMasks;
966 }
967 
inputSequenceArrayIntoSplayTableAndArchive(ReadSet * reads,SplayTable * table,char * filename,char * seqFilename)968 void inputSequenceArrayIntoSplayTableAndArchive(ReadSet * reads,
969 						SplayTable * table,
970 						char *filename, char* seqFilename)
971 {
972 	IDnum index;
973 	IDnum sequenceCount = reads->readCount;
974 	TightString *array;
975 	FILE *outfile = fopen(filename, "w");
976 	FILE *seqFile = NULL;
977 	IDnum kmerCount;
978 	IDnum referenceSequenceCount = 0;
979 	struct timeval start, end, diff;
980 	SequencesReader seqReadInfo;
981 	memset(&seqReadInfo, 0, sizeof(seqReadInfo));
982 	if (isCreateBinary()) {
983 		seqReadInfo.m_bIsBinary = true;
984 		seqReadInfo.m_pFile = openCnySeqForRead(seqFilename, &seqReadInfo.m_unifiedSeqFileHeader);
985 		if (!seqReadInfo.m_pFile) {
986 			exitErrorf(EXIT_FAILURE, true, "Could not open %s", seqFilename);
987 		}
988 		seqReadInfo.m_namesFilename = mallocOrExit(strlen(seqFilename) + sizeof(".names"), char);
989 		sprintf(seqReadInfo.m_namesFilename, "%s.names", seqFilename);
990 		seqReadInfo.m_numCategories = seqReadInfo.m_unifiedSeqFileHeader.m_numCategories;
991 		seqReadInfo.m_minSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_minSeqLen;
992 		seqReadInfo.m_maxSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_maxSeqLen;
993 		seqReadInfo.m_bIsRef = false;
994 		seqReadInfo.m_pReadBuffer = mallocOrExit(USF_READ_BUF_SIZE, uint8_t );
995 		seqReadInfo.m_pCurrentReadPtr = seqReadInfo.m_pReadBufEnd = 0;
996 
997 		resetCnySeqCurrentRead(&seqReadInfo);
998 	} else {
999 		seqReadInfo.m_bIsBinary = false;
1000 	}
1001 	IDnum ** mapReferenceIDs = NULL;
1002 	Coordinate ** mapCoords = NULL;
1003 	Coordinate * mapCount = NULL;
1004 
1005 	char line[MAXLINE];
1006 	char c;
1007 	IDnum seqID = 0;
1008 	long long_var;
1009 	long long longlong_var;
1010 	Coordinate maxCount = 20;
1011 	Coordinate counter = 0;
1012 	// DEBUG
1013 	Mask ** referenceMasks = NULL;
1014 
1015 	if (outfile == NULL)
1016 		exitErrorf(EXIT_FAILURE, true, "Couldn't write to file %s", filename);
1017 	else
1018 		velvetLog("Writing into roadmap file %s...\n", filename);
1019 
1020 	// Count reference sequences
1021 	for (index = 0; index < reads->readCount && reads->categories[index] == REFERENCE; index++)
1022 		referenceSequenceCount++;
1023 
1024 	velvetFprintf(outfile, "%ld\t%ld\t%i\t%hi\n", (long) sequenceCount, (long) referenceSequenceCount, table->WORDLENGTH, (short) table->double_strand);
1025 
1026 	if (reads->tSequences == NULL)
1027 		convertSequences(reads);
1028 
1029 	gettimeofday(&start, NULL);
1030 	array = reads->tSequences;
1031 
1032 #ifdef _OPENMP
1033 	if (omp_get_max_threads() == 1)
1034 	{
1035 		omp_set_num_threads(2);
1036 		omp_set_nested(0);
1037 	}
1038 	else
1039 		omp_set_nested(1);
1040 	initAnnotationBuffers();
1041 #else
1042 	annotationBuffer = newStringBuffer(BUFFER_SIZE);
1043 #endif
1044 
1045 	if (referenceSequenceCount && (kmerCount = countReferenceKmers(reads, table->WORDLENGTH)) > 0) {
1046 		table->kmerOccurenceTable = newKmerOccurenceTable(24 , table->WORDLENGTH);
1047 		allocateKmerOccurences(kmerCount, table->kmerOccurenceTable);
1048 		if (seqReadInfo.m_bIsBinary) {
1049 			referenceMasks = scanBinaryReferenceSequences(&seqReadInfo, referenceSequenceCount);
1050 			// binary seqs have no Ns so just advance past the references
1051 			for (index = 0; index < referenceSequenceCount; index++) {
1052 				TightString cmpString;
1053 				cmpString.length = seqReadInfo.m_currentReadLength;
1054 				cmpString.sequence = mallocOrExit((seqReadInfo.m_currentReadLength + 3) / 4, uint8_t );
1055 				getCnySeqNucl(&seqReadInfo, cmpString.sequence);
1056 				if (seqReadInfo.m_bIsRef) {
1057 					seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
1058 					// now the next ptr is advanced
1059 					seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
1060 					RefInfo refElem;
1061 					uint32_t refIdx;
1062 					for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
1063 						// not actually used so just read past refs
1064 						refElem.m_referenceID = readCnySeqUint32(&seqReadInfo);
1065 						refElem.m_pos = readCnySeqUint32(&seqReadInfo);
1066 					}
1067 				}
1068 				// optional test to ensure reference mapping seqIDs are in sync
1069 #if 0
1070 				TightString *tString;
1071 				tString = getTightStringInArray(array, index);
1072 				if (getLength(tString) != seqReadInfo.m_currentReadLength) {
1073 					velvetLog("Error: TightString len mismatch, %d != %ld\n", getLength(tString), seqReadInfo.m_currentReadLength);
1074 					exit(1);
1075 				}
1076 				char *str = readTightString(tString);
1077 				char *cmpStr = readTightString(&cmpString);
1078 				if (strcmp(str, cmpStr) != 0) {
1079 					printf("seq %s != cmp %s\n", str, cmpStr);
1080 					exit(1);
1081 				}
1082 				free(str);
1083 				free(cmpStr);
1084 #endif
1085 				advanceCnySeqCurrentRead(&seqReadInfo);
1086 				free(cmpString.sequence);
1087 			}
1088 		} else {
1089 			seqFile = fopen(seqFilename, "r");
1090 
1091 			if (seqFile == NULL)
1092 				exitErrorf(EXIT_FAILURE, true, "Couldn't write to file %s", seqFilename);
1093 			else
1094 				velvetLog("Reading mapping info from file %s\n", seqFilename);
1095 
1096 			seqReadInfo.m_pFile = seqFile;
1097 
1098 			// Skip through reference headers quickly
1099 			referenceMasks = scanReferenceSequences(seqFile, referenceSequenceCount);
1100 		}
1101 
1102 #ifdef _OPENMP
1103 		producing = 1;
1104 		#pragma omp parallel sections
1105 		{
1106 			#pragma omp section
1107 			{
1108 				bufferWritter(outfile);
1109 			}
1110 			#pragma omp section
1111 			{
1112 				#pragma omp parallel for
1113 #endif
1114 				for (index = 0; index < referenceSequenceCount; index++)
1115 					inputReferenceIntoSplayTable(getTightStringInArray(array, index),
1116 								     table, outfile, index + 1, referenceMasks[index]);
1117 
1118 #ifdef _OPENMP
1119 				for (index = omp_get_max_threads() - 1; index >= 0; index--)
1120 					pushBufferCommit(index);
1121 				producing = 0;
1122 				#pragma omp flush(producing)
1123 			}
1124 		}
1125 #endif
1126 
1127 		if (maskMemory)
1128 			destroyRecycleBin(maskMemory);
1129 		maskMemory = NULL;
1130 		sortKmerOccurenceTable(table->kmerOccurenceTable);
1131 	}
1132 
1133 	velvetLog("Inputting sequences...\n");
1134 
1135 	if (table->kmerOccurenceTable) {
1136 		mapReferenceIDs = callocOrExit(sequenceCount + 1, IDnum*);
1137 		mapCoords = callocOrExit(sequenceCount + 1, Coordinate *);
1138 		mapCount = callocOrExit(sequenceCount + 1, Coordinate);
1139 
1140 		RefInfo *refArray = NULL;
1141 		if (seqReadInfo.m_bIsBinary) {
1142 			TightString cmpString;
1143 			for (seqID = referenceSequenceCount + 1; seqID < sequenceCount + 1; seqID++) {
1144 				cmpString.length = seqReadInfo.m_currentReadLength;
1145 				cmpString.sequence = mallocOrExit((seqReadInfo.m_currentReadLength + 3) / 4, uint8_t );
1146 				getCnySeqNucl(&seqReadInfo, cmpString.sequence);
1147 				if (seqReadInfo.m_bIsRef) {
1148 					seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
1149 					// now the next ptr is advanced
1150 					seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
1151 					refArray = callocOrExit(seqReadInfo.m_refCnt, RefInfo);
1152 					uint32_t refIdx;
1153 					for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
1154 						refArray[refIdx].m_referenceID = readCnySeqUint32(&seqReadInfo);
1155 						refArray[refIdx].m_pos = readCnySeqUint32(&seqReadInfo);
1156 					}
1157 				}
1158 				// optional test to ensure reference mapping seqIDs are in sync
1159 #if 0
1160 				TightString *tString;
1161 				tString = getTightStringInArray(array, seqID - 1);
1162 				if (getLength(tString) != seqReadInfo.m_currentReadLength) {
1163 					velvetLog("Error: TightString len mismatch, %d != %ld\n", getLength(tString), seqReadInfo.m_currentReadLength);
1164 					exit(1);
1165 				}
1166 				char *str = readTightString(tString);
1167 				char *cmpStr = readTightString(&cmpString);
1168 				if (strcmp(str, cmpStr) != 0) {
1169 					printf("seq %s != cmp %s\n", str, cmpStr);
1170 					exit(1);
1171 				}
1172 				free(str);
1173 				free(cmpStr);
1174 #endif
1175 				free(cmpString.sequence);
1176 
1177 				// set prior count
1178 				mapCount[seqID - 1] = counter;
1179 				counter = 0;
1180 				maxCount = 20;
1181 				mapReferenceIDs[seqID] = callocOrExit(maxCount, IDnum);
1182 				mapCoords[seqID] = callocOrExit(maxCount, Coordinate);
1183 
1184 				if (seqReadInfo.m_bIsRef) {
1185 					while (counter < seqReadInfo.m_refCnt) {
1186 						mapReferenceIDs[seqID][counter] = (IDnum) refArray[counter].m_referenceID;
1187 						mapCoords[seqID][counter] = (Coordinate) refArray[counter].m_pos;
1188 
1189 						if (++counter == maxCount) {
1190 							maxCount *= 2;
1191 							mapReferenceIDs[seqID] = reallocOrExit(mapReferenceIDs[seqID], maxCount, IDnum);
1192 							mapCoords[seqID] = reallocOrExit(mapCoords[seqID], maxCount, Coordinate);
1193 						}
1194 					}
1195 					free(refArray);
1196 				}
1197 				advanceCnySeqCurrentRead(&seqReadInfo);
1198 			}
1199 		} else {
1200 			// Parse file for mapping info
1201 			while (seqFile && (c = getc(seqFile)) != EOF) {
1202 
1203 				if (c == '>') {
1204 					mapCount[seqID] = counter;
1205 					counter = 0;
1206 					maxCount = 20;
1207 					fgets(line, MAXLINE, seqFile);
1208 					sscanf(line,"%*[^\t]\t%li\t", &long_var);
1209 					seqID = (IDnum) long_var;
1210 					mapReferenceIDs[seqID] = callocOrExit(maxCount, IDnum);
1211 					mapCoords[seqID] = callocOrExit(maxCount, Coordinate);
1212 				} else if (c == 'M') {
1213 					fgets(line, MAXLINE, seqFile);
1214 					sscanf(line,"\t%li\t%lli\n", &long_var, &longlong_var);
1215 					mapReferenceIDs[seqID][counter] = (IDnum) long_var;
1216 					mapCoords[seqID][counter] = (Coordinate) longlong_var;
1217 
1218 					if (++counter == maxCount) {
1219 						maxCount *= 2;
1220 						mapReferenceIDs[seqID] = reallocOrExit(mapReferenceIDs[seqID], maxCount, IDnum);
1221 						mapCoords[seqID] = reallocOrExit(mapCoords[seqID], maxCount, Coordinate);
1222 					}
1223 				}
1224 			}
1225 		}
1226 	}
1227 	if (seqFile)
1228 		fclose(seqFile);
1229 
1230 	if (seqReadInfo.m_bIsBinary) {
1231 		if (seqReadInfo.m_pReadBuffer) {
1232 			free(seqReadInfo.m_pReadBuffer);
1233 		}
1234 		fclose(seqReadInfo.m_pFile);
1235 	}
1236 
1237 #ifdef _OPENMP
1238 	producing = 1;
1239 	#pragma omp parallel sections
1240 	{
1241 		#pragma omp section
1242 		{
1243 			bufferWritter(outfile);
1244 		}
1245 		#pragma omp section
1246 		{
1247 			#pragma omp parallel for
1248 #endif
1249 			for (index = referenceSequenceCount; index < sequenceCount; index++)
1250 			{
1251 				boolean second_in_pair;
1252 
1253 				// Progress report on screen
1254 				if (index % 1000000 == 0) {
1255 					velvetLog("Inputting sequence %li / %li\n",
1256 						  (long)index, (long)sequenceCount);
1257 					fflush(stdout);
1258 				}
1259 
1260 				// Test to make sure that all the reference reads are before all the other reads
1261 				if (reads->categories[index] == REFERENCE) {
1262 					velvetLog("Reference sequence placed after a non-reference read!\n");
1263 					velvetLog(">> Please re-order the filenames in your command line so as "
1264 						  "to have the reference sequence files before all the others\n");
1265 #ifdef DEBUG
1266 					abort();
1267 #endif
1268 					exit(0);
1269 				}
1270 				second_in_pair = reads->categories[index] % 2 && isSecondInPair(reads, index);
1271 
1272 				// Hashing the reads
1273 				if (table->kmerOccurenceTable)
1274 				    inputSequenceIntoSplayTable(array, table,
1275 								outfile,
1276 								second_in_pair, mapReferenceIDs[index + 1], mapCoords[index+1], mapCount[index+1], index + 1);
1277 				else
1278 				    inputSequenceIntoSplayTable(array, table,
1279 								outfile,
1280 								second_in_pair, NULL, NULL, 0, index + 1);
1281 			}
1282 #ifdef _OPENMP
1283 			for (index = omp_get_max_threads() - 1; index >= 0; index--)
1284 				pushBufferCommit(index);
1285 			producing = 0;
1286 			#pragma omp flush(producing)
1287 		}
1288 	}
1289 	destroyAnnotationBuffers();
1290 #else
1291 	destroyStringBuffer(annotationBuffer, 1);
1292 #endif
1293 
1294 	gettimeofday(&end, NULL);
1295 	timersub(&end, &start, &diff);
1296 	velvetLog(" === Sequences loaded in %ld.%06ld s\n", (long) diff.tv_sec, (long) diff.tv_usec);
1297 
1298 	fclose(outfile);
1299 
1300 	if (mapReferenceIDs) {
1301 		free(mapReferenceIDs);
1302 		free(mapCoords);
1303 		free(mapCount);
1304 	}
1305 	if (referenceMasks) {
1306 		free(referenceMasks);
1307 	}
1308 	if (seqReadInfo.m_namesFilename) {
1309 		free(seqReadInfo.m_namesFilename);
1310 	}
1311 	//free(reads->tSequences);
1312 	//reads->tSequences = NULL;
1313 	//destroyReadSet(reads);
1314 	velvetLog("Done inputting sequences\n");
1315 }
1316