1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
4 This file is part of Velvet.
5
6 Velvet is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 Velvet is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Velvet; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
20 */
21
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <limits.h>
26 #include <sys/time.h>
27
28 #ifdef _OPENMP
29 #include <omp.h>
30 #endif
31
32 #include "globals.h"
33 #include "graph.h"
34 #include "passageMarker.h"
35 #include "readSet.h"
36 #include "tightString.h"
37 #include "recycleBin.h"
38 #include "utility.h"
39 #include "kmer.h"
40 #include "kmerOccurenceTable.h"
41 #include "roadMap.h"
42
43 #define ADENINE 0
44 #define CYTOSINE 1
45 #define GUANINE 2
46 #define THYMINE 3
47
48
49 //////////////////////////////////////////////////////////
50 // Node Locking
51 //////////////////////////////////////////////////////////
52
53 #ifdef _OPENMP
54
55 /* Array of per-node locks */
56
57 static omp_lock_t *nodeLocks = NULL;
58
59 static void
createNodeLocks(Graph * graph)60 createNodeLocks(Graph *graph)
61 {
62 IDnum nbNodes;
63 IDnum nodeIndex;
64
65 nbNodes = nodeCount(graph) + 1;
66 if (nodeLocks)
67 free (nodeLocks);
68 nodeLocks = mallocOrExit(nbNodes, omp_lock_t);
69
70 #pragma omp parallel for
71 for (nodeIndex = 0; nodeIndex < nbNodes; nodeIndex++)
72 omp_init_lock(nodeLocks + nodeIndex);
73 }
74
lockNode(Node * node)75 static inline void lockNode(Node *node)
76 {
77 IDnum nodeID = getNodeID(node);
78
79 if (nodeID < 0)
80 nodeID = -nodeID;
81 omp_set_lock (nodeLocks + nodeID);
82 }
83
84 /* Assumes node is already locked */
lockTwoNodes(Node * node,Node * node2)85 static inline void lockTwoNodes(Node *node, Node *node2)
86 {
87 IDnum nodeID = getNodeID(node);
88 IDnum node2ID = getNodeID(node2);
89
90 if (nodeID < 0)
91 nodeID = -nodeID;
92 if (node2ID < 0)
93 node2ID = -node2ID;
94
95 if (nodeID == node2ID)
96 return;
97
98 /* Lock lowest ID first to avoid deadlocks */
99 if (nodeID < node2ID)
100 {
101 omp_set_lock (nodeLocks + node2ID);
102 }
103 else if (!omp_test_lock (nodeLocks + node2ID))
104 {
105 omp_unset_lock (nodeLocks + nodeID);
106 omp_set_lock (nodeLocks + node2ID);
107 omp_set_lock (nodeLocks + nodeID);
108 }
109 }
110
unLockTwoNodes(Node * node,Node * node2)111 static inline void unLockTwoNodes(Node *node, Node *node2)
112 {
113 IDnum nodeID = getNodeID(node);
114 IDnum node2ID = getNodeID(node2);
115
116 if (nodeID < 0)
117 nodeID = -nodeID;
118 if (node2ID < 0)
119 node2ID = -node2ID;
120
121 omp_unset_lock (nodeLocks + nodeID);
122 if (nodeID != node2ID)
123 omp_unset_lock (nodeLocks + node2ID);
124 }
125
unLockNode(Node * node)126 static inline void unLockNode(Node *node)
127 {
128 IDnum nodeID = getNodeID(node);
129
130 if (nodeID < 0)
131 nodeID = -nodeID;
132 omp_unset_lock (nodeLocks + nodeID);
133 }
134
135 #endif
136
137 //////////////////////////////////////////////////////////
138 // Node Lists
139 //////////////////////////////////////////////////////////
140 typedef struct smallNodeList_st SmallNodeList;
141
142 struct smallNodeList_st {
143 Node *node;
144 SmallNodeList *next;
145 } ATTRIBUTE_PACKED;
146
147 static RecycleBin *smallNodeListMemory = NULL;
148
149 #define BLOCKSIZE 1000
150
151 #ifdef _OPENMP
initSmallNodeListMemory(void)152 static void initSmallNodeListMemory(void)
153 {
154 int n = omp_get_max_threads();
155
156 #pragma omp critical
157 {
158 if (smallNodeListMemory == NULL)
159 smallNodeListMemory = newRecycleBinArray(n, sizeof(SmallNodeList), BLOCKSIZE);
160 }
161 }
162 #endif
163
allocateSmallNodeList()164 static SmallNodeList *allocateSmallNodeList()
165 {
166 #ifdef _OPENMP
167 #ifdef DEBUG
168 if (smallNodeListMemory == NULL)
169 {
170 velvetLog("The memory for small nodes seems uninitialised, "
171 "this is probably a bug, aborting.\n");
172 abort();
173 }
174 #endif
175 return allocatePointer(getRecycleBinInArray(smallNodeListMemory,
176 omp_get_thread_num()));
177 #else
178 if (smallNodeListMemory == NULL)
179 smallNodeListMemory = newRecycleBin(sizeof(SmallNodeList), BLOCKSIZE);
180
181 return allocatePointer(smallNodeListMemory);
182 #endif
183 }
184
deallocateSmallNodeList(SmallNodeList * smallNodeList)185 static void deallocateSmallNodeList(SmallNodeList * smallNodeList)
186 {
187 #ifdef _OPENMP
188 deallocatePointer(getRecycleBinInArray(smallNodeListMemory,
189 omp_get_thread_num()),
190 smallNodeList);
191 #else
192 deallocatePointer(smallNodeListMemory, smallNodeList);
193 #endif
194 }
195
destroySmallNodeListMemmory(void)196 static void destroySmallNodeListMemmory(void)
197 {
198 if (smallNodeListMemory != NULL)
199 {
200 #ifdef _OPENMP
201 destroyRecycleBinArray(smallNodeListMemory);
202 #else
203 destroyRecycleBin(smallNodeListMemory);
204 #endif
205 smallNodeListMemory = NULL;
206 }
207 }
208
memorizeNode(Node * node,SmallNodeList ** nodePile)209 static inline void memorizeNode(Node * node, SmallNodeList ** nodePile)
210 {
211 SmallNodeList *list = allocateSmallNodeList();
212 list->node = node;
213 list->next = *nodePile;
214 *nodePile = list;
215 #ifndef _OPENMP
216 setSingleNodeStatus(node, true);
217 #endif
218 }
219
isNodeMemorized(Node * node,SmallNodeList * nodePile)220 static inline boolean isNodeMemorized(Node * node, SmallNodeList * nodePile)
221 {
222 #ifdef _OPENMP
223 /* SF TODO There must be a faster way to do this: bit mask, hash table, tree, ... ? */
224 SmallNodeList * list;
225
226 for (list = nodePile; list; list = list->next)
227 if (list->node == node)
228 return true;
229
230 return false;
231 #else
232 return getNodeStatus(node);
233 #endif
234 }
235
unMemorizeNodes(SmallNodeList ** nodePile)236 static void unMemorizeNodes(SmallNodeList ** nodePile)
237 {
238 SmallNodeList * list;
239
240 while (*nodePile) {
241 list = *nodePile;
242 *nodePile = list->next;
243 #ifndef _OPENMP
244 setSingleNodeStatus(list->node, false);
245 #endif
246 deallocateSmallNodeList(list);
247 }
248 }
249
250 ///////////////////////////////////////////////////////////
251 // Reference Mappings
252 ///////////////////////////////////////////////////////////
253 typedef struct referenceMapping_st ReferenceMapping;
254
255 struct referenceMapping_st {
256 IDnum referenceStart;
257 IDnum nodeStart;
258 IDnum length;
259 IDnum referenceID;
260 IDnum nodeID;
261 } ATTRIBUTE_PACKED;
262
countMappings(char * preGraphFilename)263 static IDnum countMappings(char * preGraphFilename) {
264 FILE *file = fopen(preGraphFilename, "r");
265 const int maxline = MAXLINE;
266 char line[MAXLINE];
267 IDnum count = 0;
268
269 // Go past NODE blocks
270 while(fgets(line, maxline, file))
271 if (line[0] == 'S')
272 break;
273
274 // Count relevant lines
275 while(fgets(line, maxline, file))
276 if (line[0] != 'S')
277 count++;
278
279 fclose(file);
280 return count;
281 }
282
recordReferenceMappings(char * preGraphFilename,IDnum arrayLength)283 static ReferenceMapping * recordReferenceMappings(char * preGraphFilename, IDnum arrayLength) {
284 ReferenceMapping * mappings = callocOrExit(arrayLength, ReferenceMapping);
285 FILE *file = fopen(preGraphFilename, "r");
286 const int maxline = MAXLINE;
287 char line[MAXLINE];
288 ReferenceMapping * current = mappings;
289 IDnum referenceID;
290 long long_var;
291 long long coord1, coord2, coord3;
292
293 // Go past NODE blocks
294 while(fgets(line, maxline, file))
295 if (line[0] == 'S')
296 break;
297
298 sscanf(line, "SEQ\t%li\n", &long_var);
299 referenceID = long_var;
300
301 // Go relevant lines
302 while(fgets(line, maxline, file)) {
303 if (line[0] != 'S') {
304 sscanf(line, "%li\t%lli\t%lli\t%lli\n", &long_var, &coord1, &coord2, &coord3);
305 current->referenceID = referenceID;
306 current->nodeID = long_var;
307 current->nodeStart = coord1;
308 current->referenceStart = coord2;
309 current->length = coord3;
310 current++;
311 } else {
312 sscanf(line, "SEQ\t%li\n", &long_var);
313 referenceID = long_var;
314 }
315 }
316
317 fclose(file);
318 return mappings;
319 }
320
compareRefMaps(const void * ptrA,const void * ptrB)321 static int compareRefMaps(const void * ptrA, const void * ptrB) {
322 ReferenceMapping * A = (ReferenceMapping *) ptrA;
323 ReferenceMapping * B = (ReferenceMapping *) ptrB;
324
325 if (A->referenceID > B->referenceID)
326 return 1;
327 else if (A->referenceID < B->referenceID)
328 return -1;
329 else {
330 if (A->referenceStart >= B->referenceStart + B->length)
331 return 1;
332 else if (A->referenceStart + A->length <= B->referenceStart)
333 return -1;
334 else
335 return 0;
336 }
337 }
338
computeReferenceMappings(char * preGraphFilename,ReadSet * reads,Coordinate * referenceMappingLength,IDnum * referenceCount)339 static ReferenceMapping * computeReferenceMappings(char * preGraphFilename, ReadSet * reads, Coordinate * referenceMappingLength, IDnum * referenceCount) {
340 IDnum index;
341 ReferenceMapping * referenceMappings;
342
343 for(index = 0; index < reads->readCount && reads->categories[index] == 2 * CATEGORIES + 2; index++)
344 (*referenceCount)++;
345
346 if (*referenceCount == 0) {
347 *referenceMappingLength = 0;
348 return NULL;
349 }
350
351 *referenceMappingLength = countMappings(preGraphFilename);
352
353 if (*referenceMappingLength == 0)
354 return NULL;
355
356 referenceMappings = recordReferenceMappings(preGraphFilename, *referenceMappingLength);
357 qsort(referenceMappings, *referenceMappingLength, sizeof(ReferenceMapping), compareRefMaps);
358
359 return referenceMappings;
360 }
361
findReferenceMapping(IDnum seqID,Coordinate refCoord,ReferenceMapping * referenceMappings,Coordinate referenceMappingCount)362 static ReferenceMapping * findReferenceMapping(IDnum seqID, Coordinate refCoord, ReferenceMapping * referenceMappings, Coordinate referenceMappingCount) {
363 IDnum positive_seqID;
364 Coordinate leftIndex = 0;
365 Coordinate rightIndex = referenceMappingCount - 1;
366 Coordinate middleIndex;
367 ReferenceMapping refMap;
368 int comparison;
369
370 if (seqID > 0)
371 positive_seqID = seqID;
372 else
373 positive_seqID = -seqID;
374
375 refMap.referenceID = positive_seqID;
376 refMap.referenceStart = refCoord;
377 refMap.length = 1;
378 refMap.nodeStart = 0;
379 refMap.nodeID = 0;
380
381 if (compareRefMaps(&(referenceMappings[leftIndex]), &refMap) == 0)
382 return &(referenceMappings[leftIndex]);
383 if (compareRefMaps(&(referenceMappings[rightIndex]), &refMap) == 0)
384 return &(referenceMappings[rightIndex]);
385
386 while (true) {
387 middleIndex = (rightIndex + leftIndex) / 2;
388 comparison = compareRefMaps(&(referenceMappings[middleIndex]), &refMap);
389
390 if (leftIndex >= rightIndex)
391 return NULL;
392 else if (comparison == 0)
393 return &(referenceMappings[middleIndex]);
394 else if (leftIndex == middleIndex)
395 return NULL;
396 else if (comparison > 0)
397 rightIndex = middleIndex;
398 else
399 leftIndex = middleIndex;
400 }
401 }
402
403 ///////////////////////////////////////////////////////////
404 // Node Mask
405 ///////////////////////////////////////////////////////////
406
407 typedef struct nodeMask_st NodeMask;
408
409 struct nodeMask_st {
410 IDnum nodeID;
411 IDnum start;
412 IDnum finish;
413 } ATTRIBUTE_PACKED;
414
compareNodeMasks(const void * ptrA,const void * ptrB)415 static int compareNodeMasks(const void * ptrA, const void * ptrB) {
416 NodeMask * A = (NodeMask *) ptrA;
417 NodeMask * B = (NodeMask *) ptrB;
418
419 if (A->nodeID < B->nodeID)
420 return -1;
421 else if (A->nodeID > B->nodeID)
422 return 1;
423 else {
424 if (A->start < B->start)
425 return -1;
426 else if (A->start > B->start)
427 return 1;
428 else
429 return 0;
430 }
431 }
432
computeNodeMasks(ReferenceMapping * referenceMappings,Coordinate arrayLength,Graph * graph)433 static NodeMask * computeNodeMasks(ReferenceMapping * referenceMappings, Coordinate arrayLength, Graph * graph) {
434 NodeMask * nodeMasks;
435 NodeMask * currentMask;
436 ReferenceMapping * currentMapping = referenceMappings;
437 Coordinate index;
438
439 if (referenceMappings == NULL)
440 return NULL;
441
442 nodeMasks = callocOrExit(arrayLength, NodeMask);
443 currentMask = nodeMasks;
444
445 for (index = 0; index < arrayLength; index++) {
446 if (currentMapping->nodeID > 0) {
447 currentMask->nodeID = currentMapping->nodeID;
448 } else {
449 currentMask->nodeID = -currentMapping->nodeID;
450 }
451 currentMask->start = currentMapping->nodeStart;
452 currentMask->finish = currentMapping->nodeStart + currentMapping->length;
453 currentMask++;
454 currentMapping++;
455 }
456
457 qsort(nodeMasks, arrayLength, sizeof(NodeMask), compareNodeMasks);
458
459 return nodeMasks;
460 }
461
462 ///////////////////////////////////////////////////////////
463 // Process
464 ///////////////////////////////////////////////////////////
465
referenceGraphKmers(char * preGraphFilename,short int accelerationBits,Graph * graph,boolean double_strand,NodeMask * nodeMasks,Coordinate nodeMaskCount)466 static KmerOccurenceTable *referenceGraphKmers(char *preGraphFilename,
467 short int accelerationBits, Graph * graph, boolean double_strand, NodeMask * nodeMasks, Coordinate nodeMaskCount)
468 {
469 FILE *file = fopen(preGraphFilename, "r");
470 const int maxline = MAXLINE;
471 char line[MAXLINE];
472 char c;
473 int wordLength;
474 Coordinate lineLength, kmerCount;
475 Kmer word;
476 Kmer antiWord;
477 KmerOccurenceTable *kmerTable;
478 IDnum index;
479 IDnum nodeID = 0;
480 Nucleotide nucleotide;
481 NodeMask * nodeMask = nodeMasks;
482 Coordinate nodeMaskIndex = 0;
483
484 if (file == NULL)
485 exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
486
487 // Count kmers
488 velvetLog("Scanning pre-graph file %s for k-mers\n",
489 preGraphFilename);
490
491 // First line
492 if (!fgets(line, maxline, file))
493 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
494 sscanf(line, "%*i\t%*i\t%i\n", &wordLength);
495
496 kmerTable = newKmerOccurenceTable(accelerationBits, wordLength);
497
498 // Read nodes
499 if (!fgets(line, maxline, file))
500 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
501 kmerCount = 0;
502 while (line[0] == 'N') {
503 lineLength = 0;
504 while ((c = getc(file)) != EOF && c != '\n')
505 lineLength++;
506 kmerCount += lineLength - wordLength + 1;
507 if (fgets(line, maxline, file) == NULL)
508 break;
509 }
510
511 velvetLog("%li kmers found\n", (long) kmerCount);
512
513 for(nodeMaskIndex = 0; nodeMaskIndex < nodeMaskCount; nodeMaskIndex++) {
514 kmerCount -= nodeMasks[nodeMaskIndex].finish -
515 nodeMasks[nodeMaskIndex].start;
516 }
517
518 nodeMaskIndex = 0;
519
520 fclose(file);
521
522 // Create table
523 allocateKmerOccurences(kmerCount, kmerTable);
524
525 // Fill table
526 file = fopen(preGraphFilename, "r");
527 if (file == NULL)
528 exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
529
530 if (!fgets(line, maxline, file))
531 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
532
533 // Read nodes
534 if (!fgets(line, maxline, file))
535 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
536 while (line[0] == 'N') {
537 nodeID++;
538
539 // Fill in the initial word :
540 clearKmer(&word);
541 clearKmer(&antiWord);
542
543 for (index = 0; index < wordLength - 1; index++) {
544 c = getc(file);
545 if (c == 'A')
546 nucleotide = ADENINE;
547 else if (c == 'C')
548 nucleotide = CYTOSINE;
549 else if (c == 'G')
550 nucleotide = GUANINE;
551 else if (c == 'T')
552 nucleotide = THYMINE;
553 else if (c == '\n')
554 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
555 else
556 nucleotide = ADENINE;
557
558
559 pushNucleotide(&word, nucleotide);
560 if (double_strand) {
561 #ifdef COLOR
562 reversePushNucleotide(&antiWord, nucleotide);
563 #else
564 reversePushNucleotide(&antiWord, 3 - nucleotide);
565 #endif
566 }
567 }
568
569 // Scan through node
570 index = 0;
571 while((c = getc(file)) != '\n' && c != EOF) {
572 if (c == 'A')
573 nucleotide = ADENINE;
574 else if (c == 'C')
575 nucleotide = CYTOSINE;
576 else if (c == 'G')
577 nucleotide = GUANINE;
578 else if (c == 'T')
579 nucleotide = THYMINE;
580 else
581 nucleotide = ADENINE;
582
583 pushNucleotide(&word, nucleotide);
584 if (double_strand) {
585 #ifdef COLOR
586 reversePushNucleotide(&antiWord, nucleotide);
587 #else
588 reversePushNucleotide(&antiWord, 3 - nucleotide);
589 #endif
590 }
591
592 // Update mask if necessary
593 if (nodeMask) {
594 if (nodeMask->nodeID < nodeID || (nodeMask->nodeID == nodeID && index >= nodeMask->finish)) {
595 if (++nodeMaskIndex == nodeMaskCount)
596 nodeMask = NULL;
597 else
598 nodeMask++;
599 }
600 }
601
602 // Check if not masked!
603 if (nodeMask) {
604 if (nodeMask->nodeID == nodeID && index >= nodeMask->start && index < nodeMask->finish) {
605 index++;
606 continue;
607 }
608 }
609
610 if (!double_strand || compareKmers(&word, &antiWord) <= 0)
611 recordKmerOccurence(&word, nodeID, index, kmerTable);
612 else
613 recordKmerOccurence(&antiWord, -nodeID, getNodeLength(getNodeInGraph(graph, nodeID)) - 1 - index, kmerTable);
614
615 index++;
616 }
617
618 if (fgets(line, maxline, file) == NULL)
619 break;
620 }
621
622 fclose(file);
623
624 // Sort table
625 sortKmerOccurenceTable(kmerTable);
626
627 return kmerTable;
628 }
629
ghostThreadSequenceThroughGraph(TightString * tString,KmerOccurenceTable * kmerTable,Graph * graph,IDnum seqID,Category category,boolean readTracking,boolean double_strand,ReferenceMapping * referenceMappings,Coordinate referenceMappingCount,IDnum refCount,Annotation * annotations,IDnum annotationCount,boolean second_in_pair)630 static void ghostThreadSequenceThroughGraph(TightString * tString,
631 KmerOccurenceTable *
632 kmerTable, Graph * graph,
633 IDnum seqID, Category category,
634 boolean readTracking,
635 boolean double_strand,
636 ReferenceMapping * referenceMappings,
637 Coordinate referenceMappingCount,
638 IDnum refCount,
639 Annotation * annotations,
640 IDnum annotationCount,
641 boolean second_in_pair)
642 {
643 Kmer word;
644 Kmer antiWord;
645 Coordinate readNucleotideIndex;
646 KmerOccurence *kmerOccurence;
647 int wordLength = getWordLength(graph);
648 Nucleotide nucleotide;
649 IDnum refID;
650 Coordinate refCoord;
651 ReferenceMapping * refMap = NULL;
652 Coordinate uniqueIndex = 0;
653 Coordinate annotIndex = 0;
654 IDnum annotCount = 0;
655 boolean reversed;
656 SmallNodeList * nodePile = NULL;
657 Annotation * annotation = annotations;
658
659 Node *node = NULL;
660 Node *previousNode = NULL;
661
662 // Neglect any read which will not be short paired
663 if ((!readTracking && category % 2 == 0)
664 || category / 2 >= CATEGORIES)
665 return;
666
667 // Neglect any string shorter than WORDLENGTH :
668 if (getLength(tString) < wordLength)
669 return;
670
671 // Verify that all short reads are reasonnably short
672 if (getLength(tString) > USHRT_MAX) {
673 velvetLog("Short read of length %lli, longer than limit %i\n",
674 (long long) getLength(tString), SHRT_MAX);
675 velvetLog("You should better declare this sequence as long, because it genuinely is!\n");
676 exit(1);
677 }
678
679 clearKmer(&word);
680 clearKmer(&antiWord);
681
682 // Fill in the initial word :
683 for (readNucleotideIndex = 0;
684 readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
685 nucleotide = getNucleotide(readNucleotideIndex, tString);
686 pushNucleotide(&word, nucleotide);
687 if (double_strand || second_in_pair) {
688 #ifdef COLOR
689 reversePushNucleotide(&antiWord, nucleotide);
690 #else
691 reversePushNucleotide(&antiWord, 3 - nucleotide);
692 #endif
693 }
694 }
695
696 // Go through sequence
697 while (readNucleotideIndex < getLength(tString)) {
698 // Shift word:
699 nucleotide = getNucleotide(readNucleotideIndex++, tString);
700 pushNucleotide(&word, nucleotide);
701 if (double_strand || second_in_pair) {
702 #ifdef COLOR
703 reversePushNucleotide(&antiWord, nucleotide);
704 #else
705 reversePushNucleotide(&antiWord, 3 - nucleotide);
706 #endif
707 }
708
709 // Update annotation if necessary
710 if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) {
711 annotation = getNextAnnotation(annotation);
712 annotCount++;
713 annotIndex = 0;
714 }
715
716 // Search for reference mapping
717 if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) {
718 refID = getAnnotSequenceID(annotation);
719 if (refID > 0)
720 refCoord = getStart(annotation) + annotIndex;
721 else
722 refCoord = getStart(annotation) - annotIndex;
723
724 refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount);
725 // If success
726 if (refMap) {
727 if (refID > 0)
728 node = getNodeInGraph(graph, refMap->nodeID);
729 else
730 node = getNodeInGraph(graph, -refMap->nodeID);
731 } else {
732 node = NULL;
733 if (previousNode)
734 break;
735 }
736 }
737 // if not.. look in table
738 else {
739 reversed = false;
740 if (double_strand) {
741 if (compareKmers(&word, &antiWord) <= 0) {
742 kmerOccurence =
743 findKmerInKmerOccurenceTable(&word,
744 kmerTable);
745 } else {
746 kmerOccurence =
747 findKmerInKmerOccurenceTable(&antiWord,
748 kmerTable);
749 reversed = true;
750 }
751 } else {
752 if (!second_in_pair) {
753 kmerOccurence =
754 findKmerInKmerOccurenceTable(&word,
755 kmerTable);
756 } else {
757 kmerOccurence =
758 findKmerInKmerOccurenceTable(&antiWord,
759 kmerTable);
760 reversed = true;
761 }
762 }
763
764 if (kmerOccurence) {
765 if (!reversed)
766 node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence));
767 else
768 node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence));
769 } else {
770 node = NULL;
771 if (previousNode)
772 break;
773 }
774
775 }
776
777 if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation))
778 annotIndex++;
779 else
780 uniqueIndex++;
781
782 previousNode = node;
783
784 // Fill in graph
785 if (node && !isNodeMemorized(node, nodePile))
786 {
787 #ifdef _OPENMP
788 lockNode(node);
789 #endif
790 incrementReadStartCount(node, graph);
791 #ifdef _OPENMP
792 unLockNode(node);
793 #endif
794 memorizeNode(node, &nodePile);
795 }
796 }
797
798 unMemorizeNodes(&nodePile);
799 }
800
threadSequenceThroughGraph(TightString * tString,KmerOccurenceTable * kmerTable,Graph * graph,IDnum seqID,Category category,boolean readTracking,boolean double_strand,ReferenceMapping * referenceMappings,Coordinate referenceMappingCount,IDnum refCount,Annotation * annotations,IDnum annotationCount,boolean second_in_pair)801 static void threadSequenceThroughGraph(TightString * tString,
802 KmerOccurenceTable * kmerTable,
803 Graph * graph,
804 IDnum seqID, Category category,
805 boolean readTracking,
806 boolean double_strand,
807 ReferenceMapping * referenceMappings,
808 Coordinate referenceMappingCount,
809 IDnum refCount,
810 Annotation * annotations,
811 IDnum annotationCount,
812 boolean second_in_pair)
813 {
814 Kmer word;
815 Kmer antiWord;
816 Coordinate readNucleotideIndex;
817 Coordinate kmerIndex;
818 KmerOccurence *kmerOccurence;
819 int wordLength = getWordLength(graph);
820
821 PassageMarkerI marker = NULL_IDX;
822 PassageMarkerI previousMarker = NULL_IDX;
823 Node *node = NULL;
824 Node *previousNode = NULL;
825 Coordinate coord = 0;
826 Coordinate previousCoord = 0;
827 Nucleotide nucleotide;
828 boolean reversed;
829
830 IDnum refID;
831 Coordinate refCoord = 0;
832 ReferenceMapping * refMap;
833 Annotation * annotation = annotations;
834 Coordinate index = 0;
835 Coordinate uniqueIndex = 0;
836 Coordinate annotIndex = 0;
837 IDnum annotCount = 0;
838 SmallNodeList * nodePile = NULL;
839
840 // Neglect any string shorter than WORDLENGTH :
841 if (getLength(tString) < wordLength)
842 return;
843
844 clearKmer(&word);
845 clearKmer(&antiWord);
846
847 // Fill in the initial word :
848 for (readNucleotideIndex = 0;
849 readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
850 nucleotide = getNucleotide(readNucleotideIndex, tString);
851 pushNucleotide(&word, nucleotide);
852 if (double_strand || second_in_pair) {
853 #ifdef COLOR
854 reversePushNucleotide(&antiWord, nucleotide);
855 #else
856 reversePushNucleotide(&antiWord, 3 - nucleotide);
857 #endif
858 }
859 }
860
861 // Go through sequence
862 // printf("len %d\n", getLength(tString));
863 while (readNucleotideIndex < getLength(tString)) {
864 nucleotide = getNucleotide(readNucleotideIndex++, tString);
865 pushNucleotide(&word, nucleotide);
866 if (double_strand || second_in_pair) {
867 #ifdef COLOR
868 reversePushNucleotide(&antiWord, nucleotide);
869 #else
870 reversePushNucleotide(&antiWord, 3 - nucleotide);
871 #endif
872 }
873
874 // Update annotation if necessary
875 if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) {
876 annotation = getNextAnnotation(annotation);
877 annotCount++;
878 annotIndex = 0;
879 }
880
881 // Search for reference mapping
882 if (category == REFERENCE) {
883 if (referenceMappings)
884 refMap = findReferenceMapping(seqID, index, referenceMappings, referenceMappingCount);
885 else
886 refMap = NULL;
887
888 if (refMap) {
889 node = getNodeInGraph(graph, refMap->nodeID);
890 if (refMap->nodeID > 0) {
891 coord = refMap->nodeStart + (index - refMap->referenceStart);
892 } else {
893 coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (index - refMap->referenceStart);
894 }
895 } else {
896 node = NULL;
897 }
898 }
899 // Search for reference-based mapping
900 else if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) {
901 refID = getAnnotSequenceID(annotation);
902 if (refID > 0)
903 refCoord = getStart(annotation) + annotIndex;
904 else
905 refCoord = getStart(annotation) - annotIndex;
906
907 refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount);
908 // If success
909 if (refMap) {
910 if (refID > 0) {
911 node = getNodeInGraph(graph, refMap->nodeID);
912 if (refMap->nodeID > 0) {
913 coord = refMap->nodeStart + (refCoord - refMap->referenceStart);
914 } else {
915 coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (refCoord - refMap->referenceStart);
916 }
917 } else {
918 node = getNodeInGraph(graph, -refMap->nodeID);
919 if (refMap->nodeID > 0) {
920 coord = getNodeLength(node) - refMap->nodeStart - (refCoord - refMap->referenceStart) - 1;
921 } else {
922 coord = refMap->nodeStart + refMap->length - (refCoord - refMap->referenceStart) - 1;
923 }
924 }
925 } else {
926 node = NULL;
927 if (previousNode)
928 break;
929 }
930 }
931 // Search in table
932 else {
933 reversed = false;
934 if (double_strand) {
935 if (compareKmers(&word, &antiWord) <= 0) {
936 kmerOccurence =
937 findKmerInKmerOccurenceTable(&word,
938 kmerTable);
939 } else {
940 kmerOccurence =
941 findKmerInKmerOccurenceTable(&antiWord,
942 kmerTable);
943 reversed = true;
944 }
945 } else {
946 if (!second_in_pair) {
947 kmerOccurence =
948 findKmerInKmerOccurenceTable(&word,
949 kmerTable);
950 } else {
951 kmerOccurence =
952 findKmerInKmerOccurenceTable(&antiWord,
953 kmerTable);
954 reversed = true;
955 }
956 }
957
958 if (kmerOccurence) {
959 if (!reversed) {
960 node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence));
961 coord = getKmerOccurencePosition(kmerOccurence);
962 } else {
963 node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence));
964 coord = getNodeLength(node) - getKmerOccurencePosition(kmerOccurence) - 1;
965 }
966 } else {
967 node = NULL;
968 if (previousNode)
969 break;
970 }
971 }
972
973 // Increment positions
974 if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation))
975 annotIndex++;
976 else
977 uniqueIndex++;
978
979 // Fill in graph
980 if (node)
981 {
982 #ifdef _OPENMP
983 lockNode(node);
984 #endif
985 kmerIndex = readNucleotideIndex - wordLength;
986
987 if (previousNode == node
988 && previousCoord == coord - 1) {
989 if (category / 2 >= CATEGORIES) {
990 setPassageMarkerFinish(marker,
991 kmerIndex +
992 1);
993 setFinishOffset(marker,
994 getNodeLength(node)
995 - coord - 1);
996 } else {
997 #ifndef SINGLE_COV_CAT
998 incrementVirtualCoverage(node, category / 2, 1);
999 incrementOriginalVirtualCoverage(node, category / 2, 1);
1000 #else
1001 incrementVirtualCoverage(node, 1);
1002 #endif
1003 }
1004 #ifdef _OPENMP
1005 unLockNode(node);
1006 #endif
1007 } else {
1008 if (category / 2 >= CATEGORIES) {
1009 marker =
1010 newPassageMarker(seqID,
1011 kmerIndex,
1012 kmerIndex + 1,
1013 coord,
1014 getNodeLength
1015 (node) -
1016 coord - 1);
1017 transposePassageMarker(marker,
1018 node);
1019 connectPassageMarkers
1020 (previousMarker, marker,
1021 graph);
1022 previousMarker = marker;
1023 } else {
1024 if (readTracking) {
1025 if (!isNodeMemorized(node, nodePile)) {
1026 addReadStart(node,
1027 seqID,
1028 coord,
1029 graph,
1030 kmerIndex);
1031 memorizeNode(node, &nodePile);
1032 } else {
1033 blurLastShortReadMarker
1034 (node, graph);
1035 }
1036 }
1037
1038 #ifndef SINGLE_COV_CAT
1039 incrementVirtualCoverage(node, category / 2, 1);
1040 incrementOriginalVirtualCoverage(node, category / 2, 1);
1041 #else
1042 incrementVirtualCoverage(node, 1);
1043 #endif
1044 }
1045 #ifdef _OPENMP
1046 lockTwoNodes(node, previousNode);
1047 #endif
1048 if (category != REFERENCE)
1049 createArc(previousNode, node, graph);
1050 #ifdef _OPENMP
1051 unLockTwoNodes(node, previousNode);
1052 #endif
1053 }
1054
1055 previousNode = node;
1056 previousCoord = coord;
1057 }
1058 index++;
1059 }
1060 // printKmer(&word);
1061
1062 if (readTracking && category / 2 < CATEGORIES)
1063 unMemorizeNodes(&nodePile);
1064 }
1065
fillUpGraph(ReadSet * reads,KmerOccurenceTable * kmerTable,Graph * graph,boolean readTracking,boolean double_strand,ReferenceMapping * referenceMappings,Coordinate referenceMappingCount,IDnum refCount,char * roadmapFilename)1066 static void fillUpGraph(ReadSet * reads,
1067 KmerOccurenceTable * kmerTable,
1068 Graph * graph,
1069 boolean readTracking,
1070 boolean double_strand,
1071 ReferenceMapping * referenceMappings,
1072 Coordinate referenceMappingCount,
1073 IDnum refCount,
1074 char * roadmapFilename)
1075 {
1076 IDnum readIndex;
1077 RoadMapArray *roadmap = NULL;
1078 Coordinate *annotationOffset = NULL;
1079 struct timeval start, end, diff;
1080
1081 if (referenceMappings)
1082 {
1083 roadmap = importRoadMapArray(roadmapFilename);
1084 annotationOffset = callocOrExit(reads->readCount, Coordinate);
1085 for (readIndex = 1; readIndex < reads->readCount; readIndex++)
1086 annotationOffset[readIndex] = annotationOffset[readIndex - 1]
1087 + getAnnotationCount(getRoadMapInArray(roadmap, readIndex - 1));
1088 }
1089
1090 resetNodeStatus(graph);
1091 // Allocate memory for the read pairs
1092 if (!readStartsAreActivated(graph))
1093 activateReadStarts(graph);
1094
1095 gettimeofday(&start, NULL);
1096 #ifdef _OPENMP
1097 initSmallNodeListMemory();
1098 createNodeLocks(graph);
1099 #pragma omp parallel for
1100 #endif
1101 for (readIndex = refCount; readIndex < reads->readCount; readIndex++)
1102 {
1103 Annotation * annotations = NULL;
1104 IDnum annotationCount = 0;
1105 Category category;
1106 boolean second_in_pair;
1107
1108 if (readIndex % 1000000 == 0)
1109 velvetLog("Ghost Threading through reads %ld / %ld\n",
1110 (long) readIndex, (long) reads->readCount);
1111
1112 category = reads->categories[readIndex];
1113 second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex);
1114
1115 if (referenceMappings)
1116 {
1117 annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
1118 annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
1119 }
1120
1121 ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
1122 kmerTable,
1123 graph, readIndex + 1,
1124 category,
1125 readTracking, double_strand,
1126 referenceMappings, referenceMappingCount,
1127 refCount, annotations, annotationCount,
1128 second_in_pair);
1129 }
1130 createNodeReadStartArrays(graph);
1131 gettimeofday(&end, NULL);
1132 timersub(&end, &start, &diff);
1133 velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", (long) diff.tv_sec, (long) diff.tv_usec);
1134
1135 gettimeofday(&start, NULL);
1136 #ifdef _OPENMP
1137 int threads = omp_get_max_threads();
1138 if (threads > 32)
1139 threads = 32;
1140
1141 #pragma omp parallel for num_threads(threads)
1142 #endif
1143 for (readIndex = 0; readIndex < reads->readCount; readIndex++)
1144 {
1145 Annotation * annotations = NULL;
1146 IDnum annotationCount = 0;
1147 Category category;
1148 boolean second_in_pair;
1149
1150 if (readIndex % 1000000 == 0)
1151 velvetLog("Threading through reads %li / %li\n",
1152 (long) readIndex, (long) reads->readCount);
1153
1154 category = reads->categories[readIndex];
1155 second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex);
1156
1157 if (referenceMappings)
1158 {
1159 annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
1160 annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
1161 }
1162
1163 threadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
1164 kmerTable,
1165 graph, readIndex + 1, category,
1166 readTracking, double_strand,
1167 referenceMappings, referenceMappingCount,
1168 refCount, annotations, annotationCount, second_in_pair);
1169 }
1170 gettimeofday(&end, NULL);
1171 timersub(&end, &start, &diff);
1172 velvetLog(" === Threaded in %ld.%06ld s\n", (long) diff.tv_sec, (long) diff.tv_usec);
1173
1174 #ifdef _OPENMP
1175 free(nodeLocks);
1176 nodeLocks = NULL;
1177 #endif
1178
1179 if (referenceMappings)
1180 {
1181 destroyRoadMapArray(roadmap);
1182 free (annotationOffset);
1183 }
1184
1185 orderNodeReadStartArrays(graph);
1186
1187 destroySmallNodeListMemmory();
1188
1189 destroyKmerOccurenceTable(kmerTable);
1190 }
1191
importPreGraph(char * preGraphFilename,ReadSet * reads,char * roadmapFilename,boolean readTracking,short int accelerationBits)1192 Graph *importPreGraph(char *preGraphFilename, ReadSet * reads, char * roadmapFilename,
1193 boolean readTracking, short int accelerationBits)
1194 {
1195 boolean double_strand = false;
1196 Graph *graph = readPreGraphFile(preGraphFilename, &double_strand);
1197 Coordinate referenceMappingCount = 0;
1198 IDnum referenceCount = 0;
1199
1200 if (nodeCount(graph) == 0)
1201 return graph;
1202
1203 // If necessary compile reference -> node
1204 ReferenceMapping * referenceMappings = computeReferenceMappings(preGraphFilename, reads, &referenceMappingCount, &referenceCount);
1205 // Node -> reference maps
1206 NodeMask * nodeMasks = computeNodeMasks(referenceMappings, referenceMappingCount, graph);
1207
1208 // Map k-mers to nodes
1209 KmerOccurenceTable *kmerTable =
1210 referenceGraphKmers(preGraphFilename, accelerationBits, graph, double_strand, nodeMasks, referenceMappingCount);
1211
1212 free(nodeMasks);
1213
1214 // Map sequences -> kmers -> nodes
1215 fillUpGraph(reads, kmerTable, graph, readTracking, double_strand, referenceMappings, referenceMappingCount, referenceCount, roadmapFilename);
1216
1217 free(referenceMappings);
1218
1219 return graph;
1220 }
1221
addReadsToGraph(TightString * tString,KmerOccurenceTable * kmerTable,Graph * graph,IDnum seqID,Category category,boolean readTracking,boolean double_strand,boolean second_in_pair)1222 static void addReadsToGraph(TightString * tString,
1223 KmerOccurenceTable * kmerTable,
1224 Graph * graph,
1225 IDnum seqID, Category category,
1226 boolean readTracking,
1227 boolean double_strand,
1228 boolean second_in_pair)
1229 {
1230 Kmer word;
1231 Kmer antiWord;
1232 Coordinate readNucleotideIndex;
1233 Coordinate kmerIndex;
1234 KmerOccurence *kmerOccurence;
1235 int wordLength = getWordLength(graph);
1236
1237 Node *node = NULL;
1238 Node *previousNode = NULL;
1239 Coordinate coord = 0;
1240 Coordinate previousCoord = 0;
1241 Nucleotide nucleotide;
1242 boolean reversed;
1243
1244 Coordinate index = 0;
1245 SmallNodeList * nodePile = NULL;
1246
1247 // Neglect any read which will not be short paired
1248 if (category / 2 >= CATEGORIES)
1249 return;
1250
1251 // Neglect any string shorter than WORDLENGTH :
1252 if (getLength(tString) < wordLength)
1253 return;
1254
1255 clearKmer(&word);
1256 clearKmer(&antiWord);
1257
1258 // Fill in the initial word :
1259 for (readNucleotideIndex = 0;
1260 readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
1261 nucleotide = getNucleotide(readNucleotideIndex, tString);
1262 pushNucleotide(&word, nucleotide);
1263 if (double_strand || second_in_pair) {
1264 #ifdef COLOR
1265 reversePushNucleotide(&antiWord, nucleotide);
1266 #else
1267 reversePushNucleotide(&antiWord, 3 - nucleotide);
1268 #endif
1269 }
1270 }
1271
1272 // Go through sequence
1273 // printf("len %d\n", getLength(tString));
1274 while (readNucleotideIndex < getLength(tString)) {
1275 nucleotide = getNucleotide(readNucleotideIndex++, tString);
1276 pushNucleotide(&word, nucleotide);
1277 if (double_strand || second_in_pair) {
1278 #ifdef COLOR
1279 reversePushNucleotide(&antiWord, nucleotide);
1280 #else
1281 reversePushNucleotide(&antiWord, 3 - nucleotide);
1282 #endif
1283 }
1284
1285 // Search in table
1286 reversed = false;
1287 if (double_strand) {
1288 if (compareKmers(&word, &antiWord) <= 0) {
1289 kmerOccurence =
1290 findKmerInKmerOccurenceTable(&word,
1291 kmerTable);
1292 } else {
1293 kmerOccurence =
1294 findKmerInKmerOccurenceTable(&antiWord,
1295 kmerTable);
1296 reversed = true;
1297 }
1298 } else {
1299 if (!second_in_pair) {
1300 kmerOccurence =
1301 findKmerInKmerOccurenceTable(&word,
1302 kmerTable);
1303 } else {
1304 kmerOccurence =
1305 findKmerInKmerOccurenceTable(&antiWord,
1306 kmerTable);
1307 reversed = true;
1308 }
1309 }
1310
1311 if (kmerOccurence) {
1312 if (!reversed) {
1313 node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence));
1314 coord = getKmerOccurencePosition(kmerOccurence);
1315 } else {
1316 node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence));
1317 coord = getNodeLength(node) - getKmerOccurencePosition(kmerOccurence) - 1;
1318 }
1319 } else {
1320 node = NULL;
1321 if (previousNode)
1322 break;
1323 }
1324
1325 // Fill in graph
1326 if (node)
1327 {
1328 #ifdef _OPENMP
1329 lockNode(node);
1330 #endif
1331 kmerIndex = readNucleotideIndex - wordLength;
1332
1333 if (previousNode != node || previousCoord != coord -1) {
1334 if (!isNodeMemorized(node, nodePile)) {
1335 addReadStart(node,
1336 seqID,
1337 coord,
1338 graph,
1339 kmerIndex);
1340 memorizeNode(node, &nodePile);
1341 } else {
1342 blurLastShortReadMarker
1343 (node, graph);
1344 }
1345 }
1346 #ifdef _OPENMP
1347 unLockNode(node);
1348 #endif
1349 previousNode = node;
1350 previousCoord = coord;
1351 }
1352 index++;
1353 }
1354 // printKmer(&word);
1355
1356 if (category / 2 < CATEGORIES)
1357 unMemorizeNodes(&nodePile);
1358 }
1359
fillUpConnectedGraph(ReadSet * reads,KmerOccurenceTable * kmerTable,Graph * graph,boolean readTracking,boolean double_strand)1360 static void fillUpConnectedGraph(ReadSet * reads,
1361 KmerOccurenceTable * kmerTable,
1362 Graph * graph,
1363 boolean readTracking,
1364 boolean double_strand)
1365 {
1366 IDnum refCount = 0; // refs not present in connected graphs
1367 IDnum readIndex;
1368 struct timeval start, end, diff;
1369
1370 resetNodeStatus(graph);
1371 // Allocate memory for the read pairs
1372 if (!readStartsAreActivated(graph))
1373 activateReadStarts(graph);
1374
1375 gettimeofday(&start, NULL);
1376 #ifdef _OPENMP
1377 initSmallNodeListMemory();
1378 createNodeLocks(graph);
1379 #pragma omp parallel for
1380 #endif
1381 for (readIndex = refCount; readIndex < reads->readCount; readIndex++)
1382 {
1383 Category category;
1384 boolean second_in_pair;
1385
1386 if (readIndex % 1000000 == 0)
1387 velvetLog("Ghost Threading through reads %ld / %ld\n",
1388 (long) readIndex, (long) reads->readCount);
1389
1390 category = reads->categories[readIndex];
1391 second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex);
1392
1393 // referenceMappings = NULL, referenceMappingCount = 0
1394 // refCount = 0, annotations = NULL, annotationCount = 0
1395 ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
1396 kmerTable,
1397 graph, readIndex + 1,
1398 category,
1399 readTracking, double_strand,
1400 NULL, 0,
1401 0, NULL, 0,
1402 second_in_pair);
1403 }
1404 createNodeReadStartArrays(graph);
1405 gettimeofday(&end, NULL);
1406 timersub(&end, &start, &diff);
1407 velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);
1408
1409 gettimeofday(&start, NULL);
1410 #ifdef _OPENMP
1411 int threads = omp_get_max_threads();
1412 if (threads > 32)
1413 threads = 32;
1414
1415 #pragma omp parallel for num_threads(threads)
1416 #endif
1417 for (readIndex = 0; readIndex < reads->readCount; readIndex++)
1418 {
1419 Category category;
1420 boolean second_in_pair;
1421
1422 if (readIndex % 1000000 == 0)
1423 velvetLog("Adding reads %li / %li\n",
1424 (long) readIndex, (long) reads->readCount);
1425
1426 category = reads->categories[readIndex];
1427 second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex);
1428
1429 addReadsToGraph(getTightStringInArray(reads->tSequences, readIndex),
1430 kmerTable,
1431 graph, readIndex + 1, category,
1432 readTracking, double_strand, second_in_pair);
1433 }
1434 gettimeofday(&end, NULL);
1435 timersub(&end, &start, &diff);
1436 velvetLog(" === Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);
1437
1438 #ifdef _OPENMP
1439 free(nodeLocks);
1440 nodeLocks = NULL;
1441 #endif
1442
1443 orderNodeReadStartArrays(graph);
1444
1445 destroySmallNodeListMemmory();
1446
1447 destroyKmerOccurenceTable(kmerTable);
1448 }
1449
importConnectedGraph(char * connectedGraphFilename,ReadSet * reads,char * roadmapFilename,boolean readTracking,short int accelerationBits)1450 Graph *importConnectedGraph(char *connectedGraphFilename, ReadSet * reads, char * roadmapFilename,
1451 boolean readTracking, short int accelerationBits)
1452 {
1453 boolean double_strand = false;
1454 Graph *graph = readConnectedGraphFile(connectedGraphFilename, &double_strand);
1455
1456 if (nodeCount(graph) == 0)
1457 return graph;
1458
1459 if (readTracking) {
1460 Coordinate referenceMappingCount = 0;
1461 NodeMask * nodeMasks = NULL;
1462
1463 // Map k-mers to nodes
1464 KmerOccurenceTable *kmerTable =
1465 referenceGraphKmers(connectedGraphFilename, accelerationBits, graph, doubleStrandedGraph(graph), nodeMasks, referenceMappingCount);
1466
1467 // Map sequences -> kmers -> nodes
1468 fillUpConnectedGraph(reads, kmerTable, graph, readTracking, double_strand);
1469 }
1470
1471 return graph;
1472 }
1473