1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
4 This file is part of Velvet.
5
6 Velvet is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 Velvet is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Velvet; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24
25 #include "globals.h"
26 #include "graph.h"
27 #include "recycleBin.h"
28 #include "tightString.h"
29 #include "passageMarker.h"
30 #include "utility.h"
31 #include "kmer.h"
32
33 #define ADENINE 0
34 #define CYTOSINE 1
35 #define GUANINE 2
36 #define THYMINE 3
37
38 struct arc_st {
39 Arc *twinArc; // 64
40 Arc *next; // 64
41 Arc *previous; // 64
42 Arc *nextInLookupTable; // 64
43 Node *destination; // 64
44 IDnum multiplicity; // 32
45 } ATTRIBUTE_PACKED; // 352 Total
46
47 struct node_st {
48 Node *twinNode; // 64
49 Arc *arc; // 64
50 Descriptor *descriptor; // 64
51 PassageMarkerI marker; // 32
52 IDnum length; // 32
53 #ifndef SINGLE_COV_CAT
54 IDnum virtualCoverage[CATEGORIES]; // 32 * 2
55 IDnum originalVirtualCoverage[CATEGORIES]; // 32 * 2
56 #else
57 IDnum virtualCoverage; // 32 * 2
58 #endif
59 IDnum ID; // 32
60 IDnum arcCount; // 32
61 boolean status; // 1
62 boolean uniqueness; // 1
63 } ATTRIBUTE_PACKED; // 418 Total
64
65 struct shortReadMarker_st {
66 IDnum position;
67 IDnum readID;
68 ShortLength offset;
69 } ATTRIBUTE_PACKED;
70
71 struct gapMarker_st {
72 GapMarker *next;
73 IDnum position;
74 IDnum length;
75 } ATTRIBUTE_PACKED;
76
77 struct graph_st {
78 Node **nodes;
79 Arc **arcLookupTable;
80 ShortReadMarker **nodeReads;
81 IDnum *nodeReadCounts;
82 GapMarker **gapMarkers;
83 Coordinate insertLengths[CATEGORIES + 1];
84 double insertLengths_var[CATEGORIES + 1];
85 IDnum sequenceCount;
86 IDnum nodeCount;
87 int wordLength;
88 boolean double_stranded;
89 };
90
91 static RecycleBin *arcMemory = NULL;
92 static RecycleBin *nodeMemory = NULL;
93 static RecycleBin *gapMarkerMemory = NULL;
94
95 #define BLOCKSIZE 50
96 #define GAPBLOCKSIZE 10000
97
allocateArc()98 Arc *allocateArc()
99 {
100 if (arcMemory == NULL)
101 arcMemory = newRecycleBin(sizeof(Arc), BLOCKSIZE);
102
103 return allocatePointer(arcMemory);
104 }
105
deallocateArc(Arc * arc)106 void deallocateArc(Arc * arc)
107 {
108 deallocatePointer(arcMemory, arc);
109 }
110
allocateNode()111 Node *allocateNode()
112 {
113 if (nodeMemory == NULL)
114 nodeMemory = newRecycleBin(sizeof(Node), BLOCKSIZE);
115
116 return (Node *) allocatePointer(nodeMemory);
117 }
118
deallocateNode(Node * node)119 void deallocateNode(Node * node)
120 {
121 deallocatePointer(nodeMemory, node);
122 }
123
124 // Returns the twin node of a given node
getTwinNode(Node * node)125 Node *getTwinNode(Node * node)
126 {
127 return node->twinNode;
128 }
129
130 // Inserts new passage marker in the marker list of destination node
insertPassageMarker(PassageMarkerI marker,Node * destination)131 void insertPassageMarker(PassageMarkerI marker, Node * destination)
132 {
133 setTopOfTheNode(marker);
134 setNextInNode(marker, destination->marker);
135 destination->marker = marker;
136 }
137
138 // Returns the length of the node's descriptor list
getNodeLength(Node * node)139 Coordinate getNodeLength(Node * node)
140 {
141 return node->length;
142 }
143
144 // Returns the number of nodes in the graph
nodeCount(Graph * graph)145 IDnum nodeCount(Graph * graph)
146 {
147 return graph->nodeCount;
148 }
149
150 // returns the number of sequences used to buid the graph
sequenceCount(Graph * graph)151 IDnum sequenceCount(Graph * graph)
152 {
153 return graph->sequenceCount;
154 }
155
156 // Creates an arc from node origin to node destination.
157 // If this arc already exists, increments its multiplicity by 1.
createArc(Node * originNode,Node * destinationNode,Graph * graph)158 Arc *createArc(Node * originNode, Node * destinationNode, Graph * graph)
159 {
160 Arc *arc, *twinArc;
161 Node *destinationTwin;
162 IDnum lookupIndex;
163
164 if (originNode == NULL || destinationNode == NULL)
165 return NULL;
166
167 // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
168
169 arc = getArcBetweenNodes(originNode, destinationNode, graph);
170
171 if (arc != NULL) {
172 arc->multiplicity++;
173 arc->twinArc->multiplicity++;
174 return arc;
175 }
176 // If not found
177 #ifdef _OPENMP
178 #pragma omp critical
179 #endif
180 arc = allocateArc();
181 arc->destination = destinationNode;
182 arc->multiplicity = 1;
183 arc->previous = NULL;
184 arc->next = originNode->arc;
185 if (originNode->arc != NULL)
186 originNode->arc->previous = arc;
187 originNode->arc = arc;
188 originNode->arcCount++;
189
190 destinationTwin = destinationNode->twinNode;
191
192 // Hairpin case
193 if (destinationTwin == originNode) {
194 arc->multiplicity++;
195 arc->twinArc = arc;
196 if (graph->arcLookupTable != NULL) {
197 lookupIndex =
198 2 * originNode->ID + destinationNode->ID +
199 3 * graph->nodeCount;
200 arc->nextInLookupTable =
201 graph->arcLookupTable[lookupIndex];
202 graph->arcLookupTable[lookupIndex] = arc;
203 }
204 return arc;
205 }
206
207 #ifdef _OPENMP
208 #pragma omp critical
209 #endif
210 twinArc = allocateArc();
211 twinArc->destination = originNode->twinNode;
212 twinArc->multiplicity = 1;
213 twinArc->previous = NULL;
214 twinArc->next = destinationTwin->arc;
215 if (destinationTwin->arc != NULL)
216 destinationTwin->arc->previous = twinArc;
217 destinationTwin->arc = twinArc;
218 destinationTwin->arcCount++;
219
220 arc->twinArc = twinArc;
221 twinArc->twinArc = arc;
222
223 if (graph->arcLookupTable != NULL) {
224 lookupIndex =
225 2 * originNode->ID + destinationNode->ID +
226 3 * graph->nodeCount;
227 arc->nextInLookupTable =
228 graph->arcLookupTable[lookupIndex];
229 graph->arcLookupTable[lookupIndex] = arc;
230
231 lookupIndex =
232 -2 * destinationNode->ID - originNode->ID +
233 3 * graph->nodeCount;
234 twinArc->nextInLookupTable =
235 graph->arcLookupTable[lookupIndex];
236 graph->arcLookupTable[lookupIndex] = twinArc;
237 }
238 return arc;
239 }
240
createAnalogousArc(Node * originNode,Node * destinationNode,Arc * refArc,Graph * graph)241 void createAnalogousArc(Node * originNode, Node * destinationNode,
242 Arc * refArc, Graph * graph)
243 {
244 Arc *arc, *twinArc;
245 Node *destinationTwin;
246 IDnum lookupIndex;
247
248 if (originNode == NULL || destinationNode == NULL)
249 return;
250
251 // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
252
253 arc = getArcBetweenNodes(originNode, destinationNode, graph);
254
255 if (arc != NULL) {
256 if (refArc->twinArc != refArc) {
257 arc->multiplicity += getMultiplicity(refArc);
258 arc->twinArc->multiplicity +=
259 getMultiplicity(refArc);
260 } else {
261 arc->multiplicity += getMultiplicity(refArc) / 2;
262 arc->twinArc->multiplicity +=
263 getMultiplicity(refArc) / 2;
264 }
265 return;
266 }
267 // If not found
268 arc = allocateArc();
269 arc->destination = destinationNode;
270 arc->multiplicity = getMultiplicity(refArc);
271 arc->previous = NULL;
272 arc->next = originNode->arc;
273 if (originNode->arc != NULL)
274 originNode->arc->previous = arc;
275 originNode->arc = arc;
276 originNode->arcCount++;
277
278 destinationTwin = destinationNode->twinNode;
279
280 // Hairpin case
281 if (destinationTwin == originNode) {
282 arc->twinArc = arc;
283 if (refArc->twinArc != refArc)
284 arc->multiplicity *= 2;
285
286 if (graph->arcLookupTable != NULL) {
287 lookupIndex =
288 2 * originNode->ID + destinationNode->ID
289 + 3 * graph->nodeCount;
290 arc->nextInLookupTable =
291 graph->arcLookupTable[lookupIndex];
292 graph->arcLookupTable[lookupIndex] = arc;
293 }
294 return;
295 }
296
297 twinArc = allocateArc();
298 twinArc->destination = originNode->twinNode;
299 twinArc->multiplicity = getMultiplicity(refArc);
300 twinArc->previous = NULL;
301 twinArc->next = destinationTwin->arc;
302 if (destinationTwin->arc != NULL)
303 destinationTwin->arc->previous = twinArc;
304 destinationTwin->arc = twinArc;
305 destinationTwin->arcCount++;
306
307 arc->twinArc = twinArc;
308 twinArc->twinArc = arc;
309
310 if (graph->arcLookupTable != NULL) {
311 lookupIndex =
312 2 * originNode->ID + destinationNode->ID +
313 3 * graph->nodeCount;
314 arc->nextInLookupTable =
315 graph->arcLookupTable[lookupIndex];
316 graph->arcLookupTable[lookupIndex] = arc;
317
318 lookupIndex =
319 -2 * destinationNode->ID - originNode->ID +
320 3 * graph->nodeCount;
321 twinArc->nextInLookupTable =
322 graph->arcLookupTable[lookupIndex];
323 graph->arcLookupTable[lookupIndex] = twinArc;
324 }
325 }
326
getArcBetweenNodes(Node * originNode,Node * destinationNode,Graph * graph)327 Arc *getArcBetweenNodes(Node * originNode, Node * destinationNode,
328 Graph * graph)
329 {
330 Arc *arc;
331 Node *twinDestination, *twinOrigin;
332
333 if (originNode == NULL || destinationNode == NULL)
334 return NULL;
335
336 if (graph->arcLookupTable != NULL) {
337 for (arc =
338 graph->arcLookupTable[2 * originNode->ID +
339 destinationNode->ID +
340 3 * graph->nodeCount];
341 arc != NULL; arc = arc->nextInLookupTable) {
342 if (arc->destination == destinationNode) {
343 return arc;
344 }
345 }
346 return NULL;
347 }
348
349 twinDestination = destinationNode->twinNode;
350 if (originNode->arcCount <= twinDestination->arcCount) {
351 for (arc = originNode->arc; arc != NULL; arc = arc->next)
352 if (arc->destination == destinationNode)
353 return arc;
354 return NULL;
355 }
356
357 twinOrigin = originNode->twinNode;
358 for (arc = twinDestination->arc; arc != NULL; arc = arc->next)
359 if (arc->destination == twinOrigin)
360 return arc->twinArc;
361 return NULL;
362 }
363
destroyArc(Arc * arc,Graph * graph)364 void destroyArc(Arc * arc, Graph * graph)
365 {
366 Node *origin, *destination;
367 Arc *twinArc;
368 Arc *currentArc;
369 IDnum lookupIndex;
370
371 if (arc == NULL)
372 return;
373
374 twinArc = arc->twinArc;
375 origin = twinArc->destination->twinNode;
376 destination = arc->destination->twinNode;
377
378 //velvetLog("Destroying arc %p\n", arc);
379
380 // Removing arc from list
381 if (origin->arc == arc) {
382 origin->arc = arc->next;
383 if (origin->arc != NULL)
384 origin->arc->previous = NULL;
385 } else {
386 arc->previous->next = arc->next;
387 if (arc->next != NULL)
388 arc->next->previous = arc->previous;
389 }
390
391 origin->arcCount--;
392
393 if (destination == origin) {
394 if (graph->arcLookupTable != NULL) {
395 lookupIndex =
396 2 * origin->ID - destination->ID +
397 3 * graph->nodeCount;
398 currentArc = graph->arcLookupTable[lookupIndex];
399 if (currentArc == arc)
400 graph->arcLookupTable[lookupIndex] =
401 arc->nextInLookupTable;
402 else {
403 while (currentArc->nextInLookupTable !=
404 arc)
405 currentArc =
406 currentArc->nextInLookupTable;
407
408 currentArc->nextInLookupTable =
409 twinArc->nextInLookupTable;
410 }
411 }
412
413 deallocateArc(arc);
414 return;
415 }
416 // Removing arc's twin from list
417 if (destination->arc == twinArc) {
418 destination->arc = twinArc->next;
419 if (destination->arc != NULL)
420 destination->arc->previous = NULL;
421 } else {
422 twinArc->previous->next = twinArc->next;
423 if (twinArc->next != NULL)
424 twinArc->next->previous = twinArc->previous;
425 }
426
427 destination->arcCount--;
428
429 if (graph->arcLookupTable != NULL) {
430 lookupIndex =
431 2 * origin->ID - destination->ID +
432 3 * graph->nodeCount;
433 currentArc = graph->arcLookupTable[lookupIndex];
434 if (currentArc == arc)
435 graph->arcLookupTable[lookupIndex] =
436 arc->nextInLookupTable;
437 else {
438 while (currentArc->nextInLookupTable != arc)
439 currentArc = currentArc->nextInLookupTable;
440
441 currentArc->nextInLookupTable =
442 arc->nextInLookupTable;
443 }
444
445 lookupIndex =
446 2 * destination->ID - origin->ID +
447 3 * graph->nodeCount;
448 currentArc = graph->arcLookupTable[lookupIndex];
449 if (currentArc == twinArc)
450 graph->arcLookupTable[lookupIndex] =
451 twinArc->nextInLookupTable;
452 else {
453 while (currentArc->nextInLookupTable != twinArc)
454 currentArc = currentArc->nextInLookupTable;
455
456 currentArc->nextInLookupTable =
457 twinArc->nextInLookupTable;
458 }
459 }
460 // Freeing memory
461 deallocateArc(arc);
462 deallocateArc(twinArc);
463 }
464
destroyNode(Node * node,Graph * graph)465 void destroyNode(Node * node, Graph * graph)
466 {
467 Node *twin = node->twinNode;
468 IDnum ID = node->ID;
469 IDnum index;
470
471 //velvetLog("Destroying %d\n and twin %d\n", getNodeID(node), getNodeID(twin));
472
473 if (ID < 0)
474 ID = -ID;
475
476 // Node arcs:
477 while (node->arc != NULL)
478 destroyArc(node->arc, graph);
479 while (twin->arc != NULL)
480 destroyArc(twin->arc, graph);
481
482 // Descriptors
483 free(node->descriptor);
484 free(twin->descriptor);
485
486 // Passage markers
487 while (node->marker != NULL_IDX)
488 destroyPassageMarker(node->marker);
489
490 // Reads starts
491 if (graph->nodeReads != NULL) {
492 index = ID + graph->nodeCount;
493 free(graph->nodeReads[index]);
494 graph->nodeReads[index] = NULL;
495 graph->nodeReadCounts[index] = 0;
496
497 index = -ID + graph->nodeCount;
498 free(graph->nodeReads[index]);
499 graph->nodeReads[index] = NULL;
500 graph->nodeReadCounts[index] = 0;
501 }
502
503 graph->nodes[ID] = NULL;
504 deallocateNode(node);
505 deallocateNode(twin);
506 }
507
outDegree(Node * node)508 int outDegree(Node * node)
509 {
510 int result = 0;
511 Arc *arc = node->arc;
512 while (arc != NULL) {
513 result += arc->multiplicity;
514 arc = arc->next;
515 }
516
517 return result;
518 }
519
simpleArcCount(Node * node)520 int simpleArcCount(Node * node)
521 {
522 return node->arcCount;
523 }
524
arcCount(Node * node)525 int arcCount(Node * node)
526 {
527 int result = 0;
528 Arc *arc;
529
530 if (node == NULL)
531 return result;
532
533 arc = node->arc;
534 while (arc != NULL) {
535 result++;
536 if (arc->destination == node->twinNode)
537 result++;
538 arc = arc->next;
539 }
540
541 return result;
542
543 }
544
getNucleotideInDescriptor(Descriptor * descriptor,Coordinate i)545 static Nucleotide getNucleotideInDescriptor(Descriptor * descriptor,
546 Coordinate i)
547 {
548 Descriptor *fourMer = descriptor + i / 4;
549
550 switch (i % 4) {
551 case 0:
552 return (*fourMer & 3);
553 case 1:
554 return (*fourMer & 12) >> 2;
555 case 2:
556 return (*fourMer & 48) >> 4;
557 case 3:
558 return (*fourMer & 192) >> 6;
559 }
560 return 0;
561 }
562
getNucleotideInNode(Node * node,Coordinate index)563 Nucleotide getNucleotideInNode(Node * node, Coordinate index) {
564 return getNucleotideInDescriptor(node->descriptor, index);
565 }
566
getMarker(Node * node)567 PassageMarkerI getMarker(Node * node)
568 {
569 return node->marker;
570 }
571
setMarker(Node * node,PassageMarkerI marker)572 void setMarker(Node * node, PassageMarkerI marker)
573 {
574 if (node == NULL)
575 return;
576
577 if (marker == NULL_IDX) {
578 node->marker = NULL_IDX;
579 node->twinNode->marker = NULL_IDX;
580 return;
581 }
582
583 node->marker = marker;
584 setTopOfTheNode(marker);
585 node->twinNode->marker = getTwinMarker(marker);
586 setTopOfTheNode(getTwinMarker(marker));
587 }
588
setNodeStatus(Node * node,boolean status)589 void setNodeStatus(Node * node, boolean status)
590 {
591 node->status = status;
592 node->twinNode->status = status;
593 }
594
setSingleNodeStatus(Node * node,boolean status)595 void setSingleNodeStatus(Node * node, boolean status)
596 {
597 node->status = status;
598 }
599
getNodeStatus(Node * node)600 boolean getNodeStatus(Node * node)
601 {
602 if (node == NULL)
603 return false;
604 return node->status;
605 }
606
getNodeID(Node * node)607 IDnum getNodeID(Node * node)
608 {
609 if (node == NULL)
610 return 0;
611
612 return node->ID;
613 }
614
resetNodeStatus(Graph * graph)615 void resetNodeStatus(Graph * graph)
616 {
617 IDnum nodeIndex;
618 Node *node;
619
620 for (nodeIndex = 1; nodeIndex <= graph->nodeCount; nodeIndex++) {
621 node = graph->nodes[nodeIndex];
622 if (node == NULL)
623 continue;
624
625 node->status = false;
626 node->twinNode->status = false;
627 }
628 }
629
getNodeInGraph(Graph * graph,IDnum nodeID)630 Node *getNodeInGraph(Graph * graph, IDnum nodeID)
631 {
632 if (nodeID == 0)
633 return NULL;
634 else if (nodeID > 0)
635 return graph->nodes[nodeID];
636 else if (graph->nodes[-nodeID] == NULL)
637 return NULL;
638 else
639 return graph->nodes[-nodeID]->twinNode;
640 }
641
getArc(Node * node)642 Arc *getArc(Node * node)
643 {
644 return node->arc;
645 }
646
getNextArc(Arc * arc)647 Arc *getNextArc(Arc * arc)
648 {
649 return arc->next;
650 }
651
getMultiplicity(Arc * arc)652 IDnum getMultiplicity(Arc * arc)
653 {
654 if (arc == NULL)
655 return 0;
656
657 return arc->multiplicity;
658 }
659
getOrigin(Arc * arc)660 Node *getOrigin(Arc * arc)
661 {
662 if (arc == NULL)
663 return NULL;
664
665 return arc->twinArc->destination->twinNode;
666 }
667
getDestination(Arc * arc)668 Node *getDestination(Arc * arc)
669 {
670 if (arc == NULL)
671 return NULL;
672
673 return arc->destination;
674 }
675
markerCount(Node * node)676 IDnum markerCount(Node * node)
677 {
678 IDnum count = 0;
679 PassageMarkerI marker;
680
681 for (marker = getMarker(node); marker != NULL_IDX;
682 marker = getNextInNode(marker))
683 count++;
684
685 return count;
686 }
687
appendNodeSequence(Node * node,TightString * sequence,Coordinate writeIndex)688 void appendNodeSequence(Node * node, TightString * sequence,
689 Coordinate writeIndex)
690 {
691 Coordinate i;
692 Nucleotide nucleotide;
693
694 //velvetLog("Getting sequence from node %d of length %d (%d)\n", getNodeID(node), getNodeLength(node), getLength(nodeLabel));
695
696 for (i = 0; i < getNodeLength(node); i++) {
697 nucleotide =
698 getNucleotideInDescriptor(node->descriptor, i);
699 writeNucleotideAtPosition(nucleotide, i + writeIndex,
700 sequence);
701 }
702 }
703
writeNucleotideInDescriptor(Nucleotide nucleotide,Descriptor * descriptor,Coordinate i)704 static void writeNucleotideInDescriptor(Nucleotide nucleotide,
705 Descriptor * descriptor,
706 Coordinate i)
707 {
708 Descriptor *fourMer = descriptor + i / 4;
709 switch (i % 4) {
710 case 3:
711 *fourMer &= 63;
712 *fourMer += nucleotide << 6;
713 return;
714 case 2:
715 *fourMer &= 207;
716 *fourMer += nucleotide << 4;
717 return;
718 case 1:
719 *fourMer &= 243;
720 *fourMer += nucleotide << 2;
721 return;
722 case 0:
723 *fourMer &= 252;
724 *fourMer += nucleotide;
725 }
726 }
727
mergeDescriptors(Descriptor * descr,Coordinate destinationLength,Descriptor * copy,Coordinate sourceLength,size_t arrayLength)728 static inline Descriptor *mergeDescriptors(Descriptor * descr,
729 Coordinate destinationLength,
730 Descriptor * copy,
731 Coordinate sourceLength,
732 size_t arrayLength)
733 {
734 Descriptor *readPtr, *writePtr;
735 Descriptor readCopy;
736 int readOffset, writeOffset;
737 Descriptor *new = callocOrExit(arrayLength, Descriptor);
738 Coordinate index;
739
740 readPtr = descr;
741 readCopy = *readPtr;
742 writePtr = new;
743 writeOffset = 0;
744 for (index = 0; index < destinationLength; index++) {
745 (*writePtr) >>= 2;
746 (*writePtr) += (readCopy & 3) << 6;
747 readCopy >>= 2;
748
749 writeOffset++;
750 if (writeOffset == 4) {
751 writePtr++;
752 readPtr++;
753 if (index < destinationLength - 1)
754 readCopy = *readPtr;
755 writeOffset = 0;
756 }
757 }
758
759 readPtr = copy;
760 readCopy = *readPtr;
761 readOffset = 0;
762 for (index = 0; index < sourceLength; index++) {
763 (*writePtr) >>= 2;
764 (*writePtr) += (readCopy & 3) << 6;
765 readCopy >>= 2;
766
767 writeOffset++;
768 if (writeOffset == 4) {
769 writePtr++;
770 writeOffset = 0;
771 }
772
773 readOffset++;
774 if (readOffset == 4) {
775 readPtr++;
776 if (index < sourceLength - 1)
777 readCopy = *readPtr;
778 readOffset = 0;
779 }
780 }
781
782 if (writeOffset != 0) {
783 while (writeOffset != 4) {
784 (*writePtr) >>= 2;
785 writeOffset++;
786 }
787 }
788
789 return new;
790 }
791
addBufferToDescriptor(Node * node,Coordinate length)792 static void addBufferToDescriptor(Node * node, Coordinate length)
793 {
794 Descriptor *descr;
795 Coordinate newLength;
796 size_t arrayLength;
797 Node *twinNode;
798 Coordinate index;
799 Descriptor *old_descriptor;
800
801 if (node == NULL)
802 return;
803
804 twinNode = node->twinNode;
805 descr = node->descriptor;
806
807 // Amendments for empty descriptors
808 if (descr == NULL) {
809 arrayLength = length / 4;
810 if (length % 4 != 0)
811 arrayLength++;
812
813 node->descriptor = callocOrExit(arrayLength, Descriptor);
814 node->length = length;
815 twinNode->descriptor =
816 callocOrExit(arrayLength, Descriptor);
817 twinNode->length = length;
818 return;
819 }
820
821 newLength = node->length + length;
822 arrayLength = newLength / 4;
823 if (newLength % 4 != 0)
824 arrayLength++;
825
826 // Merging forward descriptors
827 node->descriptor =
828 reallocOrExit(node->descriptor, arrayLength, Descriptor);
829
830 for (index = node->length; index < newLength; index++)
831 writeNucleotideInDescriptor(ADENINE, node->descriptor,
832 index);
833 node->length = newLength;
834
835 // Merging reverse descriptors
836 old_descriptor = twinNode->descriptor;
837 twinNode->descriptor = callocOrExit(arrayLength, Descriptor);
838 for (index = 0; index < twinNode->length; index++)
839 writeNucleotideInDescriptor(getNucleotideInDescriptor
840 (old_descriptor, index),
841 twinNode->descriptor,
842 index + length);
843 for (index = 0; index < length; index++)
844 writeNucleotideInDescriptor(THYMINE, twinNode->descriptor,
845 index);
846 free(old_descriptor);
847 twinNode->length = newLength;
848 }
849
appendDescriptors(Node * destination,Node * source)850 void appendDescriptors(Node * destination, Node * source)
851 {
852 Descriptor *copy;
853 Descriptor *twinCopy;
854 Descriptor *descr;
855 Descriptor *twinDescr;
856 Coordinate newLength, destinationLength, sourceLength;
857 size_t arrayLength;
858 Descriptor *new;
859 Node *twinDestination;
860
861 if (source == NULL || destination == NULL)
862 return;
863
864 twinDestination = destination->twinNode;
865 descr = destination->descriptor;
866 twinDescr = twinDestination->descriptor;
867 copy = source->descriptor;
868 twinCopy = source->twinNode->descriptor;
869
870 // Amendments for empty descriptors
871 if (getNodeLength(source) == 0)
872 return;
873 if (getNodeLength(destination) == 0) {
874 destination->descriptor = copy;
875 twinDestination->descriptor = twinCopy;
876 source->descriptor = NULL;
877 source->twinNode->descriptor = NULL;
878 destination->length = source->length;
879 destination->twinNode->length = source->length;
880 source->length = 0;
881 source->twinNode->length = 0;
882 return;
883 }
884
885 destinationLength = destination->length;
886 sourceLength = source->length;
887 newLength = destinationLength + sourceLength;
888 arrayLength = newLength / 4;
889 if (newLength % 4 != 0)
890 arrayLength++;
891
892 // Merging forward descriptors
893 new =
894 mergeDescriptors(descr, destinationLength, copy, sourceLength,
895 arrayLength);
896 free(descr);
897 destination->descriptor = new;
898 destination->length = newLength;
899
900 // Merging reverse descriptors
901 new =
902 mergeDescriptors(twinCopy, sourceLength, twinDescr,
903 destinationLength, arrayLength);
904 free(twinDescr);
905 twinDestination->descriptor = new;
906 twinDestination->length = newLength;
907 }
908
catDescriptors(Descriptor * descr,Coordinate destinationLength,Descriptor * copy,Coordinate sourceLength)909 static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
910 {
911 Coordinate index;
912 Nucleotide nucleotide;
913
914 for (index = 0; index < sourceLength; index++) {
915 nucleotide = getNucleotideInDescriptor(copy, index);
916 writeNucleotideInDescriptor(nucleotide, descr, index + destinationLength);
917 }
918 }
919
reverseCatDescriptors(Descriptor * descr,Coordinate destinationLength,Descriptor * copy,Coordinate sourceLength,Coordinate totalLength)920 static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
921 {
922 Coordinate shift = totalLength - destinationLength - sourceLength;
923 Coordinate index;
924 Nucleotide nucleotide;
925
926 for (index = 0; index < sourceLength; index++) {
927 nucleotide = getNucleotideInDescriptor(copy, index);
928 writeNucleotideInDescriptor(nucleotide, descr, index + shift);
929 }
930 }
931
directlyAppendDescriptors(Node * destination,Node * source,Coordinate totalLength)932 void directlyAppendDescriptors(Node * destination, Node * source, Coordinate totalLength)
933 {
934 Descriptor *copy;
935 Descriptor *twinCopy;
936 Descriptor *descr;
937 Descriptor *twinDescr;
938 Coordinate destinationLength, sourceLength;
939
940 if (source == NULL || destination == NULL)
941 return;
942
943 descr = destination->descriptor;
944 twinDescr = destination->twinNode->descriptor;
945 copy = source->descriptor;
946 twinCopy = source->twinNode->descriptor;
947
948 // Amendments for empty descriptors
949 if (getNodeLength(source) == 0)
950 return;
951
952 destinationLength = destination->length;
953 sourceLength = source->length;
954
955 // Merging forward descriptors
956 catDescriptors(descr, destinationLength, copy, sourceLength);
957
958 // Merging reverse descriptors
959 reverseCatDescriptors(twinDescr, destinationLength, twinCopy, sourceLength, totalLength);
960
961 destination->length += source->length;
962 destination->twinNode->length += source->length;
963 }
964
copyDownDescriptor(Descriptor ** writePtr,int * writeOffset,Descriptor * source,Coordinate length)965 static void copyDownDescriptor(Descriptor ** writePtr, int *writeOffset,
966 Descriptor * source, Coordinate length)
967 {
968 Descriptor *readPtr = source;
969 Descriptor readCopy = *readPtr;
970 int readOffset = 0;
971 Coordinate index;
972
973 for (index = 0; index < length; index++) {
974 (**writePtr) >>= 2;
975 (**writePtr) += (readCopy & 3) << 6;
976 readCopy >>= 2;
977
978 (*writeOffset)++;
979 if (*writeOffset == 4) {
980 (*writePtr)++;
981 *writeOffset = 0;
982 }
983
984 readOffset++;
985 if (readOffset == 4) {
986 readPtr++;
987 if (index < length - 1)
988 readCopy = *readPtr;
989 readOffset = 0;
990 }
991 }
992 }
993
copyDownSequence(Descriptor ** writePtr,int * writeOffset,TightString * sequence,Coordinate start,Coordinate finish,int WORDLENGTH)994 static void copyDownSequence(Descriptor ** writePtr, int *writeOffset,
995 TightString * sequence, Coordinate start,
996 Coordinate finish, int WORDLENGTH)
997 {
998 boolean forward = (start < finish);
999 Coordinate sourceLength = finish - start;
1000 Coordinate index;
1001 Nucleotide nucleotide;
1002
1003 if (!forward)
1004 sourceLength *= -1;
1005
1006 for (index = 0; index < sourceLength; index++) {
1007 if (forward)
1008 nucleotide =
1009 getNucleotide(start + WORDLENGTH - 1 + index,
1010 sequence);
1011 else
1012 nucleotide =
1013 #ifndef COLOR
1014 3 - getNucleotide(start - index - 1, sequence);
1015 #else
1016 getNucleotide(start - index - 1, sequence);
1017 #endif
1018
1019 (**writePtr) >>= 2;
1020 (**writePtr) += nucleotide << 6;
1021
1022 (*writeOffset)++;
1023 if (*writeOffset == 4) {
1024 (*writePtr)++;
1025 *writeOffset = 0;
1026 }
1027 }
1028 }
1029
appendSequenceToDescriptor(Descriptor * descr,Coordinate nodeLength,PassageMarkerI marker,TightString * sequences,int WORDLENGTH,size_t arrayLength,boolean downStream)1030 static Descriptor *appendSequenceToDescriptor(Descriptor * descr,
1031 Coordinate nodeLength,
1032 PassageMarkerI marker,
1033 TightString *sequences,
1034 int WORDLENGTH,
1035 size_t arrayLength,
1036 boolean downStream)
1037 {
1038 int writeOffset = 0;
1039 Descriptor *new = callocOrExit(arrayLength, Descriptor);
1040 Descriptor *writePtr = new;
1041 TightString *sequence;
1042 IDnum sequenceID = getPassageMarkerSequenceID(marker);
1043 Coordinate start = getPassageMarkerStart(marker);
1044 Coordinate finish = getPassageMarkerFinish(marker);
1045
1046 if (sequenceID > 0)
1047 sequence = getTightStringInArray(sequences, sequenceID - 1);
1048 else
1049 sequence = getTightStringInArray(sequences, -sequenceID - 1);
1050
1051 if (downStream)
1052 copyDownDescriptor(&writePtr, &writeOffset, descr,
1053 nodeLength);
1054
1055 copyDownSequence(&writePtr, &writeOffset, sequence, start, finish,
1056 WORDLENGTH);
1057
1058 if (!downStream)
1059 copyDownDescriptor(&writePtr, &writeOffset, descr,
1060 nodeLength);
1061
1062 if (writeOffset != 0) {
1063 while (writeOffset != 4) {
1064 (*writePtr) >>= 2;
1065 writeOffset++;
1066 }
1067 }
1068
1069 return new;
1070 }
1071
appendSequence(Node * node,TightString * reads,PassageMarkerI guide,Graph * graph)1072 void appendSequence(Node * node, TightString * reads,
1073 PassageMarkerI guide, Graph * graph)
1074 {
1075 Descriptor *descr;
1076 Descriptor *twinDescr;
1077 Coordinate newLength, nodeLength, sourceLength;
1078 size_t arrayLength;
1079 Descriptor *new;
1080 Node *twinNode;
1081
1082 if (node == NULL)
1083 return;
1084
1085 twinNode = node->twinNode;
1086 descr = node->descriptor;
1087 twinDescr = twinNode->descriptor;
1088 nodeLength = node->length;
1089 sourceLength = getPassageMarkerLength(guide);
1090
1091 // Amendments for empty descriptors
1092 if (sourceLength == 0)
1093 return;
1094
1095 newLength = nodeLength + sourceLength;
1096 arrayLength = newLength / 4;
1097 if (newLength % 4 != 0)
1098 arrayLength++;
1099
1100 // Merging forward descriptors
1101 new =
1102 appendSequenceToDescriptor(descr, nodeLength, guide, reads,
1103 getWordLength(graph), arrayLength,
1104 true);
1105 free(descr);
1106 node->descriptor = new;
1107 node->length = newLength;
1108
1109 // Merging reverse descriptors
1110 new =
1111 appendSequenceToDescriptor(twinDescr, nodeLength,
1112 getTwinMarker(guide), reads,
1113 getWordLength(graph), arrayLength,
1114 false);
1115 free(twinDescr);
1116 twinNode->descriptor = new;
1117 twinNode->length = newLength;
1118 }
1119
setMultiplicity(Arc * arc,IDnum mult)1120 void setMultiplicity(Arc * arc, IDnum mult)
1121 {
1122 arc->multiplicity = mult;
1123 arc->twinArc->multiplicity = mult;
1124 }
1125
1126 // Reshuffles the graph->nodes array to remove NULL pointers
1127 // Beware that node IDs are accordingly reshuffled (all pointers remain valid though)
renumberNodes(Graph * graph)1128 void renumberNodes(Graph * graph)
1129 {
1130 IDnum nodeIndex;
1131 Node *currentNode;
1132 IDnum counter = 0;
1133 IDnum nodes = graph->nodeCount;
1134 IDnum newIndex;
1135
1136 velvetLog("Renumbering nodes\n");
1137 velvetLog("Initial node count %li\n", (long) graph->nodeCount);
1138
1139 for (nodeIndex = 1; nodeIndex <= nodes; nodeIndex++) {
1140 currentNode = getNodeInGraph(graph, nodeIndex);
1141
1142 if (currentNode == NULL)
1143 counter++;
1144 else if (counter != 0) {
1145 newIndex = nodeIndex - counter;
1146 currentNode->ID = newIndex;
1147 currentNode->twinNode->ID = -newIndex;
1148 graph->nodes[newIndex] = currentNode;
1149
1150 if (graph->nodeReads != NULL) {
1151 graph->nodeReads[newIndex + nodes] =
1152 graph->nodeReads[nodeIndex + nodes];
1153 graph->nodeReadCounts[newIndex + nodes] =
1154 graph->nodeReadCounts[nodeIndex +
1155 nodes];
1156
1157 graph->nodeReads[nodeIndex + nodes] = NULL;
1158 graph->nodeReadCounts[nodeIndex + nodes] =
1159 0;
1160
1161 graph->nodeReads[-newIndex + nodes] =
1162 graph->nodeReads[-nodeIndex + nodes];
1163 graph->nodeReadCounts[-newIndex + nodes] =
1164 graph->nodeReadCounts[-nodeIndex +
1165 nodes];
1166
1167 graph->nodeReads[-nodeIndex + nodes] =
1168 NULL;
1169 graph->nodeReadCounts[-nodeIndex + nodes] =
1170 0;
1171 }
1172
1173 if (graph->gapMarkers != NULL) {
1174 graph->gapMarkers[newIndex] =
1175 graph->gapMarkers[nodeIndex];
1176 graph->gapMarkers[nodeIndex] = NULL;
1177 }
1178 }
1179 }
1180
1181 // Shitfting array to the left
1182 if (graph->nodeReads != NULL && counter != 0) {
1183 for (nodeIndex = counter; nodeIndex <= 2 * nodes - counter;
1184 nodeIndex++) {
1185 graph->nodeReads[nodeIndex - counter] =
1186 graph->nodeReads[nodeIndex];
1187 graph->nodeReadCounts[nodeIndex - counter] =
1188 graph->nodeReadCounts[nodeIndex];
1189 }
1190 }
1191
1192 // Rellocating node space
1193 graph->nodeCount -= counter;
1194 graph->nodes =
1195 reallocOrExit(graph->nodes, graph->nodeCount + 1, Node *);
1196
1197 // Reallocating short read marker arrays
1198 if (graph->nodeReads != NULL) {
1199 graph->nodeReads =
1200 reallocOrExit(graph->nodeReads,
1201 2 * graph->nodeCount +
1202 1, ShortReadMarker *);
1203 graph->nodeReadCounts =
1204 reallocOrExit(graph->nodeReadCounts,
1205 2 * graph->nodeCount + 1, IDnum);
1206 }
1207
1208 // Reallocating gap marker table
1209 if (graph->gapMarkers != NULL)
1210 graph->gapMarkers = reallocOrExit(graph->gapMarkers,
1211 graph->nodeCount +
1212 1, GapMarker *);
1213
1214 velvetLog("Removed %li null nodes\n", (long) counter);
1215 }
1216
splitNodeDescriptor(Node * source,Node * target,Coordinate offset)1217 void splitNodeDescriptor(Node * source, Node * target, Coordinate offset)
1218 {
1219 Coordinate originalLength = source->length;
1220 Coordinate backLength = originalLength - offset;
1221 Coordinate index;
1222 Descriptor *descriptor, *new;
1223 size_t arrayLength;
1224 Nucleotide nucleotide;
1225
1226 source->length = offset;
1227 source->twinNode->length = offset;
1228
1229 if (target != NULL) {
1230 target->length = backLength;
1231 target->twinNode->length = backLength;
1232 free(target->descriptor);
1233 free(target->twinNode->descriptor);
1234 target->descriptor = NULL;
1235 target->twinNode->descriptor = NULL;
1236 }
1237
1238 if (backLength == 0)
1239 return;
1240
1241 descriptor = source->descriptor;
1242
1243 arrayLength = backLength / 4;
1244 if (backLength % 4 > 0)
1245 arrayLength++;
1246
1247 if (target != NULL) {
1248 // Target node .. forwards
1249 new = mallocOrExit(arrayLength, Descriptor);
1250 target->descriptor = new;
1251 for (index = 0; index < backLength; index++) {
1252 nucleotide =
1253 getNucleotideInDescriptor(descriptor, index);
1254 writeNucleotideInDescriptor(nucleotide, new,
1255 index);
1256 }
1257 }
1258 // Source node
1259 for (index = backLength; index < originalLength; index++) {
1260 nucleotide = getNucleotideInDescriptor(descriptor, index);
1261 writeNucleotideInDescriptor(nucleotide, descriptor,
1262 index - backLength);
1263 }
1264
1265 if (target == NULL)
1266 return;
1267
1268 // target node other way
1269 descriptor = source->twinNode->descriptor;
1270 new = mallocOrExit(arrayLength, Descriptor);
1271 target->twinNode->descriptor = new;
1272
1273 for (index = offset; index < originalLength; index++) {
1274 nucleotide = getNucleotideInDescriptor(descriptor, index);
1275 writeNucleotideInDescriptor(nucleotide, new,
1276 index - offset);
1277 }
1278 }
1279
reduceNode(Node * node)1280 void reduceNode(Node * node)
1281 {
1282 free(node->descriptor);
1283 node->descriptor = NULL;
1284 node->length = 0;
1285
1286 free(node->twinNode->descriptor);
1287 node->twinNode->descriptor = NULL;
1288 node->twinNode->length = 0;
1289 }
1290
1291 // Allocate memory for an empty graph created with sequenceCount different sequences
emptyGraph(IDnum sequenceCount,int wordLength)1292 Graph *emptyGraph(IDnum sequenceCount, int wordLength)
1293 {
1294 Graph *newGraph = mallocOrExit(1, Graph);
1295 newGraph->sequenceCount = sequenceCount;
1296 newGraph->arcLookupTable = NULL;
1297 newGraph->nodeReads = NULL;
1298 newGraph->nodeReadCounts = NULL;
1299 newGraph->wordLength = wordLength;
1300 newGraph->gapMarkers = NULL;
1301 return newGraph;
1302 }
1303
newPositiveDescriptor(IDnum sequenceID,Coordinate start,Coordinate finish,TightString * sequences,int WORDLENGTH)1304 static Descriptor *newPositiveDescriptor(IDnum sequenceID,
1305 Coordinate start,
1306 Coordinate finish,
1307 TightString *sequences,
1308 int WORDLENGTH)
1309 {
1310 Coordinate index;
1311 Nucleotide nucleotide;
1312 TightString *tString = getTightStringInArray (sequences, sequenceID - 1);
1313 Coordinate length = finish - start;
1314 Descriptor *res;
1315 size_t arrayLength = length / 4;
1316
1317 if (length % 4 > 0)
1318 arrayLength++;
1319
1320 res = mallocOrExit(arrayLength, Descriptor);
1321
1322 for (index = 0; index < length; index++) {
1323 nucleotide =
1324 getNucleotide(start + index + WORDLENGTH - 1, tString);
1325 writeNucleotideInDescriptor(nucleotide, res, index);
1326 }
1327
1328 return res;
1329
1330 }
1331
newNegativeDescriptor(IDnum sequenceID,Coordinate start,Coordinate finish,TightString * sequences,int WORDLENGTH)1332 static Descriptor *newNegativeDescriptor(IDnum sequenceID,
1333 Coordinate start,
1334 Coordinate finish,
1335 TightString *sequences,
1336 int WORDLENGTH)
1337 {
1338 Coordinate index;
1339 Nucleotide nucleotide;
1340 TightString *tString = getTightStringInArray (sequences, -sequenceID - 1);
1341 Coordinate length = start - finish;
1342 Descriptor *res;
1343 size_t arrayLength = length / 4;
1344
1345 if (length % 4 > 0)
1346 arrayLength++;
1347
1348 res = mallocOrExit(arrayLength, Descriptor);
1349
1350 for (index = 0; index < length; index++) {
1351 nucleotide = getNucleotide(start - index, tString);
1352 #ifndef COLOR
1353 writeNucleotideInDescriptor(3 - nucleotide, res, index);
1354 #else
1355 writeNucleotideInDescriptor(nucleotide, res, index);
1356 #endif
1357 }
1358
1359 return res;
1360
1361 }
1362
newDescriptor(IDnum sequenceID,Coordinate start,Coordinate finish,TightString * sequences,int WORDLENGTH)1363 static Descriptor *newDescriptor(IDnum sequenceID, Coordinate start,
1364 Coordinate finish,
1365 TightString * sequences, int WORDLENGTH)
1366 {
1367 if (sequenceID > 0)
1368 return newPositiveDescriptor(sequenceID, start, finish,
1369 sequences, WORDLENGTH);
1370 else
1371 return newNegativeDescriptor(sequenceID, start, finish,
1372 sequences, WORDLENGTH);
1373 }
1374
1375 // Constructor
1376 // Memory allocated
newNode(IDnum sequenceID,Coordinate start,Coordinate finish,Coordinate offset,IDnum ID,TightString * sequences,int WORDLENGTH)1377 Node *newNode(IDnum sequenceID, Coordinate start, Coordinate finish,
1378 Coordinate offset, IDnum ID, TightString * sequences,
1379 int WORDLENGTH)
1380 {
1381 Node *newnd = allocateNode();
1382 Node *antiNode = allocateNode();
1383
1384 newnd->ID = ID;
1385 newnd->descriptor =
1386 newDescriptor(sequenceID, start + offset, finish + offset,
1387 sequences, WORDLENGTH);
1388 newnd->arc = NULL;
1389 newnd->arcCount = 0;
1390 newnd->marker = NULL_IDX;
1391 newnd->status = false;
1392
1393 #ifndef SINGLE_COV_CAT
1394 Category cat;
1395 for (cat = 0; cat < CATEGORIES; cat++) {
1396 newnd->virtualCoverage[cat] = 0;
1397 newnd->originalVirtualCoverage[cat] = 0;
1398 }
1399 #else
1400 newnd->virtualCoverage = 0;
1401 #endif
1402
1403 antiNode->ID = -ID;
1404 antiNode->descriptor =
1405 newDescriptor(-sequenceID, finish + offset - 1,
1406 start + offset - 1, sequences, WORDLENGTH);
1407 antiNode->arc = NULL;
1408 antiNode->arcCount = 0;
1409 antiNode->marker = NULL_IDX;
1410 antiNode->status = false;
1411
1412 #ifndef SINGLE_COV_CAT
1413 for (cat = 0; cat < CATEGORIES; cat++) {
1414 antiNode->virtualCoverage[cat] = 0;
1415 antiNode->originalVirtualCoverage[cat] = 0;
1416 }
1417 #else
1418 antiNode->virtualCoverage = 0;
1419 #endif
1420
1421 newnd->twinNode = antiNode;
1422 antiNode->twinNode = newnd;
1423
1424 if (sequenceID > 0) {
1425 newnd->length = finish - start;
1426 antiNode->length = finish - start;
1427 } else {
1428 newnd->length = start - finish;
1429 antiNode->length = start - finish;
1430 }
1431
1432 return newnd;
1433 }
1434
allocateNodeSpace(Graph * graph,IDnum nodeCount)1435 void allocateNodeSpace(Graph * graph, IDnum nodeCount)
1436 {
1437 graph->nodes = callocOrExit(nodeCount + 1, Node *);
1438 graph->nodeCount = nodeCount;
1439 }
1440
getUniqueness(Node * node)1441 boolean getUniqueness(Node * node)
1442 {
1443 return node->uniqueness;
1444 }
1445
setUniqueness(Node * node,boolean value)1446 void setUniqueness(Node * node, boolean value)
1447 {
1448 node->uniqueness = value;
1449 node->twinNode->uniqueness = value;
1450 }
1451
emptyNode()1452 Node *emptyNode()
1453 {
1454 Node *newnd = allocateNode();
1455 Node *antiNode = allocateNode();
1456
1457 newnd->ID = 0;
1458 newnd->descriptor = NULL;
1459 newnd->arc = NULL;
1460 newnd->arcCount = 0;
1461 newnd->marker = NULL_IDX;
1462 newnd->length = 0;
1463 newnd->uniqueness = false;
1464
1465 #ifndef SINGLE_COV_CAT
1466 Category cat;
1467 for (cat = 0; cat < CATEGORIES; cat++) {
1468 newnd->virtualCoverage[cat] = 0;
1469 newnd->originalVirtualCoverage[cat] = 0;
1470 }
1471 #else
1472 newnd->virtualCoverage = 0;
1473 #endif
1474
1475 antiNode->ID = 0;
1476 antiNode->descriptor = NULL;
1477 antiNode->arc = NULL;
1478 antiNode->arcCount = 0;
1479 antiNode->marker = NULL_IDX;
1480 antiNode->length = 0;
1481 antiNode->uniqueness = false;
1482
1483 #ifndef SINGLE_COV_CAT
1484 for (cat = 0; cat < CATEGORIES; cat++) {
1485 antiNode->virtualCoverage[cat] = 0;
1486 antiNode->originalVirtualCoverage[cat] = 0;
1487 }
1488 #else
1489 antiNode->virtualCoverage = 0;
1490 #endif
1491
1492 newnd->twinNode = antiNode;
1493 antiNode->twinNode = newnd;
1494
1495 return newnd;
1496
1497 }
1498
addEmptyNodeToGraph(Graph * graph,IDnum ID)1499 Node *addEmptyNodeToGraph(Graph * graph, IDnum ID)
1500 {
1501 Node *newnd = emptyNode();
1502
1503 newnd->ID = ID;
1504 newnd->twinNode->ID = -ID;
1505
1506 graph->nodes[ID] = newnd;
1507
1508 return newnd;
1509
1510 }
1511
1512 #ifndef SINGLE_COV_CAT
1513
setVirtualCoverage(Node * node,Category category,Coordinate coverage)1514 void setVirtualCoverage(Node * node, Category category,
1515 Coordinate coverage)
1516 {
1517 node->virtualCoverage[category] = coverage;
1518 node->twinNode->virtualCoverage[category] =
1519 node->virtualCoverage[category];
1520 }
1521
incrementVirtualCoverage(Node * node,Category category,Coordinate coverage)1522 void incrementVirtualCoverage(Node * node, Category category,
1523 Coordinate coverage)
1524 {
1525 node->virtualCoverage[category] += coverage;
1526 node->twinNode->virtualCoverage[category] =
1527 node->virtualCoverage[category];
1528 }
1529
getVirtualCoverage(Node * node,Category category)1530 Coordinate getVirtualCoverage(Node * node, Category category)
1531 {
1532 return node->virtualCoverage[category];
1533 }
1534
getTotalCoverage(Node * node)1535 Coordinate getTotalCoverage(Node * node)
1536 {
1537 Category cat;
1538 Coordinate coverage = 0;
1539
1540 for (cat = 0; cat < CATEGORIES; cat++)
1541 coverage += node->virtualCoverage[cat];
1542
1543 return coverage;
1544 }
1545
setOriginalVirtualCoverage(Node * node,Category category,Coordinate coverage)1546 void setOriginalVirtualCoverage(Node * node, Category category,
1547 Coordinate coverage)
1548 {
1549 node->originalVirtualCoverage[category] = coverage;
1550 node->twinNode->originalVirtualCoverage[category] =
1551 node->originalVirtualCoverage[category];
1552 }
1553
incrementOriginalVirtualCoverage(Node * node,Category category,Coordinate coverage)1554 void incrementOriginalVirtualCoverage(Node * node, Category category,
1555 Coordinate coverage)
1556 {
1557 node->originalVirtualCoverage[category] += coverage;
1558 node->twinNode->originalVirtualCoverage[category] =
1559 node->originalVirtualCoverage[category];
1560 }
1561
getOriginalVirtualCoverage(Node * node,Category category)1562 Coordinate getOriginalVirtualCoverage(Node * node, Category category)
1563 {
1564 return node->originalVirtualCoverage[category];
1565 }
1566
1567 #else
1568
setVirtualCoverage(Node * node,Coordinate coverage)1569 void setVirtualCoverage(Node * node,
1570 Coordinate coverage)
1571 {
1572 node->virtualCoverage = coverage;
1573 node->twinNode->virtualCoverage = coverage;
1574 }
1575
incrementVirtualCoverage(Node * node,Coordinate coverage)1576 void incrementVirtualCoverage(Node * node,
1577 Coordinate coverage)
1578 {
1579 node->virtualCoverage += coverage;
1580 node->twinNode->virtualCoverage += coverage;
1581 }
1582
getVirtualCoverage(Node * node)1583 Coordinate getVirtualCoverage(Node * node)
1584 {
1585 return node->virtualCoverage;
1586 }
1587
getTotalCoverage(Node * node)1588 Coordinate getTotalCoverage(Node * node)
1589 {
1590 return node->virtualCoverage;
1591 }
1592
1593 #endif
1594
hasSingleArc(Node * node)1595 boolean hasSingleArc(Node * node)
1596 {
1597 return node->arcCount == 1;
1598 }
1599
activateArcLookupTable(Graph * graph)1600 void activateArcLookupTable(Graph * graph)
1601 {
1602 IDnum index;
1603 Node *node;
1604 Arc *arc;
1605 IDnum nodes = graph->nodeCount;
1606 IDnum twinOriginID, destinationID, hash;
1607 Arc **table;
1608
1609 velvetLog("Activating arc lookup table\n");
1610
1611 graph->arcLookupTable = callocOrExit(6 * nodes + 1, Arc *);
1612
1613 table = graph->arcLookupTable;
1614
1615 for (index = -nodes; index <= nodes; index++) {
1616 if (index == 0)
1617 continue;
1618
1619 node = getNodeInGraph(graph, index);
1620 if (node == 0)
1621 continue;
1622
1623 for (arc = getArc(node); arc != NULL;
1624 arc = getNextArc(arc)) {
1625 twinOriginID = arc->twinArc->destination->ID;
1626 destinationID = arc->destination->ID;
1627 hash =
1628 3 * nodes - 2 * twinOriginID + destinationID;
1629 arc->nextInLookupTable = table[hash];
1630 table[hash] = arc;
1631 }
1632 }
1633
1634 velvetLog("Done activating arc lookup table\n");
1635 }
1636
deactivateArcLookupTable(Graph * graph)1637 void deactivateArcLookupTable(Graph * graph)
1638 {
1639 free(graph->arcLookupTable);
1640 graph->arcLookupTable = NULL;
1641 }
1642
exportNode(FILE * outfile,Node * node,void * withSequence)1643 static void exportNode(FILE * outfile, Node * node, void *withSequence)
1644 {
1645 Coordinate index;
1646 Nucleotide nucleotide;
1647
1648 if (node == NULL)
1649 return;
1650
1651 velvetFprintf(outfile, "NODE\t%ld\t%lld", (long) node->ID, (long long) node->length);
1652
1653 #ifndef SINGLE_COV_CAT
1654 Category cat;
1655 for (cat = 0; cat < CATEGORIES; cat++)
1656 velvetFprintf(outfile, "\t%lld\t%lld", (long long) node->virtualCoverage[cat],
1657 (long long) node->originalVirtualCoverage[cat]);
1658 velvetFprintf(outfile, "\n");
1659 #else
1660 velvetFprintf(outfile, "\t%lld\n", (long long) node->virtualCoverage);
1661 #endif
1662
1663 if (withSequence == NULL)
1664 return;
1665
1666 for (index = 0; index < node->length; index++) {
1667 nucleotide =
1668 getNucleotideInDescriptor(node->descriptor, index);
1669 switch (nucleotide) {
1670 case ADENINE:
1671 velvetFprintf(outfile, "A");
1672 break;
1673 case CYTOSINE:
1674 velvetFprintf(outfile, "C");
1675 break;
1676 case GUANINE:
1677 velvetFprintf(outfile, "G");
1678 break;
1679 case THYMINE:
1680 velvetFprintf(outfile, "T");
1681 break;
1682 }
1683 }
1684 velvetFprintf(outfile, "\n");
1685
1686 for (index = 0; index < node->length; index++) {
1687 nucleotide =
1688 getNucleotideInDescriptor(node->twinNode->descriptor,
1689 index);
1690 switch (nucleotide) {
1691 case ADENINE:
1692 velvetFprintf(outfile, "A");
1693 break;
1694 case CYTOSINE:
1695 velvetFprintf(outfile, "C");
1696 break;
1697 case GUANINE:
1698 velvetFprintf(outfile, "G");
1699 break;
1700 case THYMINE:
1701 velvetFprintf(outfile, "T");
1702 break;
1703 }
1704 }
1705 velvetFprintf(outfile, "\n");
1706 }
1707
exportArc(FILE * outfile,Arc * arc)1708 static void exportArc(FILE * outfile, Arc * arc)
1709 {
1710 IDnum originID, destinationID;
1711 IDnum absOriginID, absDestinationID;
1712
1713 if (arc == NULL)
1714 return;
1715
1716 absOriginID = originID = -arc->twinArc->destination->ID;
1717 absDestinationID = destinationID = arc->destination->ID;
1718
1719 if (absOriginID < 0)
1720 absOriginID = -absOriginID;
1721 if (absDestinationID < 0)
1722 absDestinationID = -absDestinationID;
1723
1724 if (absDestinationID < absOriginID)
1725 return;
1726
1727 if (originID == destinationID && originID < 0)
1728 return;
1729
1730 velvetFprintf(outfile, "ARC\t%li\t%li\t%li\n", (long) originID, (long) destinationID,
1731 (long) arc->multiplicity);
1732 }
1733
1734 // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
mergeArcLists(Arc * left,Arc * right)1735 static Arc *mergeArcLists(Arc * left, Arc * right)
1736 {
1737 Arc *mergedList = NULL;
1738 Arc *tail = NULL;
1739
1740 // Choose first element:
1741 if (left->destination->ID <= right->destination->ID) {
1742 mergedList = left;
1743 tail = left;
1744 left = left->next;
1745 } else {
1746 mergedList = right;
1747 tail = right;
1748 right = right->next;
1749 }
1750
1751 // Iterate while both lists are still non empty
1752 while (left != NULL && right != NULL) {
1753 if (left->destination->ID <= right->destination->ID) {
1754 tail->next = left;
1755 left->previous = tail;
1756 left = left->next;
1757 } else {
1758 tail->next = right;
1759 right->previous = tail;
1760 right = right->next;
1761 }
1762
1763 tail = tail->next;
1764 }
1765
1766 // Concatenate the remaining list at the end of the merged list
1767 if (left != NULL) {
1768 tail->next = left;
1769 left->previous = tail;
1770 }
1771
1772 if (right != NULL) {
1773 tail->next = right;
1774 right->previous = tail;
1775 }
1776
1777 return mergedList;
1778 }
1779
arcMergeSort(Arc ** arcPtr,IDnum count)1780 static void arcMergeSort(Arc ** arcPtr, IDnum count)
1781 {
1782
1783 IDnum half = count / 2;
1784 Arc *left = *arcPtr;
1785 Arc *ptr = left;
1786 Arc *right;
1787 IDnum index;
1788
1789 if (count == 0 || count == 1)
1790 return;
1791
1792 if (count == 2) {
1793 if ((*arcPtr)->destination->ID >
1794 (*arcPtr)->next->destination->ID) {
1795 (*arcPtr)->next->next = *arcPtr;
1796 (*arcPtr)->previous = (*arcPtr)->next;
1797 *arcPtr = (*arcPtr)->next;
1798 (*arcPtr)->next->next = NULL;
1799 (*arcPtr)->previous = NULL;
1800 }
1801 return;
1802 }
1803
1804 for (index = 0; index < half - 1; index++) {
1805 ptr = ptr->next;
1806 if (ptr == NULL)
1807 return;
1808 }
1809
1810 right = ptr->next;
1811 ptr->next = NULL;
1812 right->previous = NULL;
1813
1814 arcMergeSort(&left, half);
1815 arcMergeSort(&right, count - half);
1816 *arcPtr = mergeArcLists(left, right);
1817 }
1818
sortNodeArcs(Node * node)1819 static void sortNodeArcs(Node * node)
1820 {
1821 Arc *arc;
1822 IDnum count = 0;
1823
1824 for (arc = getArc(node); arc != NULL; arc = getNextArc(arc))
1825 count++;
1826
1827 if (count == 0)
1828 return;
1829
1830 arc = getArc(node);
1831 arcMergeSort(&arc, count);
1832
1833 node->arc = arc;
1834 }
1835
1836 // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
mergeGapMarkerLists(GapMarker * left,GapMarker * right)1837 static GapMarker *mergeGapMarkerLists(GapMarker * left, GapMarker * right)
1838 {
1839 GapMarker *mergedList = NULL;
1840 GapMarker *tail = NULL;
1841
1842 // Choose first element:
1843 if (left->position <= right->position) {
1844 mergedList = left;
1845 tail = left;
1846 left = left->next;
1847 } else {
1848 mergedList = right;
1849 tail = right;
1850 right = right->next;
1851 }
1852
1853 // Iterate while both lists are still non empty
1854 while (left != NULL && right != NULL) {
1855 if (left->position <= right->position) {
1856 tail->next = left;
1857 left = left->next;
1858 } else {
1859 tail->next = right;
1860 right = right->next;
1861 }
1862
1863 tail = tail->next;
1864 }
1865
1866 // Concatenate the remaining list at the end of the merged list
1867 if (left != NULL)
1868 tail->next = left;
1869
1870 if (right != NULL)
1871 tail->next = right;
1872
1873 return mergedList;
1874 }
1875
gapMergeSort(GapMarker ** gapPtr,IDnum count)1876 static void gapMergeSort(GapMarker ** gapPtr, IDnum count)
1877 {
1878
1879 IDnum half = count / 2;
1880 GapMarker *left = *gapPtr;
1881 GapMarker *ptr = left;
1882 GapMarker *right;
1883 IDnum index;
1884
1885 if (count == 0 || count == 1)
1886 return;
1887
1888 if (count == 2) {
1889 if ((*gapPtr)->position > (*gapPtr)->next->position) {
1890 (*gapPtr)->next->next = *gapPtr;
1891 *gapPtr = (*gapPtr)->next;
1892 (*gapPtr)->next->next = NULL;
1893 }
1894 return;
1895 }
1896
1897 for (index = 0; index < half - 1; index++) {
1898 ptr = ptr->next;
1899 if (ptr == NULL)
1900 return;
1901 }
1902
1903 right = ptr->next;
1904 ptr->next = NULL;
1905
1906 gapMergeSort(&left, half);
1907 gapMergeSort(&right, count - half);
1908 *gapPtr = mergeGapMarkerLists(left, right);
1909 }
1910
sortNodeGapMarkers(Node * node,Graph * graph)1911 static void sortNodeGapMarkers(Node * node, Graph * graph)
1912 {
1913 GapMarker *gap;
1914 IDnum count = 0;
1915 IDnum ID = getNodeID(node);
1916
1917 if (ID < 0)
1918 ID = -ID;
1919
1920 for (gap = graph->gapMarkers[ID]; gap != NULL; gap = gap->next)
1921 count++;
1922
1923 if (count == 0)
1924 return;
1925
1926 gap = graph->gapMarkers[ID];
1927 gapMergeSort(&gap, count);
1928
1929 graph->gapMarkers[ID] = gap;
1930 }
1931
sortGapMarkers(Graph * graph)1932 void sortGapMarkers(Graph * graph)
1933 {
1934 IDnum index;
1935 Node *node;
1936
1937 if (graph->gapMarkers == NULL)
1938 return;
1939
1940 for (index = 1; index <= nodeCount(graph); index++) {
1941 node = getNodeInGraph(graph, index);
1942 if (node)
1943 sortNodeGapMarkers(node, graph);
1944 }
1945 }
1946
exportGraph(char * filename,Graph * graph,TightString * sequences)1947 void exportGraph(char *filename, Graph * graph, TightString * sequences)
1948 {
1949 IDnum index;
1950 FILE *outfile;
1951 Node *node;
1952 Arc *arc;
1953 PassageMarkerI marker;
1954 ShortReadMarker *reads;
1955 IDnum readCount, readIndex;
1956
1957 if (graph == NULL) {
1958 return;
1959 }
1960
1961 outfile = fopen(filename, "w");
1962 if (outfile == NULL) {
1963 velvetLog("Couldn't open file, sorry\n");
1964 return;
1965 } else
1966 velvetLog("Writing into graph file %s...\n", filename);
1967
1968 // General data
1969 velvetFprintf(outfile, "%li\t%li\t%i\t%i\n", (long) graph->nodeCount,
1970 (long) graph->sequenceCount, graph->wordLength, (int) graph->double_stranded);
1971
1972 // Node info
1973 for (index = 1; index <= graph->nodeCount; index++) {
1974 node = getNodeInGraph(graph, index);
1975 exportNode(outfile, node, (void *) sequences);
1976 }
1977
1978 // Arc info
1979 for (index = 1; index <= graph->nodeCount; index++) {
1980 node = getNodeInGraph(graph, index);
1981 if (node == NULL)
1982 continue;
1983
1984 sortNodeArcs(node);
1985 sortNodeArcs(getTwinNode(node));
1986
1987 for (arc = node->arc; arc != NULL; arc = arc->next)
1988 exportArc(outfile, arc);
1989 for (arc = node->twinNode->arc; arc != NULL;
1990 arc = arc->next)
1991 exportArc(outfile, arc);
1992 }
1993
1994 // Sequence info
1995 for (index = 1; index <= graph->nodeCount; index++) {
1996 node = getNodeInGraph(graph, index);
1997 if (node == NULL)
1998 continue;
1999 for (marker = node->marker; marker != NULL_IDX;
2000 marker = getNextInNode(marker))
2001 exportMarker(outfile, marker, sequences,
2002 graph->wordLength);
2003 }
2004
2005 // Node reads
2006 if (readStartsAreActivated(graph)) {
2007 for (index = 0; index <= graph->nodeCount * 2; index++) {
2008 readCount = graph->nodeReadCounts[index];
2009 if (readCount == 0)
2010 continue;
2011
2012 velvetFprintf(outfile, "NR\t%li\t%li\n",
2013 (long) (index - graph->nodeCount), (long) readCount);
2014
2015 reads = graph->nodeReads[index];
2016 for (readIndex = 0; readIndex < readCount;
2017 readIndex++)
2018 velvetFprintf(outfile, "%ld\t%lld\t%d\n",
2019 (long) reads[readIndex].readID,
2020 (long long) reads[readIndex].position,
2021 (int) reads[readIndex].offset);
2022 }
2023 }
2024
2025 fclose(outfile);
2026 }
2027
importGraph(char * filename)2028 Graph *importGraph(char *filename)
2029 {
2030 FILE *file = fopen(filename, "r");
2031 const int maxline = MAXLINE;
2032 char line[MAXLINE];
2033 Graph *graph;
2034 Coordinate coverage;
2035 IDnum nodeCounter, sequenceCount;
2036 Node *node, *twin;
2037 Arc *arc;
2038 IDnum originID, destinationID, multiplicity;
2039 PassageMarkerI newMarker, marker;
2040 IDnum nodeID, seqID;
2041 Coordinate index;
2042 Coordinate start, finish;
2043 Coordinate startOffset, finishOffset;
2044 boolean finished = false;
2045 size_t arrayLength;
2046 IDnum readCount;
2047 ShortReadMarker *array;
2048 int wordLength, sCount;
2049 ShortLength length;
2050 long long_var, long_var2, long_var3;
2051 long long longlong_var, longlong_var2, longlong_var3, longlong_var4;
2052 short short_var;
2053 char c;
2054
2055 if (file == NULL)
2056 exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
2057
2058 velvetLog("Reading graph file %s\n", filename);
2059
2060 // First line
2061 if (!fgets(line, maxline, file))
2062 exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2063 sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2064 &wordLength, &short_var);
2065 nodeCounter = (IDnum) long_var;
2066 sequenceCount = (IDnum) long_var2;
2067 graph = emptyGraph(sequenceCount, wordLength);
2068 graph->double_stranded = (boolean) short_var;
2069 resetWordFilter(wordLength);
2070 allocateNodeSpace(graph, nodeCounter);
2071
2072 velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2073 (long) sequenceCount);
2074
2075 if (nodeCounter == 0)
2076 return graph;
2077
2078 // Read nodes
2079 if (!fgets(line, maxline, file))
2080 exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2081 while (!finished && strncmp(line, "NODE", 4) == 0) {
2082 strtok(line, "\t\n");
2083 sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2084 nodeID = (IDnum) long_var;
2085 node = addEmptyNodeToGraph(graph, nodeID);
2086 sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2087 node->length = (Coordinate) longlong_var;
2088
2089 #ifndef SINGLE_COV_CAT
2090 Category cat;
2091 Coordinate originalCoverage;
2092 for (cat = 0; cat < CATEGORIES; cat++) {
2093 sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2094 coverage = (Coordinate) longlong_var;
2095 setVirtualCoverage(node, cat, coverage);
2096 sscanf(strtok(NULL, "\t\n"), "%lld",
2097 &longlong_var);
2098 originalCoverage = (Coordinate) longlong_var;
2099 setOriginalVirtualCoverage(node, cat,
2100 originalCoverage);
2101 }
2102 #else
2103 sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2104 coverage = (Coordinate) longlong_var;
2105 setVirtualCoverage(node, coverage);
2106 #endif
2107
2108 arrayLength = node->length / 4;
2109 if (node->length % 4 > 0)
2110 arrayLength++;
2111 node->descriptor =
2112 callocOrExit(arrayLength, Descriptor);
2113
2114 index = 0;
2115 while ((c = fgetc(file)) != '\n' && c != EOF) {
2116 if (c == 'A')
2117 writeNucleotideInDescriptor(ADENINE,
2118 node->
2119 descriptor,
2120 index++);
2121 else if (c == 'C')
2122 writeNucleotideInDescriptor(CYTOSINE,
2123 node->
2124 descriptor,
2125 index++);
2126 else if (c == 'G')
2127 writeNucleotideInDescriptor(GUANINE,
2128 node->
2129 descriptor,
2130 index++);
2131 else if (c == 'T')
2132 writeNucleotideInDescriptor(THYMINE,
2133 node->
2134 descriptor,
2135 index++);
2136 }
2137
2138 twin = node->twinNode;
2139 twin->length = node->length;
2140 twin->descriptor =
2141 callocOrExit(arrayLength, Descriptor);
2142 index = 0;
2143 while ((c = fgetc(file)) != '\n' && c != EOF) {
2144 if (c == 'A')
2145 writeNucleotideInDescriptor(ADENINE,
2146 twin->
2147 descriptor,
2148 index++);
2149 else if (c == 'C')
2150 writeNucleotideInDescriptor(CYTOSINE,
2151 twin->
2152 descriptor,
2153 index++);
2154 else if (c == 'G')
2155 writeNucleotideInDescriptor(GUANINE,
2156 twin->
2157 descriptor,
2158 index++);
2159 else if (c == 'T')
2160 writeNucleotideInDescriptor(THYMINE,
2161 twin->
2162 descriptor,
2163 index++);
2164 }
2165
2166 if (fgets(line, maxline, file) == NULL)
2167 finished = true;
2168 }
2169
2170 // Read arcs
2171 while (!finished && line[0] == 'A') {
2172 sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2173 &long_var2, &long_var3);
2174 originID = (IDnum) long_var;
2175 destinationID = (IDnum) long_var2;
2176 multiplicity = (IDnum) long_var3;
2177 arc =
2178 createArc(getNodeInGraph(graph, originID),
2179 getNodeInGraph(graph, destinationID), graph);
2180 setMultiplicity(arc, multiplicity);
2181 if (fgets(line, maxline, file) == NULL)
2182 finished = true;
2183 }
2184
2185 // Read sequences
2186 while (!finished && line[0] != 'N') {
2187 sscanf(line, "SEQ\t%ld\n", &long_var);
2188 seqID = (IDnum) long_var;
2189 marker = NULL_IDX;
2190 if (!fgets(line, maxline, file))
2191 exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2192
2193 while (!finished && line[0] != 'N' && line[0] != 'S') {
2194 sCount =
2195 sscanf(line, "%ld\t%lld\t%lld\t%lld\t%lld\n",
2196 &long_var, &longlong_var, &longlong_var2, &longlong_var3,
2197 &longlong_var4);
2198 nodeID = (IDnum) long_var;
2199 startOffset = (Coordinate) longlong_var;
2200 start = (Coordinate) longlong_var2;
2201 finish = (Coordinate) longlong_var3;
2202 finishOffset = (Coordinate) longlong_var4;
2203 if (sCount != 5) {
2204 velvetLog
2205 ("ERROR: reading in graph - only %d items read for line '%s'",
2206 sCount, line);
2207 #ifdef DEBUG
2208 abort();
2209 #endif
2210 exit(1);
2211 }
2212 newMarker =
2213 newPassageMarker(seqID, start, finish,
2214 startOffset, finishOffset);
2215 transposePassageMarker(newMarker,
2216 getNodeInGraph(graph,
2217 nodeID));
2218 connectPassageMarkers(marker, newMarker, graph);
2219 marker = newMarker;
2220 if (fgets(line, maxline, file) == NULL)
2221 finished = true;
2222 }
2223 }
2224
2225 // Node reads
2226 while (!finished) {
2227 sscanf(line, "NR\t%ld\t%ld\n", &long_var, &long_var2);
2228 nodeID = (IDnum) long_var;
2229 readCount = (IDnum) long_var2;
2230 if (!readStartsAreActivated(graph))
2231 activateReadStarts(graph);
2232
2233 graph->nodeReadCounts[nodeID + graph->nodeCount] =
2234 readCount;
2235 array = mallocOrExit(readCount, ShortReadMarker);
2236 graph->nodeReads[nodeID + graph->nodeCount] = array;
2237
2238 readCount = 0;
2239 if (!fgets(line, maxline, file))
2240 exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2241 while (!finished && line[0] != 'N') {
2242 sscanf(line, "%ld\t%lld\t%hd\n", &long_var,
2243 &longlong_var, &short_var);
2244 seqID = (IDnum) long_var;
2245 startOffset = (Coordinate) longlong_var;
2246 length = (ShortLength) short_var;
2247 array[readCount].readID = seqID;
2248 array[readCount].position = startOffset;
2249 array[readCount].offset = length;
2250 readCount++;
2251 if (fgets(line, maxline, file) == NULL)
2252 finished = true;
2253 }
2254 }
2255
2256 //velvetLog("New graph has %d nodes\n", graph->nodeCount);
2257
2258 fclose(file);
2259 //velvetLog("Done, exiting\n");
2260 return graph;
2261 }
2262
readPreGraphFile(char * preGraphFilename,boolean * double_strand)2263 Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand)
2264 {
2265 FILE *file = fopen(preGraphFilename, "r");
2266 const int maxline = MAXLINE;
2267 char line[MAXLINE];
2268
2269 Graph *graph;
2270 IDnum nodeCounter, sequenceCount;
2271
2272 Node *node, *twin;
2273 IDnum nodeID = 0;
2274 Coordinate index, nodeLength;
2275 char c;
2276 int wordLength, wordShift;
2277 size_t arrayLength;
2278 short short_var;
2279 long long_var, long_var2;
2280 long long longlong_var;
2281
2282 if (file == NULL)
2283 exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
2284
2285 velvetLog("Reading pre-graph file %s\n", preGraphFilename);
2286
2287 // First line
2288 if (!fgets(line, maxline, file))
2289 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2290 sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2291 &wordLength, &short_var);
2292 nodeCounter = (IDnum) long_var;
2293 sequenceCount = (IDnum) long_var2;
2294 *double_strand = (boolean) short_var;
2295 wordShift = wordLength - 1;
2296 graph = emptyGraph(sequenceCount, wordLength);
2297 graph->double_stranded = *double_strand;
2298 resetWordFilter(wordLength);
2299 allocateNodeSpace(graph, nodeCounter);
2300 velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2301 (long) sequenceCount);
2302
2303 // Read nodes
2304 if (nodeCounter == 0)
2305 return graph;
2306
2307 if (!fgets(line, maxline, file))
2308 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2309 while (line[0] == 'N') {
2310 nodeID++;
2311 node = addEmptyNodeToGraph(graph, nodeID);
2312
2313 sscanf(line, "%*s\t%*i\t%lli\n", &longlong_var);
2314 node->length = (Coordinate) longlong_var;
2315 nodeLength = node->length;
2316 arrayLength = node->length / 4;
2317 if (node->length % 4 > 0)
2318 arrayLength++;
2319 node->descriptor =
2320 callocOrExit(arrayLength, Descriptor);
2321
2322 twin = node->twinNode;
2323 twin->length = nodeLength;
2324 twin->descriptor =
2325 callocOrExit(arrayLength, Descriptor);
2326
2327
2328 index = 0;
2329 while ((c = getc(file)) != '\n') {
2330 if (c == 'A') {
2331 if (index - wordShift >= 0)
2332 writeNucleotideInDescriptor(ADENINE,
2333 node->
2334 descriptor,
2335 index - wordShift);
2336 if (nodeLength - index - 1 >= 0) {
2337 #ifndef COLOR
2338 writeNucleotideInDescriptor(THYMINE,
2339 twin->
2340 descriptor,
2341 nodeLength - index - 1);
2342 #else
2343 writeNucleotideInDescriptor(ADENINE,
2344 twin->
2345 descriptor,
2346 nodeLength - index - 1);
2347 #endif
2348 }
2349 } else if (c == 'C') {
2350 if (index - wordShift >= 0)
2351 writeNucleotideInDescriptor(CYTOSINE,
2352 node->
2353 descriptor,
2354 index - wordShift);
2355 if (nodeLength - index - 1 >= 0) {
2356 #ifndef COLOR
2357 writeNucleotideInDescriptor(GUANINE,
2358 twin->
2359 descriptor,
2360 nodeLength - index - 1);
2361 #else
2362 writeNucleotideInDescriptor(CYTOSINE,
2363 twin->
2364 descriptor,
2365 nodeLength - index - 1);
2366 #endif
2367 }
2368 } else if (c == 'G') {
2369 if (index - wordShift >= 0)
2370 writeNucleotideInDescriptor(GUANINE,
2371 node->
2372 descriptor,
2373 index - wordShift);
2374 if (nodeLength - index - 1 >= 0) {
2375 #ifndef COLOR
2376 writeNucleotideInDescriptor(CYTOSINE,
2377 twin->
2378 descriptor,
2379 nodeLength - index - 1);
2380 #else
2381 writeNucleotideInDescriptor(GUANINE,
2382 twin->
2383 descriptor,
2384 nodeLength - index - 1);
2385 #endif
2386 }
2387 } else if (c == 'T') {
2388 if (index - wordShift >= 0)
2389 writeNucleotideInDescriptor(THYMINE,
2390 node->
2391 descriptor,
2392 index - wordShift);
2393 if (nodeLength - index - 1 >= 0) {
2394 #ifndef COLOR
2395 writeNucleotideInDescriptor(ADENINE,
2396 twin->
2397 descriptor,
2398 nodeLength - index - 1);
2399 #else
2400 writeNucleotideInDescriptor(THYMINE,
2401 twin->
2402 descriptor,
2403 nodeLength - index - 1);
2404 #endif
2405 }
2406 }
2407
2408 index++;
2409 }
2410
2411 if (fgets(line, maxline, file) == NULL) {
2412 fclose(file);
2413 return graph;
2414 }
2415 }
2416
2417 fclose(file);
2418 return graph;
2419 }
2420
readConnectedGraphFile(char * connectedGraphFilename,boolean * double_strand)2421 Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_strand)
2422 {
2423 FILE *file = fopen(connectedGraphFilename, "r");
2424 const int maxline = MAXLINE;
2425 char line[MAXLINE];
2426 Coordinate coverage;
2427 Arc *arc;
2428 IDnum originID, destinationID, multiplicity;
2429 boolean finished = false;
2430 long long_var3;
2431
2432 Graph *graph;
2433 IDnum nodeCounter, sequenceCount;
2434
2435 Node *node, *twin;
2436 IDnum nodeID = 0;
2437 Coordinate index, nodeLength;
2438 char c;
2439 int wordLength, wordShift;
2440 size_t arrayLength;
2441 short short_var;
2442 long long_var, long_var2;
2443 long long longlong_var;
2444
2445 if (file == NULL)
2446 exitErrorf(EXIT_FAILURE, true, "Could not open %s", connectedGraphFilename);
2447
2448 velvetLog("Reading connected graph file %s\n", connectedGraphFilename);
2449
2450 // First line
2451 if (!fgets(line, maxline, file))
2452 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2453 sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2454 &wordLength, &short_var);
2455 nodeCounter = (IDnum) long_var;
2456 sequenceCount = (IDnum) long_var2;
2457 *double_strand = (boolean) short_var;
2458 wordShift = wordLength - 1;
2459 graph = emptyGraph(sequenceCount, wordLength);
2460 graph->double_stranded = *double_strand;
2461 resetWordFilter(wordLength);
2462 allocateNodeSpace(graph, nodeCounter);
2463 velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2464 (long) sequenceCount);
2465
2466 // Read nodes
2467 if (nodeCounter == 0)
2468 return graph;
2469
2470 if (!fgets(line, maxline, file))
2471 exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2472 while (!finished && strncmp(line, "NODE", 4) == 0) {
2473 strtok(line, "\t\n");
2474 sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2475 nodeID = (IDnum) long_var;
2476 node = addEmptyNodeToGraph(graph, nodeID);
2477 sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2478 node->length = (Coordinate) longlong_var;
2479 nodeLength = node->length;
2480
2481 #ifndef SINGLE_COV_CAT
2482 Category cat;
2483 Coordinate originalCoverage;
2484 for (cat = 0; cat < CATEGORIES; cat++) {
2485 sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2486 coverage = (Coordinate) longlong_var;
2487 setVirtualCoverage(node, cat, coverage);
2488 sscanf(strtok(NULL, "\t\n"), "%lld",
2489 &longlong_var);
2490 originalCoverage = (Coordinate) longlong_var;
2491 setOriginalVirtualCoverage(node, cat,
2492 originalCoverage);
2493 }
2494 #else
2495 sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2496 coverage = (Coordinate) longlong_var;
2497 setVirtualCoverage(node, coverage);
2498 #endif
2499
2500 arrayLength = node->length / 4;
2501 if (node->length % 4 > 0)
2502 arrayLength++;
2503 node->descriptor =
2504 callocOrExit(arrayLength, Descriptor);
2505
2506 twin = node->twinNode;
2507 twin->length = node->length;
2508 twin->descriptor =
2509 callocOrExit(arrayLength, Descriptor);
2510
2511 index = 0;
2512 while ((c = getc(file)) != '\n') {
2513 if (c == 'A') {
2514 if (index - wordShift >= 0)
2515 writeNucleotideInDescriptor(ADENINE,
2516 node->
2517 descriptor,
2518 index - wordShift);
2519 if (nodeLength - index - 1 >= 0) {
2520 #ifndef COLOR
2521 writeNucleotideInDescriptor(THYMINE,
2522 twin->
2523 descriptor,
2524 nodeLength - index - 1);
2525 #else
2526 writeNucleotideInDescriptor(ADENINE,
2527 twin->
2528 descriptor,
2529 nodeLength - index - 1);
2530 #endif
2531 }
2532 } else if (c == 'C') {
2533 if (index - wordShift >= 0)
2534 writeNucleotideInDescriptor(CYTOSINE,
2535 node->
2536 descriptor,
2537 index - wordShift);
2538 if (nodeLength - index - 1 >= 0) {
2539 #ifndef COLOR
2540 writeNucleotideInDescriptor(GUANINE,
2541 twin->
2542 descriptor,
2543 nodeLength - index - 1);
2544 #else
2545 writeNucleotideInDescriptor(CYTOSINE,
2546 twin->
2547 descriptor,
2548 nodeLength - index - 1);
2549 #endif
2550 }
2551 } else if (c == 'G') {
2552 if (index - wordShift >= 0)
2553 writeNucleotideInDescriptor(GUANINE,
2554 node->
2555 descriptor,
2556 index - wordShift);
2557 if (nodeLength - index - 1 >= 0) {
2558 #ifndef COLOR
2559 writeNucleotideInDescriptor(CYTOSINE,
2560 twin->
2561 descriptor,
2562 nodeLength - index - 1);
2563 #else
2564 writeNucleotideInDescriptor(GUANINE,
2565 twin->
2566 descriptor,
2567 nodeLength - index - 1);
2568 #endif
2569 }
2570 } else if (c == 'T') {
2571 if (index - wordShift >= 0)
2572 writeNucleotideInDescriptor(THYMINE,
2573 node->
2574 descriptor,
2575 index - wordShift);
2576 if (nodeLength - index - 1 >= 0) {
2577 #ifndef COLOR
2578 writeNucleotideInDescriptor(ADENINE,
2579 twin->
2580 descriptor,
2581 nodeLength - index - 1);
2582 #else
2583 writeNucleotideInDescriptor(THYMINE,
2584 twin->
2585 descriptor,
2586 nodeLength - index - 1);
2587 #endif
2588 }
2589 }
2590
2591 index++;
2592 }
2593
2594 if (fgets(line, maxline, file) == NULL) {
2595 finished = true;
2596 }
2597 }
2598
2599 // Read arcs
2600 while (!finished && line[0] == 'A') {
2601 sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2602 &long_var2, &long_var3);
2603 originID = (IDnum) long_var;
2604 destinationID = (IDnum) long_var2;
2605 multiplicity = (IDnum) long_var3;
2606 arc =
2607 createArc(getNodeInGraph(graph, originID),
2608 getNodeInGraph(graph, destinationID), graph);
2609 setMultiplicity(arc, multiplicity);
2610 if (fgets(line, maxline, file) == NULL)
2611 finished = true;
2612 }
2613
2614 fclose(file);
2615 return graph;
2616 }
2617
2618 // Prints out the information relative to the topology of a node into a new file
2619 // Internal to exportDOTGraph()
DOTNode(Node * node,FILE * outfile)2620 void DOTNode(Node * node, FILE * outfile)
2621 {
2622 IDnum ID;
2623 Arc *arc;
2624 Node *otherNode;
2625
2626 ID = node->ID;
2627 if (ID < 0)
2628 return;
2629
2630 velvetFprintf(outfile, "\t%li [label=\"<left>|%li|<right>\"]\n", (long) ID, (long) ID);
2631
2632 for (arc = node->arc; arc != NULL; arc = arc->next) {
2633 otherNode = arc->destination;
2634 if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2635 continue;
2636 }
2637
2638 if (otherNode->ID > 0)
2639 velvetFprintf(outfile, "\t%li:right -> %li:left\n", (long) ID,
2640 (long) otherNode->ID);
2641 else
2642 velvetFprintf(outfile, "\t%li:right -> %li:right\n", (long) ID,
2643 (long) -otherNode->ID);
2644 }
2645
2646 for (arc = node->twinNode->arc; arc != NULL; arc = arc->next) {
2647 otherNode = arc->destination;
2648 if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2649 continue;
2650 }
2651
2652 if (otherNode->ID > 0)
2653 velvetFprintf(outfile, "\t%li:left -> %li:left\n", (long) ID,
2654 (long) otherNode->ID);
2655 else
2656 velvetFprintf(outfile, "\t%li:left -> %li:right\n", (long) ID,
2657 (long) -otherNode->ID);
2658 }
2659 }
2660
expandNode(Node * node,int WORDLENGTH)2661 TightString *expandNode(Node * node, int WORDLENGTH)
2662 {
2663 Nucleotide nucleotide;
2664 Coordinate index;
2665 TightString *tString =
2666 newTightString(node->length + WORDLENGTH - 1);
2667 Node *twin = node->twinNode;
2668 Coordinate length = node->length;
2669
2670 for (index = 0; index < WORDLENGTH; index++) {
2671 nucleotide =
2672 getNucleotideInDescriptor(twin->descriptor,
2673 length - index - 1);
2674 #ifndef COLOR
2675 writeNucleotideAtPosition(3 - nucleotide, index, tString);
2676 #else
2677 writeNucleotideAtPosition(nucleotide, index, tString);
2678 #endif
2679 }
2680
2681 for (index = 1; index < node->length; index++) {
2682 nucleotide =
2683 getNucleotideInDescriptor(node->descriptor, index);
2684 writeNucleotideAtPosition(nucleotide,
2685 index + WORDLENGTH - 1, tString);
2686 }
2687
2688 return tString;
2689 }
2690
expandNodeFragment(Node * node,Coordinate contigStart,Coordinate contigFinish,int wordLength)2691 char *expandNodeFragment(Node * node, Coordinate contigStart,
2692 Coordinate contigFinish, int wordLength)
2693 {
2694 Nucleotide nucleotide;
2695 Coordinate index;
2696 Node *twin = node->twinNode;
2697 Coordinate length = contigFinish - contigStart;
2698 int wordShift = wordLength - 1;
2699 char *string;
2700
2701 if (length >= wordShift) {
2702 string = callocOrExit(length + wordLength, char);
2703
2704 for (index = 0; index < wordShift; index++) {
2705 nucleotide =
2706 getNucleotideInDescriptor(twin->descriptor,
2707 twin->length - contigStart -
2708 index - 1);
2709 #ifndef COLOR
2710 nucleotide = 3 - nucleotide;
2711 #endif
2712
2713 switch (nucleotide) {
2714 case ADENINE:
2715 string[index] = 'A';
2716 break;
2717 case CYTOSINE:
2718 string[index] = 'C';
2719 break;
2720 case GUANINE:
2721 string[index] = 'G';
2722 break;
2723 case THYMINE:
2724 string[index] = 'T';
2725 break;
2726 }
2727
2728 }
2729
2730 for (index = 0; index < length; index++) {
2731 nucleotide =
2732 getNucleotideInDescriptor(node->descriptor,
2733 contigStart + index);
2734 switch (nucleotide) {
2735 case ADENINE:
2736 string[index + wordShift] = 'A';
2737 break;
2738 case CYTOSINE:
2739 string[index + wordShift] = 'C';
2740 break;
2741 case GUANINE:
2742 string[index + wordShift] = 'G';
2743 break;
2744 case THYMINE:
2745 string[index + wordShift] = 'T';
2746 break;
2747 }
2748 }
2749
2750 string[length + wordShift] = '\0';
2751 } else {
2752 string = callocOrExit(length + 1, char);
2753
2754 for (index = 0; index < length; index++) {
2755 nucleotide =
2756 getNucleotideInDescriptor(node->descriptor, contigStart + index);
2757 switch (nucleotide) {
2758 case ADENINE:
2759 string[index] = 'A';
2760 break;
2761 case CYTOSINE:
2762 string[index] = 'C';
2763 break;
2764 case GUANINE:
2765 string[index] = 'G';
2766 break;
2767 case THYMINE:
2768 string[index] = 'T';
2769 break;
2770 }
2771 }
2772
2773 string[length] = '\0';
2774 }
2775
2776 return string;
2777 }
2778
readStartsAreActivated(Graph * graph)2779 boolean readStartsAreActivated(Graph * graph)
2780 {
2781 return graph->nodeReads != NULL;
2782 }
2783
activateReadStarts(Graph * graph)2784 void activateReadStarts(Graph * graph)
2785 {
2786 graph->nodeReads =
2787 callocOrExit(2 * graph->nodeCount + 1, ShortReadMarker *);
2788 graph->nodeReadCounts =
2789 callocOrExit(2 * graph->nodeCount + 1, IDnum);
2790 }
2791
deactivateReadStarts(Graph * graph)2792 void deactivateReadStarts(Graph * graph)
2793 {
2794 free(graph->nodeReads);
2795 free(graph->nodeReadCounts);
2796
2797 graph->nodeReads = NULL;
2798 graph->nodeReadCounts = NULL;
2799 }
2800
findIDnumInArray(IDnum query,IDnum * array,IDnum arrayLength)2801 boolean findIDnumInArray(IDnum query, IDnum * array, IDnum arrayLength)
2802 {
2803 IDnum leftIndex = 0;
2804 IDnum rightIndex = arrayLength;
2805 IDnum middleIndex;
2806
2807 if (arrayLength == 0)
2808 return false;
2809
2810 while (true) {
2811 middleIndex = leftIndex + (rightIndex - leftIndex) / 2;
2812
2813 if (array[middleIndex] == query)
2814 return true;
2815 else if (leftIndex >= rightIndex)
2816 return false;
2817 else if (array[middleIndex] > query)
2818 rightIndex = middleIndex;
2819 else if (leftIndex == middleIndex)
2820 leftIndex++;
2821 else
2822 leftIndex = middleIndex;
2823 }
2824 }
2825
compareShortReadMarkers(const void * A,const void * B)2826 static inline int compareShortReadMarkers(const void *A, const void *B)
2827 {
2828 IDnum a = ((ShortReadMarker *) A)->readID;
2829 IDnum b = ((ShortReadMarker *) B)->readID;
2830
2831 if (a > b)
2832 return 1;
2833 if (a == b)
2834 return 0;
2835 return -1;
2836 }
2837
compareIDnums(const void * A,const void * B)2838 static inline int compareIDnums(const void *A, const void *B)
2839 {
2840 IDnum a = *((IDnum *) A);
2841 IDnum b = *((IDnum *) B);
2842
2843 if (a > b)
2844 return 1;
2845 if (a == b)
2846 return 0;
2847 return -1;
2848 }
2849
incrementReadStartCount(Node * node,Graph * graph)2850 void incrementReadStartCount(Node * node, Graph * graph)
2851 {
2852 graph->nodeReadCounts[node->ID + graph->nodeCount]++;
2853 }
2854
createNodeReadStartArrays(Graph * graph)2855 void createNodeReadStartArrays(Graph * graph)
2856 {
2857 IDnum index;
2858
2859 if (graph->nodeReads == NULL)
2860 return;
2861
2862 for (index = 0; index <= 2 * (graph->nodeCount); index++) {
2863 if (graph->nodeReadCounts[index] != 0) {
2864 graph->nodeReads[index] =
2865 mallocOrExit(graph->nodeReadCounts[index],
2866 ShortReadMarker);
2867 graph->nodeReadCounts[index] = 0;
2868 } else {
2869 graph->nodeReads[index] = NULL;
2870 }
2871 }
2872 }
2873
orderNodeReadStartArrays(Graph * graph)2874 void orderNodeReadStartArrays(Graph * graph)
2875 {
2876 IDnum index;
2877
2878 if (graph->nodeReads == NULL)
2879 return;
2880
2881 for (index = 0; index <= 2 * (graph->nodeCount); index++)
2882 if (graph->nodeReadCounts[index] != 0)
2883 qsort(graph->nodeReads[index],
2884 graph->nodeReadCounts[index],
2885 sizeof(ShortReadMarker),
2886 compareShortReadMarkers);
2887 }
2888
addReadStart(Node * node,IDnum seqID,Coordinate position,Graph * graph,Coordinate offset)2889 void addReadStart(Node * node, IDnum seqID, Coordinate position,
2890 Graph * graph, Coordinate offset)
2891 {
2892 IDnum nodeIndex = getNodeID(node) + graph->nodeCount;
2893
2894 ShortReadMarker *array = graph->nodeReads[nodeIndex];
2895 IDnum arrayLength = graph->nodeReadCounts[nodeIndex];
2896
2897 array[arrayLength].readID = seqID;
2898 array[arrayLength].position = position;
2899 array[arrayLength].offset = (ShortLength) offset;
2900 // printf("node %d, seq %d, pos %ld, offset %ld\n", getNodeID(node), seqID, position, offset);
2901 graph->nodeReadCounts[nodeIndex]++;
2902 }
2903
blurLastShortReadMarker(Node * node,Graph * graph)2904 void blurLastShortReadMarker(Node * node, Graph * graph)
2905 {
2906 IDnum nodeIndex = getNodeID(node) + nodeCount(graph);
2907 IDnum index = graph->nodeReadCounts[nodeIndex] - 1;
2908 ShortReadMarker *marker;
2909
2910 if (index >= 0)
2911 marker = &(graph->nodeReads[nodeIndex][index]);
2912 else
2913 abort();
2914
2915 setShortReadMarkerPosition(marker, -1);
2916 }
2917
commonNodeReads(Node * nodeA,Node * nodeB,Graph * graph,IDnum * length)2918 ShortReadMarker *commonNodeReads(Node * nodeA, Node * nodeB, Graph * graph,
2919 IDnum * length)
2920 {
2921 IDnum targetID, targetLength, targetIndex, targetVal;
2922 IDnum sourceID, sourceLength, sourceIndex, sourceVal;
2923 IDnum mergeLength;
2924 ShortReadMarker *mergeArray, *targetArray, *sourceArray;
2925
2926 if (graph->nodeReads == NULL) {
2927 *length = 0;
2928 return NULL;
2929 }
2930
2931 if (nodeA == NULL || nodeB == NULL) {
2932 *length = 0;
2933 return NULL;
2934 }
2935
2936 targetID = getNodeID(nodeA) + graph->nodeCount;
2937 targetArray = graph->nodeReads[targetID];
2938 targetLength = graph->nodeReadCounts[targetID];
2939
2940 sourceID = getNodeID(nodeB) + graph->nodeCount;
2941 sourceArray = graph->nodeReads[sourceID];
2942 sourceLength = graph->nodeReadCounts[sourceID];
2943
2944 if (sourceArray == NULL || targetArray == NULL) {
2945 *length = 0;
2946 return NULL;
2947 }
2948
2949 mergeArray =
2950 mallocOrExit(sourceLength +
2951 targetLength, ShortReadMarker);
2952
2953 mergeLength = 0;
2954 sourceIndex = 0;
2955 targetIndex = 0;
2956 sourceVal = sourceArray[0].readID;
2957 targetVal = targetArray[0].readID;
2958
2959 while (sourceIndex < sourceLength && targetIndex < targetLength) {
2960 switch (compareIDnums(&sourceVal, &targetVal)) {
2961 case -1:
2962 mergeArray[mergeLength].readID = sourceVal;
2963 mergeArray[mergeLength].position = -1;
2964 mergeArray[mergeLength].offset = -1;
2965 mergeLength++;
2966 sourceIndex++;
2967 if (sourceIndex < sourceLength)
2968 sourceVal =
2969 sourceArray[sourceIndex].readID;
2970 break;
2971 case 0:
2972 mergeArray[mergeLength].readID = sourceVal;
2973 mergeArray[mergeLength].position = -1;
2974 mergeArray[mergeLength].offset = -1;
2975 mergeLength++;
2976 sourceIndex++;
2977 if (sourceIndex < sourceLength)
2978 sourceVal =
2979 sourceArray[sourceIndex].readID;
2980 targetIndex++;
2981 if (targetIndex < targetLength)
2982 targetVal =
2983 targetArray[targetIndex].readID;
2984 break;
2985 case 1:
2986 mergeArray[mergeLength].readID = targetVal;
2987 mergeArray[mergeLength].position = -1;
2988 mergeArray[mergeLength].offset = -1;
2989 mergeLength++;
2990 targetIndex++;
2991 if (targetIndex < targetLength)
2992 targetVal =
2993 targetArray[targetIndex].readID;
2994 }
2995 }
2996
2997 while (sourceIndex < sourceLength) {
2998 mergeArray[mergeLength].readID =
2999 sourceArray[sourceIndex].readID;
3000 mergeArray[mergeLength].position = -1;
3001 mergeArray[mergeLength].offset = -1;
3002 mergeLength++;
3003 sourceIndex++;
3004 }
3005
3006 while (targetIndex < targetLength) {
3007 mergeArray[mergeLength].readID =
3008 targetArray[targetIndex].readID;
3009 mergeArray[mergeLength].position = -1;
3010 mergeArray[mergeLength].offset = -1;
3011 mergeLength++;
3012 targetIndex++;
3013 }
3014
3015 *length = mergeLength;
3016 return mergeArray;
3017 }
3018
extractFrontOfNodeReads(Node * node,Coordinate breakpoint,Graph * graph,IDnum * length,PassageMarkerI sourceMarker,ShortLength * lengths)3019 ShortReadMarker *extractFrontOfNodeReads(Node * node,
3020 Coordinate breakpoint,
3021 Graph * graph, IDnum * length,
3022 PassageMarkerI sourceMarker,
3023 ShortLength * lengths)
3024 {
3025 IDnum sourceID;
3026 IDnum mergeLength, newLength, sourceLength;
3027 IDnum sourceIndex;
3028 ShortReadMarker *mergeArray, *sourceArray, *newArray;
3029 ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3030 Coordinate finish;
3031 Coordinate revBreakpoint;
3032
3033 if (graph->nodeReads == NULL) {
3034 *length = 0;
3035 return NULL;
3036 }
3037
3038 if (node == NULL) {
3039 *length = 0;
3040 return NULL;
3041 }
3042
3043 if (breakpoint == 0) {
3044 return commonNodeReads(node,
3045 getTwinNode(getNode
3046 (getPreviousInSequence
3047 (sourceMarker))),
3048 graph, length);
3049 }
3050
3051 sourceID = getNodeID(node) + graph->nodeCount;
3052 sourceArray = graph->nodeReads[sourceID];
3053 sourceLength = graph->nodeReadCounts[sourceID];
3054
3055 if (sourceArray == NULL) {
3056 *length = 0;
3057 return NULL;
3058 }
3059
3060 revBreakpoint = node->length - breakpoint;
3061
3062 mergeLength = 0;
3063 newLength = 0;
3064 sourcePtr = sourceArray;
3065 for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3066 if (sourcePtr->position == -1) {
3067 newLength++;
3068 mergeLength++;
3069 } else {
3070 finish =
3071 sourcePtr->position - sourcePtr->offset +
3072 lengths[sourcePtr->readID - 1];
3073 if (sourcePtr->position < revBreakpoint)
3074 newLength++;
3075 if (finish > revBreakpoint)
3076 mergeLength++;
3077 }
3078 sourcePtr++;
3079 }
3080
3081 newArray = mallocOrExit(newLength, ShortReadMarker);
3082 mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3083
3084 mergePtr = mergeArray;
3085 newPtr = newArray;
3086 sourcePtr = sourceArray;
3087 mergeLength = 0;
3088 newLength = 0;
3089 for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3090 if (sourcePtr->position == -1) {
3091 mergePtr->readID = sourcePtr->readID;
3092 setShortReadMarkerPosition(mergePtr, -1);
3093 setShortReadMarkerOffset(mergePtr, -1);
3094 mergePtr++;
3095 mergeLength++;
3096 newPtr->readID = sourcePtr->readID;
3097 setShortReadMarkerPosition(newPtr, -1);
3098 setShortReadMarkerOffset(newPtr, -1);
3099 newPtr++;
3100 newLength++;
3101 } else {
3102 finish =
3103 sourcePtr->position - sourcePtr->offset +
3104 lengths[sourcePtr->readID - 1];
3105 if (sourcePtr->position < revBreakpoint) {
3106 newPtr->readID = sourcePtr->readID;
3107 setShortReadMarkerPosition(newPtr,
3108 sourcePtr->
3109 position);
3110 setShortReadMarkerOffset(newPtr,
3111 sourcePtr->
3112 offset);
3113 newPtr++;
3114 newLength++;
3115
3116 // Saddle back reads:
3117 if (finish > revBreakpoint) {
3118 mergePtr->readID =
3119 sourcePtr->readID;
3120 setShortReadMarkerPosition
3121 (mergePtr, 0);
3122 setShortReadMarkerOffset(mergePtr,
3123 sourcePtr->
3124 offset +
3125 revBreakpoint
3126 -
3127 sourcePtr->
3128 position);
3129 mergePtr++;
3130 }
3131 } else if (finish > revBreakpoint) {
3132 mergePtr->readID = sourcePtr->readID;
3133 setShortReadMarkerPosition(mergePtr,
3134 sourcePtr->
3135 position - revBreakpoint);
3136 setShortReadMarkerOffset(mergePtr,
3137 sourcePtr->
3138 offset);
3139 mergePtr++;
3140 mergeLength++;
3141 }
3142 }
3143
3144 sourcePtr++;
3145 }
3146
3147 free(sourceArray);
3148 graph->nodeReads[sourceID] = newArray;
3149 graph->nodeReadCounts[sourceID] = newLength;
3150
3151 *length = mergeLength;
3152 return mergeArray;
3153 }
3154
extractBackOfNodeReads(Node * node,Coordinate breakpoint,Graph * graph,IDnum * length,PassageMarkerI sourceMarker,ShortLength * lengths)3155 ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
3156 Graph * graph, IDnum * length,
3157 PassageMarkerI sourceMarker,
3158 ShortLength * lengths)
3159 {
3160 IDnum sourceID;
3161 IDnum mergeLength, newLength, sourceLength;
3162 IDnum sourceIndex;
3163 ShortReadMarker *mergeArray, *sourceArray, *newArray;
3164 ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3165 Coordinate finish;
3166
3167 if (graph->nodeReads == NULL) {
3168 *length = 0;
3169 return NULL;
3170 }
3171
3172 if (node == NULL) {
3173 *length = 0;
3174 return NULL;
3175 }
3176
3177 if (breakpoint == 0) {
3178 return
3179 commonNodeReads(getNode
3180 (getPreviousInSequence(sourceMarker)),
3181 node, graph, length);
3182 }
3183
3184 sourceID = getNodeID(node) + graph->nodeCount;
3185 sourceArray = graph->nodeReads[sourceID];
3186 sourceLength = graph->nodeReadCounts[sourceID];
3187
3188 if (sourceArray == NULL) {
3189 *length = 0;
3190 return NULL;
3191 }
3192
3193 mergeLength = 0;
3194 newLength = 0;
3195 sourcePtr = sourceArray;
3196 for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3197 if (sourcePtr->position == -1) {
3198 mergeLength++;
3199 newLength++;
3200 } else {
3201 finish =
3202 sourcePtr->position - sourcePtr->offset +
3203 lengths[sourcePtr->readID - 1];
3204 if (sourcePtr->position < breakpoint)
3205 mergeLength++;
3206 if (finish > breakpoint)
3207 newLength++;
3208 }
3209 sourcePtr++;
3210 }
3211
3212 newArray = mallocOrExit(newLength, ShortReadMarker);
3213 mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3214
3215 mergePtr = mergeArray;
3216 newPtr = newArray;
3217 sourcePtr = sourceArray;
3218 for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3219 if (sourcePtr->position == -1) {
3220 mergePtr->readID = sourcePtr->readID;
3221 setShortReadMarkerPosition(mergePtr, -1);
3222 setShortReadMarkerOffset(mergePtr, -1);
3223 mergePtr++;
3224
3225 newPtr->readID = sourcePtr->readID;
3226 setShortReadMarkerPosition(newPtr, -1);
3227 setShortReadMarkerOffset(newPtr, -1);
3228 newPtr++;
3229
3230 sourcePtr++;
3231 continue;
3232 } else {
3233 finish =
3234 sourcePtr->position - sourcePtr->offset +
3235 lengths[sourcePtr->readID - 1];
3236
3237 if (sourcePtr->position < breakpoint) {
3238 mergePtr->readID = sourcePtr->readID;
3239 setShortReadMarkerPosition(mergePtr,
3240 sourcePtr->
3241 position);
3242 setShortReadMarkerOffset(mergePtr,
3243 sourcePtr->
3244 offset);
3245 mergePtr++;
3246
3247 // Saddle back reads:
3248 if (finish > breakpoint) {
3249 newPtr->readID = sourcePtr->readID;
3250 setShortReadMarkerPosition(newPtr,
3251 0);
3252 setShortReadMarkerOffset(newPtr,
3253 sourcePtr->
3254 offset +
3255 breakpoint
3256 -
3257 sourcePtr->
3258 position);
3259 newPtr++;
3260 }
3261 } else if (finish > breakpoint) {
3262 newPtr->readID = sourcePtr->readID;
3263 setShortReadMarkerPosition(newPtr,
3264 sourcePtr->
3265 position -
3266 breakpoint);
3267 setShortReadMarkerOffset(newPtr,
3268 sourcePtr->
3269 offset);
3270 newPtr++;
3271 }
3272 }
3273
3274 sourcePtr++;
3275 }
3276
3277 free(sourceArray);
3278 graph->nodeReads[sourceID] = newArray;
3279 graph->nodeReadCounts[sourceID] = newLength;
3280
3281 *length = mergeLength;
3282 return mergeArray;
3283 }
3284
spreadReadIDs(ShortReadMarker * reads,IDnum readCount,Node * node,Graph * graph)3285 void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
3286 Graph * graph)
3287 {
3288 IDnum targetID, targetLength, targetIndex, targetVal;
3289 IDnum sourceLength, sourceIndex, sourceVal;
3290 IDnum mergeLength;
3291 ShortReadMarker *sourceArray, *targetArray, *mergeArray;
3292 ShortReadMarker *sourcePtr, *targetPtr, *mergePtr;
3293 Coordinate targetPosition;
3294 //ShortLength nodeLength = (ShortLength) getNodeLength(node);
3295 ShortLength targetOffset;
3296
3297 if (graph->nodeReads == NULL || reads == NULL || node == NULL)
3298 return;
3299
3300 targetID = getNodeID(node) + graph->nodeCount;
3301 targetArray = graph->nodeReads[targetID];
3302 targetLength = graph->nodeReadCounts[targetID];
3303 targetPtr = targetArray;
3304
3305 sourceArray = reads;
3306 sourceLength = readCount;
3307 sourcePtr = sourceArray;
3308
3309 if (targetArray == NULL) {
3310 mergeArray =
3311 mallocOrExit(sourceLength, ShortReadMarker);
3312 mergePtr = mergeArray;
3313
3314 sourceIndex = 0;
3315 while (sourceIndex < sourceLength) {
3316 mergePtr->readID = sourcePtr->readID;
3317 setShortReadMarkerPosition(mergePtr, -1);
3318 setShortReadMarkerOffset(mergePtr, -1);
3319 mergePtr++;
3320 sourcePtr++;
3321 sourceIndex++;
3322 }
3323
3324 graph->nodeReads[targetID] = mergeArray;
3325 graph->nodeReadCounts[targetID] = sourceLength;
3326 return;
3327 }
3328
3329 mergeArray =
3330 mallocOrExit(sourceLength +
3331 targetLength, ShortReadMarker);
3332 mergePtr = mergeArray;
3333
3334 mergeLength = 0;
3335 sourceIndex = 0;
3336 targetIndex = 0;
3337 sourceVal = sourcePtr->readID;
3338 targetVal = targetPtr->readID;
3339 targetPosition = targetPtr->position;
3340 targetOffset = targetPtr->offset;
3341
3342 while (sourceIndex < sourceLength && targetIndex < targetLength) {
3343 if (sourceVal < targetVal) {
3344 mergePtr->readID = sourceVal;
3345 setShortReadMarkerPosition(mergePtr, -1);
3346 setShortReadMarkerOffset(mergePtr, -1);
3347 sourceIndex++;
3348 sourcePtr++;
3349 if (sourceIndex < sourceLength)
3350 sourceVal = sourcePtr->readID;
3351 } else if (sourceVal == targetVal) {
3352 mergePtr->readID = sourceVal;
3353 setShortReadMarkerPosition(mergePtr, -1);
3354 setShortReadMarkerOffset(mergePtr, -1);
3355 sourceIndex++;
3356 sourcePtr++;
3357 if (sourceIndex < sourceLength)
3358 sourceVal = sourcePtr->readID;
3359 targetIndex++;
3360 targetPtr++;
3361 if (targetIndex < targetLength) {
3362 targetVal = targetPtr->readID;
3363 targetPosition = targetPtr->position;
3364 targetOffset = targetPtr->offset;
3365 }
3366 } else {
3367 mergePtr->readID = targetVal;
3368 setShortReadMarkerPosition(mergePtr,
3369 targetPosition);
3370 setShortReadMarkerOffset(mergePtr, targetOffset);
3371 targetIndex++;
3372 targetPtr++;
3373 if (targetIndex < targetLength) {
3374 targetVal = targetPtr->readID;
3375 targetPosition = targetPtr->position;
3376 targetOffset = targetPtr->offset;
3377 }
3378 }
3379
3380 mergeLength++;
3381 mergePtr++;
3382 }
3383
3384 while (sourceIndex < sourceLength) {
3385 mergePtr->readID = sourcePtr->readID;
3386 setShortReadMarkerPosition(mergePtr, -1);
3387 setShortReadMarkerOffset(mergePtr, -1);
3388 mergeLength++;
3389 mergePtr++;
3390 sourceIndex++;
3391 sourcePtr++;
3392 }
3393
3394 while (targetIndex < targetLength) {
3395 mergePtr->readID = targetPtr->readID;
3396 setShortReadMarkerPosition(mergePtr, targetPtr->position);
3397 setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3398 mergeLength++;
3399 mergePtr++;
3400 targetIndex++;
3401 targetPtr++;
3402 }
3403
3404 free(targetArray);
3405 graph->nodeReads[targetID] = mergeArray;
3406 graph->nodeReadCounts[targetID] = mergeLength;
3407 }
3408
min(Coordinate A,Coordinate B)3409 static inline Coordinate min(Coordinate A, Coordinate B)
3410 {
3411 return A < B ? A : B;
3412 }
3413
min_short(ShortLength A,ShortLength B)3414 static inline ShortLength min_short(ShortLength A, ShortLength B)
3415 {
3416 return A < B ? A : B;
3417 }
3418
injectShortReads(ShortReadMarker * sourceArray,IDnum sourceLength,Node * target,Graph * graph)3419 void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
3420 Node * target, Graph * graph)
3421 {
3422 IDnum targetID = getNodeID(target) + graph->nodeCount;
3423 ShortReadMarker *targetArray = graph->nodeReads[targetID];
3424 IDnum targetLength = graph->nodeReadCounts[targetID];
3425 ShortReadMarker *targetPtr = targetArray;
3426 ShortReadMarker *sourcePtr = sourceArray;
3427 ShortReadMarker *mergeArray, *mergePtr;
3428 IDnum mergeLength;
3429 Coordinate targetPosition, sourcePosition;
3430 ShortLength targetOffset, sourceOffset;
3431 IDnum targetIndex, targetVal, sourceIndex, sourceVal;
3432
3433 if (sourceLength == 0) {
3434 free(sourceArray);
3435 return;
3436 }
3437
3438 if (targetLength == 0) {
3439 free(targetArray);
3440 graph->nodeReads[targetID] = sourceArray;
3441 graph->nodeReadCounts[targetID] = sourceLength;
3442 return;
3443 }
3444
3445 mergeArray =
3446 mallocOrExit(sourceLength +
3447 targetLength, ShortReadMarker);
3448 mergePtr = mergeArray;
3449
3450 mergeLength = 0;
3451 sourceIndex = 0;
3452 targetIndex = 0;
3453 targetVal = targetPtr->readID;
3454 targetPosition = targetPtr->position;
3455 targetOffset = targetPtr->offset;
3456 sourceVal = sourcePtr->readID;
3457 sourcePosition = sourcePtr->position;
3458 sourceOffset = sourcePtr->offset;
3459
3460 while (sourceIndex < sourceLength && targetIndex < targetLength) {
3461 if (sourceVal < targetVal) {
3462 mergePtr->readID = sourceVal;
3463 setShortReadMarkerPosition(mergePtr,
3464 sourcePosition);
3465 setShortReadMarkerOffset(mergePtr, sourceOffset);
3466 sourceIndex++;
3467 if (sourceIndex < sourceLength) {
3468 sourcePtr++;
3469 sourceVal = sourcePtr->readID;
3470 sourcePosition = sourcePtr->position;
3471 sourceOffset = sourcePtr->offset;
3472 }
3473 } else if (sourceVal == targetVal) {
3474 mergePtr->readID = sourceVal;
3475 if (sourcePosition == -1 && targetPosition == -1) {
3476 setShortReadMarkerPosition(mergePtr, -1);
3477 setShortReadMarkerOffset(mergePtr, -1);
3478 } else if (sourcePosition == -1) {
3479 setShortReadMarkerPosition(mergePtr,
3480 targetPosition);
3481 setShortReadMarkerOffset(mergePtr,
3482 targetOffset);
3483 } else if (targetPosition == -1) {
3484 setShortReadMarkerPosition(mergePtr,
3485 sourcePosition);
3486 setShortReadMarkerOffset(mergePtr,
3487 sourceOffset);
3488 } else {
3489 setShortReadMarkerPosition(mergePtr,
3490 min
3491 (sourcePosition,
3492 targetPosition));
3493 setShortReadMarkerOffset(mergePtr,
3494 min_short
3495 (sourceOffset,
3496 targetOffset));
3497 }
3498 sourceIndex++;
3499 if (sourceIndex < sourceLength) {
3500 sourcePtr++;
3501 sourceVal = sourcePtr->readID;
3502 sourcePosition = sourcePtr->position;
3503 sourceOffset = sourcePtr->offset;
3504 }
3505 targetIndex++;
3506 if (targetIndex < targetLength) {
3507 targetPtr++;
3508 targetVal = targetPtr->readID;
3509 targetPosition = targetPtr->position;
3510 targetOffset = targetPtr->offset;
3511 }
3512 } else {
3513 mergePtr->readID = targetVal;
3514 setShortReadMarkerPosition(mergePtr,
3515 targetPosition);
3516 setShortReadMarkerOffset(mergePtr, targetOffset);
3517 targetIndex++;
3518 if (targetIndex < targetLength) {
3519 targetPtr++;
3520 targetVal = targetPtr->readID;
3521 targetPosition = targetPtr->position;
3522 targetOffset = targetPtr->offset;
3523 }
3524 }
3525
3526 mergeLength++;
3527 mergePtr++;
3528 }
3529
3530 while (sourceIndex < sourceLength) {
3531 mergePtr->readID = sourcePtr->readID;
3532 setShortReadMarkerPosition(mergePtr, sourcePtr->position);
3533 setShortReadMarkerOffset(mergePtr, sourcePtr->offset);
3534 mergeLength++;
3535 mergePtr++;
3536 sourceIndex++;
3537 sourcePtr++;
3538 }
3539
3540 while (targetIndex < targetLength) {
3541 mergePtr->readID = targetPtr->readID;
3542 setShortReadMarkerPosition(mergePtr, targetPtr->position);
3543 setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3544 mergeLength++;
3545 mergePtr++;
3546 targetIndex++;
3547 targetPtr++;
3548 }
3549
3550 free(targetArray);
3551 graph->nodeReads[targetID] = mergeArray;
3552 graph->nodeReadCounts[targetID] = mergeLength;
3553
3554 free(sourceArray);
3555 }
3556
mergeNodeReads(Node * target,Node * source,Graph * graph)3557 void mergeNodeReads(Node * target, Node * source, Graph * graph)
3558 {
3559 IDnum sourceID, sourceLength;
3560 ShortReadMarker *sourceArray;
3561
3562 if (graph->nodeReads == NULL || source == NULL || target == NULL)
3563 return;
3564
3565 sourceID = getNodeID(source) + graph->nodeCount;
3566 sourceArray = graph->nodeReads[sourceID];
3567 sourceLength = graph->nodeReadCounts[sourceID];
3568
3569 if (sourceArray == NULL)
3570 return;
3571
3572 graph->nodeReads[sourceID] = NULL;
3573 graph->nodeReadCounts[sourceID] = 0;
3574
3575 injectShortReads(sourceArray, sourceLength, target, graph);
3576 }
3577
foldSymmetricalNodeReads(Node * node,Graph * graph)3578 void foldSymmetricalNodeReads(Node * node, Graph * graph)
3579 {
3580 IDnum targetID, targetLength, targetIndex;
3581 IDnum sourceID, sourceLength, sourceIndex;
3582 IDnum targetVal = 0;
3583 IDnum sourceVal = 0;
3584 IDnum mergeLength;
3585 ShortReadMarker *sourceArray, *targetArray, *mergeArray,
3586 *mergeArray2;
3587 ShortReadMarker *sourcePtr, *targetPtr, *mergePtr, *mergePtr2;
3588
3589 if (graph->nodeReads == NULL || node == NULL)
3590 return;
3591
3592 sourceID = getNodeID(node) + graph->nodeCount;
3593 sourceArray = graph->nodeReads[sourceID];
3594 sourceLength = graph->nodeReadCounts[sourceID];
3595 sourcePtr = sourceArray;
3596
3597 targetID = -getNodeID(node) + graph->nodeCount;
3598 targetArray = graph->nodeReads[targetID];
3599 targetLength = graph->nodeReadCounts[targetID];
3600 targetPtr = targetArray;
3601
3602 if (sourceArray == NULL && targetArray == NULL)
3603 return;
3604
3605 mergeArray =
3606 mallocOrExit(sourceLength +
3607 targetLength, ShortReadMarker);
3608 mergeArray2 =
3609 mallocOrExit(sourceLength +
3610 targetLength, ShortReadMarker);
3611 mergePtr = mergeArray;
3612 mergePtr2 = mergeArray2;
3613
3614 mergeLength = 0;
3615 sourceIndex = 0;
3616 targetIndex = 0;
3617 if (targetIndex < targetLength)
3618 targetVal = targetPtr->readID;
3619 if (sourceIndex < sourceLength)
3620 sourceVal = sourcePtr->readID;
3621
3622 while (sourceIndex < sourceLength && targetIndex < targetLength) {
3623 if (sourceVal < targetVal) {
3624 mergePtr->readID = sourceVal;
3625 setShortReadMarkerPosition(mergePtr, -1);
3626 setShortReadMarkerOffset(mergePtr, -1);
3627 mergePtr2->readID = sourceVal;
3628 setShortReadMarkerPosition(mergePtr2, -1);
3629 setShortReadMarkerOffset(mergePtr2, -1);
3630 sourceIndex++;
3631 sourcePtr++;
3632 if (sourceIndex < sourceLength)
3633 sourceVal = sourcePtr->readID;
3634 } else if (sourceVal == targetVal) {
3635 mergePtr->readID = sourceVal;
3636 setShortReadMarkerPosition(mergePtr, -1);
3637 setShortReadMarkerOffset(mergePtr, -1);
3638 mergePtr2->readID = sourceVal;
3639 setShortReadMarkerPosition(mergePtr2, -1);
3640 setShortReadMarkerOffset(mergePtr2, -1);
3641 sourceIndex++;
3642 sourcePtr++;
3643 if (sourceIndex < sourceLength)
3644 sourceVal = sourcePtr->readID;
3645 targetIndex++;
3646 targetPtr++;
3647 if (targetIndex < targetLength)
3648 targetVal = targetPtr->readID;
3649 } else {
3650 mergePtr->readID = targetVal;
3651 setShortReadMarkerPosition(mergePtr, -1);
3652 setShortReadMarkerOffset(mergePtr, -1);
3653 mergePtr2->readID = targetVal;
3654 setShortReadMarkerPosition(mergePtr2, -1);
3655 setShortReadMarkerOffset(mergePtr2, -1);
3656 targetIndex++;
3657 targetPtr++;
3658 if (targetIndex < targetLength)
3659 targetVal = targetPtr->readID;
3660 }
3661
3662 mergeLength++;
3663 mergePtr++;
3664 mergePtr2++;
3665 }
3666
3667 while (sourceIndex < sourceLength) {
3668 mergePtr->readID = sourcePtr->readID;
3669 setShortReadMarkerPosition(mergePtr, -1);
3670 setShortReadMarkerOffset(mergePtr, -1);
3671 mergePtr2->readID = sourcePtr->readID;
3672 setShortReadMarkerPosition(mergePtr2, -1);
3673 setShortReadMarkerOffset(mergePtr2, -1);
3674 mergeLength++;
3675 mergePtr++;
3676 mergePtr2++;
3677 sourceIndex++;
3678 sourcePtr++;
3679 }
3680
3681 while (targetIndex < targetLength) {
3682 mergePtr->readID = targetPtr->readID;
3683 setShortReadMarkerPosition(mergePtr, -1);
3684 setShortReadMarkerOffset(mergePtr, -1);
3685 mergePtr2->readID = targetPtr->readID;
3686 setShortReadMarkerPosition(mergePtr2, -1);
3687 setShortReadMarkerOffset(mergePtr2, -1);
3688 mergeLength++;
3689 mergePtr++;
3690 mergePtr2++;
3691 targetIndex++;
3692 targetPtr++;
3693 }
3694
3695 free(targetArray);
3696 graph->nodeReads[targetID] = mergeArray;
3697 graph->nodeReadCounts[targetID] = mergeLength;
3698
3699 free(sourceArray);
3700 graph->nodeReads[sourceID] = mergeArray2;
3701 graph->nodeReadCounts[sourceID] = mergeLength;
3702 }
3703
shareReadStarts(Node * target,Node * source,Graph * graph)3704 void shareReadStarts(Node * target, Node * source, Graph * graph)
3705 {
3706 ShortReadMarker *sourceArray;
3707 IDnum sourceLength, sourceID;
3708
3709 if (graph->nodeReads == NULL)
3710 return;
3711
3712 if (target == NULL || source == NULL)
3713 return;
3714
3715 sourceID = source->ID + graph->nodeCount;
3716 sourceArray = graph->nodeReads[sourceID];
3717 sourceLength = graph->nodeReadCounts[sourceID];
3718
3719 if (sourceArray == NULL)
3720 return;
3721
3722 spreadReadIDs(sourceArray, sourceLength, target, graph);
3723 }
3724
getNodeToReadMappings(Graph * graph)3725 ShortReadMarker **getNodeToReadMappings(Graph * graph)
3726 {
3727 return graph->nodeReads;
3728 }
3729
getShortReadMarkerID(ShortReadMarker * marker)3730 IDnum getShortReadMarkerID(ShortReadMarker * marker)
3731 {
3732 return marker->readID;
3733 }
3734
getShortReadMarkerOffset(ShortReadMarker * marker)3735 inline ShortLength getShortReadMarkerOffset(ShortReadMarker * marker)
3736 {
3737 return marker->offset;
3738 }
3739
setShortReadMarkerOffset(ShortReadMarker * marker,ShortLength offset)3740 inline void setShortReadMarkerOffset(ShortReadMarker * marker,
3741 ShortLength offset)
3742 {
3743 marker->offset = offset;
3744 }
3745
getNodeReadCounts(Graph * graph)3746 IDnum *getNodeReadCounts(Graph * graph)
3747 {
3748 return graph->nodeReadCounts;
3749 }
3750
getWordLength(Graph * graph)3751 int getWordLength(Graph * graph)
3752 {
3753 return graph->wordLength;
3754 }
3755
getNodeReads(Node * node,Graph * graph)3756 ShortReadMarker *getNodeReads(Node * node, Graph * graph)
3757 {
3758 IDnum id = node->ID + graph->nodeCount;
3759 return graph->nodeReads[id];
3760 }
3761
getNodeReadCount(Node * node,Graph * graph)3762 IDnum getNodeReadCount(Node * node, Graph * graph)
3763 {
3764 IDnum id = node->ID + graph->nodeCount;
3765 return graph->nodeReadCounts[id];
3766 }
3767
getShortReadMarkerPosition(ShortReadMarker * marker)3768 inline Coordinate getShortReadMarkerPosition(ShortReadMarker * marker)
3769 {
3770 return marker->position;
3771 }
3772
setShortReadMarkerPosition(ShortReadMarker * marker,Coordinate position)3773 inline void setShortReadMarkerPosition(ShortReadMarker * marker,
3774 Coordinate position)
3775 {
3776 if (position < -100)
3777 return;
3778
3779 marker->position = position;
3780 }
3781
getShortReadMarkerAtIndex(ShortReadMarker * array,IDnum index)3782 ShortReadMarker *getShortReadMarkerAtIndex(ShortReadMarker * array,
3783 IDnum index)
3784 {
3785 return &(array[index]);
3786 }
3787
destroyGraph(Graph * graph)3788 void destroyGraph(Graph * graph)
3789 {
3790 IDnum index;
3791 Node *node;
3792 for (index = 1; index <= graph->nodeCount; index++) {
3793 node = getNodeInGraph(graph, index);
3794 if (node != NULL)
3795 destroyNode(node, graph);
3796 }
3797
3798 if (graph->gapMarkers)
3799 deactivateGapMarkers(graph);
3800
3801 free(graph->nodes);
3802 destroyRecycleBin(nodeMemory);
3803 destroyRecycleBin(arcMemory);
3804 destroyAllPassageMarkers();
3805 free(graph->arcLookupTable);
3806 free(graph->nodeReads);
3807 free(graph->nodeReadCounts);
3808 free(graph);
3809 }
3810
setInsertLengths(Graph * graph,Category cat,Coordinate insertLength,Coordinate insertLength_std_dev)3811 void setInsertLengths(Graph * graph, Category cat, Coordinate insertLength,
3812 Coordinate insertLength_std_dev)
3813 {
3814 graph->insertLengths[cat] = insertLength;
3815 graph->insertLengths_var[cat] =
3816 insertLength_std_dev * insertLength_std_dev;
3817 }
3818
getInsertLength(Graph * graph,Category cat)3819 Coordinate getInsertLength(Graph * graph, Category cat)
3820 {
3821 return graph->insertLengths[cat / 2];
3822 }
3823
getInsertLength_var(Graph * graph,Category cat)3824 double getInsertLength_var(Graph * graph, Category cat)
3825 {
3826 return graph->insertLengths_var[cat / 2];
3827 }
3828
activateGapMarkers(Graph * graph)3829 void activateGapMarkers(Graph * graph)
3830 {
3831 graph->gapMarkers =
3832 callocOrExit(graph->nodeCount + 1, GapMarker *);
3833 gapMarkerMemory = newRecycleBin(sizeof(GapMarker), GAPBLOCKSIZE);
3834 }
3835
deactivateGapMarkers(Graph * graph)3836 void deactivateGapMarkers(Graph * graph)
3837 {
3838 free(graph->gapMarkers);
3839 graph->gapMarkers = NULL;
3840 destroyRecycleBin(gapMarkerMemory);
3841 gapMarkerMemory = NULL;
3842 }
3843
allocateGapMarker()3844 static GapMarker *allocateGapMarker()
3845 {
3846 return (GapMarker *) allocatePointer(gapMarkerMemory);
3847 }
3848
appendGap(Node * node,Coordinate length,Graph * graph)3849 void appendGap(Node * node, Coordinate length, Graph * graph)
3850 {
3851 IDnum nodeID = getNodeID(node);
3852 GapMarker *marker = allocateGapMarker();
3853 GapMarker *tmp;
3854
3855 marker->length = length;
3856
3857 if (nodeID > 0) {
3858 marker->position = node->length;
3859 marker->next = graph->gapMarkers[nodeID];
3860 graph->gapMarkers[nodeID] = marker;
3861 } else {
3862 for (tmp = graph->gapMarkers[-nodeID]; tmp != NULL;
3863 tmp = tmp->next)
3864 tmp->position += length;
3865
3866 marker->position = 0;
3867 marker->next = graph->gapMarkers[-nodeID];
3868 graph->gapMarkers[-nodeID] = marker;
3869 }
3870
3871 addBufferToDescriptor(node, length);
3872 }
3873
appendNodeGaps(Node * destination,Node * source,Graph * graph)3874 void appendNodeGaps(Node * destination, Node * source, Graph * graph)
3875 {
3876 IDnum destinationID = getNodeID(destination);
3877 IDnum sourceID = getNodeID(source);
3878 GapMarker *marker;
3879
3880 if (graph->gapMarkers == NULL)
3881 return;
3882
3883 if (destinationID > 0 && sourceID > 0) {
3884 for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3885 marker = marker->next)
3886 marker->position += destination->length;
3887 } else if (destinationID > 0 && sourceID < 0) {
3888 sourceID = -sourceID;
3889 for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3890 marker = marker->next)
3891 marker->position =
3892 source->length + destination->length -
3893 marker->position - marker->length;
3894 } else if (destinationID < 0 && sourceID > 0) {
3895 destinationID = -destinationID;
3896 for (marker = graph->gapMarkers[destinationID];
3897 marker != NULL; marker = marker->next)
3898 marker->position += source->length;
3899
3900 for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3901 marker = marker->next)
3902 marker->position =
3903 source->length - marker->position -
3904 marker->length;
3905 } else {
3906 destinationID = -destinationID;
3907 sourceID = -sourceID;
3908 for (marker = graph->gapMarkers[destinationID];
3909 marker != NULL; marker = marker->next)
3910 marker->position += source->length;
3911 }
3912
3913 if (graph->gapMarkers[destinationID] == NULL)
3914 graph->gapMarkers[destinationID] =
3915 graph->gapMarkers[sourceID];
3916 else {
3917 marker = graph->gapMarkers[destinationID];
3918 while (marker->next != NULL)
3919 marker = marker->next;
3920 marker->next = graph->gapMarkers[sourceID];
3921 }
3922
3923 graph->gapMarkers[sourceID] = NULL;
3924 }
3925
getGap(Node * node,Graph * graph)3926 GapMarker *getGap(Node * node, Graph * graph)
3927 {
3928 IDnum nodeID = getNodeID(node);
3929
3930 if (graph->gapMarkers == NULL)
3931 return NULL;
3932
3933 if (nodeID < 0)
3934 nodeID = -nodeID;
3935
3936 return graph->gapMarkers[nodeID];
3937 }
3938
getNextGap(GapMarker * marker)3939 GapMarker *getNextGap(GapMarker * marker)
3940 {
3941 return marker->next;
3942 }
3943
getGapStart(GapMarker * marker)3944 Coordinate getGapStart(GapMarker * marker)
3945 {
3946 return marker->position;
3947 }
3948
getGapFinish(GapMarker * marker)3949 Coordinate getGapFinish(GapMarker * marker)
3950 {
3951 return marker->position + marker->length;
3952 }
3953
reallocateNodeDescriptor(Node * node,Coordinate length)3954 void reallocateNodeDescriptor(Node * node, Coordinate length) {
3955 Coordinate arrayLength, index, shift;
3956 Node * twin = node->twinNode;
3957 Descriptor * array;
3958 Nucleotide nucleotide;
3959
3960 if (length < node->length)
3961 exitErrorf(EXIT_FAILURE, true, "Sum of node lengths smaller than first!");
3962
3963 shift = length - node->length;
3964
3965 arrayLength = length / 4;
3966 if (length % 4)
3967 arrayLength++;
3968
3969 node->descriptor = reallocOrExit(node->descriptor, arrayLength, Descriptor);
3970
3971 array = callocOrExit(arrayLength, Descriptor);
3972 for (index = node->length - 1; index >= 0; index--) {
3973 nucleotide = getNucleotideInDescriptor(twin->descriptor, index);
3974 writeNucleotideInDescriptor(nucleotide, array, index + shift);
3975 }
3976
3977 free(twin->descriptor);
3978 twin->descriptor = array;
3979 }
3980
doubleStrandedGraph(Graph * graph)3981 boolean doubleStrandedGraph(Graph * graph) {
3982 return graph->double_stranded;
3983 }
3984