1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
4 This file is part of Velvet.
5
6 Velvet is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 Velvet is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Velvet; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <time.h>
24
25 #include "globals.h"
26 #include "graph.h"
27 #include "tightString.h"
28 #include "dfibHeap.h"
29 #include "fibHeap.h"
30 #include "recycleBin.h"
31 #include "passageMarker.h"
32 #include "concatenatedGraph.h"
33 #include "graphStats.h"
34 #include "utility.h"
35
36 #define TICKET_BLOCK_SIZE 10000
37
38 static const Time INDEL = 0;
39 static const Time SIM[4][4] = {
40 {1, 0, 0, 0},
41 {0, 1, 0, 0},
42 {0, 0, 1, 0},
43 {0, 0, 0, 1}
44 };
45
46 typedef struct tkt_st Ticket;
47
48 struct tkt_st {
49 Ticket *next;
50 IDnum id_a;
51 IDnum id_b;
52 };
53
54 //Global variables used throughout this procedure(internal use only !)
55 static int MAXREADLENGTH = 100;
56 static int MAXNODELENGTH = 200;
57 static double MAXDIVERGENCE = 0.2;
58 static int MAXGAPS = 3;
59
60 static Time *times;
61 static Node **previous;
62
63 static DFibHeapNode **dheapNodes;
64 static DFibHeap *dheap;
65
66 static TightString *fastSequence;
67 static TightString *slowSequence;
68
69 static Node *activeNode;
70 static Node *startingNode;
71 static int WORDLENGTH;
72 static Graph *graph;
73 static IDnum dbgCounter;
74
75 static PassageMarker *fastPath;
76 static PassageMarker *slowPath;
77
78 static IDnum *eligibleStartingPoints;
79
80 static double **Fmatrix;
81 static Coordinate *slowToFastMapping;
82 static Coordinate *fastToSlowMapping;
83
84 static RecycleBin *ticketMemory;
85 static Ticket *ticketQueue;
86
87 static Ticket **todoLists;
88 static Ticket **todo;
89 static Ticket *done;
90 static boolean *progressStatus;
91
92 static Coordinate *sequenceLengths;
93
94 //End of global variables;
95
setNodeTime(Node * node,Time time)96 static void setNodeTime(Node * node, Time time)
97 {
98 times[getNodeID(node) + nodeCount(graph)] = time;
99 }
100
getNodeTime(Node * node)101 static Time getNodeTime(Node * node)
102 {
103 return times[getNodeID(node) + nodeCount(graph)];
104 }
105
setNodePrevious(Node * previousNode,Node * node)106 static void setNodePrevious(Node * previousNode, Node * node)
107 {
108 previous[getNodeID(node) + nodeCount(graph)] = previousNode;
109 }
110
getNodePrevious(Node * node)111 static Node *getNodePrevious(Node * node)
112 {
113 return previous[getNodeID(node) + nodeCount(graph)];
114 }
115
setNodeDHeapNode(Node * node,DFibHeapNode * dheapNode)116 static void setNodeDHeapNode(Node * node, DFibHeapNode * dheapNode)
117 {
118 dheapNodes[getNodeID(node) + nodeCount(graph)] = dheapNode;
119 }
120
getNodeDHeapNode(Node * node)121 static DFibHeapNode *getNodeDHeapNode(Node * node)
122 {
123 return dheapNodes[getNodeID(node) + nodeCount(graph)];
124 }
125
newTicket()126 static Ticket *newTicket()
127 {
128 if (ticketMemory == NULL)
129 ticketMemory =
130 newRecycleBin(sizeof(Ticket), TICKET_BLOCK_SIZE);
131
132 return allocatePointer(ticketMemory);
133 }
134
newQueueTicket(IDnum id_a,IDnum id_b)135 static void newQueueTicket(IDnum id_a, IDnum id_b)
136 {
137 Ticket *tkt = newTicket();
138 tkt->id_a = id_a;
139 tkt->id_b = id_b;
140 tkt->next = ticketQueue;
141 ticketQueue = tkt;
142 }
143
isPreviousToNode(Node * previous,Node * target)144 static boolean isPreviousToNode(Node * previous, Node * target)
145 {
146 Node *currentNode = target;
147 Node *previousNode = NULL;
148 Time targetTime = getNodeTime(target);
149
150 //printf("Testing if %ld is previous to %ld\n", getNodeID(previous), getNodeID(target));
151
152 while (true) {
153 if (currentNode == previous)
154 return true;
155
156 if (currentNode == previousNode)
157 return false;
158
159 if (getNodeTime(currentNode) != targetTime)
160 return false;
161
162 previousNode = currentNode;
163 currentNode = getNodePrevious(currentNode);
164 }
165 }
166
concatenateCommonTodoLists(Node * nodeA,Node * nodeB)167 static void concatenateCommonTodoLists(Node * nodeA, Node * nodeB)
168 {
169 Ticket **listA = &todoLists[getNodeID(nodeA) + nodeCount(graph)];
170 Ticket **listB = &todoLists[getNodeID(nodeB) + nodeCount(graph)];
171 Ticket *head = NULL;
172 Ticket *tail = NULL;
173 Ticket *tmp;
174 IDnum idA, idB;
175 IDnum targetID = getNodeID(nodeA);
176 IDnum indexA, indexB;
177 IDnum nodes = nodeCount(graph);
178
179 //printf("Merging todo list %ld into %ld\n", getNodeID(nodeB),
180 // getNodeID(nodeA));
181
182 if (*listB == NULL)
183 return;
184
185 if (*listA == NULL) {
186 *listA = *listB;
187 *listB = NULL;
188 return;
189 }
190
191 while (*listA != NULL && *listB != NULL) {
192 idA = (*listA)->id_a;
193 idB = (*listB)->id_a;
194 indexA = idA + nodes;
195 indexB = idB + nodes;
196
197 if (previous[indexA] == nodeA) {
198 tmp = *listA;
199 *listA = (*listA)->next;
200 deallocatePointer(ticketMemory, tmp);
201 continue;
202 }
203
204 if (idB == targetID || previous[indexB] == nodeA) {
205 tmp = *listB;
206 *listB = (*listB)->next;
207 deallocatePointer(ticketMemory, tmp);
208 continue;
209 }
210
211 if (idA > idB) {
212 tmp = *listB;
213 *listB = (*listB)->next;
214 } else if (idA < idB) {
215 tmp = *listA;
216 *listA = (*listA)->next;
217 } else {
218 tmp = *listB;
219 *listB = (*listB)->next;
220 deallocatePointer(ticketMemory, tmp);
221
222 tmp = *listA;
223 *listA = (*listA)->next;
224 }
225
226 if (tail == NULL) {
227 tail = tmp;
228 head = tail;
229 } else {
230 tail->next = tmp;
231 tail = tail->next;
232 }
233 }
234
235 while (*listA != NULL) {
236 idA = (*listA)->id_a;
237 indexA = idA + nodes;
238
239 if (previous[indexA] == nodeA) {
240 tmp = *listA;
241 *listA = (*listA)->next;
242 deallocatePointer(ticketMemory, tmp);
243 } else if (tail != NULL) {
244 tail->next = *listA;
245 *listA = (*listA)->next;
246 tail = tail->next;
247 } else {
248 head = *listA;
249 *listA = (*listA)->next;
250 tail = head;
251 }
252 }
253
254 while (*listB != NULL) {
255 idB = (*listB)->id_a;
256 indexB = idB + nodes;
257
258 if (idB == targetID || previous[indexB] == nodeA) {
259 tmp = *listB;
260 *listB = (*listB)->next;
261 deallocatePointer(ticketMemory, tmp);
262 } else if (tail != NULL) {
263 tail->next = *listB;
264 *listB = (*listB)->next;
265 tail = tail->next;
266 } else {
267 head = *listB;
268 *listB = (*listB)->next;
269 tail = head;
270
271 }
272 }
273
274 if (tail != NULL)
275 tail->next = NULL;
276
277 *listA = head;
278 *listB = NULL;
279 }
280
concatenateTodoListIntoActive(Node * nodeB)281 static void concatenateTodoListIntoActive(Node * nodeB)
282 {
283 Ticket **listB = &todoLists[getNodeID(nodeB) + nodeCount(graph)];
284 Ticket *head = NULL;
285 Ticket *tail = NULL;
286 Ticket *tmp;
287 IDnum nodes = nodeCount(graph);
288 IDnum idA, idB;
289 IDnum activeID = getNodeID(activeNode);
290 IDnum indexB, indexA;
291
292 //printf("Merging todo list %ld into active node %ld\n",
293 // getNodeID(nodeB), getNodeID(activeNode));
294
295 if (*listB == NULL)
296 return;
297
298 if (*todo == NULL) {
299 *todo = *listB;
300 *listB = NULL;
301 return;
302 }
303
304 while (*todo != NULL && *listB != NULL) {
305 idA = (*todo)->id_a;
306 idB = (*listB)->id_a;
307 indexA = idA + nodes;
308 indexB = idB + nodes;
309
310 if (previous[indexA] == activeNode
311 || progressStatus[indexA]) {
312 tmp = *todo;
313 *todo = (*todo)->next;
314 deallocatePointer(ticketMemory, tmp);
315 continue;
316 }
317
318 if (idB == activeID || previous[indexB] == activeNode
319 || progressStatus[indexB]) {
320 tmp = *listB;
321 *listB = (*listB)->next;
322 deallocatePointer(ticketMemory, tmp);
323 continue;
324 }
325
326 if (idA > idB) {
327 tmp = *listB;
328 *listB = (*listB)->next;
329 } else if (idA < idB) {
330 tmp = *todo;
331 *todo = (*todo)->next;
332 } else {
333 tmp = *listB;
334 *listB = (*listB)->next;
335 deallocatePointer(ticketMemory, tmp);
336
337 tmp = *todo;
338 *todo = (*todo)->next;
339 }
340
341 if (tail == NULL) {
342 tail = tmp;
343 head = tail;
344 } else {
345 tail->next = tmp;
346 tail = tmp;
347 }
348 }
349
350 while (*todo != NULL) {
351 idA = (*todo)->id_a;
352 indexA = idA + nodes;
353
354 if (previous[indexA] == activeNode
355 || progressStatus[indexA]) {
356 tmp = *todo;
357 *todo = (*todo)->next;
358 deallocatePointer(ticketMemory, tmp);
359 } else if (tail != NULL) {
360 tail->next = *todo;
361 *todo = (*todo)->next;
362 tail = tail->next;
363 } else {
364 head = *todo;
365 *todo = (*todo)->next;
366 tail = head;
367 }
368 }
369
370 while (*listB != NULL) {
371 idB = (*listB)->id_a;
372 indexB = idB + nodes;
373
374 if (idB == activeID || previous[indexB] == activeNode
375 || progressStatus[indexB]) {
376 tmp = *listB;
377 *listB = (*listB)->next;
378 deallocatePointer(ticketMemory, tmp);
379 } else if (tail != NULL) {
380 tail->next = *listB;
381 *listB = (*listB)->next;
382 tail = tail->next;
383 } else {
384 head = *listB;
385 *listB = (*listB)->next;
386 tail = head;
387
388 }
389 }
390
391 if (tail != NULL)
392 tail->next = NULL;
393 *todo = head;
394 *listB = NULL;
395 }
396
concatenateTodoLists(Node * nodeA,Node * nodeB)397 static void concatenateTodoLists(Node * nodeA, Node * nodeB)
398 {
399 if (nodeA == activeNode)
400 concatenateTodoListIntoActive(nodeB);
401 else
402 concatenateCommonTodoLists(nodeA, nodeB);
403 }
404
nextTodoTicket()405 static IDnum nextTodoTicket()
406 {
407 Ticket *tkt;
408 IDnum index;
409
410 while (*todo != NULL) {
411 tkt = *todo;
412 *todo = tkt->next;
413
414 index = tkt->id_a + nodeCount(graph);
415
416 if (previous[index] == activeNode) {
417 deallocatePointer(ticketMemory, tkt);
418 continue;
419 }
420
421 progressStatus[index] = true;
422
423 tkt->next = done;
424 done = tkt;
425
426 return tkt->id_a;
427 }
428
429 return 0;
430 }
431
freeDoneTickets()432 static void freeDoneTickets()
433 {
434 Ticket *tkt;
435 IDnum nodes = nodeCount(graph);
436
437 while (done != NULL) {
438 tkt = done;
439 done = tkt->next;
440 progressStatus[tkt->id_a + nodes] = false;
441 deallocatePointer(ticketMemory, tkt);
442 }
443 }
444
updateNodeStatus(Node * node)445 static void updateNodeStatus(Node * node)
446 {
447 FibHeap *heap = newFibHeap();
448 Arc *arc;
449 Node *currentNode = node;
450 Node *destination;
451
452 setNodeStatus(currentNode, true);
453
454 while (currentNode != NULL) {
455 for (arc = getArc(currentNode); arc != NULL;
456 arc = getNextArc(arc)) {
457 destination = getDestination(arc);
458 if (getNodeStatus(destination) > 1) {
459 setNodeStatus(destination, true);
460 insertNodeIntoHeap(heap,
461 getNodeID(destination),
462 destination);
463 }
464 }
465
466 currentNode = removeNextNodeFromHeap(heap);
467 }
468
469 destroyHeap(heap);
470 }
471
determineEligibleStartingPoints()472 static void determineEligibleStartingPoints()
473 {
474 IDnum nodeIndex;
475 IDnum maxmult;
476 Node *node;
477 Arc *arc;
478 IDnum counter = 0;
479 FibHeap *heap = newFibHeap();
480
481 puts("Determining eligible starting points");
482
483 for (nodeIndex = 1; nodeIndex <= nodeCount(graph); nodeIndex++) {
484 node = getNodeInGraph(graph, nodeIndex);
485 if (node == NULL)
486 continue;
487
488 maxmult = 0;
489 for (arc = getArc(node); arc != NULL;
490 arc = getNextArc(arc))
491 if (getMultiplicity(arc) > maxmult)
492 maxmult = getMultiplicity(arc);
493
494 insertNodeIntoHeap(heap, -maxmult, node);
495
496 // Same for twin node
497 node = getNodeInGraph(graph, -nodeIndex);
498 maxmult = 0;
499 for (arc = getArc(node); arc != NULL;
500 arc = getNextArc(arc))
501 if (getMultiplicity(arc) > maxmult)
502 maxmult = getMultiplicity(arc);
503
504 insertNodeIntoHeap(heap, -maxmult, node);
505 }
506
507 while ((node = removeNextNodeFromHeap(heap)) != NULL)
508 eligibleStartingPoints[counter++] = getNodeID(node);
509
510 destroyHeap(heap);
511 puts("Done listing starting nodes");
512 }
513
nextStartingPoint()514 static Node *nextStartingPoint()
515 {
516 static IDnum counter = 0;
517 Node *result = NULL;
518
519 while (result == NULL || getNodeStatus(result) > 0) {
520 if (counter >= nodeCount(graph) * 2)
521 return NULL;
522
523 result =
524 getNodeInGraph(graph,
525 eligibleStartingPoints[counter++]);
526 }
527
528 return result;
529 }
530
531 static boolean
extractSequence(PassageMarker * path,TightString * sequence)532 extractSequence(PassageMarker * path, TightString * sequence)
533 {
534 PassageMarker *marker;
535 Coordinate seqLength = 0;
536 Coordinate writeIndex = 0;
537
538 //printf("Extracting sequence %ld ... ", pathLength);
539
540 //Measure length
541 for (marker = getNextInSequence(path); !isTerminal(marker);
542 marker = getNextInSequence(marker))
543 seqLength += getNodeLength(getNode(marker));
544
545 if (seqLength > MAXREADLENGTH)
546 return false;
547 else
548 setTightStringLength(sequence, seqLength);
549
550 //Copy sequences
551 for (marker = getNextInSequence(path); !isTerminal(marker);
552 marker = getNextInSequence(marker)) {
553 appendNodeSequence(getNode(marker), sequence, writeIndex);
554 writeIndex += getNodeLength(getNode(marker));
555 }
556
557 return true;
558 }
559
max(Time A,Time B,Time C)560 static Time max(Time A, Time B, Time C)
561 {
562 if (A >= B && A >= C)
563 return A;
564 else if (B >= C)
565 return B;
566 else
567 return C;
568 }
569
570 static boolean
compareSequences(TightString * sequence1,TightString * sequence2)571 compareSequences(TightString * sequence1, TightString * sequence2)
572 {
573 Coordinate i, j;
574 Coordinate length1 = getLength(sequence1);
575 Coordinate length2 = getLength(sequence2);
576 Coordinate maxLength;
577 Time Choice1, Choice2, Choice3;
578 Time maxScore;
579
580 if (length1 == 0 || length2 == 0)
581 return false;
582
583 maxLength = (length1 > length2 ? length1 : length2);
584
585 if (length1 < WORDLENGTH || length2 < WORDLENGTH) {
586 if (maxLength - length1 > MAXGAPS
587 || maxLength - length2 > MAXGAPS)
588 return false;
589 if (WORDLENGTH - length1 > MAXGAPS
590 || WORDLENGTH - length2 > MAXGAPS)
591 return false;
592 }
593
594 for (i = 0; i <= length1; i++)
595 Fmatrix[i][0] = 0;
596 for (j = 0; j <= length2; j++)
597 Fmatrix[0][j] = 0;
598
599 for (i = 1; i <= length1; i++) {
600 for (j = 1; j <= length2; j++) {
601 Choice1 =
602 Fmatrix[i - 1][j - 1] +
603 SIM[(int) getNucleotide(i - 1, sequence1)]
604 [(int) getNucleotide(j - 1, sequence2)];
605 Choice2 = Fmatrix[i - 1][j] + INDEL;
606 Choice3 = Fmatrix[i][j - 1] + INDEL;
607 Fmatrix[i][j] = max(Choice1, Choice2, Choice3);
608 }
609 }
610
611 maxScore = Fmatrix[length1][length2];
612
613 if (maxScore < maxLength - MAXGAPS)
614 return false;
615
616 if ((1 - maxScore / maxLength) > MAXDIVERGENCE)
617 return false;
618
619 return true;
620 }
621
mapSlowOntoFast()622 static void mapSlowOntoFast()
623 {
624 Coordinate slowIndex = getLength(slowSequence);
625 Coordinate fastIndex = getLength(fastSequence);
626 int fastn, slown;
627
628 if (slowIndex == 0) {
629 slowToFastMapping[0] = fastIndex;
630
631 while (fastIndex >= 0)
632 fastToSlowMapping[fastIndex--] = 0;
633
634 return;
635 }
636
637 if (fastIndex == 0) {
638 while (slowIndex >= 0)
639 slowToFastMapping[slowIndex--] = 0;
640
641 fastToSlowMapping[0] = slowIndex;
642
643 return;
644 }
645
646 while (slowIndex > 0 && fastIndex > 0) {
647 fastn = (int) getNucleotide(fastIndex - 1, fastSequence);
648 slown = (int) getNucleotide(slowIndex - 1, slowSequence);
649
650 if (Fmatrix[fastIndex][slowIndex] ==
651 Fmatrix[fastIndex - 1][slowIndex - 1] +
652 SIM[fastn][slown]) {
653 fastToSlowMapping[--fastIndex] = --slowIndex;
654 slowToFastMapping[slowIndex] = fastIndex;
655 } else if (Fmatrix[fastIndex][slowIndex] ==
656 Fmatrix[fastIndex - 1][slowIndex] + INDEL)
657 fastToSlowMapping[--fastIndex] = slowIndex - 1;
658
659 else if (Fmatrix[fastIndex][slowIndex] ==
660 Fmatrix[fastIndex][slowIndex - 1] + INDEL)
661 slowToFastMapping[--slowIndex] = fastIndex - 1;
662
663 else {
664 puts("Error");
665 fflush(stdout);
666 abort();
667 }
668 }
669
670 while (slowIndex > 0)
671 slowToFastMapping[--slowIndex] = -1;
672 while (fastIndex > 0)
673 fastToSlowMapping[--fastIndex] = -1;
674
675 slowToFastMapping[getLength(slowSequence)] =
676 getLength(fastSequence);
677 fastToSlowMapping[getLength(fastSequence)] =
678 getLength(slowSequence);
679 }
680
createAnalogousArcAndVaccinate(Node * nodeA,Node * nodeB,Arc * arc)681 static void createAnalogousArcAndVaccinate(Node * nodeA, Node * nodeB,
682 Arc * arc)
683 {
684 boolean aNull = (getNodeLength(nodeA) == 0);
685 boolean bNull = (getNodeLength(nodeB) == 0);
686
687 createAnalogousArc(nodeA, nodeB, arc, graph);
688
689 if (aNull && bNull)
690 newQueueTicket(getNodeID(nodeA), getNodeID(nodeB));
691 }
692
remapNodeArcsOntoTarget(Node * source,Node * target)693 static void remapNodeArcsOntoTarget(Node * source, Node * target)
694 {
695 Arc *arc;
696
697 if (source == activeNode) {
698 activeNode = target;
699 todo =
700 &todoLists[getNodeID(activeNode) + nodeCount(graph)];
701 }
702 concatenateTodoLists(target, source);
703
704 arc = getArc(source);
705 while (arc != NULL) {
706 createAnalogousArcAndVaccinate(target, getDestination(arc),
707 arc);
708 destroyArc(arc, graph);
709 arc = getArc(source);
710 }
711 }
712
remapNodeArcsOntoNeighbour(Node * source,Node * target)713 static void remapNodeArcsOntoNeighbour(Node * source, Node * target)
714 {
715 remapNodeArcsOntoTarget(source, target);
716 remapNodeArcsOntoTarget(getTwinNode(source), getTwinNode(target));
717 }
718
remapNodeMarkersOntoNeighbour(Node * source,PassageMarker * sourceMarker,Node * target,PassageMarker * targetMarker)719 static void remapNodeMarkersOntoNeighbour(Node * source,
720 PassageMarker * sourceMarker,
721 Node * target,
722 PassageMarker * targetMarker)
723 {
724 PassageMarker *marker;
725 Coordinate offset;
726 IDnum sourceLength, index;
727 ShortReadMarker *sourceArray, *shortMarker;
728 Coordinate position;
729 Category cat;
730
731 Coordinate targetStart = getPassageMarkerStart(targetMarker);
732 Coordinate targetFinish = getPassageMarkerFinish(targetMarker);
733 Coordinate sourceStart = getPassageMarkerStart(sourceMarker);
734 Coordinate sourceFinish = getPassageMarkerFinish(sourceMarker);
735
736 Coordinate alignedTargetLength = targetFinish - targetStart;
737 Coordinate alignedSourceLength = sourceFinish - sourceStart;
738
739 Coordinate realTargetLength = getNodeLength(target);
740 Coordinate realSourceLength = getNodeLength(source);
741
742 while (getMarker(source) != NULL) {
743 marker = getMarker(source);
744 extractPassageMarker(marker);
745 transposePassageMarker(marker, target);
746
747 if (realSourceLength != 0 && alignedTargetLength != 0) {
748 if (isInitial(marker)) {
749 offset = getStartOffset(marker);
750 offset *= alignedSourceLength;
751 offset /= realSourceLength;
752 offset += sourceStart;
753 offset = slowToFastMapping[offset];
754 offset -= targetStart;
755 offset *= realTargetLength;
756 offset /= alignedTargetLength;
757
758 if (offset < 0)
759 offset = 0;
760 if (offset > realTargetLength)
761 offset = realTargetLength;
762 } else
763 offset = 0;
764
765 setStartOffset(marker, offset);
766
767 if (isTerminal(marker)) {
768 offset = getFinishOffset(marker);
769 offset *= alignedSourceLength;
770 offset /= realSourceLength;
771 offset = sourceFinish - offset;
772 offset = slowToFastMapping[offset];
773 offset = targetFinish - offset;
774 offset *= realTargetLength;
775 offset /= alignedTargetLength;
776
777 if (offset < 0)
778 offset = 0;
779 if (offset > realTargetLength)
780 offset = realTargetLength;
781 } else
782 offset = 0;
783
784 setFinishOffset(marker, offset);
785 } else {
786 setStartOffset(marker, 0);
787 setFinishOffset(marker, 0);
788 }
789 }
790
791 // Short read markers
792 if (readStartsAreActivated(graph)) {
793 // Update Coordinates
794 sourceArray = getNodeReads(source, graph);
795 sourceLength = getNodeReadCount(source, graph);
796
797 for (index = 0; index < sourceLength; index++) {
798 shortMarker =
799 getShortReadMarkerAtIndex(sourceArray, index);
800 position = getShortReadMarkerPosition(shortMarker);
801
802 if (position > -1) {
803 if (realSourceLength > 0
804 && alignedTargetLength > 0) {
805 position *= alignedSourceLength;
806 position /= realSourceLength;
807 position += sourceStart;
808 position =
809 slowToFastMapping[position];
810 position -= targetStart;
811 position *= realTargetLength;
812 position /= alignedTargetLength;
813
814 if (position < 0)
815 position = 0;
816 if (position > realTargetLength)
817 position =
818 realTargetLength;
819 } else
820 position = 0;
821 }
822
823 setShortReadMarkerPosition(shortMarker, position);
824 }
825 mergeNodeReads(target, source, graph);
826
827 // Same but for symmetrical reads
828 sourceArray = getNodeReads(getTwinNode(source), graph);
829 sourceLength =
830 getNodeReadCount(getTwinNode(source), graph);
831
832 for (index = 0; index < sourceLength; index++) {
833 shortMarker =
834 getShortReadMarkerAtIndex(sourceArray, index);
835 position = getShortReadMarkerPosition(shortMarker);
836
837 if (position > -1) {
838 if (realSourceLength > 0
839 && alignedTargetLength > 0) {
840 position =
841 realSourceLength - position;
842 position *= alignedSourceLength;
843 position /= realSourceLength;
844 position += sourceStart;
845 position =
846 slowToFastMapping[position];
847 position -= targetStart;
848 position *= realTargetLength;
849 position /= alignedTargetLength;
850 position =
851 realTargetLength - position;
852
853 if (position < 0)
854 position = 0;
855 if (position > realTargetLength)
856 position =
857 realTargetLength;
858 } else
859 position = 0;
860 }
861
862 setShortReadMarkerPosition(shortMarker, position);
863 }
864 mergeNodeReads(getTwinNode(target), getTwinNode(source),
865 graph);
866 }
867 // Virtual reads
868 for (cat = 0; cat < CATEGORIES; cat++)
869 incrementVirtualCoverage(target, cat,
870 getVirtualCoverage(source, cat));
871 }
872
remapBackOfNodeArcsOntoNeighbour(Node * source,Node * target)873 static void remapBackOfNodeArcsOntoNeighbour(Node * source, Node * target)
874 {
875 Arc *arc;
876
877 remapNodeArcsOntoTarget(getTwinNode(source), getTwinNode(target));
878 for (arc = getArc(source); arc != NULL; arc = getNextArc(arc))
879 createAnalogousArcAndVaccinate(target, source, arc);
880
881 }
882
883 static Coordinate
remapBackOfNodeMarkersOntoNeighbour(Node * source,PassageMarker * sourceMarker,Node * target,PassageMarker * targetMarker,boolean slowToFast)884 remapBackOfNodeMarkersOntoNeighbour(Node * source,
885 PassageMarker * sourceMarker,
886 Node * target,
887 PassageMarker * targetMarker,
888 boolean slowToFast)
889 {
890 PassageMarker *marker, *newMarker, *previousMarker, *nextMarker;
891 Coordinate halfwayPoint, halfwayPointOffset, breakpoint,
892 newStartOffset, newFinishOffset;
893 Category cat;
894 Coordinate coverage;
895 Coordinate *targetToSourceMapping, *sourceToTargetMapping;
896 ShortReadMarker *selectedShortReads, *shortRead;
897 IDnum selectedShortReadCount, shortReadIndex;
898 Coordinate position;
899
900 Coordinate targetStart = getPassageMarkerStart(targetMarker);
901 Coordinate targetFinish = getPassageMarkerFinish(targetMarker);
902 Coordinate sourceStart = getPassageMarkerStart(sourceMarker);
903 Coordinate sourceFinish = getPassageMarkerFinish(sourceMarker);
904
905 Coordinate alignedTargetLength = targetFinish - targetStart;
906 Coordinate alignedSourceLength = sourceFinish - sourceStart;
907
908 Coordinate realTargetLength = getNodeLength(target);
909 Coordinate realSourceLength = getNodeLength(source);
910
911 if (slowToFast) {
912 sourceToTargetMapping = slowToFastMapping;
913 targetToSourceMapping = fastToSlowMapping;
914 } else {
915 sourceToTargetMapping = fastToSlowMapping;
916 targetToSourceMapping = slowToFastMapping;
917 }
918
919 // Calculating source node breakpoint:
920 if (alignedSourceLength > 0 && targetFinish > 0) {
921 halfwayPoint =
922 targetToSourceMapping[targetFinish - 1] - sourceStart +
923 1;
924 halfwayPoint *= realSourceLength;
925 halfwayPoint /= alignedSourceLength;
926 } else
927 halfwayPoint = 0;
928
929 if (halfwayPoint < 0)
930 halfwayPoint = 0;
931 if (halfwayPoint > realSourceLength)
932 halfwayPoint = realSourceLength;
933 halfwayPointOffset = realSourceLength - halfwayPoint;
934
935 // Complete markers
936 for (marker = getMarker(source); marker != NULL;
937 marker = nextMarker) {
938 nextMarker = getNextInNode(marker);
939
940 // To avoid making loops...
941 if (getNode(getPreviousInSequence(marker)) == target)
942 continue;
943
944 // Markers which are downstream of the breakpoint
945 if (isInitial(marker)
946 && getStartOffset(marker) > halfwayPoint) {
947 newStartOffset =
948 getStartOffset(marker) - halfwayPoint;
949 setStartOffset(marker, newStartOffset);
950 continue;
951 }
952 // Markers which are upstream of the breakpoint
953 if (isTerminal(marker)
954 && getFinishOffset(marker) > halfwayPointOffset) {
955 if (slowToFast) {
956 if (realSourceLength > 0
957 && alignedTargetLength > 0) {
958 newFinishOffset =
959 getFinishOffset(marker) -
960 halfwayPointOffset;
961 newFinishOffset *=
962 alignedSourceLength;
963 newFinishOffset /=
964 realSourceLength;
965 newFinishOffset *=
966 realTargetLength;
967 newFinishOffset /=
968 alignedTargetLength;
969 if (newFinishOffset < 0)
970 newFinishOffset = 0;
971 else if (newFinishOffset >
972 realTargetLength)
973 newFinishOffset =
974 realTargetLength;
975 } else
976 newFinishOffset = 0;
977 } else {
978 newFinishOffset =
979 getFinishOffset(marker) -
980 halfwayPointOffset;
981 }
982 setFinishOffset(marker, newFinishOffset);
983 extractPassageMarker(marker);
984 transposePassageMarker(marker, target);
985 continue;
986 }
987 // Markers on both sides of the divide
988 newMarker =
989 addPassageMarker(getPassageMarkerSequenceID(marker),
990 getPassageMarkerStart(marker),
991 target);
992
993 setPassageMarkerStart(newMarker,
994 getPassageMarkerStart(marker));
995 setPassageMarkerStatus(newMarker,
996 getPassageMarkerStatus(marker));
997
998 if (realSourceLength - getStartOffset(marker) -
999 getFinishOffset(marker) > 0) {
1000 breakpoint = halfwayPoint - getStartOffset(marker);
1001 breakpoint *= getPassageMarkerLength(marker);
1002 breakpoint /= realSourceLength -
1003 getStartOffset(marker) -
1004 getFinishOffset(marker);
1005 breakpoint *= passageMarkerDirection(marker);
1006 breakpoint += getPassageMarkerStart(marker);
1007 } else {
1008 breakpoint = getPassageMarkerStart(marker);
1009 }
1010
1011 setPassageMarkerFinish(newMarker, breakpoint);
1012 setPassageMarkerStart(marker, breakpoint);
1013
1014 if (slowToFast) {
1015 if (realSourceLength != 0
1016 && alignedTargetLength != 0) {
1017 newStartOffset = getStartOffset(marker);
1018 newStartOffset *= alignedSourceLength;
1019 newStartOffset /= realSourceLength;
1020 newStartOffset *= realTargetLength;
1021 newStartOffset /= alignedTargetLength;
1022 if (newStartOffset < 0)
1023 newStartOffset = 0;
1024 else if (newStartOffset > realTargetLength)
1025 newStartOffset = realTargetLength;
1026 } else {
1027 newStartOffset = 0;
1028 }
1029 } else {
1030 newStartOffset = getStartOffset(marker);
1031 }
1032
1033 setStartOffset(newMarker, newStartOffset);
1034 setFinishOffset(newMarker, 0);
1035 setStartOffset(marker, 0);
1036
1037 previousMarker = getPreviousInSequence(marker);
1038 setNextInSequence(previousMarker, newMarker);
1039 setPreviousInSequence(previousMarker, newMarker);
1040
1041 setPreviousInSequence(newMarker, marker);
1042 setNextInSequence(newMarker, marker);
1043 }
1044
1045 // Read starts
1046 if (readStartsAreActivated(graph)) {
1047 selectedShortReads =
1048 extractBackOfNodeReads(source, halfwayPoint, graph,
1049 &selectedShortReadCount,
1050 sourceMarker, sequenceLengths);
1051 if (slowToFast) {
1052 if (realSourceLength > 0
1053 && alignedTargetLength > 0) {
1054 for (shortReadIndex = 0;
1055 shortReadIndex <
1056 selectedShortReadCount;
1057 shortReadIndex++) {
1058 shortRead =
1059 getShortReadMarkerAtIndex
1060 (selectedShortReads,
1061 shortReadIndex);
1062 position =
1063 getShortReadMarkerPosition
1064 (shortRead);
1065 if (position > -1) {
1066 position *=
1067 alignedSourceLength;
1068 position /=
1069 realSourceLength;
1070 position += sourceStart;
1071 position =
1072 sourceToTargetMapping
1073 [position];
1074 position -= targetStart;
1075 position *=
1076 realTargetLength;
1077 position /=
1078 alignedTargetLength;
1079 if (position < 0)
1080 position = 0;
1081 if (position >
1082 realTargetLength)
1083 position =
1084 realTargetLength;
1085 }
1086 setShortReadMarkerPosition
1087 (shortRead, position);
1088 }
1089 } else {
1090 for (shortReadIndex = 0;
1091 shortReadIndex <
1092 selectedShortReadCount;
1093 shortReadIndex++) {
1094 shortRead =
1095 getShortReadMarkerAtIndex
1096 (selectedShortReads,
1097 shortReadIndex);
1098 position =
1099 getShortReadMarkerPosition
1100 (shortRead);
1101 if (position > -1)
1102 setShortReadMarkerPosition
1103 (shortRead, 0);
1104 }
1105
1106 }
1107 }
1108 injectShortReads(selectedShortReads,
1109 selectedShortReadCount, target, graph);
1110
1111 selectedShortReads =
1112 extractFrontOfNodeReads(getTwinNode(source),
1113 halfwayPoint, graph,
1114 &selectedShortReadCount,
1115 sourceMarker, sequenceLengths);
1116 if (slowToFast) {
1117 if (realSourceLength > 0
1118 && alignedTargetLength > 0) {
1119 for (shortReadIndex = 0;
1120 shortReadIndex <
1121 selectedShortReadCount;
1122 shortReadIndex++) {
1123 shortRead =
1124 getShortReadMarkerAtIndex
1125 (selectedShortReads,
1126 shortReadIndex);
1127 position =
1128 getShortReadMarkerPosition
1129 (shortRead);
1130 if (position > -1) {
1131 position =
1132 getShortReadMarkerPosition
1133 (shortRead);
1134 position =
1135 realSourceLength -
1136 position;
1137 position *=
1138 alignedSourceLength;
1139 position /=
1140 realSourceLength;
1141 position += sourceStart;
1142 position =
1143 sourceToTargetMapping
1144 [position];
1145 position -= targetStart;
1146 position *=
1147 realTargetLength;
1148 position /=
1149 alignedTargetLength;
1150 position =
1151 realTargetLength -
1152 position;
1153 if (position < 0)
1154 position = 0;
1155 if (position >
1156 realTargetLength)
1157 position =
1158 realTargetLength;
1159 }
1160 setShortReadMarkerPosition
1161 (shortRead, position);
1162 }
1163 } else {
1164 for (shortReadIndex = 0;
1165 shortReadIndex <
1166 selectedShortReadCount;
1167 shortReadIndex++) {
1168 shortRead =
1169 getShortReadMarkerAtIndex
1170 (selectedShortReads,
1171 shortReadIndex);
1172 position =
1173 getShortReadMarkerPosition
1174 (shortRead);
1175 if (position > -1)
1176 setShortReadMarkerPosition
1177 (shortRead, 0);
1178 }
1179
1180 }
1181 }
1182 injectShortReads(selectedShortReads,
1183 selectedShortReadCount,
1184 getTwinNode(target), graph);
1185 }
1186 // Virtual coverage
1187 if (alignedSourceLength != 0) {
1188 for (cat = 0; cat < CATEGORIES; cat++) {
1189 coverage = getVirtualCoverage(source, cat);
1190 coverage *= halfwayPoint;
1191 coverage /= alignedSourceLength;
1192 incrementVirtualCoverage(target, cat, coverage);
1193 incrementVirtualCoverage(source, cat, -coverage);
1194
1195 coverage = getOriginalVirtualCoverage(source, cat);
1196 coverage *= halfwayPoint;
1197 coverage /= alignedSourceLength;
1198 incrementOriginalVirtualCoverage(source, cat,
1199 -coverage);
1200 }
1201 }
1202
1203 return halfwayPointOffset;
1204 }
1205
remapNodeInwardReferencesOntoNode(Node * source,Node * target)1206 static void remapNodeInwardReferencesOntoNode(Node * source, Node * target)
1207 {
1208 Arc *arc;
1209 Node *destination;
1210
1211 for (arc = getArc(source); arc != NULL; arc = getNextArc(arc)) {
1212 destination = getDestination(arc);
1213 if (destination == target || destination == source)
1214 continue;
1215 if (getNodePrevious(destination) == source)
1216 setNodePrevious(target, destination);
1217 }
1218 }
1219
remapNodeTimesOntoTargetNode(Node * source,Node * target)1220 static void remapNodeTimesOntoTargetNode(Node * source, Node * target)
1221 {
1222 Time nodeTime = getNodeTime(source);
1223 Node *previous = getNodePrevious(source);
1224 Time targetTime = getNodeTime(target);
1225
1226 if (nodeTime == -1)
1227 return;
1228
1229 if (previous == source) {
1230 setNodeTime(target, nodeTime);
1231 setNodePrevious(target, target);
1232 } else if (targetTime == -1
1233 || targetTime > nodeTime
1234 || (targetTime == nodeTime
1235 && !isPreviousToNode(target, previous))) {
1236 setNodeTime(target, nodeTime);
1237 if (previous != getTwinNode(source))
1238 setNodePrevious(previous, target);
1239 else
1240 setNodePrevious(getTwinNode(target), target);
1241 }
1242
1243 remapNodeInwardReferencesOntoNode(source, target);
1244
1245 setNodePrevious(NULL, source);
1246 }
1247
foldSymmetricalNode(Node * node)1248 static void foldSymmetricalNode(Node * node)
1249 {
1250 Node *twinNode = getTwinNode(node);
1251 Node *tmp, *destination;
1252 Arc *arc;
1253 PassageMarker *oldMarker = getMarker(node);
1254 PassageMarker *currentMarker, *newMarker, *previousMarker;
1255 Coordinate halfwayPoint;
1256 IDnum totalMult;
1257
1258 // Reduce time complexity of damn thing
1259 if (simpleArcCount(node) < simpleArcCount(twinNode)) {
1260 tmp = twinNode;
1261 twinNode = node;
1262 node = tmp;
1263 }
1264 // Destroy link to old markers
1265 setMarker(node, NULL);
1266
1267 // Reinsert markers properly
1268 while (oldMarker != NULL) {
1269 currentMarker = oldMarker;
1270 oldMarker = getNextInNode(currentMarker);
1271 previousMarker = getPreviousInSequence(currentMarker);
1272
1273 if (getNode(previousMarker) != twinNode) {
1274 newMarker =
1275 addUncertainPassageMarker
1276 (getPassageMarkerSequenceID(currentMarker),
1277 twinNode);
1278 setPassageMarkerStatus(newMarker,
1279 getPassageMarkerStatus
1280 (currentMarker));
1281
1282 setPassageMarkerStart(newMarker,
1283 getPassageMarkerStart
1284 (currentMarker));
1285
1286 // For security issues:
1287 if (currentMarker == slowPath)
1288 slowPath = newMarker;
1289 else if (currentMarker == fastPath)
1290 fastPath = newMarker;
1291
1292 halfwayPoint =
1293 (getPassageMarkerStart(currentMarker) +
1294 getPassageMarkerFinish(currentMarker))
1295 / 2;
1296 setPassageMarkerFinish(newMarker, halfwayPoint);
1297 setPassageMarkerStart(currentMarker, halfwayPoint);
1298
1299 setStartOffset(newMarker,
1300 getStartOffset(currentMarker));
1301 setFinishOffset(newMarker, 0);
1302 setStartOffset(currentMarker, 0);
1303
1304 setPreviousInSequence(previousMarker, newMarker);
1305 setNextInSequence(previousMarker, newMarker);
1306
1307 setPreviousInSequence(newMarker, currentMarker);
1308 setNextInSequence(newMarker, currentMarker);
1309 }
1310
1311 transposePassageMarker(currentMarker, node);
1312 }
1313
1314 // Read start info
1315 foldSymmetricalNodeReads(node, graph);
1316
1317 // Coverage => already balanced!
1318
1319 // References
1320 if (getNodeTime(node) == -1 && getNodeTime(twinNode) == -1);
1321 else if (getNodeTime(node) == -1) {
1322 setNodeTime(node, getNodeTime(twinNode));
1323 } else if (getNodeTime(twinNode) == -1) {
1324 setNodeTime(twinNode, getNodeTime(node));
1325 setNodePrevious(getNodePrevious(node), twinNode);
1326 } else if (getNodePrevious(node) == node) {
1327 setNodeTime(twinNode, getNodeTime(node));
1328 setNodePrevious(twinNode, twinNode);
1329 } else if (getNodeTime(node) < getNodeTime(twinNode)) {
1330 setNodeTime(twinNode, getNodeTime(node));
1331 setNodePrevious(getNodePrevious(node), twinNode);
1332 } else if (getNodeTime(node) == getNodeTime(twinNode)
1333 && isPreviousToNode(node, twinNode)) {
1334 setNodePrevious(getNodePrevious(node), twinNode);
1335 } else {
1336 setNodeTime(node, getNodeTime(twinNode));
1337 }
1338
1339 setNodePrevious(twinNode, node);
1340 remapNodeInwardReferencesOntoNode(twinNode, node);
1341
1342 // Active node
1343 if (twinNode == activeNode) {
1344 activeNode = node;
1345 todo =
1346 &todoLists[getNodeID(activeNode) + nodeCount(graph)];
1347 }
1348 concatenateTodoLists(node, twinNode);
1349
1350 // Remap arcs properly
1351 arc = getArc(twinNode);
1352 totalMult = 0;
1353 while (arc != NULL) {
1354 destination = getDestination(arc);
1355 if (destination != node)
1356 createAnalogousArc(node, destination, arc, graph);
1357 totalMult += getMultiplicity(arc);
1358 destroyArc(arc, graph);
1359 arc = getArc(twinNode);
1360 }
1361
1362 arc = createArc(twinNode, node, graph);
1363 setMultiplicity(arc, totalMult);
1364
1365 // Uniqueness
1366 setUniqueness(node, false);
1367
1368 // Starting node
1369 if (startingNode == node)
1370 startingNode = twinNode;
1371 }
1372
remapNodeTimesOntoNeighbour(Node * source,Node * target)1373 static void remapNodeTimesOntoNeighbour(Node * source, Node * target)
1374 {
1375 remapNodeTimesOntoTargetNode(source, target);
1376 remapNodeTimesOntoTargetNode(getTwinNode(source),
1377 getTwinNode(target));
1378 }
1379
remapNodeTimesOntoForwardMiddlePath(Node * source,PassageMarker * path)1380 static void remapNodeTimesOntoForwardMiddlePath(Node * source,
1381 PassageMarker * path)
1382 {
1383 PassageMarker *marker;
1384 Node *target;
1385 Time nodeTime = getNodeTime(source);
1386 Node *previousNode = getNodePrevious(source);
1387 Time targetTime;
1388
1389 //printf("Remapping times from %ld to %ld\n", getNodeID(previousNode), getNodeID(source));
1390
1391 for (marker = path; getNode(marker) != source;
1392 marker = getNextInSequence(marker)) {
1393 target = getNode(marker);
1394 targetTime = getNodeTime(target);
1395
1396 //printf("Through %ld\n", getNodeID(target));
1397
1398 if (targetTime == -1
1399 || targetTime > nodeTime
1400 || (targetTime == nodeTime
1401 && !isPreviousToNode(target, previousNode))) {
1402 setNodeTime(target, nodeTime);
1403 setNodePrevious(previousNode, target);
1404 }
1405
1406 previousNode = target;
1407 }
1408
1409 setNodePrevious(previousNode, source);
1410
1411 }
1412
remapNodeTimesOntoTwinMiddlePath(Node * source,PassageMarker * path)1413 static void remapNodeTimesOntoTwinMiddlePath(Node * source,
1414 PassageMarker * path)
1415 {
1416 PassageMarker *marker;
1417 Node *target;
1418 Node *previousNode = getTwinNode(source);
1419 Time targetTime;
1420 PassageMarker *limit = getTwinMarker(getPreviousInSequence(path));
1421 Time nodeTime = getNodeTime(getNode(limit));
1422
1423 //printf("Remapping times from twins %ld to %ld\n", getNodeID(previousNode), getNodeID(getNode(limit)));
1424
1425 // Revving up
1426 marker = path;
1427 while (getNode(marker) != source)
1428 marker = getNextInSequence(marker);
1429 marker = getTwinMarker(marker);
1430
1431 // Going down the path
1432 while (marker != limit) {
1433 marker = getNextInSequence(marker);
1434 target = getNode(marker);
1435 targetTime = getNodeTime(target);
1436
1437 //printf("Through %ld\n", getNodeID(target));
1438
1439 if (targetTime == -1
1440 || targetTime > nodeTime
1441 || (targetTime == nodeTime
1442 && !isPreviousToNode(target, previousNode))) {
1443 setNodeTime(target, nodeTime);
1444 getNodeTime(target);
1445 setNodePrevious(previousNode, target);
1446 }
1447
1448 previousNode = target;
1449 }
1450 }
1451
1452 static void
remapNodeFibHeapReferencesOntoNode(Node * source,Node * target)1453 remapNodeFibHeapReferencesOntoNode(Node * source, Node * target)
1454 {
1455 DFibHeapNode *sourceDHeapNode = getNodeDHeapNode(source);
1456 DFibHeapNode *targetDHeapNode = getNodeDHeapNode(target);
1457
1458 if (sourceDHeapNode == NULL)
1459 return;
1460
1461 if (targetDHeapNode == NULL) {
1462 setNodeDHeapNode(target, sourceDHeapNode);
1463 replaceValueInDHeap(sourceDHeapNode, target);
1464 } else if (getKey(targetDHeapNode) > getKey(sourceDHeapNode)) {
1465 setNodeDHeapNode(target, sourceDHeapNode);
1466 replaceValueInDHeap(sourceDHeapNode, target);
1467 destroyNodeInDHeap(targetDHeapNode, dheap);
1468 } else
1469 destroyNodeInDHeap(sourceDHeapNode, dheap);
1470
1471 setNodeDHeapNode(source, NULL);
1472 }
1473
remapNodeOntoNeighbour(Node * source,PassageMarker * sourceMarker,Node * target,PassageMarker * targetMarker)1474 static void remapNodeOntoNeighbour(Node * source,
1475 PassageMarker * sourceMarker,
1476 Node * target,
1477 PassageMarker * targetMarker)
1478 {
1479 //printf("Remapping node %ld onto middle path %ld\n", getNodeID(source), getNodeID(target));
1480 remapNodeMarkersOntoNeighbour(source, sourceMarker, target,
1481 targetMarker);
1482
1483 remapNodeTimesOntoNeighbour(source, target);
1484 remapNodeArcsOntoNeighbour(source, target);
1485
1486 remapNodeFibHeapReferencesOntoNode(getTwinNode(source),
1487 getTwinNode(target));
1488 remapNodeFibHeapReferencesOntoNode(source, target);
1489
1490 if (startingNode == source)
1491 startingNode = target;
1492 if (startingNode == getTwinNode(source))
1493 startingNode = getTwinNode(target);
1494
1495 destroyNode(source, graph);
1496 }
1497
remapBackOfNodeDescriptorOntoNeighbour(Node * source,PassageMarker * sourceMarker,Node * target,PassageMarker * targetMarker,boolean slowToFast,Coordinate offset)1498 static void remapBackOfNodeDescriptorOntoNeighbour(Node * source,
1499 PassageMarker *
1500 sourceMarker,
1501 Node * target,
1502 PassageMarker *
1503 targetMarker,
1504 boolean slowToFast,
1505 Coordinate offset)
1506 {
1507 //printf("Splitting node descriptor %ld // %ld\n", getNodeLength(source), offset);
1508
1509 if (slowToFast)
1510 splitNodeDescriptor(source, NULL, offset);
1511 else
1512 splitNodeDescriptor(source, target, offset);
1513 }
1514
remapBackOfNodeTimesOntoNeighbour(Node * source,Node * target)1515 static void remapBackOfNodeTimesOntoNeighbour(Node * source, Node * target)
1516 {
1517 Time targetTime = getNodeTime(target);
1518 Time nodeTime = getNodeTime(source);
1519 Node *twinTarget = getTwinNode(target);
1520 Node *twinSource = getTwinNode(source);
1521 Node *previous;
1522
1523 if (nodeTime != -1) {
1524 previous = getNodePrevious(source);
1525
1526 if (previous == source) {
1527 setNodeTime(target, nodeTime);
1528 setNodePrevious(target, target);
1529 } else if (targetTime == -1
1530 || targetTime > nodeTime
1531 || (targetTime == nodeTime
1532 && !isPreviousToNode(target, previous))) {
1533 setNodeTime(target, nodeTime);
1534 if (previous != getTwinNode(source))
1535 setNodePrevious(previous, target);
1536 else
1537 setNodePrevious(getTwinNode(target),
1538 target);
1539 }
1540
1541 setNodePrevious(target, source);
1542 }
1543
1544 targetTime = getNodeTime(twinTarget);
1545 nodeTime = getNodeTime(twinSource);
1546
1547 if (nodeTime != -1) {
1548 if (targetTime == -1
1549 || targetTime > nodeTime
1550 || (targetTime == nodeTime
1551 && !isPreviousToNode(twinTarget, twinSource))) {
1552 setNodeTime(twinTarget, nodeTime);
1553 setNodePrevious(twinSource, twinTarget);
1554 }
1555 }
1556
1557 remapNodeInwardReferencesOntoNode(twinSource, twinTarget);
1558 }
1559
1560 static void
remapBackOfNodeOntoNeighbour(Node * source,PassageMarker * sourceMarker,Node * target,PassageMarker * targetMarker,boolean slowToFast)1561 remapBackOfNodeOntoNeighbour(Node * source, PassageMarker * sourceMarker,
1562 Node * target, PassageMarker * targetMarker,
1563 boolean slowToFast)
1564 {
1565 Coordinate offset;
1566 //printf("Remapping node %ld onto middle path\n", getNodeID(node));
1567
1568 offset =
1569 remapBackOfNodeMarkersOntoNeighbour(source, sourceMarker,
1570 target, targetMarker,
1571 slowToFast);
1572 remapBackOfNodeDescriptorOntoNeighbour(source, sourceMarker,
1573 target, targetMarker,
1574 slowToFast, offset);
1575 remapBackOfNodeTimesOntoNeighbour(source, target);
1576 remapBackOfNodeArcsOntoNeighbour(source, target);
1577
1578 remapNodeFibHeapReferencesOntoNode(getTwinNode(source),
1579 getTwinNode(target));
1580
1581 if (getTwinNode(source) == startingNode)
1582 startingNode = getTwinNode(target);
1583 }
1584
remapEmptyPathArcsOntoMiddlePathSimple(PassageMarker * emptyPath,PassageMarker * targetPath)1585 static void remapEmptyPathArcsOntoMiddlePathSimple(PassageMarker *
1586 emptyPath,
1587 PassageMarker *
1588 targetPath)
1589 {
1590 PassageMarker *pathMarker;
1591 Node *start = getNode(getPreviousInSequence(emptyPath));
1592 Node *finish = getNode(emptyPath);
1593 Node *previousNode = start;
1594 Node *currentNode;
1595 Arc *originalArc = getArcBetweenNodes(start, finish, graph);
1596
1597 for (pathMarker = targetPath; getNode(pathMarker) != finish;
1598 pathMarker = getNextInSequence(pathMarker)) {
1599 currentNode = getNode(pathMarker);
1600 createAnalogousArcAndVaccinate(previousNode, currentNode,
1601 originalArc);
1602 previousNode = currentNode;
1603 }
1604
1605 createAnalogousArcAndVaccinate(previousNode, finish, originalArc);
1606
1607 destroyArc(originalArc, graph);
1608 }
1609
remapEmptyPathMarkersOntoMiddlePathSimple(PassageMarker * emptyPath,PassageMarker * targetPath)1610 static void remapEmptyPathMarkersOntoMiddlePathSimple(PassageMarker *
1611 emptyPath,
1612 PassageMarker *
1613 targetPath)
1614 {
1615 PassageMarker *marker, *newMarker, *previousMarker, *pathMarker;
1616 Node *start = getNode(getPreviousInSequence(emptyPath));
1617 Node *finish = getNode(emptyPath);
1618 PassageMarker *oldMarker = getMarker(finish);
1619 Coordinate markerStart;
1620 IDnum intersectionLength, twinIntersectionLength;
1621 ShortReadMarker *intersectionReads =
1622 commonNodeReads(start, finish, graph, &intersectionLength);
1623 ShortReadMarker *twinIntersectionReads =
1624 commonNodeReads(getTwinNode(start), getTwinNode(finish), graph,
1625 &twinIntersectionLength);
1626
1627 //printf("SIMPLE %ld\t%ld\t%i\t%i\n", markerCount(finish),
1628 // getNodeID(finish), arcCount(finish),
1629 // arcCount(getTwinNode(finish)));
1630
1631 // Destroy link to old nodes
1632 setMarker(finish, NULL);
1633
1634 while (oldMarker != NULL) {
1635 marker = oldMarker;
1636 oldMarker = getNextInNode(marker);
1637 newMarker = getPreviousInSequence(marker);
1638
1639 if (getNode(newMarker) != start) {
1640 transposePassageMarker(marker, finish);
1641 continue;
1642 }
1643
1644 markerStart = getPassageMarkerStart(marker);
1645 for (pathMarker = targetPath;
1646 getNode(pathMarker) != finish;
1647 pathMarker = getNextInSequence(pathMarker)) {
1648 previousMarker = newMarker;
1649
1650 newMarker =
1651 addUncertainPassageMarker
1652 (getPassageMarkerSequenceID(marker),
1653 getNode(pathMarker));
1654 setPassageMarkerStatus(newMarker,
1655 getPassageMarkerStatus
1656 (marker));
1657 setPassageMarkerStart(newMarker, markerStart);
1658 setPassageMarkerFinish(newMarker, markerStart);
1659
1660 setNextInSequence(previousMarker, newMarker);
1661 setPreviousInSequence(previousMarker, newMarker);
1662
1663 setStartOffset(newMarker, 0);
1664 setFinishOffset(newMarker, 0);
1665
1666 }
1667
1668 setNextInSequence(newMarker, marker);
1669 setPreviousInSequence(newMarker, marker);
1670 transposePassageMarker(marker, finish);
1671 }
1672
1673 if (readStartsAreActivated(graph)) {
1674 for (pathMarker = targetPath;
1675 getNode(pathMarker) != finish;
1676 pathMarker = getNextInSequence(pathMarker)) {
1677 // Read starts
1678 spreadReadIDs(intersectionReads,
1679 intersectionLength,
1680 getNode(pathMarker), graph);
1681 spreadReadIDs(twinIntersectionReads,
1682 twinIntersectionLength,
1683 getTwinNode(getNode(pathMarker)),
1684 graph);
1685 }
1686 }
1687
1688 free(intersectionReads);
1689 free(twinIntersectionReads);
1690 }
1691
markerFollowsPath(PassageMarker * marker,PassageMarker * start,PassageMarker * finish,Node * stopNode)1692 static boolean markerFollowsPath(PassageMarker * marker,
1693 PassageMarker * start,
1694 PassageMarker * finish, Node * stopNode)
1695 {
1696 PassageMarker *current, *path;
1697
1698 path = start;
1699 current = marker;
1700
1701 while (true) {
1702 if (current == NULL || path == finish || path == NULL)
1703 return true;
1704
1705 if (getNode(current) != getNode(path))
1706 return false;
1707
1708 current = getNextInSequence(current);
1709 path = getNextInSequence(path);
1710 }
1711 }
1712
getAnchors(PassageMarker * marker,Node * nodeA,Node * nodeB)1713 static PassageMarkerList *getAnchors(PassageMarker * marker, Node * nodeA,
1714 Node * nodeB)
1715 {
1716 PassageMarker *current, *next;
1717 Node *twinA = getTwinNode(nodeA);
1718 Node *twinB = getTwinNode(nodeB);
1719 PassageMarkerList *result = NULL;
1720
1721 current = marker;
1722 while (current != NULL) {
1723 next = getNextInSequence(current);
1724 if (getNode(current) == nodeA && getNode(next) == nodeB) {
1725 result = newPassageMarkerList(next, result);
1726 }
1727 if (getNode(current) == twinB && getNode(next) == twinA) {
1728 result =
1729 newPassageMarkerList(getTwinMarker(current),
1730 result);
1731 }
1732 current = next;
1733 }
1734
1735 return result;
1736 }
1737
destroyPassageMarkerList(PassageMarkerList ** list)1738 static void destroyPassageMarkerList(PassageMarkerList ** list)
1739 {
1740 PassageMarkerList *ptr;
1741
1742 while (*list != NULL) {
1743 ptr = *list;
1744 *list = ptr->next;
1745 deallocatePassageMarkerList(ptr);
1746 }
1747 }
1748
remapEmptyPathMarkersOntoMiddlePathDevious(PassageMarker * emptyPath,PassageMarker * targetPath)1749 static void remapEmptyPathMarkersOntoMiddlePathDevious(PassageMarker *
1750 emptyPath,
1751 PassageMarker *
1752 targetPath)
1753 {
1754 PassageMarker *marker, *newMarker, *previousMarker, *pathMarker;
1755 Node *start = getNode(getPreviousInSequence(emptyPath));
1756 Node *finish = getNode(emptyPath);
1757 PassageMarkerList *anchors = getAnchors(targetPath, start, finish);
1758 PassageMarkerList *currentAnchor;
1759 boolean untouchable = false;
1760 Coordinate markerStart;
1761
1762 printf("DEVIOUS %d\t%d\t%i\t%i\n", markerCount(finish),
1763 getNodeID(finish), arcCount(finish),
1764 arcCount(getTwinNode(finish)));
1765
1766 for (marker = getMarker(finish); marker != NULL;
1767 marker = getNextInNode(marker)) {
1768 newMarker = getPreviousInSequence(marker);
1769
1770 if (getNode(newMarker) != start)
1771 continue;
1772
1773
1774 for (currentAnchor = anchors; currentAnchor != NULL;
1775 currentAnchor = currentAnchor->next)
1776 if (markerFollowsPath
1777 (marker, currentAnchor->marker, targetPath,
1778 finish)) {
1779 untouchable = true;
1780 break;
1781 }
1782
1783 if (untouchable)
1784 continue;
1785
1786 markerStart = getPassageMarkerStart(marker);
1787 for (pathMarker = targetPath;
1788 getNode(pathMarker) != finish;
1789 pathMarker = getNextInSequence(pathMarker)) {
1790 previousMarker = newMarker;
1791 newMarker =
1792 addUncertainPassageMarker
1793 (getPassageMarkerSequenceID(marker),
1794 getNode(pathMarker));
1795 setPassageMarkerStatus(newMarker,
1796 getPassageMarkerStatus
1797 (marker));
1798 setPassageMarkerStart(newMarker, markerStart);
1799 setPassageMarkerFinish(newMarker, markerStart);
1800
1801 setNextInSequence(previousMarker, newMarker);
1802 setPreviousInSequence(previousMarker, newMarker);
1803
1804 setStartOffset(newMarker, 0);
1805 setFinishOffset(newMarker, 0);
1806 }
1807
1808 setNextInSequence(newMarker, marker);
1809 setPreviousInSequence(newMarker, marker);
1810 }
1811
1812 destroyPassageMarkerList(&anchors);
1813 }
1814
markerLeadsToArc(PassageMarker * marker,Node * nodeA,Node * nodeB)1815 static boolean markerLeadsToArc(PassageMarker * marker, Node * nodeA,
1816 Node * nodeB)
1817 {
1818 PassageMarker *current, *next;
1819 Node *twinA = getTwinNode(nodeA);
1820 Node *twinB = getTwinNode(nodeB);
1821
1822 current = marker;
1823 while (current != NULL) {
1824 next = getNextInSequence(current);
1825 if (getNode(current) == nodeA && getNode(next) == nodeB)
1826 return true;
1827 if (getNode(current) == twinB && getNode(next) == twinA)
1828 return true;
1829 current = next;
1830 }
1831
1832 return false;
1833 }
1834
1835 static void
remapEmptyPathOntoMiddlePath(PassageMarker * emptyPath,PassageMarker * targetPath)1836 remapEmptyPathOntoMiddlePath(PassageMarker * emptyPath,
1837 PassageMarker * targetPath)
1838 {
1839 Node *start = getNode(getPreviousInSequence(emptyPath));
1840 Node *finish = getNode(emptyPath);
1841
1842 // Remapping markers
1843 if (!markerLeadsToArc(targetPath, start, finish)) {
1844 remapEmptyPathArcsOntoMiddlePathSimple(emptyPath,
1845 targetPath);
1846 remapEmptyPathMarkersOntoMiddlePathSimple(emptyPath,
1847 targetPath);
1848 } else {
1849 remapEmptyPathMarkersOntoMiddlePathDevious(emptyPath,
1850 targetPath);
1851 }
1852
1853 //Remap times and previous(if necessary)
1854 if (getNodePrevious(finish) == start)
1855 remapNodeTimesOntoForwardMiddlePath(finish, targetPath);
1856
1857 if (getNodePrevious(getTwinNode(start)) == getTwinNode(finish))
1858 remapNodeTimesOntoTwinMiddlePath(finish, targetPath);
1859 }
1860
reduceSlowNodes(PassageMarker * slowMarker,Node * finish)1861 static void reduceSlowNodes(PassageMarker * slowMarker, Node * finish)
1862 {
1863 PassageMarker *marker;
1864
1865 for (marker = slowMarker; getNode(marker) != finish;
1866 marker = getNextInSequence(marker)) {
1867 reduceNode(getNode(marker));
1868 }
1869 }
1870
destroyPaths()1871 static void destroyPaths()
1872 {
1873 PassageMarker *marker;
1874
1875 while (slowPath != NULL) {
1876 marker = slowPath;
1877 getNodeTime(getNode(marker));
1878 getNodeTime(getTwinNode(getNode(marker)));
1879
1880 slowPath = getNextInSequence(marker);
1881 destroyPassageMarker(marker);
1882 }
1883
1884 while (fastPath != NULL) {
1885 marker = fastPath;
1886 getNodeTime(getNode(marker));
1887 getNodeTime(getTwinNode(getNode(marker)));
1888 fastPath = getNextInSequence(marker);
1889 destroyPassageMarker(marker);
1890 }
1891 }
1892
mapDistancesOntoPaths()1893 static Coordinate mapDistancesOntoPaths()
1894 {
1895 PassageMarker *marker;
1896 Coordinate totalDistance = 0;
1897
1898 marker = slowPath;
1899 while (!isTerminal(marker)) {
1900 marker = getNextInSequence(marker);
1901 setPassageMarkerStart(marker, totalDistance);
1902 totalDistance += getNodeLength(getNode(marker));
1903 setPassageMarkerFinish(marker, totalDistance);
1904 }
1905
1906 totalDistance = 0;
1907 marker = fastPath;
1908 while (!isTerminal(marker)) {
1909 marker = getNextInSequence(marker);
1910 setPassageMarkerStart(marker, totalDistance);
1911 totalDistance += getNodeLength(getNode(marker));
1912 setPassageMarkerFinish(marker, totalDistance);
1913 }
1914
1915 return totalDistance;
1916 }
1917
markerLeadsToNode(PassageMarker * marker,Node * node)1918 static boolean markerLeadsToNode(PassageMarker * marker, Node * node)
1919 {
1920 PassageMarker *currentMarker;
1921
1922 for (currentMarker = marker; currentMarker != NULL;
1923 currentMarker = getNextInSequence(currentMarker))
1924 if (getNode(currentMarker) == node)
1925 return true;
1926
1927 return false;
1928 }
1929
transferNodeData(Node * source,Node * target)1930 static void transferNodeData(Node * source, Node * target)
1931 {
1932 Arc *arc;
1933 Node *twinSource = getTwinNode(source);
1934 Node *twinTarget = getTwinNode(target);
1935 Node *destination;
1936
1937 // Time & Outward references
1938 if (getNodePrevious(source) == source) {
1939 setNodeTime(target, getNodeTime(source));
1940 setNodePrevious(target, target);
1941 }
1942
1943 if (getNodeTime(twinSource) == -1);
1944 else if (getNodePrevious(twinSource) == twinSource) {
1945 setNodeTime(twinTarget, getNodeTime(twinSource));
1946 setNodePrevious(twinTarget, twinTarget);
1947 } else if (getNodeTime(twinTarget) == -1
1948 || getNodeTime(twinSource) < getNodeTime(twinTarget)
1949 || (getNodeTime(twinSource) == getNodeTime(twinTarget)
1950 && !isPreviousToNode(twinTarget, twinSource))) {
1951 setNodeTime(twinTarget, getNodeTime(twinSource));
1952 setNodePrevious(getNodePrevious(twinSource), twinTarget);
1953 }
1954
1955 if (getNodePrevious(twinTarget) == source)
1956 setNodePrevious(target, twinTarget);
1957
1958 // Inward references:
1959 for (arc = getArc(source); arc != NULL; arc = getNextArc(arc)) {
1960 destination = getDestination(arc);
1961 if (getNodePrevious(destination) == source)
1962 setNodePrevious(target, destination);
1963 }
1964
1965 // Fib Heap refs
1966 remapNodeFibHeapReferencesOntoNode(source, target);
1967 remapNodeFibHeapReferencesOntoNode(twinSource, twinTarget);
1968
1969 // Starting point
1970 if (startingNode == source)
1971 startingNode = target;
1972 else if (startingNode == twinSource)
1973 startingNode = twinTarget;
1974
1975 if (getNode(slowPath) == twinSource)
1976 slowPath = getNextInSequence(slowPath);
1977 if (getNode(fastPath) == twinSource)
1978 fastPath = getNextInSequence(fastPath);
1979
1980 // Next node
1981 if (source == activeNode) {
1982 activeNode = target;
1983 todo =
1984 &todoLists[getNodeID(activeNode) + nodeCount(graph)];
1985 }
1986 concatenateTodoLists(target, source);
1987
1988 if (twinSource == activeNode) {
1989 activeNode = twinTarget;
1990 todo =
1991 &todoLists[getNodeID(activeNode) + nodeCount(graph)];
1992 }
1993 }
1994
1995 // Replaces two consecutive nodes into a single equivalent node
1996 // The extra memory is freed
concatenateNodesAndVaccinate(Node * nodeA,Node * nodeB,Graph * graph)1997 static void concatenateNodesAndVaccinate(Node * nodeA, Node * nodeB,
1998 Graph * graph)
1999 {
2000 PassageMarker *marker, *tmpMarker;
2001 Node *twinA = getTwinNode(nodeA);
2002 Node *twinB = getTwinNode(nodeB);
2003 Arc *arc;
2004 Category cat;
2005
2006 //printf("Concatenating nodes %ld and %ld\n", getNodeID(nodeA), getNodeID(nodeB));
2007 // Arc management:
2008 // Freeing useless arcs
2009 while (getArc(nodeA) != NULL)
2010 destroyArc(getArc(nodeA), graph);
2011
2012 // Correct arcs
2013 for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) {
2014 if (getDestination(arc) != twinB)
2015 createAnalogousArcAndVaccinate(nodeA,
2016 getDestination(arc),
2017 arc);
2018 else
2019 createAnalogousArcAndVaccinate(nodeA, twinA, arc);
2020 }
2021
2022 // Passage marker management in node A:
2023 for (marker = getMarker(nodeA); marker != NULL;
2024 marker = getNextInNode(marker))
2025 if (isTerminal(marker))
2026 incrementFinishOffset(marker,
2027 getNodeLength(nodeB));
2028
2029 // Swapping new born passageMarkers from B to A
2030 for (marker = getMarker(nodeB); marker != NULL; marker = tmpMarker) {
2031 tmpMarker = getNextInNode(marker);
2032
2033 if (isInitial(marker)) {
2034 extractPassageMarker(marker);
2035 transposePassageMarker(marker, nodeA);
2036 incrementStartOffset(marker, getNodeLength(nodeA));
2037 } else
2038 disconnectNextPassageMarker(getPreviousInSequence
2039 (marker), graph);
2040 }
2041
2042 // Read starts
2043 concatenateReadStarts(nodeA, nodeB, graph);
2044
2045 // Descriptor management
2046 appendDescriptors(nodeA, nodeB);
2047
2048 // Update uniqueness:
2049 setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(nodeB));
2050
2051 // Update virtual coverage
2052 for (cat = 0; cat < CATEGORIES; cat++)
2053 incrementVirtualCoverage(nodeA, cat,
2054 getVirtualCoverage(nodeB, cat));
2055
2056 // Update virtual coverage
2057 for (cat = 0; cat < CATEGORIES; cat++)
2058 incrementOriginalVirtualCoverage(nodeA, cat,
2059 getOriginalVirtualCoverage
2060 (nodeB, cat));
2061
2062 // Freeing gobbled node
2063 destroyNode(nodeB, graph);
2064 }
2065
simplifyNode(Node * node)2066 static void simplifyNode(Node * node)
2067 {
2068 Node *twin = getTwinNode(node);
2069 Node *destination, *twinDestination;
2070
2071 if (!hasSingleArc(node))
2072 return;
2073
2074 destination = getDestination(getArc(node));
2075 twinDestination = getTwinNode(destination);
2076
2077 while (hasSingleArc(node)
2078 && hasSingleArc(twinDestination)
2079 && destination != twin && destination != node) {
2080 transferNodeData(destination, node);
2081 concatenateNodesAndVaccinate(node, destination, graph);
2082
2083 if (!hasSingleArc(node))
2084 return;
2085 destination = getDestination(getArc(node));
2086 twinDestination = getTwinNode(destination);
2087 }
2088
2089 }
2090
concatenatePathNodes(PassageMarker * pathStart)2091 static void concatenatePathNodes(PassageMarker * pathStart)
2092 {
2093 PassageMarker *pathMarker;
2094
2095 //puts("Removing null loops");
2096 for (pathMarker = pathStart; pathMarker != NULL;
2097 pathMarker = getNextInSequence(pathMarker)) {
2098 simplifyNode(getNode(pathMarker));
2099 }
2100 }
2101
2102 #define SLOW_TO_FAST true
2103 #define FAST_TO_SLOW false
2104
cleanUpRedundancy()2105 static void cleanUpRedundancy()
2106 {
2107 PassageMarker *slowMarker = getNextInSequence(slowPath);
2108 PassageMarker *fastMarker = getNextInSequence(fastPath);
2109 Coordinate slowLength, fastLength;
2110 Coordinate fastConstraint = 0;
2111 Coordinate slowConstraint = 0;
2112 Coordinate finalLength;
2113 Node *slowNode, *fastNode;
2114
2115 //puts("Correcting new redundancy");
2116 mapSlowOntoFast();
2117 finalLength = mapDistancesOntoPaths();
2118
2119 while (slowMarker != NULL && fastMarker != NULL) {
2120 if (isTerminal(slowMarker))
2121 slowLength = finalLength;
2122 else {
2123 slowLength =
2124 slowToFastMapping[getPassageMarkerFinish
2125 (slowMarker) - 1];
2126 if (slowLength < slowConstraint)
2127 slowLength = slowConstraint;
2128 }
2129
2130 fastLength = getPassageMarkerFinish(fastMarker) - 1;
2131 if (fastLength < fastConstraint)
2132 fastLength = fastConstraint;
2133
2134 slowNode = getNode(slowMarker);
2135 fastNode = getNode(fastMarker);
2136
2137 if (slowNode == fastNode) {
2138 if (fastLength > slowLength)
2139 slowConstraint = fastLength;
2140 else if (fastLength < slowLength);
2141 fastConstraint = slowLength;
2142
2143 slowMarker = getNextInSequence(slowMarker);
2144 fastMarker = getNextInSequence(fastMarker);
2145 } else if (slowNode == getTwinNode(fastNode)) {
2146 if (fastLength > slowLength)
2147 slowConstraint = fastLength;
2148 else if (fastLength < slowLength);
2149 fastConstraint = slowLength;
2150
2151 slowMarker = getNextInSequence(slowMarker);
2152 fastMarker = getNextInSequence(fastMarker);
2153 foldSymmetricalNode(fastNode);
2154 } else if (markerLeadsToNode(slowMarker, fastNode)) {
2155 reduceSlowNodes(slowMarker, fastNode);
2156 remapEmptyPathOntoMiddlePath(fastMarker,
2157 slowMarker);
2158 while (getNode(slowMarker) != fastNode)
2159 slowMarker = getNextInSequence(slowMarker);
2160 } else if (markerLeadsToNode(fastMarker, slowNode)) {
2161 remapEmptyPathOntoMiddlePath(slowMarker,
2162 fastMarker);
2163 while (getNode(fastMarker) != slowNode)
2164 fastMarker = getNextInSequence(fastMarker);
2165 } else if (slowLength == fastLength) {
2166 remapNodeOntoNeighbour(slowNode, slowMarker,
2167 fastNode, fastMarker);
2168 slowMarker = getNextInSequence(slowMarker);
2169 fastMarker = getNextInSequence(fastMarker);
2170 } else if (slowLength < fastLength) {
2171 remapBackOfNodeOntoNeighbour(fastNode, fastMarker,
2172 slowNode, slowMarker,
2173 FAST_TO_SLOW);
2174 slowMarker = getNextInSequence(slowMarker);
2175 } else {
2176 remapBackOfNodeOntoNeighbour(slowNode, slowMarker,
2177 fastNode, fastMarker,
2178 SLOW_TO_FAST);
2179 fastMarker = getNextInSequence(fastMarker);
2180 }
2181
2182 fflush(stdout);
2183 }
2184
2185 //puts("Done with path");
2186
2187 while (!isInitial(slowPath))
2188 slowPath = getPreviousInSequence(slowPath);
2189 while (!isInitial(fastPath))
2190 fastPath = getPreviousInSequence(fastPath);
2191
2192 //puts("Concatenation");
2193
2194 // Freeing up memory
2195 if (slowMarker != NULL)
2196 concatenatePathNodes(slowPath);
2197 else
2198 concatenatePathNodes(fastPath);
2199
2200 //puts("Vaccinatting");
2201
2202 destroyPaths();
2203
2204 // Cleaning up silly structures
2205 //vaccinatePath(&returnValue);
2206
2207 //puts("Clean up done");
2208 //fflush(stdout);
2209 }
2210
comparePaths(Node * destination,Node * origin)2211 static void comparePaths(Node * destination, Node * origin)
2212 {
2213 IDnum slowLength, fastLength;
2214 Node *fastNode, *slowNode;
2215 IDnum i;
2216 PassageMarker *marker;
2217
2218 //Measure lengths
2219 slowLength = fastLength = 0;
2220 fastNode = destination;
2221 slowNode = origin;
2222
2223 while (fastNode != slowNode) {
2224 if (getNodeTime(fastNode) > getNodeTime(slowNode)) {
2225 fastLength++;
2226 fastNode = getNodePrevious(fastNode);
2227 } else if (getNodeTime(fastNode) < getNodeTime(slowNode)) {
2228 slowLength++;
2229 slowNode = getNodePrevious(slowNode);
2230 } else if (isPreviousToNode(slowNode, fastNode)) {
2231 while (fastNode != slowNode) {
2232 fastLength++;
2233 fastNode = getNodePrevious(fastNode);
2234 }
2235 } else if (isPreviousToNode(fastNode, slowNode)) {
2236 while (slowNode != fastNode) {
2237 slowLength++;
2238 slowNode = getNodePrevious(slowNode);
2239 }
2240 } else {
2241 fastLength++;
2242 fastNode = getNodePrevious(fastNode);
2243 slowLength++;
2244 slowNode = getNodePrevious(slowNode);
2245 }
2246
2247 if (slowLength > MAXNODELENGTH
2248 || fastLength > MAXNODELENGTH)
2249 return;
2250 }
2251
2252 if (fastLength == 0)
2253 return;
2254
2255 //Backtracking to record actual paths
2256 fastPath = addUncertainPassageMarker(1, destination);
2257 setPassageMarkerStatus(fastPath, true);
2258
2259 for (i = 0; i < fastLength; i++) {
2260 marker =
2261 addUncertainPassageMarker(1,
2262 getNodePrevious(getNode
2263 (fastPath)));
2264 setPassageMarkerStatus(marker, true);
2265 connectPassageMarkers(marker, fastPath, graph);
2266 fastPath = marker;
2267 }
2268
2269 slowPath = addUncertainPassageMarker(2, destination);
2270 setPassageMarkerStatus(slowPath, true);
2271
2272 marker = addUncertainPassageMarker(2, origin);
2273 setPassageMarkerStatus(marker, true);
2274 connectPassageMarkers(marker, slowPath, graph);
2275 slowPath = marker;
2276
2277 for (i = 0; i < slowLength; i++) {
2278 marker =
2279 addUncertainPassageMarker(2,
2280 getNodePrevious(getNode
2281 (slowPath)));
2282 setPassageMarkerStatus(marker, true);
2283 connectPassageMarkers(marker, slowPath, graph);
2284 slowPath = marker;
2285 }
2286
2287 //Extract sequences
2288 if (!extractSequence(fastPath, fastSequence)
2289 || !extractSequence(slowPath, slowSequence)) {
2290 destroyPaths();
2291 return;
2292 }
2293 //Compare sequences
2294 if (compareSequences(fastSequence, slowSequence)) {
2295 cleanUpRedundancy();
2296 return;
2297 }
2298 //puts("\tFinished comparing paths, changes made");
2299 destroyPaths();
2300 }
2301
tourBusArc(Node * origin,Arc * arc,Time originTime)2302 static void tourBusArc(Node * origin, Arc * arc, Time originTime)
2303 {
2304 Node *destination = getDestination(arc);
2305 Time arcTime, totalTime, destinationTime;
2306 IDnum nodeIndex = getNodeID(destination) + nodeCount(graph);
2307 Node *oldPrevious = previous[nodeIndex];
2308
2309 if (oldPrevious == origin || getNodeStatus(destination) == 1)
2310 return;
2311
2312 arcTime =
2313 ((Time) getNodeLength(origin)) / ((Time) getMultiplicity(arc));
2314 totalTime = originTime + arcTime;
2315
2316 destinationTime = times[nodeIndex];
2317
2318 if (destinationTime == -1) {
2319 setNodeTime(destination, totalTime);
2320 dheapNodes[nodeIndex] =
2321 insertNodeIntoDHeap(dheap, totalTime, destination);
2322 previous[nodeIndex] = origin;
2323 return;
2324 } else if (destinationTime > totalTime) {
2325 if (dheapNodes[nodeIndex] == NULL) {
2326 //puts("Already expanded though");
2327 return;
2328 }
2329
2330 setNodeTime(destination, totalTime);
2331 replaceKeyInDHeap(dheap, dheapNodes[nodeIndex], totalTime);
2332 previous[nodeIndex] = origin;
2333
2334 comparePaths(destination, oldPrevious);
2335 return;
2336 } else {
2337 if (destinationTime == getNodeTime(origin)
2338 && isPreviousToNode(destination, origin)) {
2339 return;
2340 }
2341
2342 comparePaths(destination, origin);
2343 }
2344 }
2345
initializeTodoLists()2346 static void initializeTodoLists()
2347 {
2348 IDnum index;
2349 Node *node;
2350 Arc *arc;
2351 Ticket *tkt;
2352 IDnum nodes = nodeCount(graph);
2353 Ticket **currentList;
2354 Ticket *currentTicket, *tmp;
2355 Node *destination;
2356
2357 puts("Initializing todo lists");
2358
2359 for (index = -nodes; index <= nodes; index++) {
2360 node = getNodeInGraph(graph, index);
2361 if (node == NULL)
2362 continue;
2363
2364 currentList = &todoLists[index + nodes];
2365 *currentList = NULL;
2366
2367 for (arc = getArc(node); arc != NULL;
2368 arc = getNextArc(arc)) {
2369 destination = getDestination(arc);
2370
2371 if (destination == node)
2372 continue;
2373
2374 tkt = newTicket();
2375 tkt->id_a = getNodeID(destination);
2376
2377 currentTicket = *currentList;
2378 if (currentTicket == NULL
2379 || currentTicket->id_a > tkt->id_a) {
2380 tkt->next = currentTicket;
2381 *currentList = tkt;
2382 continue;
2383 }
2384
2385 while (currentTicket->next != NULL
2386 && currentTicket->next->id_a < tkt->id_a)
2387 currentTicket = currentTicket->next;
2388
2389 tmp = currentTicket->next;
2390 currentTicket->next = tkt;
2391 tkt->next = tmp;
2392 }
2393 }
2394
2395 puts("Done with initilization");
2396 }
2397
tourBusNode(Node * node)2398 static void tourBusNode(Node * node)
2399 {
2400 Arc *arc;
2401 Node *destination;
2402 Time nodeTime = getNodeTime(node);
2403 IDnum id;
2404
2405 dbgCounter++;
2406 if (dbgCounter % 1000 == 0) {
2407 printf("%d nodes visited\n", dbgCounter);
2408 fflush(stdout);
2409 }
2410
2411 setSingleNodeStatus(node, 2);
2412 activeNode = node;
2413 todo = &todoLists[getNodeID(activeNode) + nodeCount(graph)];
2414 done = NULL;
2415
2416 while ((id = nextTodoTicket()) != 0) {
2417 destination = getNodeInGraph(graph, id);
2418
2419 // Node doesn't exist anymore
2420 if (destination == NULL)
2421 continue;
2422
2423 arc = getArcBetweenNodes(activeNode, destination, graph);
2424
2425 // Arc does not exist for some reason (?)
2426 if (arc == NULL)
2427 continue;
2428
2429 tourBusArc(activeNode, arc, nodeTime);
2430 }
2431
2432 freeDoneTickets();
2433 }
2434
getTipLength(Node * node)2435 static Coordinate getTipLength(Node * node)
2436 {
2437 Node *current = getTwinNode(node);
2438 Coordinate length = 0;
2439
2440 if (simpleArcCount(current) > 1)
2441 return getNodeLength(node);
2442
2443 while (current != NULL && simpleArcCount(getTwinNode(current)) < 2
2444 && simpleArcCount(current) < 2) {
2445 length += getNodeLength(current);
2446 current = getDestination(getArc(current));
2447 }
2448
2449 return length;
2450 }
2451
isMinorityChoice(Node * node)2452 static boolean isMinorityChoice(Node * node)
2453 {
2454 Node *current = getTwinNode(node);
2455 Arc *arc;
2456 Arc *activeArc = NULL;
2457 IDnum mult = 0;
2458
2459 // Finding first tangle
2460 while (current != NULL && simpleArcCount(getTwinNode(current)) < 2
2461 && simpleArcCount(current) < 2) {
2462 activeArc = getArc(current);
2463 current = getDestination(activeArc);
2464 }
2465
2466 // If isolated snippet:
2467 if (current == NULL)
2468 return true;
2469
2470 // Joined tips
2471 if (simpleArcCount(getTwinNode(current)) < 2)
2472 return false;
2473
2474 // If unique event
2475 if (getMultiplicity(activeArc) == 1)
2476 return true;
2477
2478 // Computing max arc
2479 for (arc = getArc(getTwinNode(current)); arc != NULL;
2480 arc = getNextArc(arc))
2481 if (getMultiplicity(arc) > mult)
2482 mult = getMultiplicity(arc);
2483
2484 // Testing for minority
2485 return mult >= getMultiplicity(activeArc);
2486 }
2487
clipTips(Graph * graph)2488 void clipTips(Graph * graph)
2489 {
2490 IDnum index;
2491 Node *current, *twin;
2492 boolean modified = true;
2493 int Wordlength = getWordLength(graph);
2494 PassageMarker *marker;
2495
2496 puts("Clipping short tips off graph");
2497
2498 while (modified) {
2499 modified = false;
2500 for (index = 1; index <= nodeCount(graph); index++) {
2501 current = getNodeInGraph(graph, index);
2502
2503 if (current == NULL)
2504 continue;
2505
2506 twin = getTwinNode(current);
2507
2508 if (getArc(current) == NULL
2509 && getTipLength(current) < 2 * Wordlength
2510 && isMinorityChoice(current)) {
2511 while ((marker = getMarker(current))) {
2512 if (!isInitial(marker)
2513 && !isTerminal(marker))
2514 disconnectNextPassageMarker
2515 (getPreviousInSequence
2516 (marker), graph);
2517 destroyPassageMarker(marker);
2518 }
2519 destroyNode(current, graph);
2520 modified = true;
2521 } else if (getArc(twin) == NULL
2522 && getTipLength(twin) < 2 * Wordlength
2523 && isMinorityChoice(twin)) {
2524 while ((marker = getMarker(current))) {
2525 if (!isInitial(marker)
2526 && !isTerminal(marker))
2527 disconnectNextPassageMarker
2528 (getPreviousInSequence
2529 (marker), graph);
2530 destroyPassageMarker(marker);
2531 }
2532 destroyNode(twin, graph);
2533 modified = true;
2534 }
2535 }
2536 }
2537
2538 concatenateGraph(graph);
2539 printf("%d nodes left\n", nodeCount(graph));
2540 }
2541
clipTipsHard(Graph * graph)2542 void clipTipsHard(Graph * graph)
2543 {
2544 IDnum index;
2545 Node *current, *twin;
2546 boolean modified = true;
2547 int Wordlength = getWordLength(graph);
2548 PassageMarker *marker;
2549
2550 puts("Clipping short tips off graph, drastic");
2551
2552 while (modified) {
2553 modified = false;
2554 for (index = 1; index <= nodeCount(graph); index++) {
2555 current = getNodeInGraph(graph, index);
2556
2557 if (current == NULL)
2558 continue;
2559
2560 twin = getTwinNode(current);
2561
2562 if (getArc(current) == NULL
2563 && getTipLength(current) < 2 * Wordlength) {
2564 while ((marker = getMarker(current))) {
2565 if (!isInitial(marker)
2566 && !isTerminal(marker))
2567 disconnectNextPassageMarker
2568 (getPreviousInSequence
2569 (marker), graph);
2570 destroyPassageMarker(marker);
2571 }
2572 destroyNode(current, graph);
2573 modified = true;
2574 } else if (getArc(twin) == NULL
2575 && getTipLength(twin) <
2576 2 * Wordlength) {
2577 while ((marker = getMarker(current))) {
2578 if (!isInitial(marker)
2579 && !isTerminal(marker))
2580 disconnectNextPassageMarker
2581 (getPreviousInSequence
2582 (marker), graph);
2583 destroyPassageMarker(marker);
2584 }
2585 destroyNode(twin, graph);
2586 modified = true;
2587 }
2588 }
2589 }
2590
2591 concatenateGraph(graph);
2592 printf("%d nodes left\n", nodeCount(graph));
2593 }
2594
tourBus(Node * startingPoint)2595 static void tourBus(Node * startingPoint)
2596 {
2597 Node *currentNode = startingPoint;
2598 IDnum nodeID = getNodeID(startingPoint) + nodeCount(graph);
2599
2600 //printf("Tour bus from node %ld...\n", getNodeID(startingPoint));
2601
2602 times[nodeID] = 0;
2603 previous[nodeID] = currentNode;
2604
2605 while (currentNode != NULL) {
2606 dheapNodes[getNodeID(currentNode) + nodeCount(graph)] =
2607 NULL;
2608 tourBusNode(currentNode);
2609 currentNode = removeNextNodeFromDHeap(dheap);
2610 }
2611 }
2612
correctGraph(Graph * argGraph,Coordinate * argSequenceLengths)2613 void correctGraph(Graph * argGraph, Coordinate * argSequenceLengths)
2614 {
2615 IDnum nodes;
2616 IDnum index;
2617
2618 //Setting global params
2619 graph = argGraph;
2620 WORDLENGTH = getWordLength(graph);
2621 sequenceLengths = argSequenceLengths;
2622 dbgCounter = 0;
2623 // Done with global params
2624
2625 printf("Correcting graph with cutoff %f\n", MAXDIVERGENCE);
2626
2627 //clipTips(graph);
2628 nodes = nodeCount(graph);
2629
2630 // Allocating memory
2631 times = mallocOrExit(2 * nodes + 1, Time);
2632 previous = mallocOrExit(2 * nodes + 1, Node *);
2633 dheapNodes = mallocOrExit(2 * nodes + 1, DFibHeapNode *);
2634
2635 for (index = 0; index < (2 * nodeCount(graph) + 1); index++) {
2636 times[index] = -1;
2637 previous[index] = NULL;
2638 dheapNodes[index] = NULL;
2639 }
2640
2641 dheap = newDFibHeap();
2642
2643 fastSequence = newTightString(MAXREADLENGTH);
2644 slowSequence = newTightString(MAXREADLENGTH);
2645 fastToSlowMapping = callocOrExit(MAXREADLENGTH + 1, Coordinate);
2646 slowToFastMapping = callocOrExit(MAXREADLENGTH + 1, Coordinate);
2647 Fmatrix = callocOrExit(MAXREADLENGTH + 1, double *);
2648 for (index = 0; index < MAXREADLENGTH + 1; index++)
2649 Fmatrix[index] = callocOrExit(MAXREADLENGTH + 1, double);
2650
2651 eligibleStartingPoints = mallocOrExit(2 * nodes + 1, IDnum);
2652 progressStatus = callocOrExit(2 * nodes + 1, boolean);
2653 todoLists = callocOrExit(2 * nodes + 1, Ticket *);
2654 //Done with memory
2655
2656 resetNodeStatus(graph);
2657 determineEligibleStartingPoints();
2658 initializeTodoLists();
2659 activateArcLookupTable(graph);
2660
2661 while ((startingNode = nextStartingPoint()) != NULL) {
2662 //puts("Going through the cycle...");
2663 tourBus(startingNode);
2664 updateNodeStatus(startingNode);
2665 }
2666
2667 deactivateArcLookupTable(graph);
2668 concatenateGraph(graph);
2669
2670 clipTipsHard(graph);
2671
2672 //Deallocating globals
2673 free(times);
2674 free(previous);
2675 free(dheapNodes);
2676 destroyDHeap(dheap);
2677
2678 destroyTightString(fastSequence);
2679 destroyTightString(slowSequence);
2680 free(fastToSlowMapping);
2681 free(slowToFastMapping);
2682 for (index = 0; index < MAXREADLENGTH + 1; index++)
2683 free(Fmatrix[index]);
2684 free(Fmatrix);
2685
2686 free(eligibleStartingPoints);
2687 free(progressStatus);
2688 free(todoLists);
2689
2690 if (ticketMemory != NULL)
2691 destroyRecycleBin(ticketMemory);
2692
2693 free(sequenceLengths);
2694 //Done deallocating
2695 }
2696
setMaxReadLength(int value)2697 void setMaxReadLength(int value)
2698 {
2699 if (value < 0) {
2700 printf("Negative branch length %i!\n", value);
2701 puts("Exiting...");
2702 exit(1);
2703 }
2704 MAXREADLENGTH = value;
2705 MAXNODELENGTH = 2 * value;
2706 }
2707
setMaxGaps(int value)2708 void setMaxGaps(int value)
2709 {
2710 if (value < 0) {
2711 printf("Negative max gap count %i!\n", value);
2712 puts("Exiting...");
2713 exit(1);
2714 }
2715 MAXGAPS = value;
2716 }
2717
setMaxDivergence(double value)2718 void setMaxDivergence(double value)
2719 {
2720 if (value < 0 || value > 1) {
2721 printf("Divergence rate %lf out of bounds [0,1]!\n",
2722 value);
2723 puts("Exiting...");
2724 exit(1);
2725 }
2726 MAXDIVERGENCE = value;
2727 }
2728