1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3 
4     This file is part of Velvet.
5 
6     Velvet is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     Velvet is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with Velvet; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 #include "globals.h"
26 #include "graph.h"
27 #include "recycleBin.h"
28 #include "tightString.h"
29 #include "passageMarker.h"
30 #include "utility.h"
31 #include "kmer.h"
32 
33 #define ADENINE 0
34 #define CYTOSINE 1
35 #define GUANINE 2
36 #define THYMINE 3
37 
38 struct arc_st {
39 	Arc *twinArc;		// 64
40 	Arc *next;		// 64
41 	Arc *previous;		// 64
42 	Arc *nextInLookupTable;	// 64
43 	Node *destination;	// 64
44 	IDnum multiplicity;	// 32
45 }  ATTRIBUTE_PACKED;	// 352 Total
46 
47 struct node_st {
48 	Node *twinNode;		// 64
49 	Arc *arc;		// 64
50 	Descriptor *descriptor;	// 64
51 	PassageMarkerI marker;	// 32
52 	IDnum length;	// 32
53 #ifndef SINGLE_COV_CAT
54 	IDnum virtualCoverage[CATEGORIES];	// 32 * 2
55 	IDnum originalVirtualCoverage[CATEGORIES];	// 32 * 2
56 #else
57 	IDnum virtualCoverage;	// 32 * 2
58 #endif
59 	IDnum ID;		// 32
60 	IDnum arcCount;		// 32
61 	boolean status;		// 1
62 	boolean uniqueness;	// 1
63 } ATTRIBUTE_PACKED;	// 418 Total
64 
65 struct shortReadMarker_st {
66 	IDnum position;
67 	IDnum readID;
68 	ShortLength offset;
69 } ATTRIBUTE_PACKED;
70 
71 struct gapMarker_st {
72 	GapMarker *next;
73 	IDnum position;
74 	IDnum length;
75 } ATTRIBUTE_PACKED;
76 
77 struct graph_st {
78 	Node **nodes;
79 	Arc **arcLookupTable;
80 	ShortReadMarker **nodeReads;
81 	IDnum *nodeReadCounts;
82 	GapMarker **gapMarkers;
83 	Coordinate insertLengths[CATEGORIES + 1];
84 	double insertLengths_var[CATEGORIES + 1];
85 	IDnum sequenceCount;
86 	IDnum nodeCount;
87 	int wordLength;
88 	boolean double_stranded;
89 };
90 
91 static RecycleBin *arcMemory = NULL;
92 static RecycleBin *nodeMemory = NULL;
93 static RecycleBin *gapMarkerMemory = NULL;
94 
95 #define BLOCKSIZE 50
96 #define GAPBLOCKSIZE 10000
97 
allocateArc()98 Arc *allocateArc()
99 {
100 	if (arcMemory == NULL)
101 		arcMemory = newRecycleBin(sizeof(Arc), BLOCKSIZE);
102 
103 	return allocatePointer(arcMemory);
104 }
105 
deallocateArc(Arc * arc)106 void deallocateArc(Arc * arc)
107 {
108 	deallocatePointer(arcMemory, arc);
109 }
110 
allocateNode()111 Node *allocateNode()
112 {
113 	if (nodeMemory == NULL)
114 		nodeMemory = newRecycleBin(sizeof(Node), BLOCKSIZE);
115 
116 	return (Node *) allocatePointer(nodeMemory);
117 }
118 
deallocateNode(Node * node)119 void deallocateNode(Node * node)
120 {
121 	deallocatePointer(nodeMemory, node);
122 }
123 
124 // Returns the twin node of a given node
getTwinNode(Node * node)125 Node *getTwinNode(Node * node)
126 {
127 	return node->twinNode;
128 }
129 
130 // Inserts new passage marker in the marker list of destination node
insertPassageMarker(PassageMarkerI marker,Node * destination)131 void insertPassageMarker(PassageMarkerI marker, Node * destination)
132 {
133 	setTopOfTheNode(marker);
134 	setNextInNode(marker, destination->marker);
135 	destination->marker = marker;
136 }
137 
138 // Returns the length of the node's descriptor list
getNodeLength(Node * node)139 Coordinate getNodeLength(Node * node)
140 {
141 	return node->length;
142 }
143 
144 // Returns the number of nodes in the graph
nodeCount(Graph * graph)145 IDnum nodeCount(Graph * graph)
146 {
147 	return graph->nodeCount;
148 }
149 
150 // returns the number of sequences used to buid the graph
sequenceCount(Graph * graph)151 IDnum sequenceCount(Graph * graph)
152 {
153 	return graph->sequenceCount;
154 }
155 
156 // Creates an arc from node origin to node destination.
157 // If this arc already exists, increments its multiplicity by 1.
createArc(Node * originNode,Node * destinationNode,Graph * graph)158 Arc *createArc(Node * originNode, Node * destinationNode, Graph * graph)
159 {
160 	Arc *arc, *twinArc;
161 	Node *destinationTwin;
162 	IDnum lookupIndex;
163 
164 	if (originNode == NULL || destinationNode == NULL)
165 		return NULL;
166 
167 //      velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
168 
169 	arc = getArcBetweenNodes(originNode, destinationNode, graph);
170 
171 	if (arc != NULL) {
172 		arc->multiplicity++;
173 		arc->twinArc->multiplicity++;
174 		return arc;
175 	}
176 	// If not found
177 #ifdef _OPENMP
178 	#pragma omp critical
179 #endif
180 	arc = allocateArc();
181 	arc->destination = destinationNode;
182 	arc->multiplicity = 1;
183 	arc->previous = NULL;
184 	arc->next = originNode->arc;
185 	if (originNode->arc != NULL)
186 		originNode->arc->previous = arc;
187 	originNode->arc = arc;
188 	originNode->arcCount++;
189 
190 	destinationTwin = destinationNode->twinNode;
191 
192 	// Hairpin case
193 	if (destinationTwin == originNode) {
194 		arc->multiplicity++;
195 		arc->twinArc = arc;
196 		if (graph->arcLookupTable != NULL) {
197 			lookupIndex =
198 			    2 * originNode->ID + destinationNode->ID +
199 			    3 * graph->nodeCount;
200 			arc->nextInLookupTable =
201 			    graph->arcLookupTable[lookupIndex];
202 			graph->arcLookupTable[lookupIndex] = arc;
203 		}
204 		return arc;
205 	}
206 
207 #ifdef _OPENMP
208 	#pragma omp critical
209 #endif
210 	twinArc = allocateArc();
211 	twinArc->destination = originNode->twinNode;
212 	twinArc->multiplicity = 1;
213 	twinArc->previous = NULL;
214 	twinArc->next = destinationTwin->arc;
215 	if (destinationTwin->arc != NULL)
216 		destinationTwin->arc->previous = twinArc;
217 	destinationTwin->arc = twinArc;
218 	destinationTwin->arcCount++;
219 
220 	arc->twinArc = twinArc;
221 	twinArc->twinArc = arc;
222 
223 	if (graph->arcLookupTable != NULL) {
224 		lookupIndex =
225 		    2 * originNode->ID + destinationNode->ID +
226 		    3 * graph->nodeCount;
227 		arc->nextInLookupTable =
228 		    graph->arcLookupTable[lookupIndex];
229 		graph->arcLookupTable[lookupIndex] = arc;
230 
231 		lookupIndex =
232 		    -2 * destinationNode->ID - originNode->ID +
233 		    3 * graph->nodeCount;
234 		twinArc->nextInLookupTable =
235 		    graph->arcLookupTable[lookupIndex];
236 		graph->arcLookupTable[lookupIndex] = twinArc;
237 	}
238 	return arc;
239 }
240 
createAnalogousArc(Node * originNode,Node * destinationNode,Arc * refArc,Graph * graph)241 void createAnalogousArc(Node * originNode, Node * destinationNode,
242 			Arc * refArc, Graph * graph)
243 {
244 	Arc *arc, *twinArc;
245 	Node *destinationTwin;
246 	IDnum lookupIndex;
247 
248 	if (originNode == NULL || destinationNode == NULL)
249 		return;
250 
251 //      velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
252 
253 	arc = getArcBetweenNodes(originNode, destinationNode, graph);
254 
255 	if (arc != NULL) {
256 		if (refArc->twinArc != refArc) {
257 			arc->multiplicity += getMultiplicity(refArc);
258 			arc->twinArc->multiplicity +=
259 			    getMultiplicity(refArc);
260 		} else {
261 			arc->multiplicity += getMultiplicity(refArc) / 2;
262 			arc->twinArc->multiplicity +=
263 			    getMultiplicity(refArc) / 2;
264 		}
265 		return;
266 	}
267 	// If not found
268 	arc = allocateArc();
269 	arc->destination = destinationNode;
270 	arc->multiplicity = getMultiplicity(refArc);
271 	arc->previous = NULL;
272 	arc->next = originNode->arc;
273 	if (originNode->arc != NULL)
274 		originNode->arc->previous = arc;
275 	originNode->arc = arc;
276 	originNode->arcCount++;
277 
278 	destinationTwin = destinationNode->twinNode;
279 
280 	// Hairpin case
281 	if (destinationTwin == originNode) {
282 		arc->twinArc = arc;
283 		if (refArc->twinArc != refArc)
284 			arc->multiplicity *= 2;
285 
286 		if (graph->arcLookupTable != NULL) {
287 			lookupIndex =
288 			    2 * originNode->ID + destinationNode->ID
289 			    + 3 * graph->nodeCount;
290 			arc->nextInLookupTable =
291 			    graph->arcLookupTable[lookupIndex];
292 			graph->arcLookupTable[lookupIndex] = arc;
293 		}
294 		return;
295 	}
296 
297 	twinArc = allocateArc();
298 	twinArc->destination = originNode->twinNode;
299 	twinArc->multiplicity = getMultiplicity(refArc);
300 	twinArc->previous = NULL;
301 	twinArc->next = destinationTwin->arc;
302 	if (destinationTwin->arc != NULL)
303 		destinationTwin->arc->previous = twinArc;
304 	destinationTwin->arc = twinArc;
305 	destinationTwin->arcCount++;
306 
307 	arc->twinArc = twinArc;
308 	twinArc->twinArc = arc;
309 
310 	if (graph->arcLookupTable != NULL) {
311 		lookupIndex =
312 		    2 * originNode->ID + destinationNode->ID +
313 		    3 * graph->nodeCount;
314 		arc->nextInLookupTable =
315 		    graph->arcLookupTable[lookupIndex];
316 		graph->arcLookupTable[lookupIndex] = arc;
317 
318 		lookupIndex =
319 		    -2 * destinationNode->ID - originNode->ID +
320 		    3 * graph->nodeCount;
321 		twinArc->nextInLookupTable =
322 		    graph->arcLookupTable[lookupIndex];
323 		graph->arcLookupTable[lookupIndex] = twinArc;
324 	}
325 }
326 
getArcBetweenNodes(Node * originNode,Node * destinationNode,Graph * graph)327 Arc *getArcBetweenNodes(Node * originNode, Node * destinationNode,
328 			Graph * graph)
329 {
330 	Arc *arc;
331 	Node *twinDestination, *twinOrigin;
332 
333 	if (originNode == NULL || destinationNode == NULL)
334 		return NULL;
335 
336 	if (graph->arcLookupTable != NULL) {
337 		for (arc =
338 		     graph->arcLookupTable[2 * originNode->ID +
339 					   destinationNode->ID +
340 					   3 * graph->nodeCount];
341 		     arc != NULL; arc = arc->nextInLookupTable) {
342 			if (arc->destination == destinationNode) {
343 				return arc;
344 			}
345 		}
346 		return NULL;
347 	}
348 
349 	twinDestination = destinationNode->twinNode;
350 	if (originNode->arcCount <= twinDestination->arcCount) {
351 		for (arc = originNode->arc; arc != NULL; arc = arc->next)
352 			if (arc->destination == destinationNode)
353 				return arc;
354 		return NULL;
355 	}
356 
357 	twinOrigin = originNode->twinNode;
358 	for (arc = twinDestination->arc; arc != NULL; arc = arc->next)
359 		if (arc->destination == twinOrigin)
360 			return arc->twinArc;
361 	return NULL;
362 }
363 
destroyArc(Arc * arc,Graph * graph)364 void destroyArc(Arc * arc, Graph * graph)
365 {
366 	Node *origin, *destination;
367 	Arc *twinArc;
368 	Arc *currentArc;
369 	IDnum lookupIndex;
370 
371 	if (arc == NULL)
372 		return;
373 
374 	twinArc = arc->twinArc;
375 	origin = twinArc->destination->twinNode;
376 	destination = arc->destination->twinNode;
377 
378 	//velvetLog("Destroying arc %p\n", arc);
379 
380 	// Removing arc from list
381 	if (origin->arc == arc) {
382 		origin->arc = arc->next;
383 		if (origin->arc != NULL)
384 			origin->arc->previous = NULL;
385 	} else {
386 		arc->previous->next = arc->next;
387 		if (arc->next != NULL)
388 			arc->next->previous = arc->previous;
389 	}
390 
391 	origin->arcCount--;
392 
393 	if (destination == origin) {
394 		if (graph->arcLookupTable != NULL) {
395 			lookupIndex =
396 			    2 * origin->ID - destination->ID +
397 			    3 * graph->nodeCount;
398 			currentArc = graph->arcLookupTable[lookupIndex];
399 			if (currentArc == arc)
400 				graph->arcLookupTable[lookupIndex] =
401 				    arc->nextInLookupTable;
402 			else {
403 				while (currentArc->nextInLookupTable !=
404 				       arc)
405 					currentArc =
406 					    currentArc->nextInLookupTable;
407 
408 				currentArc->nextInLookupTable =
409 				    twinArc->nextInLookupTable;
410 			}
411 		}
412 
413 		deallocateArc(arc);
414 		return;
415 	}
416 	// Removing arc's twin from list
417 	if (destination->arc == twinArc) {
418 		destination->arc = twinArc->next;
419 		if (destination->arc != NULL)
420 			destination->arc->previous = NULL;
421 	} else {
422 		twinArc->previous->next = twinArc->next;
423 		if (twinArc->next != NULL)
424 			twinArc->next->previous = twinArc->previous;
425 	}
426 
427 	destination->arcCount--;
428 
429 	if (graph->arcLookupTable != NULL) {
430 		lookupIndex =
431 		    2 * origin->ID - destination->ID +
432 		    3 * graph->nodeCount;
433 		currentArc = graph->arcLookupTable[lookupIndex];
434 		if (currentArc == arc)
435 			graph->arcLookupTable[lookupIndex] =
436 			    arc->nextInLookupTable;
437 		else {
438 			while (currentArc->nextInLookupTable != arc)
439 				currentArc = currentArc->nextInLookupTable;
440 
441 			currentArc->nextInLookupTable =
442 			    arc->nextInLookupTable;
443 		}
444 
445 		lookupIndex =
446 		    2 * destination->ID - origin->ID +
447 		    3 * graph->nodeCount;
448 		currentArc = graph->arcLookupTable[lookupIndex];
449 		if (currentArc == twinArc)
450 			graph->arcLookupTable[lookupIndex] =
451 			    twinArc->nextInLookupTable;
452 		else {
453 			while (currentArc->nextInLookupTable != twinArc)
454 				currentArc = currentArc->nextInLookupTable;
455 
456 			currentArc->nextInLookupTable =
457 			    twinArc->nextInLookupTable;
458 		}
459 	}
460 	// Freeing memory
461 	deallocateArc(arc);
462 	deallocateArc(twinArc);
463 }
464 
destroyNode(Node * node,Graph * graph)465 void destroyNode(Node * node, Graph * graph)
466 {
467 	Node *twin = node->twinNode;
468 	IDnum ID = node->ID;
469 	IDnum index;
470 
471 	//velvetLog("Destroying %d\n and twin %d\n", getNodeID(node), getNodeID(twin));
472 
473 	if (ID < 0)
474 		ID = -ID;
475 
476 	// Node arcs:
477 	while (node->arc != NULL)
478 		destroyArc(node->arc, graph);
479 	while (twin->arc != NULL)
480 		destroyArc(twin->arc, graph);
481 
482 	// Descriptors
483 	free(node->descriptor);
484 	free(twin->descriptor);
485 
486 	// Passage markers
487 	while (node->marker != NULL_IDX)
488 		destroyPassageMarker(node->marker);
489 
490 	// Reads starts
491 	if (graph->nodeReads != NULL) {
492 		index = ID + graph->nodeCount;
493 		free(graph->nodeReads[index]);
494 		graph->nodeReads[index] = NULL;
495 		graph->nodeReadCounts[index] = 0;
496 
497 		index = -ID + graph->nodeCount;
498 		free(graph->nodeReads[index]);
499 		graph->nodeReads[index] = NULL;
500 		graph->nodeReadCounts[index] = 0;
501 	}
502 
503 	graph->nodes[ID] = NULL;
504 	deallocateNode(node);
505 	deallocateNode(twin);
506 }
507 
outDegree(Node * node)508 int outDegree(Node * node)
509 {
510 	int result = 0;
511 	Arc *arc = node->arc;
512 	while (arc != NULL) {
513 		result += arc->multiplicity;
514 		arc = arc->next;
515 	}
516 
517 	return result;
518 }
519 
simpleArcCount(Node * node)520 int simpleArcCount(Node * node)
521 {
522 	return node->arcCount;
523 }
524 
arcCount(Node * node)525 int arcCount(Node * node)
526 {
527 	int result = 0;
528 	Arc *arc;
529 
530 	if (node == NULL)
531 		return result;
532 
533 	arc = node->arc;
534 	while (arc != NULL) {
535 		result++;
536 		if (arc->destination == node->twinNode)
537 			result++;
538 		arc = arc->next;
539 	}
540 
541 	return result;
542 
543 }
544 
getNucleotideInDescriptor(Descriptor * descriptor,Coordinate i)545 static Nucleotide getNucleotideInDescriptor(Descriptor * descriptor,
546 					    Coordinate i)
547 {
548 	Descriptor *fourMer = descriptor + i / 4;
549 
550 	switch (i % 4) {
551 	case 0:
552 		return (*fourMer & 3);
553 	case 1:
554 		return (*fourMer & 12) >> 2;
555 	case 2:
556 		return (*fourMer & 48) >> 4;
557 	case 3:
558 		return (*fourMer & 192) >> 6;
559 	}
560 	return 0;
561 }
562 
getNucleotideInNode(Node * node,Coordinate index)563 Nucleotide getNucleotideInNode(Node * node, Coordinate index) {
564         return getNucleotideInDescriptor(node->descriptor, index);
565 }
566 
getMarker(Node * node)567 PassageMarkerI getMarker(Node * node)
568 {
569 	return node->marker;
570 }
571 
setMarker(Node * node,PassageMarkerI marker)572 void setMarker(Node * node, PassageMarkerI marker)
573 {
574 	if (node == NULL)
575 		return;
576 
577 	if (marker == NULL_IDX) {
578 		node->marker = NULL_IDX;
579 		node->twinNode->marker = NULL_IDX;
580 		return;
581 	}
582 
583 	node->marker = marker;
584 	setTopOfTheNode(marker);
585 	node->twinNode->marker = getTwinMarker(marker);
586 	setTopOfTheNode(getTwinMarker(marker));
587 }
588 
setNodeStatus(Node * node,boolean status)589 void setNodeStatus(Node * node, boolean status)
590 {
591 	node->status = status;
592 	node->twinNode->status = status;
593 }
594 
setSingleNodeStatus(Node * node,boolean status)595 void setSingleNodeStatus(Node * node, boolean status)
596 {
597 	node->status = status;
598 }
599 
getNodeStatus(Node * node)600 boolean getNodeStatus(Node * node)
601 {
602 	if (node == NULL)
603 		return false;
604 	return node->status;
605 }
606 
getNodeID(Node * node)607 IDnum getNodeID(Node * node)
608 {
609 	if (node == NULL)
610 		return 0;
611 
612 	return node->ID;
613 }
614 
resetNodeStatus(Graph * graph)615 void resetNodeStatus(Graph * graph)
616 {
617 	IDnum nodeIndex;
618 	Node *node;
619 
620 	for (nodeIndex = 1; nodeIndex <= graph->nodeCount; nodeIndex++) {
621 		node = graph->nodes[nodeIndex];
622 		if (node == NULL)
623 			continue;
624 
625 		node->status = false;
626 		node->twinNode->status = false;
627 	}
628 }
629 
getNodeInGraph(Graph * graph,IDnum nodeID)630 Node *getNodeInGraph(Graph * graph, IDnum nodeID)
631 {
632 	if (nodeID == 0)
633 		return NULL;
634 	else if (nodeID > 0)
635 		return graph->nodes[nodeID];
636 	else if (graph->nodes[-nodeID] == NULL)
637 		return NULL;
638 	else
639 		return graph->nodes[-nodeID]->twinNode;
640 }
641 
getArc(Node * node)642 Arc *getArc(Node * node)
643 {
644 	return node->arc;
645 }
646 
getNextArc(Arc * arc)647 Arc *getNextArc(Arc * arc)
648 {
649 	return arc->next;
650 }
651 
getMultiplicity(Arc * arc)652 IDnum getMultiplicity(Arc * arc)
653 {
654 	if (arc == NULL)
655 		return 0;
656 
657 	return arc->multiplicity;
658 }
659 
getOrigin(Arc * arc)660 Node *getOrigin(Arc * arc)
661 {
662 	if (arc == NULL)
663 		return NULL;
664 
665 	return arc->twinArc->destination->twinNode;
666 }
667 
getDestination(Arc * arc)668 Node *getDestination(Arc * arc)
669 {
670 	if (arc == NULL)
671 		return NULL;
672 
673 	return arc->destination;
674 }
675 
markerCount(Node * node)676 IDnum markerCount(Node * node)
677 {
678 	IDnum count = 0;
679 	PassageMarkerI marker;
680 
681 	for (marker = getMarker(node); marker != NULL_IDX;
682 	     marker = getNextInNode(marker))
683 		count++;
684 
685 	return count;
686 }
687 
appendNodeSequence(Node * node,TightString * sequence,Coordinate writeIndex)688 void appendNodeSequence(Node * node, TightString * sequence,
689 			Coordinate writeIndex)
690 {
691 	Coordinate i;
692 	Nucleotide nucleotide;
693 
694 	//velvetLog("Getting sequence from node %d of length %d (%d)\n", getNodeID(node), getNodeLength(node), getLength(nodeLabel));
695 
696 	for (i = 0; i < getNodeLength(node); i++) {
697 		nucleotide =
698 		    getNucleotideInDescriptor(node->descriptor, i);
699 		writeNucleotideAtPosition(nucleotide, i + writeIndex,
700 					  sequence);
701 	}
702 }
703 
writeNucleotideInDescriptor(Nucleotide nucleotide,Descriptor * descriptor,Coordinate i)704 static void writeNucleotideInDescriptor(Nucleotide nucleotide,
705 					Descriptor * descriptor,
706 					Coordinate i)
707 {
708 	Descriptor *fourMer = descriptor + i / 4;
709 	switch (i % 4) {
710 	case 3:
711 		*fourMer &= 63;
712 		*fourMer += nucleotide << 6;
713 		return;
714 	case 2:
715 		*fourMer &= 207;
716 		*fourMer += nucleotide << 4;
717 		return;
718 	case 1:
719 		*fourMer &= 243;
720 		*fourMer += nucleotide << 2;
721 		return;
722 	case 0:
723 		*fourMer &= 252;
724 		*fourMer += nucleotide;
725 	}
726 }
727 
mergeDescriptors(Descriptor * descr,Coordinate destinationLength,Descriptor * copy,Coordinate sourceLength,size_t arrayLength)728 static inline Descriptor *mergeDescriptors(Descriptor * descr,
729 					   Coordinate destinationLength,
730 					   Descriptor * copy,
731 					   Coordinate sourceLength,
732 					   size_t arrayLength)
733 {
734 	Descriptor *readPtr, *writePtr;
735 	Descriptor readCopy;
736 	int readOffset, writeOffset;
737 	Descriptor *new = callocOrExit(arrayLength, Descriptor);
738 	Coordinate index;
739 
740 	readPtr = descr;
741 	readCopy = *readPtr;
742 	writePtr = new;
743 	writeOffset = 0;
744 	for (index = 0; index < destinationLength; index++) {
745 		(*writePtr) >>= 2;
746 		(*writePtr) += (readCopy & 3) << 6;
747 		readCopy >>= 2;
748 
749 		writeOffset++;
750 		if (writeOffset == 4) {
751 			writePtr++;
752 			readPtr++;
753 			if (index < destinationLength - 1)
754 				readCopy = *readPtr;
755 			writeOffset = 0;
756 		}
757 	}
758 
759 	readPtr = copy;
760 	readCopy = *readPtr;
761 	readOffset = 0;
762 	for (index = 0; index < sourceLength; index++) {
763 		(*writePtr) >>= 2;
764 		(*writePtr) += (readCopy & 3) << 6;
765 		readCopy >>= 2;
766 
767 		writeOffset++;
768 		if (writeOffset == 4) {
769 			writePtr++;
770 			writeOffset = 0;
771 		}
772 
773 		readOffset++;
774 		if (readOffset == 4) {
775 			readPtr++;
776 			if (index < sourceLength - 1)
777 				readCopy = *readPtr;
778 			readOffset = 0;
779 		}
780 	}
781 
782 	if (writeOffset != 0) {
783 		while (writeOffset != 4) {
784 			(*writePtr) >>= 2;
785 			writeOffset++;
786 		}
787 	}
788 
789 	return new;
790 }
791 
addBufferToDescriptor(Node * node,Coordinate length)792 static void addBufferToDescriptor(Node * node, Coordinate length)
793 {
794 	Descriptor *descr;
795 	Coordinate newLength;
796 	size_t arrayLength;
797 	Node *twinNode;
798 	Coordinate index;
799 	Descriptor *old_descriptor;
800 
801 	if (node == NULL)
802 		return;
803 
804 	twinNode = node->twinNode;
805 	descr = node->descriptor;
806 
807 	// Amendments for empty descriptors
808 	if (descr == NULL) {
809 		arrayLength = length / 4;
810 		if (length % 4 != 0)
811 			arrayLength++;
812 
813 		node->descriptor = callocOrExit(arrayLength, Descriptor);
814 		node->length = length;
815 		twinNode->descriptor =
816 		    callocOrExit(arrayLength, Descriptor);
817 		twinNode->length = length;
818 		return;
819 	}
820 
821 	newLength = node->length + length;
822 	arrayLength = newLength / 4;
823 	if (newLength % 4 != 0)
824 		arrayLength++;
825 
826 	// Merging forward descriptors
827 	node->descriptor =
828 	    reallocOrExit(node->descriptor, arrayLength, Descriptor);
829 
830 	for (index = node->length; index < newLength; index++)
831 		writeNucleotideInDescriptor(ADENINE, node->descriptor,
832 					    index);
833 	node->length = newLength;
834 
835 	// Merging reverse descriptors
836 	old_descriptor = twinNode->descriptor;
837 	twinNode->descriptor = callocOrExit(arrayLength, Descriptor);
838 	for (index = 0; index < twinNode->length; index++)
839 		writeNucleotideInDescriptor(getNucleotideInDescriptor
840 					    (old_descriptor, index),
841 					    twinNode->descriptor,
842 					    index + length);
843 	for (index = 0; index < length; index++)
844 		writeNucleotideInDescriptor(THYMINE, twinNode->descriptor,
845 					    index);
846 	free(old_descriptor);
847 	twinNode->length = newLength;
848 }
849 
appendDescriptors(Node * destination,Node * source)850 void appendDescriptors(Node * destination, Node * source)
851 {
852 	Descriptor *copy;
853 	Descriptor *twinCopy;
854 	Descriptor *descr;
855 	Descriptor *twinDescr;
856 	Coordinate newLength, destinationLength, sourceLength;
857 	size_t arrayLength;
858 	Descriptor *new;
859 	Node *twinDestination;
860 
861 	if (source == NULL || destination == NULL)
862 		return;
863 
864 	twinDestination = destination->twinNode;
865 	descr = destination->descriptor;
866 	twinDescr = twinDestination->descriptor;
867 	copy = source->descriptor;
868 	twinCopy = source->twinNode->descriptor;
869 
870 	// Amendments for empty descriptors
871 	if (getNodeLength(source) == 0)
872 		return;
873 	if (getNodeLength(destination) == 0) {
874 		destination->descriptor = copy;
875 		twinDestination->descriptor = twinCopy;
876 		source->descriptor = NULL;
877 		source->twinNode->descriptor = NULL;
878 		destination->length = source->length;
879 		destination->twinNode->length = source->length;
880 		source->length = 0;
881 		source->twinNode->length = 0;
882 		return;
883 	}
884 
885 	destinationLength = destination->length;
886 	sourceLength = source->length;
887 	newLength = destinationLength + sourceLength;
888 	arrayLength = newLength / 4;
889 	if (newLength % 4 != 0)
890 		arrayLength++;
891 
892 	// Merging forward descriptors
893 	new =
894 	    mergeDescriptors(descr, destinationLength, copy, sourceLength,
895 			     arrayLength);
896 	free(descr);
897 	destination->descriptor = new;
898 	destination->length = newLength;
899 
900 	// Merging reverse descriptors
901 	new =
902 	    mergeDescriptors(twinCopy, sourceLength, twinDescr,
903 			     destinationLength, arrayLength);
904 	free(twinDescr);
905 	twinDestination->descriptor = new;
906 	twinDestination->length = newLength;
907 }
908 
catDescriptors(Descriptor * descr,Coordinate destinationLength,Descriptor * copy,Coordinate sourceLength)909 static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
910 {
911 	Coordinate index;
912 	Nucleotide nucleotide;
913 
914 	for (index = 0; index < sourceLength; index++) {
915 		nucleotide = getNucleotideInDescriptor(copy, index);
916 		writeNucleotideInDescriptor(nucleotide, descr, index + destinationLength);
917 	}
918 }
919 
reverseCatDescriptors(Descriptor * descr,Coordinate destinationLength,Descriptor * copy,Coordinate sourceLength,Coordinate totalLength)920 static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
921 {
922 	Coordinate shift = totalLength - destinationLength - sourceLength;
923 	Coordinate index;
924 	Nucleotide nucleotide;
925 
926 	for (index = 0; index < sourceLength; index++) {
927 		nucleotide = getNucleotideInDescriptor(copy, index);
928 		writeNucleotideInDescriptor(nucleotide, descr, index + shift);
929 	}
930 }
931 
directlyAppendDescriptors(Node * destination,Node * source,Coordinate totalLength)932 void directlyAppendDescriptors(Node * destination, Node * source, Coordinate totalLength)
933 {
934 	Descriptor *copy;
935 	Descriptor *twinCopy;
936 	Descriptor *descr;
937 	Descriptor *twinDescr;
938 	Coordinate destinationLength, sourceLength;
939 
940 	if (source == NULL || destination == NULL)
941 		return;
942 
943 	descr = destination->descriptor;
944 	twinDescr = destination->twinNode->descriptor;
945 	copy = source->descriptor;
946 	twinCopy = source->twinNode->descriptor;
947 
948 	// Amendments for empty descriptors
949 	if (getNodeLength(source) == 0)
950 		return;
951 
952 	destinationLength = destination->length;
953 	sourceLength = source->length;
954 
955 	// Merging forward descriptors
956 	catDescriptors(descr, destinationLength, copy, sourceLength);
957 
958 	// Merging reverse descriptors
959 	reverseCatDescriptors(twinDescr, destinationLength, twinCopy, sourceLength, totalLength);
960 
961 	destination->length += source->length;
962 	destination->twinNode->length += source->length;
963 }
964 
copyDownDescriptor(Descriptor ** writePtr,int * writeOffset,Descriptor * source,Coordinate length)965 static void copyDownDescriptor(Descriptor ** writePtr, int *writeOffset,
966 			       Descriptor * source, Coordinate length)
967 {
968 	Descriptor *readPtr = source;
969 	Descriptor readCopy = *readPtr;
970 	int readOffset = 0;
971 	Coordinate index;
972 
973 	for (index = 0; index < length; index++) {
974 		(**writePtr) >>= 2;
975 		(**writePtr) += (readCopy & 3) << 6;
976 		readCopy >>= 2;
977 
978 		(*writeOffset)++;
979 		if (*writeOffset == 4) {
980 			(*writePtr)++;
981 			*writeOffset = 0;
982 		}
983 
984 		readOffset++;
985 		if (readOffset == 4) {
986 			readPtr++;
987 			if (index < length - 1)
988 				readCopy = *readPtr;
989 			readOffset = 0;
990 		}
991 	}
992 }
993 
copyDownSequence(Descriptor ** writePtr,int * writeOffset,TightString * sequence,Coordinate start,Coordinate finish,int WORDLENGTH)994 static void copyDownSequence(Descriptor ** writePtr, int *writeOffset,
995 			     TightString * sequence, Coordinate start,
996 			     Coordinate finish, int WORDLENGTH)
997 {
998 	boolean forward = (start < finish);
999 	Coordinate sourceLength = finish - start;
1000 	Coordinate index;
1001 	Nucleotide nucleotide;
1002 
1003 	if (!forward)
1004 		sourceLength *= -1;
1005 
1006 	for (index = 0; index < sourceLength; index++) {
1007 		if (forward)
1008 			nucleotide =
1009 			    getNucleotide(start + WORDLENGTH - 1 + index,
1010 					  sequence);
1011 		else
1012 			nucleotide =
1013 #ifndef COLOR
1014 			    3 - getNucleotide(start - index - 1, sequence);
1015 #else
1016 			    getNucleotide(start - index - 1, sequence);
1017 #endif
1018 
1019 		(**writePtr) >>= 2;
1020 		(**writePtr) += nucleotide << 6;
1021 
1022 		(*writeOffset)++;
1023 		if (*writeOffset == 4) {
1024 			(*writePtr)++;
1025 			*writeOffset = 0;
1026 		}
1027 	}
1028 }
1029 
appendSequenceToDescriptor(Descriptor * descr,Coordinate nodeLength,PassageMarkerI marker,TightString * sequences,int WORDLENGTH,size_t arrayLength,boolean downStream)1030 static Descriptor *appendSequenceToDescriptor(Descriptor * descr,
1031 					      Coordinate nodeLength,
1032 					      PassageMarkerI marker,
1033 					      TightString *sequences,
1034 					      int WORDLENGTH,
1035 					      size_t arrayLength,
1036 					      boolean downStream)
1037 {
1038 	int writeOffset = 0;
1039 	Descriptor *new = callocOrExit(arrayLength, Descriptor);
1040 	Descriptor *writePtr = new;
1041 	TightString *sequence;
1042 	IDnum sequenceID = getPassageMarkerSequenceID(marker);
1043 	Coordinate start = getPassageMarkerStart(marker);
1044 	Coordinate finish = getPassageMarkerFinish(marker);
1045 
1046 	if (sequenceID > 0)
1047 		sequence = getTightStringInArray(sequences, sequenceID - 1);
1048 	else
1049 		sequence = getTightStringInArray(sequences, -sequenceID - 1);
1050 
1051 	if (downStream)
1052 		copyDownDescriptor(&writePtr, &writeOffset, descr,
1053 				   nodeLength);
1054 
1055 	copyDownSequence(&writePtr, &writeOffset, sequence, start, finish,
1056 			 WORDLENGTH);
1057 
1058 	if (!downStream)
1059 		copyDownDescriptor(&writePtr, &writeOffset, descr,
1060 				   nodeLength);
1061 
1062 	if (writeOffset != 0) {
1063 		while (writeOffset != 4) {
1064 			(*writePtr) >>= 2;
1065 			writeOffset++;
1066 		}
1067 	}
1068 
1069 	return new;
1070 }
1071 
appendSequence(Node * node,TightString * reads,PassageMarkerI guide,Graph * graph)1072 void appendSequence(Node * node, TightString * reads,
1073 		    PassageMarkerI guide, Graph * graph)
1074 {
1075 	Descriptor *descr;
1076 	Descriptor *twinDescr;
1077 	Coordinate newLength, nodeLength, sourceLength;
1078 	size_t arrayLength;
1079 	Descriptor *new;
1080 	Node *twinNode;
1081 
1082 	if (node == NULL)
1083 		return;
1084 
1085 	twinNode = node->twinNode;
1086 	descr = node->descriptor;
1087 	twinDescr = twinNode->descriptor;
1088 	nodeLength = node->length;
1089 	sourceLength = getPassageMarkerLength(guide);
1090 
1091 	// Amendments for empty descriptors
1092 	if (sourceLength == 0)
1093 		return;
1094 
1095 	newLength = nodeLength + sourceLength;
1096 	arrayLength = newLength / 4;
1097 	if (newLength % 4 != 0)
1098 		arrayLength++;
1099 
1100 	// Merging forward descriptors
1101 	new =
1102 	    appendSequenceToDescriptor(descr, nodeLength, guide, reads,
1103 				       getWordLength(graph), arrayLength,
1104 				       true);
1105 	free(descr);
1106 	node->descriptor = new;
1107 	node->length = newLength;
1108 
1109 	// Merging reverse descriptors
1110 	new =
1111 	    appendSequenceToDescriptor(twinDescr, nodeLength,
1112 				       getTwinMarker(guide), reads,
1113 				       getWordLength(graph), arrayLength,
1114 				       false);
1115 	free(twinDescr);
1116 	twinNode->descriptor = new;
1117 	twinNode->length = newLength;
1118 }
1119 
setMultiplicity(Arc * arc,IDnum mult)1120 void setMultiplicity(Arc * arc, IDnum mult)
1121 {
1122 	arc->multiplicity = mult;
1123 	arc->twinArc->multiplicity = mult;
1124 }
1125 
1126 // Reshuffles the graph->nodes array to remove NULL pointers
1127 // Beware that node IDs are accordingly reshuffled (all pointers remain valid though)
renumberNodes(Graph * graph)1128 void renumberNodes(Graph * graph)
1129 {
1130 	IDnum nodeIndex;
1131 	Node *currentNode;
1132 	IDnum counter = 0;
1133 	IDnum nodes = graph->nodeCount;
1134 	IDnum newIndex;
1135 
1136 	velvetLog("Renumbering nodes\n");
1137 	velvetLog("Initial node count %li\n", (long) graph->nodeCount);
1138 
1139 	for (nodeIndex = 1; nodeIndex <= nodes; nodeIndex++) {
1140 		currentNode = getNodeInGraph(graph, nodeIndex);
1141 
1142 		if (currentNode == NULL)
1143 			counter++;
1144 		else if (counter != 0) {
1145 			newIndex = nodeIndex - counter;
1146 			currentNode->ID = newIndex;
1147 			currentNode->twinNode->ID = -newIndex;
1148 			graph->nodes[newIndex] = currentNode;
1149 
1150 			if (graph->nodeReads != NULL) {
1151 				graph->nodeReads[newIndex + nodes] =
1152 				    graph->nodeReads[nodeIndex + nodes];
1153 				graph->nodeReadCounts[newIndex + nodes] =
1154 				    graph->nodeReadCounts[nodeIndex +
1155 							  nodes];
1156 
1157 				graph->nodeReads[nodeIndex + nodes] = NULL;
1158 				graph->nodeReadCounts[nodeIndex + nodes] =
1159 				    0;
1160 
1161 				graph->nodeReads[-newIndex + nodes] =
1162 				    graph->nodeReads[-nodeIndex + nodes];
1163 				graph->nodeReadCounts[-newIndex + nodes] =
1164 				    graph->nodeReadCounts[-nodeIndex +
1165 							  nodes];
1166 
1167 				graph->nodeReads[-nodeIndex + nodes] =
1168 				    NULL;
1169 				graph->nodeReadCounts[-nodeIndex + nodes] =
1170 				    0;
1171 			}
1172 
1173 			if (graph->gapMarkers != NULL) {
1174 				graph->gapMarkers[newIndex] =
1175 				    graph->gapMarkers[nodeIndex];
1176 				graph->gapMarkers[nodeIndex] = NULL;
1177 			}
1178 		}
1179 	}
1180 
1181 	// Shitfting array to the left
1182 	if (graph->nodeReads != NULL && counter != 0) {
1183 		for (nodeIndex = counter; nodeIndex <= 2 * nodes - counter;
1184 		     nodeIndex++) {
1185 			graph->nodeReads[nodeIndex - counter] =
1186 			    graph->nodeReads[nodeIndex];
1187 			graph->nodeReadCounts[nodeIndex - counter] =
1188 			    graph->nodeReadCounts[nodeIndex];
1189 		}
1190 	}
1191 
1192 	// Rellocating node space
1193 	graph->nodeCount -= counter;
1194 	graph->nodes =
1195 	    reallocOrExit(graph->nodes, graph->nodeCount + 1, Node *);
1196 
1197 	// Reallocating short read marker arrays
1198 	if (graph->nodeReads != NULL) {
1199 		graph->nodeReads =
1200 		    reallocOrExit(graph->nodeReads,
1201 			    2 * graph->nodeCount +
1202 			     1, ShortReadMarker *);
1203 		graph->nodeReadCounts =
1204 		    reallocOrExit(graph->nodeReadCounts,
1205 			    2 * graph->nodeCount + 1, IDnum);
1206 	}
1207 
1208 	// Reallocating gap marker table
1209 	if (graph->gapMarkers != NULL)
1210 		graph->gapMarkers = reallocOrExit(graph->gapMarkers,
1211 					    graph->nodeCount +
1212 					     1, GapMarker *);
1213 
1214 	velvetLog("Removed %li null nodes\n", (long) counter);
1215 }
1216 
splitNodeDescriptor(Node * source,Node * target,Coordinate offset)1217 void splitNodeDescriptor(Node * source, Node * target, Coordinate offset)
1218 {
1219 	Coordinate originalLength = source->length;
1220 	Coordinate backLength = originalLength - offset;
1221 	Coordinate index;
1222 	Descriptor *descriptor, *new;
1223 	size_t arrayLength;
1224 	Nucleotide nucleotide;
1225 
1226 	source->length = offset;
1227 	source->twinNode->length = offset;
1228 
1229 	if (target != NULL) {
1230 		target->length = backLength;
1231 		target->twinNode->length = backLength;
1232 		free(target->descriptor);
1233 		free(target->twinNode->descriptor);
1234 		target->descriptor = NULL;
1235 		target->twinNode->descriptor = NULL;
1236 	}
1237 
1238 	if (backLength == 0)
1239 		return;
1240 
1241 	descriptor = source->descriptor;
1242 
1243 	arrayLength = backLength / 4;
1244 	if (backLength % 4 > 0)
1245 		arrayLength++;
1246 
1247 	if (target != NULL) {
1248 		// Target node .. forwards
1249 		new = mallocOrExit(arrayLength, Descriptor);
1250 		target->descriptor = new;
1251 		for (index = 0; index < backLength; index++) {
1252 			nucleotide =
1253 			    getNucleotideInDescriptor(descriptor, index);
1254 			writeNucleotideInDescriptor(nucleotide, new,
1255 						    index);
1256 		}
1257 	}
1258 	// Source node
1259 	for (index = backLength; index < originalLength; index++) {
1260 		nucleotide = getNucleotideInDescriptor(descriptor, index);
1261 		writeNucleotideInDescriptor(nucleotide, descriptor,
1262 					    index - backLength);
1263 	}
1264 
1265 	if (target == NULL)
1266 		return;
1267 
1268 	// target node other way
1269 	descriptor = source->twinNode->descriptor;
1270 	new = mallocOrExit(arrayLength, Descriptor);
1271 	target->twinNode->descriptor = new;
1272 
1273 	for (index = offset; index < originalLength; index++) {
1274 		nucleotide = getNucleotideInDescriptor(descriptor, index);
1275 		writeNucleotideInDescriptor(nucleotide, new,
1276 					    index - offset);
1277 	}
1278 }
1279 
reduceNode(Node * node)1280 void reduceNode(Node * node)
1281 {
1282 	free(node->descriptor);
1283 	node->descriptor = NULL;
1284 	node->length = 0;
1285 
1286 	free(node->twinNode->descriptor);
1287 	node->twinNode->descriptor = NULL;
1288 	node->twinNode->length = 0;
1289 }
1290 
1291 // Allocate memory for an empty graph created with sequenceCount different sequences
emptyGraph(IDnum sequenceCount,int wordLength)1292 Graph *emptyGraph(IDnum sequenceCount, int wordLength)
1293 {
1294 	Graph *newGraph = mallocOrExit(1, Graph);
1295 	newGraph->sequenceCount = sequenceCount;
1296 	newGraph->arcLookupTable = NULL;
1297 	newGraph->nodeReads = NULL;
1298 	newGraph->nodeReadCounts = NULL;
1299 	newGraph->wordLength = wordLength;
1300 	newGraph->gapMarkers = NULL;
1301 	return newGraph;
1302 }
1303 
newPositiveDescriptor(IDnum sequenceID,Coordinate start,Coordinate finish,TightString * sequences,int WORDLENGTH)1304 static Descriptor *newPositiveDescriptor(IDnum sequenceID,
1305 					 Coordinate start,
1306 					 Coordinate finish,
1307 					 TightString *sequences,
1308 					 int WORDLENGTH)
1309 {
1310 	Coordinate index;
1311 	Nucleotide nucleotide;
1312 	TightString *tString = getTightStringInArray (sequences, sequenceID - 1);
1313 	Coordinate length = finish - start;
1314 	Descriptor *res;
1315 	size_t arrayLength = length / 4;
1316 
1317 	if (length % 4 > 0)
1318 		arrayLength++;
1319 
1320 	res = mallocOrExit(arrayLength, Descriptor);
1321 
1322 	for (index = 0; index < length; index++) {
1323 		nucleotide =
1324 		    getNucleotide(start + index + WORDLENGTH - 1, tString);
1325 		writeNucleotideInDescriptor(nucleotide, res, index);
1326 	}
1327 
1328 	return res;
1329 
1330 }
1331 
newNegativeDescriptor(IDnum sequenceID,Coordinate start,Coordinate finish,TightString * sequences,int WORDLENGTH)1332 static Descriptor *newNegativeDescriptor(IDnum sequenceID,
1333 					 Coordinate start,
1334 					 Coordinate finish,
1335 					 TightString *sequences,
1336 					 int WORDLENGTH)
1337 {
1338 	Coordinate index;
1339 	Nucleotide nucleotide;
1340 	TightString *tString = getTightStringInArray (sequences, -sequenceID - 1);
1341 	Coordinate length = start - finish;
1342 	Descriptor *res;
1343 	size_t arrayLength = length / 4;
1344 
1345 	if (length % 4 > 0)
1346 		arrayLength++;
1347 
1348 	res = mallocOrExit(arrayLength, Descriptor);
1349 
1350 	for (index = 0; index < length; index++) {
1351 		nucleotide = getNucleotide(start - index, tString);
1352 #ifndef COLOR
1353 		writeNucleotideInDescriptor(3 - nucleotide, res, index);
1354 #else
1355 		writeNucleotideInDescriptor(nucleotide, res, index);
1356 #endif
1357 	}
1358 
1359 	return res;
1360 
1361 }
1362 
newDescriptor(IDnum sequenceID,Coordinate start,Coordinate finish,TightString * sequences,int WORDLENGTH)1363 static Descriptor *newDescriptor(IDnum sequenceID, Coordinate start,
1364 				 Coordinate finish,
1365 				 TightString * sequences, int WORDLENGTH)
1366 {
1367 	if (sequenceID > 0)
1368 		return newPositiveDescriptor(sequenceID, start, finish,
1369 					     sequences, WORDLENGTH);
1370 	else
1371 		return newNegativeDescriptor(sequenceID, start, finish,
1372 					     sequences, WORDLENGTH);
1373 }
1374 
1375 // Constructor
1376 // Memory allocated
newNode(IDnum sequenceID,Coordinate start,Coordinate finish,Coordinate offset,IDnum ID,TightString * sequences,int WORDLENGTH)1377 Node *newNode(IDnum sequenceID, Coordinate start, Coordinate finish,
1378 	      Coordinate offset, IDnum ID, TightString * sequences,
1379 	      int WORDLENGTH)
1380 {
1381 	Node *newnd = allocateNode();
1382 	Node *antiNode = allocateNode();
1383 
1384 	newnd->ID = ID;
1385 	newnd->descriptor =
1386 	    newDescriptor(sequenceID, start + offset, finish + offset,
1387 			  sequences, WORDLENGTH);
1388 	newnd->arc = NULL;
1389 	newnd->arcCount = 0;
1390 	newnd->marker = NULL_IDX;
1391 	newnd->status = false;
1392 
1393 #ifndef SINGLE_COV_CAT
1394 	Category cat;
1395 	for (cat = 0; cat < CATEGORIES; cat++) {
1396 		newnd->virtualCoverage[cat] = 0;
1397 		newnd->originalVirtualCoverage[cat] = 0;
1398 	}
1399 #else
1400 	newnd->virtualCoverage = 0;
1401 #endif
1402 
1403 	antiNode->ID = -ID;
1404 	antiNode->descriptor =
1405 	    newDescriptor(-sequenceID, finish + offset - 1,
1406 			  start + offset - 1, sequences, WORDLENGTH);
1407 	antiNode->arc = NULL;
1408 	antiNode->arcCount = 0;
1409 	antiNode->marker = NULL_IDX;
1410 	antiNode->status = false;
1411 
1412 #ifndef SINGLE_COV_CAT
1413 	for (cat = 0; cat < CATEGORIES; cat++) {
1414 		antiNode->virtualCoverage[cat] = 0;
1415 		antiNode->originalVirtualCoverage[cat] = 0;
1416 	}
1417 #else
1418 	antiNode->virtualCoverage = 0;
1419 #endif
1420 
1421 	newnd->twinNode = antiNode;
1422 	antiNode->twinNode = newnd;
1423 
1424 	if (sequenceID > 0) {
1425 		newnd->length = finish - start;
1426 		antiNode->length = finish - start;
1427 	} else {
1428 		newnd->length = start - finish;
1429 		antiNode->length = start - finish;
1430 	}
1431 
1432 	return newnd;
1433 }
1434 
allocateNodeSpace(Graph * graph,IDnum nodeCount)1435 void allocateNodeSpace(Graph * graph, IDnum nodeCount)
1436 {
1437 	graph->nodes = callocOrExit(nodeCount + 1, Node *);
1438 	graph->nodeCount = nodeCount;
1439 }
1440 
getUniqueness(Node * node)1441 boolean getUniqueness(Node * node)
1442 {
1443 	return node->uniqueness;
1444 }
1445 
setUniqueness(Node * node,boolean value)1446 void setUniqueness(Node * node, boolean value)
1447 {
1448 	node->uniqueness = value;
1449 	node->twinNode->uniqueness = value;
1450 }
1451 
emptyNode()1452 Node *emptyNode()
1453 {
1454 	Node *newnd = allocateNode();
1455 	Node *antiNode = allocateNode();
1456 
1457 	newnd->ID = 0;
1458 	newnd->descriptor = NULL;
1459 	newnd->arc = NULL;
1460 	newnd->arcCount = 0;
1461 	newnd->marker = NULL_IDX;
1462 	newnd->length = 0;
1463 	newnd->uniqueness = false;
1464 
1465 #ifndef SINGLE_COV_CAT
1466 	Category cat;
1467 	for (cat = 0; cat < CATEGORIES; cat++) {
1468 		newnd->virtualCoverage[cat] = 0;
1469 		newnd->originalVirtualCoverage[cat] = 0;
1470 	}
1471 #else
1472 	newnd->virtualCoverage = 0;
1473 #endif
1474 
1475 	antiNode->ID = 0;
1476 	antiNode->descriptor = NULL;
1477 	antiNode->arc = NULL;
1478 	antiNode->arcCount = 0;
1479 	antiNode->marker = NULL_IDX;
1480 	antiNode->length = 0;
1481 	antiNode->uniqueness = false;
1482 
1483 #ifndef SINGLE_COV_CAT
1484 	for (cat = 0; cat < CATEGORIES; cat++) {
1485 		antiNode->virtualCoverage[cat] = 0;
1486 		antiNode->originalVirtualCoverage[cat] = 0;
1487 	}
1488 #else
1489 	antiNode->virtualCoverage = 0;
1490 #endif
1491 
1492 	newnd->twinNode = antiNode;
1493 	antiNode->twinNode = newnd;
1494 
1495 	return newnd;
1496 
1497 }
1498 
addEmptyNodeToGraph(Graph * graph,IDnum ID)1499 Node *addEmptyNodeToGraph(Graph * graph, IDnum ID)
1500 {
1501 	Node *newnd = emptyNode();
1502 
1503 	newnd->ID = ID;
1504 	newnd->twinNode->ID = -ID;
1505 
1506 	graph->nodes[ID] = newnd;
1507 
1508 	return newnd;
1509 
1510 }
1511 
1512 #ifndef SINGLE_COV_CAT
1513 
setVirtualCoverage(Node * node,Category category,Coordinate coverage)1514 void setVirtualCoverage(Node * node, Category category,
1515 			Coordinate coverage)
1516 {
1517 	node->virtualCoverage[category] = coverage;
1518 	node->twinNode->virtualCoverage[category] =
1519 	    node->virtualCoverage[category];
1520 }
1521 
incrementVirtualCoverage(Node * node,Category category,Coordinate coverage)1522 void incrementVirtualCoverage(Node * node, Category category,
1523 			      Coordinate coverage)
1524 {
1525 	node->virtualCoverage[category] += coverage;
1526 	node->twinNode->virtualCoverage[category] =
1527 	    node->virtualCoverage[category];
1528 }
1529 
getVirtualCoverage(Node * node,Category category)1530 Coordinate getVirtualCoverage(Node * node, Category category)
1531 {
1532 	return node->virtualCoverage[category];
1533 }
1534 
getTotalCoverage(Node * node)1535 Coordinate getTotalCoverage(Node * node)
1536 {
1537 	Category cat;
1538 	Coordinate coverage = 0;
1539 
1540 	for (cat = 0; cat < CATEGORIES; cat++)
1541 		coverage += node->virtualCoverage[cat];
1542 
1543 	return coverage;
1544 }
1545 
setOriginalVirtualCoverage(Node * node,Category category,Coordinate coverage)1546 void setOriginalVirtualCoverage(Node * node, Category category,
1547 				Coordinate coverage)
1548 {
1549 	node->originalVirtualCoverage[category] = coverage;
1550 	node->twinNode->originalVirtualCoverage[category] =
1551 	    node->originalVirtualCoverage[category];
1552 }
1553 
incrementOriginalVirtualCoverage(Node * node,Category category,Coordinate coverage)1554 void incrementOriginalVirtualCoverage(Node * node, Category category,
1555 				      Coordinate coverage)
1556 {
1557 	node->originalVirtualCoverage[category] += coverage;
1558 	node->twinNode->originalVirtualCoverage[category] =
1559 	    node->originalVirtualCoverage[category];
1560 }
1561 
getOriginalVirtualCoverage(Node * node,Category category)1562 Coordinate getOriginalVirtualCoverage(Node * node, Category category)
1563 {
1564 	return node->originalVirtualCoverage[category];
1565 }
1566 
1567 #else
1568 
setVirtualCoverage(Node * node,Coordinate coverage)1569 void setVirtualCoverage(Node * node,
1570 			Coordinate coverage)
1571 {
1572 	node->virtualCoverage = coverage;
1573 	node->twinNode->virtualCoverage = coverage;
1574 }
1575 
incrementVirtualCoverage(Node * node,Coordinate coverage)1576 void incrementVirtualCoverage(Node * node,
1577 			      Coordinate coverage)
1578 {
1579 	node->virtualCoverage += coverage;
1580 	node->twinNode->virtualCoverage += coverage;
1581 }
1582 
getVirtualCoverage(Node * node)1583 Coordinate getVirtualCoverage(Node * node)
1584 {
1585 	return node->virtualCoverage;
1586 }
1587 
getTotalCoverage(Node * node)1588 Coordinate getTotalCoverage(Node * node)
1589 {
1590 	return node->virtualCoverage;
1591 }
1592 
1593 #endif
1594 
hasSingleArc(Node * node)1595 boolean hasSingleArc(Node * node)
1596 {
1597 	return node->arcCount == 1;
1598 }
1599 
activateArcLookupTable(Graph * graph)1600 void activateArcLookupTable(Graph * graph)
1601 {
1602 	IDnum index;
1603 	Node *node;
1604 	Arc *arc;
1605 	IDnum nodes = graph->nodeCount;
1606 	IDnum twinOriginID, destinationID, hash;
1607 	Arc **table;
1608 
1609 	velvetLog("Activating arc lookup table\n");
1610 
1611 	graph->arcLookupTable = callocOrExit(6 * nodes + 1, Arc *);
1612 
1613 	table = graph->arcLookupTable;
1614 
1615 	for (index = -nodes; index <= nodes; index++) {
1616 		if (index == 0)
1617 			continue;
1618 
1619 		node = getNodeInGraph(graph, index);
1620 		if (node == 0)
1621 			continue;
1622 
1623 		for (arc = getArc(node); arc != NULL;
1624 		     arc = getNextArc(arc)) {
1625 			twinOriginID = arc->twinArc->destination->ID;
1626 			destinationID = arc->destination->ID;
1627 			hash =
1628 			    3 * nodes - 2 * twinOriginID + destinationID;
1629 			arc->nextInLookupTable = table[hash];
1630 			table[hash] = arc;
1631 		}
1632 	}
1633 
1634 	velvetLog("Done activating arc lookup table\n");
1635 }
1636 
deactivateArcLookupTable(Graph * graph)1637 void deactivateArcLookupTable(Graph * graph)
1638 {
1639 	free(graph->arcLookupTable);
1640 	graph->arcLookupTable = NULL;
1641 }
1642 
exportNode(FILE * outfile,Node * node,void * withSequence)1643 static void exportNode(FILE * outfile, Node * node, void *withSequence)
1644 {
1645 	Coordinate index;
1646 	Nucleotide nucleotide;
1647 
1648 	if (node == NULL)
1649 		return;
1650 
1651 	velvetFprintf(outfile, "NODE\t%ld\t%lld", (long) node->ID, (long long) node->length);
1652 
1653 #ifndef SINGLE_COV_CAT
1654 	Category cat;
1655 	for (cat = 0; cat < CATEGORIES; cat++)
1656 		velvetFprintf(outfile, "\t%lld\t%lld", (long long) node->virtualCoverage[cat],
1657 			(long long) node->originalVirtualCoverage[cat]);
1658 	velvetFprintf(outfile, "\n");
1659 #else
1660 	velvetFprintf(outfile, "\t%lld\n", (long long) node->virtualCoverage);
1661 #endif
1662 
1663 	if (withSequence == NULL)
1664 		return;
1665 
1666 	for (index = 0; index < node->length; index++) {
1667 		nucleotide =
1668 		    getNucleotideInDescriptor(node->descriptor, index);
1669 		switch (nucleotide) {
1670 		case ADENINE:
1671 			velvetFprintf(outfile, "A");
1672 			break;
1673 		case CYTOSINE:
1674 			velvetFprintf(outfile, "C");
1675 			break;
1676 		case GUANINE:
1677 			velvetFprintf(outfile, "G");
1678 			break;
1679 		case THYMINE:
1680 			velvetFprintf(outfile, "T");
1681 			break;
1682 		}
1683 	}
1684 	velvetFprintf(outfile, "\n");
1685 
1686 	for (index = 0; index < node->length; index++) {
1687 		nucleotide =
1688 		    getNucleotideInDescriptor(node->twinNode->descriptor,
1689 					      index);
1690 		switch (nucleotide) {
1691 		case ADENINE:
1692 			velvetFprintf(outfile, "A");
1693 			break;
1694 		case CYTOSINE:
1695 			velvetFprintf(outfile, "C");
1696 			break;
1697 		case GUANINE:
1698 			velvetFprintf(outfile, "G");
1699 			break;
1700 		case THYMINE:
1701 			velvetFprintf(outfile, "T");
1702 			break;
1703 		}
1704 	}
1705 	velvetFprintf(outfile, "\n");
1706 }
1707 
exportArc(FILE * outfile,Arc * arc)1708 static void exportArc(FILE * outfile, Arc * arc)
1709 {
1710 	IDnum originID, destinationID;
1711 	IDnum absOriginID, absDestinationID;
1712 
1713 	if (arc == NULL)
1714 		return;
1715 
1716 	absOriginID = originID = -arc->twinArc->destination->ID;
1717 	absDestinationID = destinationID = arc->destination->ID;
1718 
1719 	if (absOriginID < 0)
1720 		absOriginID = -absOriginID;
1721 	if (absDestinationID < 0)
1722 		absDestinationID = -absDestinationID;
1723 
1724 	if (absDestinationID < absOriginID)
1725 		return;
1726 
1727 	if (originID == destinationID && originID < 0)
1728 		return;
1729 
1730 	velvetFprintf(outfile, "ARC\t%li\t%li\t%li\n", (long) originID, (long) destinationID,
1731 		(long) arc->multiplicity);
1732 }
1733 
1734 // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
mergeArcLists(Arc * left,Arc * right)1735 static Arc *mergeArcLists(Arc * left, Arc * right)
1736 {
1737 	Arc *mergedList = NULL;
1738 	Arc *tail = NULL;
1739 
1740 	// Choose first element:
1741 	if (left->destination->ID <= right->destination->ID) {
1742 		mergedList = left;
1743 		tail = left;
1744 		left = left->next;
1745 	} else {
1746 		mergedList = right;
1747 		tail = right;
1748 		right = right->next;
1749 	}
1750 
1751 	// Iterate while both lists are still non empty
1752 	while (left != NULL && right != NULL) {
1753 		if (left->destination->ID <= right->destination->ID) {
1754 			tail->next = left;
1755 			left->previous = tail;
1756 			left = left->next;
1757 		} else {
1758 			tail->next = right;
1759 			right->previous = tail;
1760 			right = right->next;
1761 		}
1762 
1763 		tail = tail->next;
1764 	}
1765 
1766 	// Concatenate the remaining list at the end of the merged list
1767 	if (left != NULL) {
1768 		tail->next = left;
1769 		left->previous = tail;
1770 	}
1771 
1772 	if (right != NULL) {
1773 		tail->next = right;
1774 		right->previous = tail;
1775 	}
1776 
1777 	return mergedList;
1778 }
1779 
arcMergeSort(Arc ** arcPtr,IDnum count)1780 static void arcMergeSort(Arc ** arcPtr, IDnum count)
1781 {
1782 
1783 	IDnum half = count / 2;
1784 	Arc *left = *arcPtr;
1785 	Arc *ptr = left;
1786 	Arc *right;
1787 	IDnum index;
1788 
1789 	if (count == 0 || count == 1)
1790 		return;
1791 
1792 	if (count == 2) {
1793 		if ((*arcPtr)->destination->ID >
1794 		    (*arcPtr)->next->destination->ID) {
1795 			(*arcPtr)->next->next = *arcPtr;
1796 			(*arcPtr)->previous = (*arcPtr)->next;
1797 			*arcPtr = (*arcPtr)->next;
1798 			(*arcPtr)->next->next = NULL;
1799 			(*arcPtr)->previous = NULL;
1800 		}
1801 		return;
1802 	}
1803 
1804 	for (index = 0; index < half - 1; index++) {
1805 		ptr = ptr->next;
1806 		if (ptr == NULL)
1807 			return;
1808 	}
1809 
1810 	right = ptr->next;
1811 	ptr->next = NULL;
1812 	right->previous = NULL;
1813 
1814 	arcMergeSort(&left, half);
1815 	arcMergeSort(&right, count - half);
1816 	*arcPtr = mergeArcLists(left, right);
1817 }
1818 
sortNodeArcs(Node * node)1819 static void sortNodeArcs(Node * node)
1820 {
1821 	Arc *arc;
1822 	IDnum count = 0;
1823 
1824 	for (arc = getArc(node); arc != NULL; arc = getNextArc(arc))
1825 		count++;
1826 
1827 	if (count == 0)
1828 		return;
1829 
1830 	arc = getArc(node);
1831 	arcMergeSort(&arc, count);
1832 
1833 	node->arc = arc;
1834 }
1835 
1836 // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
mergeGapMarkerLists(GapMarker * left,GapMarker * right)1837 static GapMarker *mergeGapMarkerLists(GapMarker * left, GapMarker * right)
1838 {
1839 	GapMarker *mergedList = NULL;
1840 	GapMarker *tail = NULL;
1841 
1842 	// Choose first element:
1843 	if (left->position <= right->position) {
1844 		mergedList = left;
1845 		tail = left;
1846 		left = left->next;
1847 	} else {
1848 		mergedList = right;
1849 		tail = right;
1850 		right = right->next;
1851 	}
1852 
1853 	// Iterate while both lists are still non empty
1854 	while (left != NULL && right != NULL) {
1855 		if (left->position <= right->position) {
1856 			tail->next = left;
1857 			left = left->next;
1858 		} else {
1859 			tail->next = right;
1860 			right = right->next;
1861 		}
1862 
1863 		tail = tail->next;
1864 	}
1865 
1866 	// Concatenate the remaining list at the end of the merged list
1867 	if (left != NULL)
1868 		tail->next = left;
1869 
1870 	if (right != NULL)
1871 		tail->next = right;
1872 
1873 	return mergedList;
1874 }
1875 
gapMergeSort(GapMarker ** gapPtr,IDnum count)1876 static void gapMergeSort(GapMarker ** gapPtr, IDnum count)
1877 {
1878 
1879 	IDnum half = count / 2;
1880 	GapMarker *left = *gapPtr;
1881 	GapMarker *ptr = left;
1882 	GapMarker *right;
1883 	IDnum index;
1884 
1885 	if (count == 0 || count == 1)
1886 		return;
1887 
1888 	if (count == 2) {
1889 		if ((*gapPtr)->position > (*gapPtr)->next->position) {
1890 			(*gapPtr)->next->next = *gapPtr;
1891 			*gapPtr = (*gapPtr)->next;
1892 			(*gapPtr)->next->next = NULL;
1893 		}
1894 		return;
1895 	}
1896 
1897 	for (index = 0; index < half - 1; index++) {
1898 		ptr = ptr->next;
1899 		if (ptr == NULL)
1900 			return;
1901 	}
1902 
1903 	right = ptr->next;
1904 	ptr->next = NULL;
1905 
1906 	gapMergeSort(&left, half);
1907 	gapMergeSort(&right, count - half);
1908 	*gapPtr = mergeGapMarkerLists(left, right);
1909 }
1910 
sortNodeGapMarkers(Node * node,Graph * graph)1911 static void sortNodeGapMarkers(Node * node, Graph * graph)
1912 {
1913 	GapMarker *gap;
1914 	IDnum count = 0;
1915 	IDnum ID = getNodeID(node);
1916 
1917 	if (ID < 0)
1918 		ID = -ID;
1919 
1920 	for (gap = graph->gapMarkers[ID]; gap != NULL; gap = gap->next)
1921 		count++;
1922 
1923 	if (count == 0)
1924 		return;
1925 
1926 	gap = graph->gapMarkers[ID];
1927 	gapMergeSort(&gap, count);
1928 
1929 	graph->gapMarkers[ID] = gap;
1930 }
1931 
sortGapMarkers(Graph * graph)1932 void sortGapMarkers(Graph * graph)
1933 {
1934 	IDnum index;
1935 	Node *node;
1936 
1937 	if (graph->gapMarkers == NULL)
1938 		return;
1939 
1940 	for (index = 1; index <= nodeCount(graph); index++) {
1941 		node = getNodeInGraph(graph, index);
1942 		if (node)
1943 			sortNodeGapMarkers(node, graph);
1944 	}
1945 }
1946 
exportGraph(char * filename,Graph * graph,TightString * sequences)1947 void exportGraph(char *filename, Graph * graph, TightString * sequences)
1948 {
1949 	IDnum index;
1950 	FILE *outfile;
1951 	Node *node;
1952 	Arc *arc;
1953 	PassageMarkerI marker;
1954 	ShortReadMarker *reads;
1955 	IDnum readCount, readIndex;
1956 
1957 	if (graph == NULL) {
1958 		return;
1959 	}
1960 
1961 	outfile = fopen(filename, "w");
1962 	if (outfile == NULL) {
1963 		velvetLog("Couldn't open file, sorry\n");
1964 		return;
1965 	} else
1966 		velvetLog("Writing into graph file %s...\n", filename);
1967 
1968 	// General data
1969 	velvetFprintf(outfile, "%li\t%li\t%i\t%i\n", (long) graph->nodeCount,
1970 		(long) graph->sequenceCount, graph->wordLength, (int) graph->double_stranded);
1971 
1972 	// Node info
1973 	for (index = 1; index <= graph->nodeCount; index++) {
1974 		node = getNodeInGraph(graph, index);
1975 		exportNode(outfile, node, (void *) sequences);
1976 	}
1977 
1978 	// Arc info
1979 	for (index = 1; index <= graph->nodeCount; index++) {
1980 		node = getNodeInGraph(graph, index);
1981 		if (node == NULL)
1982 			continue;
1983 
1984 		sortNodeArcs(node);
1985 		sortNodeArcs(getTwinNode(node));
1986 
1987 		for (arc = node->arc; arc != NULL; arc = arc->next)
1988 			exportArc(outfile, arc);
1989 		for (arc = node->twinNode->arc; arc != NULL;
1990 		     arc = arc->next)
1991 			exportArc(outfile, arc);
1992 	}
1993 
1994 	// Sequence info
1995 	for (index = 1; index <= graph->nodeCount; index++) {
1996 		node = getNodeInGraph(graph, index);
1997 		if (node == NULL)
1998 			continue;
1999 		for (marker = node->marker; marker != NULL_IDX;
2000 		     marker = getNextInNode(marker))
2001 			exportMarker(outfile, marker, sequences,
2002 				     graph->wordLength);
2003 	}
2004 
2005 	// Node reads
2006 	if (readStartsAreActivated(graph)) {
2007 		for (index = 0; index <= graph->nodeCount * 2; index++) {
2008 			readCount = graph->nodeReadCounts[index];
2009 			if (readCount == 0)
2010 				continue;
2011 
2012 			velvetFprintf(outfile, "NR\t%li\t%li\n",
2013 				(long) (index - graph->nodeCount), (long) readCount);
2014 
2015 			reads = graph->nodeReads[index];
2016 			for (readIndex = 0; readIndex < readCount;
2017 			     readIndex++)
2018 				velvetFprintf(outfile, "%ld\t%lld\t%d\n",
2019 					(long) reads[readIndex].readID,
2020 					(long long) reads[readIndex].position,
2021 					(int) reads[readIndex].offset);
2022 		}
2023 	}
2024 
2025 	fclose(outfile);
2026 }
2027 
importGraph(char * filename)2028 Graph *importGraph(char *filename)
2029 {
2030 	FILE *file = fopen(filename, "r");
2031 	const int maxline = MAXLINE;
2032 	char line[MAXLINE];
2033 	Graph *graph;
2034 	Coordinate coverage;
2035 	IDnum nodeCounter, sequenceCount;
2036 	Node *node, *twin;
2037 	Arc *arc;
2038 	IDnum originID, destinationID, multiplicity;
2039 	PassageMarkerI newMarker, marker;
2040 	IDnum nodeID, seqID;
2041 	Coordinate index;
2042 	Coordinate start, finish;
2043 	Coordinate startOffset, finishOffset;
2044 	boolean finished = false;
2045 	size_t arrayLength;
2046 	IDnum readCount;
2047 	ShortReadMarker *array;
2048 	int wordLength, sCount;
2049 	ShortLength length;
2050 	long long_var, long_var2, long_var3;
2051 	long long longlong_var, longlong_var2, longlong_var3, longlong_var4;
2052 	short short_var;
2053 	char c;
2054 
2055 	if (file == NULL)
2056 		exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
2057 
2058 	velvetLog("Reading graph file %s\n", filename);
2059 
2060 	// First  line
2061 	if (!fgets(line, maxline, file))
2062 		exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2063 	sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2064 	       &wordLength, &short_var);
2065 	nodeCounter = (IDnum) long_var;
2066 	sequenceCount = (IDnum) long_var2;
2067 	graph = emptyGraph(sequenceCount, wordLength);
2068 	graph->double_stranded = (boolean) short_var;
2069 	resetWordFilter(wordLength);
2070 	allocateNodeSpace(graph, nodeCounter);
2071 
2072 	velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2073 	       (long) sequenceCount);
2074 
2075 	if (nodeCounter == 0)
2076 		return graph;
2077 
2078 	// Read nodes
2079 	if (!fgets(line, maxline, file))
2080 		exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2081 	while (!finished && strncmp(line, "NODE", 4) == 0) {
2082 		strtok(line, "\t\n");
2083 		sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2084 		nodeID = (IDnum) long_var;
2085 		node = addEmptyNodeToGraph(graph, nodeID);
2086 		sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2087 		node->length = (Coordinate) longlong_var;
2088 
2089 #ifndef SINGLE_COV_CAT
2090 		Category cat;
2091 		Coordinate originalCoverage;
2092 		for (cat = 0; cat < CATEGORIES; cat++) {
2093 			sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2094 			coverage = (Coordinate) longlong_var;
2095 			setVirtualCoverage(node, cat, coverage);
2096 			sscanf(strtok(NULL, "\t\n"), "%lld",
2097 			       &longlong_var);
2098 			originalCoverage = (Coordinate) longlong_var;
2099 			setOriginalVirtualCoverage(node, cat,
2100 						   originalCoverage);
2101 		}
2102 #else
2103 		sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2104 		coverage = (Coordinate) longlong_var;
2105 		setVirtualCoverage(node, coverage);
2106 #endif
2107 
2108 		arrayLength = node->length / 4;
2109 		if (node->length % 4 > 0)
2110 			arrayLength++;
2111 		node->descriptor =
2112 		    callocOrExit(arrayLength, Descriptor);
2113 
2114 		index = 0;
2115 		while ((c = fgetc(file)) != '\n' && c != EOF) {
2116 			if (c == 'A')
2117 				writeNucleotideInDescriptor(ADENINE,
2118 							    node->
2119 							    descriptor,
2120 							    index++);
2121 			else if (c == 'C')
2122 				writeNucleotideInDescriptor(CYTOSINE,
2123 							    node->
2124 							    descriptor,
2125 							    index++);
2126 			else if (c == 'G')
2127 				writeNucleotideInDescriptor(GUANINE,
2128 							    node->
2129 							    descriptor,
2130 							    index++);
2131 			else if (c == 'T')
2132 				writeNucleotideInDescriptor(THYMINE,
2133 							    node->
2134 							    descriptor,
2135 							    index++);
2136 		}
2137 
2138 		twin = node->twinNode;
2139 		twin->length = node->length;
2140 		twin->descriptor =
2141 		    callocOrExit(arrayLength, Descriptor);
2142 		index = 0;
2143 		while ((c = fgetc(file)) != '\n' && c != EOF) {
2144 			if (c == 'A')
2145 				writeNucleotideInDescriptor(ADENINE,
2146 							    twin->
2147 							    descriptor,
2148 							    index++);
2149 			else if (c == 'C')
2150 				writeNucleotideInDescriptor(CYTOSINE,
2151 							    twin->
2152 							    descriptor,
2153 							    index++);
2154 			else if (c == 'G')
2155 				writeNucleotideInDescriptor(GUANINE,
2156 							    twin->
2157 							    descriptor,
2158 							    index++);
2159 			else if (c == 'T')
2160 				writeNucleotideInDescriptor(THYMINE,
2161 							    twin->
2162 							    descriptor,
2163 							    index++);
2164 		}
2165 
2166 		if (fgets(line, maxline, file) == NULL)
2167 			finished = true;
2168 	}
2169 
2170 	// Read arcs
2171 	while (!finished && line[0] == 'A') {
2172 		sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2173 		       &long_var2, &long_var3);
2174 		originID = (IDnum) long_var;
2175 		destinationID = (IDnum) long_var2;
2176 		multiplicity = (IDnum) long_var3;
2177 		arc =
2178 		    createArc(getNodeInGraph(graph, originID),
2179 			      getNodeInGraph(graph, destinationID), graph);
2180 		setMultiplicity(arc, multiplicity);
2181 		if (fgets(line, maxline, file) == NULL)
2182 			finished = true;
2183 	}
2184 
2185 	// Read sequences
2186 	while (!finished && line[0] != 'N') {
2187 		sscanf(line, "SEQ\t%ld\n", &long_var);
2188 		seqID = (IDnum) long_var;
2189 		marker = NULL_IDX;
2190 		if (!fgets(line, maxline, file))
2191 			exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2192 
2193 		while (!finished && line[0] != 'N' && line[0] != 'S') {
2194 			sCount =
2195 			    sscanf(line, "%ld\t%lld\t%lld\t%lld\t%lld\n",
2196 				   &long_var, &longlong_var, &longlong_var2, &longlong_var3,
2197 				   &longlong_var4);
2198 			nodeID = (IDnum) long_var;
2199 			startOffset = (Coordinate) longlong_var;
2200 			start = (Coordinate) longlong_var2;
2201 			finish = (Coordinate) longlong_var3;
2202 			finishOffset = (Coordinate) longlong_var4;
2203 			if (sCount != 5) {
2204 				velvetLog
2205 				    ("ERROR: reading in graph - only %d items read for line '%s'",
2206 				     sCount, line);
2207 #ifdef DEBUG
2208 				abort();
2209 #endif
2210 				exit(1);
2211 			}
2212 			newMarker =
2213 			    newPassageMarker(seqID, start, finish,
2214 					     startOffset, finishOffset);
2215 			transposePassageMarker(newMarker,
2216 					       getNodeInGraph(graph,
2217 							      nodeID));
2218 			connectPassageMarkers(marker, newMarker, graph);
2219 			marker = newMarker;
2220 			if (fgets(line, maxline, file) == NULL)
2221 				finished = true;
2222 		}
2223 	}
2224 
2225 	// Node reads
2226 	while (!finished) {
2227 		sscanf(line, "NR\t%ld\t%ld\n", &long_var, &long_var2);
2228 		nodeID = (IDnum) long_var;
2229 		readCount = (IDnum) long_var2;
2230 		if (!readStartsAreActivated(graph))
2231 			activateReadStarts(graph);
2232 
2233 		graph->nodeReadCounts[nodeID + graph->nodeCount] =
2234 		    readCount;
2235 		array = mallocOrExit(readCount, ShortReadMarker);
2236 		graph->nodeReads[nodeID + graph->nodeCount] = array;
2237 
2238 		readCount = 0;
2239 		if (!fgets(line, maxline, file))
2240 			exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2241 		while (!finished && line[0] != 'N') {
2242 			sscanf(line, "%ld\t%lld\t%hd\n", &long_var,
2243 			       &longlong_var, &short_var);
2244 			seqID = (IDnum) long_var;
2245 			startOffset = (Coordinate) longlong_var;
2246 			length = (ShortLength) short_var;
2247 			array[readCount].readID = seqID;
2248 			array[readCount].position = startOffset;
2249 			array[readCount].offset = length;
2250 			readCount++;
2251 			if (fgets(line, maxline, file) == NULL)
2252 				finished = true;
2253 		}
2254 	}
2255 
2256 	//velvetLog("New graph has %d nodes\n", graph->nodeCount);
2257 
2258 	fclose(file);
2259 	//velvetLog("Done, exiting\n");
2260 	return graph;
2261 }
2262 
readPreGraphFile(char * preGraphFilename,boolean * double_strand)2263 Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand)
2264 {
2265 	FILE *file = fopen(preGraphFilename, "r");
2266 	const int maxline = MAXLINE;
2267 	char line[MAXLINE];
2268 
2269 	Graph *graph;
2270 	IDnum nodeCounter, sequenceCount;
2271 
2272 	Node *node, *twin;
2273 	IDnum nodeID = 0;
2274 	Coordinate index, nodeLength;
2275 	char c;
2276 	int wordLength, wordShift;
2277 	size_t arrayLength;
2278 	short short_var;
2279 	long long_var, long_var2;
2280 	long long longlong_var;
2281 
2282 	if (file == NULL)
2283 		exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
2284 
2285 	velvetLog("Reading pre-graph file %s\n", preGraphFilename);
2286 
2287 	// First  line
2288 	if (!fgets(line, maxline, file))
2289 		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2290 	sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2291 	       &wordLength, &short_var);
2292 	nodeCounter = (IDnum) long_var;
2293 	sequenceCount = (IDnum) long_var2;
2294 	*double_strand = (boolean) short_var;
2295 	wordShift = wordLength - 1;
2296 	graph = emptyGraph(sequenceCount, wordLength);
2297 	graph->double_stranded = *double_strand;
2298 	resetWordFilter(wordLength);
2299 	allocateNodeSpace(graph, nodeCounter);
2300 	velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2301 	       (long) sequenceCount);
2302 
2303 	// Read nodes
2304 	if (nodeCounter == 0)
2305 		return graph;
2306 
2307 	if (!fgets(line, maxline, file))
2308 		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2309 	while (line[0] == 'N') {
2310 		nodeID++;
2311 		node = addEmptyNodeToGraph(graph, nodeID);
2312 
2313 		sscanf(line, "%*s\t%*i\t%lli\n", &longlong_var);
2314 		node->length = (Coordinate) longlong_var;
2315 		nodeLength = node->length;
2316 		arrayLength = node->length / 4;
2317 		if (node->length % 4 > 0)
2318 			arrayLength++;
2319 		node->descriptor =
2320 		    callocOrExit(arrayLength, Descriptor);
2321 
2322 		twin = node->twinNode;
2323 		twin->length = nodeLength;
2324 		twin->descriptor =
2325 		    callocOrExit(arrayLength, Descriptor);
2326 
2327 
2328 		index = 0;
2329 		while ((c = getc(file)) != '\n') {
2330 			if (c == 'A') {
2331 				if (index - wordShift >= 0)
2332 					writeNucleotideInDescriptor(ADENINE,
2333 								    node->
2334 								    descriptor,
2335 								    index - wordShift);
2336 				if (nodeLength - index - 1 >= 0) {
2337 #ifndef COLOR
2338 					writeNucleotideInDescriptor(THYMINE,
2339 								    twin->
2340 								    descriptor,
2341 								    nodeLength - index - 1);
2342 #else
2343 					writeNucleotideInDescriptor(ADENINE,
2344 								    twin->
2345 								    descriptor,
2346 								    nodeLength - index - 1);
2347 #endif
2348 				}
2349 			} else if (c == 'C') {
2350 				if (index - wordShift >= 0)
2351 					writeNucleotideInDescriptor(CYTOSINE,
2352 								    node->
2353 								    descriptor,
2354 								    index - wordShift);
2355 				if (nodeLength - index - 1 >= 0) {
2356 #ifndef COLOR
2357 					writeNucleotideInDescriptor(GUANINE,
2358 								    twin->
2359 								    descriptor,
2360 								    nodeLength - index - 1);
2361 #else
2362 					writeNucleotideInDescriptor(CYTOSINE,
2363 								    twin->
2364 								    descriptor,
2365 								    nodeLength - index - 1);
2366 #endif
2367 				}
2368 			} else if (c == 'G') {
2369 				if (index - wordShift >= 0)
2370 					writeNucleotideInDescriptor(GUANINE,
2371 								    node->
2372 								    descriptor,
2373 								    index - wordShift);
2374 				if (nodeLength - index - 1 >= 0) {
2375 #ifndef COLOR
2376 					writeNucleotideInDescriptor(CYTOSINE,
2377 								    twin->
2378 								    descriptor,
2379 								    nodeLength - index - 1);
2380 #else
2381 					writeNucleotideInDescriptor(GUANINE,
2382 								    twin->
2383 								    descriptor,
2384 								    nodeLength - index - 1);
2385 #endif
2386 				}
2387 			} else if (c == 'T') {
2388 				if (index - wordShift >= 0)
2389 					writeNucleotideInDescriptor(THYMINE,
2390 								    node->
2391 								    descriptor,
2392 								    index - wordShift);
2393 				if (nodeLength - index - 1 >= 0) {
2394 #ifndef COLOR
2395 					writeNucleotideInDescriptor(ADENINE,
2396 								    twin->
2397 								    descriptor,
2398 								    nodeLength - index - 1);
2399 #else
2400 					writeNucleotideInDescriptor(THYMINE,
2401 								    twin->
2402 								    descriptor,
2403 								    nodeLength - index - 1);
2404 #endif
2405 				}
2406 			}
2407 
2408 			index++;
2409 		}
2410 
2411 		if (fgets(line, maxline, file) == NULL) {
2412 			fclose(file);
2413 			return graph;
2414 		}
2415 	}
2416 
2417 	fclose(file);
2418 	return graph;
2419 }
2420 
readConnectedGraphFile(char * connectedGraphFilename,boolean * double_strand)2421 Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_strand)
2422 {
2423 	FILE *file = fopen(connectedGraphFilename, "r");
2424 	const int maxline = MAXLINE;
2425 	char line[MAXLINE];
2426 	Coordinate coverage;
2427 	Arc *arc;
2428 	IDnum originID, destinationID, multiplicity;
2429 	boolean finished = false;
2430 	long long_var3;
2431 
2432 	Graph *graph;
2433 	IDnum nodeCounter, sequenceCount;
2434 
2435 	Node *node, *twin;
2436 	IDnum nodeID = 0;
2437 	Coordinate index, nodeLength;
2438 	char c;
2439 	int wordLength, wordShift;
2440 	size_t arrayLength;
2441 	short short_var;
2442 	long long_var, long_var2;
2443 	long long longlong_var;
2444 
2445 	if (file == NULL)
2446 		exitErrorf(EXIT_FAILURE, true, "Could not open %s", connectedGraphFilename);
2447 
2448 	velvetLog("Reading connected graph file %s\n", connectedGraphFilename);
2449 
2450 	// First  line
2451 	if (!fgets(line, maxline, file))
2452 		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2453 	sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2454 			&wordLength, &short_var);
2455 	nodeCounter = (IDnum) long_var;
2456 	sequenceCount = (IDnum) long_var2;
2457 	*double_strand = (boolean) short_var;
2458 	wordShift = wordLength - 1;
2459 	graph = emptyGraph(sequenceCount, wordLength);
2460 	graph->double_stranded = *double_strand;
2461 	resetWordFilter(wordLength);
2462 	allocateNodeSpace(graph, nodeCounter);
2463 	velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2464 			(long) sequenceCount);
2465 
2466 	// Read nodes
2467 	if (nodeCounter == 0)
2468 		return graph;
2469 
2470 	if (!fgets(line, maxline, file))
2471 		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2472 	while (!finished && strncmp(line, "NODE", 4) == 0) {
2473 		strtok(line, "\t\n");
2474 		sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2475 		nodeID = (IDnum) long_var;
2476 		node = addEmptyNodeToGraph(graph, nodeID);
2477 		sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2478 		node->length = (Coordinate) longlong_var;
2479 		nodeLength = node->length;
2480 
2481 #ifndef SINGLE_COV_CAT
2482 		Category cat;
2483 		Coordinate originalCoverage;
2484 		for (cat = 0; cat < CATEGORIES; cat++) {
2485 			sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2486 			coverage = (Coordinate) longlong_var;
2487 			setVirtualCoverage(node, cat, coverage);
2488 			sscanf(strtok(NULL, "\t\n"), "%lld",
2489 					&longlong_var);
2490 			originalCoverage = (Coordinate) longlong_var;
2491 			setOriginalVirtualCoverage(node, cat,
2492 					originalCoverage);
2493 		}
2494 #else
2495 		sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2496 		coverage = (Coordinate) longlong_var;
2497 		setVirtualCoverage(node, coverage);
2498 #endif
2499 
2500 		arrayLength = node->length / 4;
2501 		if (node->length % 4 > 0)
2502 			arrayLength++;
2503 		node->descriptor =
2504 			callocOrExit(arrayLength, Descriptor);
2505 
2506 		twin = node->twinNode;
2507 		twin->length = node->length;
2508 		twin->descriptor =
2509 			callocOrExit(arrayLength, Descriptor);
2510 
2511 		index = 0;
2512 		while ((c = getc(file)) != '\n') {
2513 			if (c == 'A') {
2514 				if (index - wordShift >= 0)
2515 					writeNucleotideInDescriptor(ADENINE,
2516 							node->
2517 							descriptor,
2518 							index - wordShift);
2519 				if (nodeLength - index - 1 >= 0) {
2520 #ifndef COLOR
2521 					writeNucleotideInDescriptor(THYMINE,
2522 							twin->
2523 							descriptor,
2524 							nodeLength - index - 1);
2525 #else
2526 					writeNucleotideInDescriptor(ADENINE,
2527 							twin->
2528 							descriptor,
2529 							nodeLength - index - 1);
2530 #endif
2531 				}
2532 			} else if (c == 'C') {
2533 				if (index - wordShift >= 0)
2534 					writeNucleotideInDescriptor(CYTOSINE,
2535 							node->
2536 							descriptor,
2537 							index - wordShift);
2538 				if (nodeLength - index - 1 >= 0) {
2539 #ifndef COLOR
2540 					writeNucleotideInDescriptor(GUANINE,
2541 							twin->
2542 							descriptor,
2543 							nodeLength - index - 1);
2544 #else
2545 					writeNucleotideInDescriptor(CYTOSINE,
2546 							twin->
2547 							descriptor,
2548 							nodeLength - index - 1);
2549 #endif
2550 				}
2551 			} else if (c == 'G') {
2552 				if (index - wordShift >= 0)
2553 					writeNucleotideInDescriptor(GUANINE,
2554 							node->
2555 							descriptor,
2556 							index - wordShift);
2557 				if (nodeLength - index - 1 >= 0) {
2558 #ifndef COLOR
2559 					writeNucleotideInDescriptor(CYTOSINE,
2560 							twin->
2561 							descriptor,
2562 							nodeLength - index - 1);
2563 #else
2564 					writeNucleotideInDescriptor(GUANINE,
2565 							twin->
2566 							descriptor,
2567 							nodeLength - index - 1);
2568 #endif
2569 				}
2570 			} else if (c == 'T') {
2571 				if (index - wordShift >= 0)
2572 					writeNucleotideInDescriptor(THYMINE,
2573 							node->
2574 							descriptor,
2575 							index - wordShift);
2576 				if (nodeLength - index - 1 >= 0) {
2577 #ifndef COLOR
2578 					writeNucleotideInDescriptor(ADENINE,
2579 							twin->
2580 							descriptor,
2581 							nodeLength - index - 1);
2582 #else
2583 					writeNucleotideInDescriptor(THYMINE,
2584 							twin->
2585 							descriptor,
2586 							nodeLength - index - 1);
2587 #endif
2588 				}
2589 			}
2590 
2591 			index++;
2592 		}
2593 
2594 		if (fgets(line, maxline, file) == NULL) {
2595 			finished = true;
2596 		}
2597 	}
2598 
2599 	// Read arcs
2600 	while (!finished && line[0] == 'A') {
2601 		sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2602 				&long_var2, &long_var3);
2603 		originID = (IDnum) long_var;
2604 		destinationID = (IDnum) long_var2;
2605 		multiplicity = (IDnum) long_var3;
2606 		arc =
2607 			createArc(getNodeInGraph(graph, originID),
2608 					getNodeInGraph(graph, destinationID), graph);
2609 		setMultiplicity(arc, multiplicity);
2610 		if (fgets(line, maxline, file) == NULL)
2611 			finished = true;
2612 	}
2613 
2614 	fclose(file);
2615 	return graph;
2616 }
2617 
2618 // Prints out the information relative to the topology of a node into a new file
2619 // Internal to exportDOTGraph()
DOTNode(Node * node,FILE * outfile)2620 void DOTNode(Node * node, FILE * outfile)
2621 {
2622 	IDnum ID;
2623 	Arc *arc;
2624 	Node *otherNode;
2625 
2626 	ID = node->ID;
2627 	if (ID < 0)
2628 		return;
2629 
2630 	velvetFprintf(outfile, "\t%li [label=\"<left>|%li|<right>\"]\n", (long) ID, (long) ID);
2631 
2632 	for (arc = node->arc; arc != NULL; arc = arc->next) {
2633 		otherNode = arc->destination;
2634 		if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2635 			continue;
2636 		}
2637 
2638 		if (otherNode->ID > 0)
2639 			velvetFprintf(outfile, "\t%li:right -> %li:left\n", (long) ID,
2640 				(long) otherNode->ID);
2641 		else
2642 			velvetFprintf(outfile, "\t%li:right -> %li:right\n", (long) ID,
2643 				(long) -otherNode->ID);
2644 	}
2645 
2646 	for (arc = node->twinNode->arc; arc != NULL; arc = arc->next) {
2647 		otherNode = arc->destination;
2648 		if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2649 			continue;
2650 		}
2651 
2652 		if (otherNode->ID > 0)
2653 			velvetFprintf(outfile, "\t%li:left -> %li:left\n", (long) ID,
2654 				(long) otherNode->ID);
2655 		else
2656 			velvetFprintf(outfile, "\t%li:left -> %li:right\n", (long) ID,
2657 				(long) -otherNode->ID);
2658 	}
2659 }
2660 
expandNode(Node * node,int WORDLENGTH)2661 TightString *expandNode(Node * node, int WORDLENGTH)
2662 {
2663 	Nucleotide nucleotide;
2664 	Coordinate index;
2665 	TightString *tString =
2666 	    newTightString(node->length + WORDLENGTH - 1);
2667 	Node *twin = node->twinNode;
2668 	Coordinate length = node->length;
2669 
2670 	for (index = 0; index < WORDLENGTH; index++) {
2671 		nucleotide =
2672 		    getNucleotideInDescriptor(twin->descriptor,
2673 					      length - index - 1);
2674 #ifndef COLOR
2675 		writeNucleotideAtPosition(3 - nucleotide, index, tString);
2676 #else
2677 		writeNucleotideAtPosition(nucleotide, index, tString);
2678 #endif
2679 	}
2680 
2681 	for (index = 1; index < node->length; index++) {
2682 		nucleotide =
2683 		    getNucleotideInDescriptor(node->descriptor, index);
2684 		writeNucleotideAtPosition(nucleotide,
2685 					  index + WORDLENGTH - 1, tString);
2686 	}
2687 
2688 	return tString;
2689 }
2690 
expandNodeFragment(Node * node,Coordinate contigStart,Coordinate contigFinish,int wordLength)2691 char *expandNodeFragment(Node * node, Coordinate contigStart,
2692 			 Coordinate contigFinish, int wordLength)
2693 {
2694 	Nucleotide nucleotide;
2695 	Coordinate index;
2696 	Node *twin = node->twinNode;
2697 	Coordinate length = contigFinish - contigStart;
2698 	int wordShift = wordLength - 1;
2699 	char *string;
2700 
2701 	if (length >= wordShift) {
2702 		string = callocOrExit(length + wordLength, char);
2703 
2704 		for (index = 0; index < wordShift; index++) {
2705 			nucleotide =
2706 			    getNucleotideInDescriptor(twin->descriptor,
2707 						      twin->length - contigStart -
2708 						      index - 1);
2709 	#ifndef COLOR
2710 			nucleotide = 3 - nucleotide;
2711 	#endif
2712 
2713 			switch (nucleotide) {
2714 			case ADENINE:
2715 				string[index] = 'A';
2716 				break;
2717 			case CYTOSINE:
2718 				string[index] = 'C';
2719 				break;
2720 			case GUANINE:
2721 				string[index] = 'G';
2722 				break;
2723 			case THYMINE:
2724 				string[index] = 'T';
2725 				break;
2726 			}
2727 
2728 		}
2729 
2730 		for (index = 0; index < length; index++) {
2731 			nucleotide =
2732 			    getNucleotideInDescriptor(node->descriptor,
2733 						      contigStart + index);
2734 			switch (nucleotide) {
2735 			case ADENINE:
2736 				string[index + wordShift] = 'A';
2737 				break;
2738 			case CYTOSINE:
2739 				string[index + wordShift] = 'C';
2740 				break;
2741 			case GUANINE:
2742 				string[index + wordShift] = 'G';
2743 				break;
2744 			case THYMINE:
2745 				string[index + wordShift] = 'T';
2746 				break;
2747 			}
2748 		}
2749 
2750 		string[length + wordShift] = '\0';
2751 	} else {
2752 		string = callocOrExit(length + 1, char);
2753 
2754 		for (index = 0; index < length; index++) {
2755 			nucleotide =
2756 			    getNucleotideInDescriptor(node->descriptor, contigStart + index);
2757 			switch (nucleotide) {
2758 			case ADENINE:
2759 				string[index] = 'A';
2760 				break;
2761 			case CYTOSINE:
2762 				string[index] = 'C';
2763 				break;
2764 			case GUANINE:
2765 				string[index] = 'G';
2766 				break;
2767 			case THYMINE:
2768 				string[index] = 'T';
2769 				break;
2770 			}
2771 		}
2772 
2773 		string[length] = '\0';
2774 	}
2775 
2776 	return string;
2777 }
2778 
readStartsAreActivated(Graph * graph)2779 boolean readStartsAreActivated(Graph * graph)
2780 {
2781 	return graph->nodeReads != NULL;
2782 }
2783 
activateReadStarts(Graph * graph)2784 void activateReadStarts(Graph * graph)
2785 {
2786 	graph->nodeReads =
2787 	    callocOrExit(2 * graph->nodeCount + 1, ShortReadMarker *);
2788 	graph->nodeReadCounts =
2789 	    callocOrExit(2 * graph->nodeCount + 1, IDnum);
2790 }
2791 
deactivateReadStarts(Graph * graph)2792 void deactivateReadStarts(Graph * graph)
2793 {
2794 	free(graph->nodeReads);
2795 	free(graph->nodeReadCounts);
2796 
2797 	graph->nodeReads = NULL;
2798 	graph->nodeReadCounts = NULL;
2799 }
2800 
findIDnumInArray(IDnum query,IDnum * array,IDnum arrayLength)2801 boolean findIDnumInArray(IDnum query, IDnum * array, IDnum arrayLength)
2802 {
2803 	IDnum leftIndex = 0;
2804 	IDnum rightIndex = arrayLength;
2805 	IDnum middleIndex;
2806 
2807 	if (arrayLength == 0)
2808 		return false;
2809 
2810 	while (true) {
2811 		middleIndex = leftIndex + (rightIndex - leftIndex) / 2;
2812 
2813 		if (array[middleIndex] == query)
2814 			return true;
2815 		else if (leftIndex >= rightIndex)
2816 			return false;
2817 		else if (array[middleIndex] > query)
2818 			rightIndex = middleIndex;
2819 		else if (leftIndex == middleIndex)
2820 			leftIndex++;
2821 		else
2822 			leftIndex = middleIndex;
2823 	}
2824 }
2825 
compareShortReadMarkers(const void * A,const void * B)2826 static inline int compareShortReadMarkers(const void *A, const void *B)
2827 {
2828 	IDnum a = ((ShortReadMarker *) A)->readID;
2829 	IDnum b = ((ShortReadMarker *) B)->readID;
2830 
2831 	if (a > b)
2832 		return 1;
2833 	if (a == b)
2834 		return 0;
2835 	return -1;
2836 }
2837 
compareIDnums(const void * A,const void * B)2838 static inline int compareIDnums(const void *A, const void *B)
2839 {
2840 	IDnum a = *((IDnum *) A);
2841 	IDnum b = *((IDnum *) B);
2842 
2843 	if (a > b)
2844 		return 1;
2845 	if (a == b)
2846 		return 0;
2847 	return -1;
2848 }
2849 
incrementReadStartCount(Node * node,Graph * graph)2850 void incrementReadStartCount(Node * node, Graph * graph)
2851 {
2852 	graph->nodeReadCounts[node->ID + graph->nodeCount]++;
2853 }
2854 
createNodeReadStartArrays(Graph * graph)2855 void createNodeReadStartArrays(Graph * graph)
2856 {
2857 	IDnum index;
2858 
2859 	if (graph->nodeReads == NULL)
2860 		return;
2861 
2862 	for (index = 0; index <= 2 * (graph->nodeCount); index++) {
2863 		if (graph->nodeReadCounts[index] != 0) {
2864 			graph->nodeReads[index] =
2865 			    mallocOrExit(graph->nodeReadCounts[index],
2866 				   ShortReadMarker);
2867 			graph->nodeReadCounts[index] = 0;
2868 		} else {
2869 			graph->nodeReads[index] = NULL;
2870 		}
2871 	}
2872 }
2873 
orderNodeReadStartArrays(Graph * graph)2874 void orderNodeReadStartArrays(Graph * graph)
2875 {
2876 	IDnum index;
2877 
2878 	if (graph->nodeReads == NULL)
2879 		return;
2880 
2881 	for (index = 0; index <= 2 * (graph->nodeCount); index++)
2882 		if (graph->nodeReadCounts[index] != 0)
2883 			qsort(graph->nodeReads[index],
2884 			      graph->nodeReadCounts[index],
2885 			      sizeof(ShortReadMarker),
2886 			      compareShortReadMarkers);
2887 }
2888 
addReadStart(Node * node,IDnum seqID,Coordinate position,Graph * graph,Coordinate offset)2889 void addReadStart(Node * node, IDnum seqID, Coordinate position,
2890 		  Graph * graph, Coordinate offset)
2891 {
2892 	IDnum nodeIndex = getNodeID(node) + graph->nodeCount;
2893 
2894 	ShortReadMarker *array = graph->nodeReads[nodeIndex];
2895 	IDnum arrayLength = graph->nodeReadCounts[nodeIndex];
2896 
2897 	array[arrayLength].readID = seqID;
2898 	array[arrayLength].position = position;
2899 	array[arrayLength].offset = (ShortLength) offset;
2900 	// printf("node %d, seq %d, pos %ld, offset %ld\n", getNodeID(node), seqID, position, offset);
2901 	graph->nodeReadCounts[nodeIndex]++;
2902 }
2903 
blurLastShortReadMarker(Node * node,Graph * graph)2904 void blurLastShortReadMarker(Node * node, Graph * graph)
2905 {
2906 	IDnum nodeIndex = getNodeID(node) + nodeCount(graph);
2907 	IDnum index = graph->nodeReadCounts[nodeIndex] - 1;
2908 	ShortReadMarker *marker;
2909 
2910 	if (index >= 0)
2911 		marker = &(graph->nodeReads[nodeIndex][index]);
2912 	else
2913 		abort();
2914 
2915 	setShortReadMarkerPosition(marker, -1);
2916 }
2917 
commonNodeReads(Node * nodeA,Node * nodeB,Graph * graph,IDnum * length)2918 ShortReadMarker *commonNodeReads(Node * nodeA, Node * nodeB, Graph * graph,
2919 				 IDnum * length)
2920 {
2921 	IDnum targetID, targetLength, targetIndex, targetVal;
2922 	IDnum sourceID, sourceLength, sourceIndex, sourceVal;
2923 	IDnum mergeLength;
2924 	ShortReadMarker *mergeArray, *targetArray, *sourceArray;
2925 
2926 	if (graph->nodeReads == NULL) {
2927 		*length = 0;
2928 		return NULL;
2929 	}
2930 
2931 	if (nodeA == NULL || nodeB == NULL) {
2932 		*length = 0;
2933 		return NULL;
2934 	}
2935 
2936 	targetID = getNodeID(nodeA) + graph->nodeCount;
2937 	targetArray = graph->nodeReads[targetID];
2938 	targetLength = graph->nodeReadCounts[targetID];
2939 
2940 	sourceID = getNodeID(nodeB) + graph->nodeCount;
2941 	sourceArray = graph->nodeReads[sourceID];
2942 	sourceLength = graph->nodeReadCounts[sourceID];
2943 
2944 	if (sourceArray == NULL || targetArray == NULL) {
2945 		*length = 0;
2946 		return NULL;
2947 	}
2948 
2949 	mergeArray =
2950 	    mallocOrExit(sourceLength +
2951 		    targetLength, ShortReadMarker);
2952 
2953 	mergeLength = 0;
2954 	sourceIndex = 0;
2955 	targetIndex = 0;
2956 	sourceVal = sourceArray[0].readID;
2957 	targetVal = targetArray[0].readID;
2958 
2959 	while (sourceIndex < sourceLength && targetIndex < targetLength) {
2960 		switch (compareIDnums(&sourceVal, &targetVal)) {
2961 		case -1:
2962 			mergeArray[mergeLength].readID = sourceVal;
2963 			mergeArray[mergeLength].position = -1;
2964 			mergeArray[mergeLength].offset = -1;
2965 			mergeLength++;
2966 			sourceIndex++;
2967 			if (sourceIndex < sourceLength)
2968 				sourceVal =
2969 				    sourceArray[sourceIndex].readID;
2970 			break;
2971 		case 0:
2972 			mergeArray[mergeLength].readID = sourceVal;
2973 			mergeArray[mergeLength].position = -1;
2974 			mergeArray[mergeLength].offset = -1;
2975 			mergeLength++;
2976 			sourceIndex++;
2977 			if (sourceIndex < sourceLength)
2978 				sourceVal =
2979 				    sourceArray[sourceIndex].readID;
2980 			targetIndex++;
2981 			if (targetIndex < targetLength)
2982 				targetVal =
2983 				    targetArray[targetIndex].readID;
2984 			break;
2985 		case 1:
2986 			mergeArray[mergeLength].readID = targetVal;
2987 			mergeArray[mergeLength].position = -1;
2988 			mergeArray[mergeLength].offset = -1;
2989 			mergeLength++;
2990 			targetIndex++;
2991 			if (targetIndex < targetLength)
2992 				targetVal =
2993 				    targetArray[targetIndex].readID;
2994 		}
2995 	}
2996 
2997 	while (sourceIndex < sourceLength) {
2998 		mergeArray[mergeLength].readID =
2999 		    sourceArray[sourceIndex].readID;
3000 		mergeArray[mergeLength].position = -1;
3001 		mergeArray[mergeLength].offset = -1;
3002 		mergeLength++;
3003 		sourceIndex++;
3004 	}
3005 
3006 	while (targetIndex < targetLength) {
3007 		mergeArray[mergeLength].readID =
3008 		    targetArray[targetIndex].readID;
3009 		mergeArray[mergeLength].position = -1;
3010 		mergeArray[mergeLength].offset = -1;
3011 		mergeLength++;
3012 		targetIndex++;
3013 	}
3014 
3015 	*length = mergeLength;
3016 	return mergeArray;
3017 }
3018 
extractFrontOfNodeReads(Node * node,Coordinate breakpoint,Graph * graph,IDnum * length,PassageMarkerI sourceMarker,ShortLength * lengths)3019 ShortReadMarker *extractFrontOfNodeReads(Node * node,
3020 					 Coordinate breakpoint,
3021 					 Graph * graph, IDnum * length,
3022 					 PassageMarkerI sourceMarker,
3023 					 ShortLength * lengths)
3024 {
3025 	IDnum sourceID;
3026 	IDnum mergeLength, newLength, sourceLength;
3027 	IDnum sourceIndex;
3028 	ShortReadMarker *mergeArray, *sourceArray, *newArray;
3029 	ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3030 	Coordinate finish;
3031 	Coordinate revBreakpoint;
3032 
3033 	if (graph->nodeReads == NULL) {
3034 		*length = 0;
3035 		return NULL;
3036 	}
3037 
3038 	if (node == NULL) {
3039 		*length = 0;
3040 		return NULL;
3041 	}
3042 
3043 	if (breakpoint == 0) {
3044 		return commonNodeReads(node,
3045 				       getTwinNode(getNode
3046 						   (getPreviousInSequence
3047 						    (sourceMarker))),
3048 				       graph, length);
3049 	}
3050 
3051 	sourceID = getNodeID(node) + graph->nodeCount;
3052 	sourceArray = graph->nodeReads[sourceID];
3053 	sourceLength = graph->nodeReadCounts[sourceID];
3054 
3055 	if (sourceArray == NULL) {
3056 		*length = 0;
3057 		return NULL;
3058 	}
3059 
3060 	revBreakpoint = node->length - breakpoint;
3061 
3062 	mergeLength = 0;
3063 	newLength = 0;
3064 	sourcePtr = sourceArray;
3065 	for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3066 		if (sourcePtr->position == -1) {
3067 			newLength++;
3068 			mergeLength++;
3069 		} else {
3070 			finish =
3071 			    sourcePtr->position - sourcePtr->offset +
3072 			    lengths[sourcePtr->readID - 1];
3073 			if (sourcePtr->position < revBreakpoint)
3074 				newLength++;
3075 			if (finish > revBreakpoint)
3076 				mergeLength++;
3077 		}
3078 		sourcePtr++;
3079 	}
3080 
3081 	newArray = mallocOrExit(newLength, ShortReadMarker);
3082 	mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3083 
3084 	mergePtr = mergeArray;
3085 	newPtr = newArray;
3086 	sourcePtr = sourceArray;
3087 	mergeLength = 0;
3088 	newLength = 0;
3089 	for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3090 		if (sourcePtr->position == -1) {
3091 			mergePtr->readID = sourcePtr->readID;
3092 			setShortReadMarkerPosition(mergePtr, -1);
3093 			setShortReadMarkerOffset(mergePtr, -1);
3094 			mergePtr++;
3095 			mergeLength++;
3096 			newPtr->readID = sourcePtr->readID;
3097 			setShortReadMarkerPosition(newPtr, -1);
3098 			setShortReadMarkerOffset(newPtr, -1);
3099 			newPtr++;
3100 			newLength++;
3101 		} else {
3102 			finish =
3103 			    sourcePtr->position - sourcePtr->offset +
3104 			    lengths[sourcePtr->readID - 1];
3105 			if (sourcePtr->position < revBreakpoint) {
3106 				newPtr->readID = sourcePtr->readID;
3107 				setShortReadMarkerPosition(newPtr,
3108 							   sourcePtr->
3109 							   position);
3110 				setShortReadMarkerOffset(newPtr,
3111 							 sourcePtr->
3112 							 offset);
3113 				newPtr++;
3114 				newLength++;
3115 
3116 				// Saddle back reads:
3117 				if (finish > revBreakpoint) {
3118 					mergePtr->readID =
3119 					    sourcePtr->readID;
3120 					setShortReadMarkerPosition
3121 					    (mergePtr, 0);
3122 					setShortReadMarkerOffset(mergePtr,
3123 								 sourcePtr->
3124 								 offset +
3125 								 revBreakpoint
3126 								 -
3127 								 sourcePtr->
3128 								 position);
3129 					mergePtr++;
3130 				}
3131 			} else if (finish > revBreakpoint) {
3132 				mergePtr->readID = sourcePtr->readID;
3133 				setShortReadMarkerPosition(mergePtr,
3134 							   sourcePtr->
3135 							   position - revBreakpoint);
3136 				setShortReadMarkerOffset(mergePtr,
3137 							 sourcePtr->
3138 							 offset);
3139 				mergePtr++;
3140 				mergeLength++;
3141 			}
3142 		}
3143 
3144 		sourcePtr++;
3145 	}
3146 
3147 	free(sourceArray);
3148 	graph->nodeReads[sourceID] = newArray;
3149 	graph->nodeReadCounts[sourceID] = newLength;
3150 
3151 	*length = mergeLength;
3152 	return mergeArray;
3153 }
3154 
extractBackOfNodeReads(Node * node,Coordinate breakpoint,Graph * graph,IDnum * length,PassageMarkerI sourceMarker,ShortLength * lengths)3155 ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
3156 					Graph * graph, IDnum * length,
3157 					PassageMarkerI sourceMarker,
3158 					ShortLength  * lengths)
3159 {
3160 	IDnum sourceID;
3161 	IDnum mergeLength, newLength, sourceLength;
3162 	IDnum sourceIndex;
3163 	ShortReadMarker *mergeArray, *sourceArray, *newArray;
3164 	ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3165 	Coordinate finish;
3166 
3167 	if (graph->nodeReads == NULL) {
3168 		*length = 0;
3169 		return NULL;
3170 	}
3171 
3172 	if (node == NULL) {
3173 		*length = 0;
3174 		return NULL;
3175 	}
3176 
3177 	if (breakpoint == 0) {
3178 		return
3179 		    commonNodeReads(getNode
3180 				    (getPreviousInSequence(sourceMarker)),
3181 				    node, graph, length);
3182 	}
3183 
3184 	sourceID = getNodeID(node) + graph->nodeCount;
3185 	sourceArray = graph->nodeReads[sourceID];
3186 	sourceLength = graph->nodeReadCounts[sourceID];
3187 
3188 	if (sourceArray == NULL) {
3189 		*length = 0;
3190 		return NULL;
3191 	}
3192 
3193 	mergeLength = 0;
3194 	newLength = 0;
3195 	sourcePtr = sourceArray;
3196 	for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3197 		if (sourcePtr->position == -1) {
3198 			mergeLength++;
3199 			newLength++;
3200 		} else {
3201 			finish =
3202 			    sourcePtr->position - sourcePtr->offset +
3203 			    lengths[sourcePtr->readID - 1];
3204 			if (sourcePtr->position < breakpoint)
3205 				mergeLength++;
3206 			if (finish > breakpoint)
3207 				newLength++;
3208 		}
3209 		sourcePtr++;
3210 	}
3211 
3212 	newArray = mallocOrExit(newLength, ShortReadMarker);
3213 	mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3214 
3215 	mergePtr = mergeArray;
3216 	newPtr = newArray;
3217 	sourcePtr = sourceArray;
3218 	for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3219 		if (sourcePtr->position == -1) {
3220 			mergePtr->readID = sourcePtr->readID;
3221 			setShortReadMarkerPosition(mergePtr, -1);
3222 			setShortReadMarkerOffset(mergePtr, -1);
3223 			mergePtr++;
3224 
3225 			newPtr->readID = sourcePtr->readID;
3226 			setShortReadMarkerPosition(newPtr, -1);
3227 			setShortReadMarkerOffset(newPtr, -1);
3228 			newPtr++;
3229 
3230 			sourcePtr++;
3231 			continue;
3232 		} else {
3233 			finish =
3234 			    sourcePtr->position - sourcePtr->offset +
3235 			    lengths[sourcePtr->readID - 1];
3236 
3237 			if (sourcePtr->position < breakpoint) {
3238 				mergePtr->readID = sourcePtr->readID;
3239 				setShortReadMarkerPosition(mergePtr,
3240 							   sourcePtr->
3241 							   position);
3242 				setShortReadMarkerOffset(mergePtr,
3243 							 sourcePtr->
3244 							 offset);
3245 				mergePtr++;
3246 
3247 				// Saddle back reads:
3248 				if (finish > breakpoint) {
3249 					newPtr->readID = sourcePtr->readID;
3250 					setShortReadMarkerPosition(newPtr,
3251 								   0);
3252 					setShortReadMarkerOffset(newPtr,
3253 								 sourcePtr->
3254 								 offset +
3255 								 breakpoint
3256 								 -
3257 								 sourcePtr->
3258 								 position);
3259 					newPtr++;
3260 				}
3261 			} else if (finish > breakpoint) {
3262 				newPtr->readID = sourcePtr->readID;
3263 				setShortReadMarkerPosition(newPtr,
3264 							   sourcePtr->
3265 							   position -
3266 							   breakpoint);
3267 				setShortReadMarkerOffset(newPtr,
3268 							 sourcePtr->
3269 							 offset);
3270 				newPtr++;
3271 			}
3272 		}
3273 
3274 		sourcePtr++;
3275 	}
3276 
3277 	free(sourceArray);
3278 	graph->nodeReads[sourceID] = newArray;
3279 	graph->nodeReadCounts[sourceID] = newLength;
3280 
3281 	*length = mergeLength;
3282 	return mergeArray;
3283 }
3284 
spreadReadIDs(ShortReadMarker * reads,IDnum readCount,Node * node,Graph * graph)3285 void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
3286 		   Graph * graph)
3287 {
3288 	IDnum targetID, targetLength, targetIndex, targetVal;
3289 	IDnum sourceLength, sourceIndex, sourceVal;
3290 	IDnum mergeLength;
3291 	ShortReadMarker *sourceArray, *targetArray, *mergeArray;
3292 	ShortReadMarker *sourcePtr, *targetPtr, *mergePtr;
3293 	Coordinate targetPosition;
3294 	//ShortLength nodeLength = (ShortLength) getNodeLength(node);
3295 	ShortLength targetOffset;
3296 
3297 	if (graph->nodeReads == NULL || reads == NULL || node == NULL)
3298 		return;
3299 
3300 	targetID = getNodeID(node) + graph->nodeCount;
3301 	targetArray = graph->nodeReads[targetID];
3302 	targetLength = graph->nodeReadCounts[targetID];
3303 	targetPtr = targetArray;
3304 
3305 	sourceArray = reads;
3306 	sourceLength = readCount;
3307 	sourcePtr = sourceArray;
3308 
3309 	if (targetArray == NULL) {
3310 		mergeArray =
3311 		    mallocOrExit(sourceLength, ShortReadMarker);
3312 		mergePtr = mergeArray;
3313 
3314 		sourceIndex = 0;
3315 		while (sourceIndex < sourceLength) {
3316 			mergePtr->readID = sourcePtr->readID;
3317 			setShortReadMarkerPosition(mergePtr, -1);
3318 			setShortReadMarkerOffset(mergePtr, -1);
3319 			mergePtr++;
3320 			sourcePtr++;
3321 			sourceIndex++;
3322 		}
3323 
3324 		graph->nodeReads[targetID] = mergeArray;
3325 		graph->nodeReadCounts[targetID] = sourceLength;
3326 		return;
3327 	}
3328 
3329 	mergeArray =
3330 	    mallocOrExit(sourceLength +
3331 		    targetLength, ShortReadMarker);
3332 	mergePtr = mergeArray;
3333 
3334 	mergeLength = 0;
3335 	sourceIndex = 0;
3336 	targetIndex = 0;
3337 	sourceVal = sourcePtr->readID;
3338 	targetVal = targetPtr->readID;
3339 	targetPosition = targetPtr->position;
3340 	targetOffset = targetPtr->offset;
3341 
3342 	while (sourceIndex < sourceLength && targetIndex < targetLength) {
3343 		if (sourceVal < targetVal) {
3344 			mergePtr->readID = sourceVal;
3345 			setShortReadMarkerPosition(mergePtr, -1);
3346 			setShortReadMarkerOffset(mergePtr, -1);
3347 			sourceIndex++;
3348 			sourcePtr++;
3349 			if (sourceIndex < sourceLength)
3350 				sourceVal = sourcePtr->readID;
3351 		} else if (sourceVal == targetVal) {
3352 			mergePtr->readID = sourceVal;
3353 			setShortReadMarkerPosition(mergePtr, -1);
3354 			setShortReadMarkerOffset(mergePtr, -1);
3355 			sourceIndex++;
3356 			sourcePtr++;
3357 			if (sourceIndex < sourceLength)
3358 				sourceVal = sourcePtr->readID;
3359 			targetIndex++;
3360 			targetPtr++;
3361 			if (targetIndex < targetLength) {
3362 				targetVal = targetPtr->readID;
3363 				targetPosition = targetPtr->position;
3364 				targetOffset = targetPtr->offset;
3365 			}
3366 		} else {
3367 			mergePtr->readID = targetVal;
3368 			setShortReadMarkerPosition(mergePtr,
3369 						   targetPosition);
3370 			setShortReadMarkerOffset(mergePtr, targetOffset);
3371 			targetIndex++;
3372 			targetPtr++;
3373 			if (targetIndex < targetLength) {
3374 				targetVal = targetPtr->readID;
3375 				targetPosition = targetPtr->position;
3376 				targetOffset = targetPtr->offset;
3377 			}
3378 		}
3379 
3380 		mergeLength++;
3381 		mergePtr++;
3382 	}
3383 
3384 	while (sourceIndex < sourceLength) {
3385 		mergePtr->readID = sourcePtr->readID;
3386 		setShortReadMarkerPosition(mergePtr, -1);
3387 		setShortReadMarkerOffset(mergePtr, -1);
3388 		mergeLength++;
3389 		mergePtr++;
3390 		sourceIndex++;
3391 		sourcePtr++;
3392 	}
3393 
3394 	while (targetIndex < targetLength) {
3395 		mergePtr->readID = targetPtr->readID;
3396 		setShortReadMarkerPosition(mergePtr, targetPtr->position);
3397 		setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3398 		mergeLength++;
3399 		mergePtr++;
3400 		targetIndex++;
3401 		targetPtr++;
3402 	}
3403 
3404 	free(targetArray);
3405 	graph->nodeReads[targetID] = mergeArray;
3406 	graph->nodeReadCounts[targetID] = mergeLength;
3407 }
3408 
min(Coordinate A,Coordinate B)3409 static inline Coordinate min(Coordinate A, Coordinate B)
3410 {
3411 	return A < B ? A : B;
3412 }
3413 
min_short(ShortLength A,ShortLength B)3414 static inline ShortLength min_short(ShortLength A, ShortLength B)
3415 {
3416 	return A < B ? A : B;
3417 }
3418 
injectShortReads(ShortReadMarker * sourceArray,IDnum sourceLength,Node * target,Graph * graph)3419 void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
3420 		      Node * target, Graph * graph)
3421 {
3422 	IDnum targetID = getNodeID(target) + graph->nodeCount;
3423 	ShortReadMarker *targetArray = graph->nodeReads[targetID];
3424 	IDnum targetLength = graph->nodeReadCounts[targetID];
3425 	ShortReadMarker *targetPtr = targetArray;
3426 	ShortReadMarker *sourcePtr = sourceArray;
3427 	ShortReadMarker *mergeArray, *mergePtr;
3428 	IDnum mergeLength;
3429 	Coordinate targetPosition, sourcePosition;
3430 	ShortLength targetOffset, sourceOffset;
3431 	IDnum targetIndex, targetVal, sourceIndex, sourceVal;
3432 
3433 	if (sourceLength == 0) {
3434 		free(sourceArray);
3435 		return;
3436 	}
3437 
3438 	if (targetLength == 0) {
3439 		free(targetArray);
3440 		graph->nodeReads[targetID] = sourceArray;
3441 		graph->nodeReadCounts[targetID] = sourceLength;
3442 		return;
3443 	}
3444 
3445 	mergeArray =
3446 	    mallocOrExit(sourceLength +
3447 		    targetLength, ShortReadMarker);
3448 	mergePtr = mergeArray;
3449 
3450 	mergeLength = 0;
3451 	sourceIndex = 0;
3452 	targetIndex = 0;
3453 	targetVal = targetPtr->readID;
3454 	targetPosition = targetPtr->position;
3455 	targetOffset = targetPtr->offset;
3456 	sourceVal = sourcePtr->readID;
3457 	sourcePosition = sourcePtr->position;
3458 	sourceOffset = sourcePtr->offset;
3459 
3460 	while (sourceIndex < sourceLength && targetIndex < targetLength) {
3461 		if (sourceVal < targetVal) {
3462 			mergePtr->readID = sourceVal;
3463 			setShortReadMarkerPosition(mergePtr,
3464 						   sourcePosition);
3465 			setShortReadMarkerOffset(mergePtr, sourceOffset);
3466 			sourceIndex++;
3467 			if (sourceIndex < sourceLength) {
3468 				sourcePtr++;
3469 				sourceVal = sourcePtr->readID;
3470 				sourcePosition = sourcePtr->position;
3471 				sourceOffset = sourcePtr->offset;
3472 			}
3473 		} else if (sourceVal == targetVal) {
3474 			mergePtr->readID = sourceVal;
3475 			if (sourcePosition == -1 && targetPosition == -1) {
3476 				setShortReadMarkerPosition(mergePtr, -1);
3477 				setShortReadMarkerOffset(mergePtr, -1);
3478 			} else if (sourcePosition == -1) {
3479 				setShortReadMarkerPosition(mergePtr,
3480 							   targetPosition);
3481 				setShortReadMarkerOffset(mergePtr,
3482 							 targetOffset);
3483 			} else if (targetPosition == -1) {
3484 				setShortReadMarkerPosition(mergePtr,
3485 							   sourcePosition);
3486 				setShortReadMarkerOffset(mergePtr,
3487 							 sourceOffset);
3488 			} else {
3489 				setShortReadMarkerPosition(mergePtr,
3490 							   min
3491 							   (sourcePosition,
3492 							    targetPosition));
3493 				setShortReadMarkerOffset(mergePtr,
3494 							 min_short
3495 							 (sourceOffset,
3496 							  targetOffset));
3497 			}
3498 			sourceIndex++;
3499 			if (sourceIndex < sourceLength) {
3500 				sourcePtr++;
3501 				sourceVal = sourcePtr->readID;
3502 				sourcePosition = sourcePtr->position;
3503 				sourceOffset = sourcePtr->offset;
3504 			}
3505 			targetIndex++;
3506 			if (targetIndex < targetLength) {
3507 				targetPtr++;
3508 				targetVal = targetPtr->readID;
3509 				targetPosition = targetPtr->position;
3510 				targetOffset = targetPtr->offset;
3511 			}
3512 		} else {
3513 			mergePtr->readID = targetVal;
3514 			setShortReadMarkerPosition(mergePtr,
3515 						   targetPosition);
3516 			setShortReadMarkerOffset(mergePtr, targetOffset);
3517 			targetIndex++;
3518 			if (targetIndex < targetLength) {
3519 				targetPtr++;
3520 				targetVal = targetPtr->readID;
3521 				targetPosition = targetPtr->position;
3522 				targetOffset = targetPtr->offset;
3523 			}
3524 		}
3525 
3526 		mergeLength++;
3527 		mergePtr++;
3528 	}
3529 
3530 	while (sourceIndex < sourceLength) {
3531 		mergePtr->readID = sourcePtr->readID;
3532 		setShortReadMarkerPosition(mergePtr, sourcePtr->position);
3533 		setShortReadMarkerOffset(mergePtr, sourcePtr->offset);
3534 		mergeLength++;
3535 		mergePtr++;
3536 		sourceIndex++;
3537 		sourcePtr++;
3538 	}
3539 
3540 	while (targetIndex < targetLength) {
3541 		mergePtr->readID = targetPtr->readID;
3542 		setShortReadMarkerPosition(mergePtr, targetPtr->position);
3543 		setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3544 		mergeLength++;
3545 		mergePtr++;
3546 		targetIndex++;
3547 		targetPtr++;
3548 	}
3549 
3550 	free(targetArray);
3551 	graph->nodeReads[targetID] = mergeArray;
3552 	graph->nodeReadCounts[targetID] = mergeLength;
3553 
3554 	free(sourceArray);
3555 }
3556 
mergeNodeReads(Node * target,Node * source,Graph * graph)3557 void mergeNodeReads(Node * target, Node * source, Graph * graph)
3558 {
3559 	IDnum sourceID, sourceLength;
3560 	ShortReadMarker *sourceArray;
3561 
3562 	if (graph->nodeReads == NULL || source == NULL || target == NULL)
3563 		return;
3564 
3565 	sourceID = getNodeID(source) + graph->nodeCount;
3566 	sourceArray = graph->nodeReads[sourceID];
3567 	sourceLength = graph->nodeReadCounts[sourceID];
3568 
3569 	if (sourceArray == NULL)
3570 		return;
3571 
3572 	graph->nodeReads[sourceID] = NULL;
3573 	graph->nodeReadCounts[sourceID] = 0;
3574 
3575 	injectShortReads(sourceArray, sourceLength, target, graph);
3576 }
3577 
foldSymmetricalNodeReads(Node * node,Graph * graph)3578 void foldSymmetricalNodeReads(Node * node, Graph * graph)
3579 {
3580 	IDnum targetID, targetLength, targetIndex;
3581 	IDnum sourceID, sourceLength, sourceIndex;
3582 	IDnum targetVal = 0;
3583 	IDnum sourceVal = 0;
3584 	IDnum mergeLength;
3585 	ShortReadMarker *sourceArray, *targetArray, *mergeArray,
3586 	    *mergeArray2;
3587 	ShortReadMarker *sourcePtr, *targetPtr, *mergePtr, *mergePtr2;
3588 
3589 	if (graph->nodeReads == NULL || node == NULL)
3590 		return;
3591 
3592 	sourceID = getNodeID(node) + graph->nodeCount;
3593 	sourceArray = graph->nodeReads[sourceID];
3594 	sourceLength = graph->nodeReadCounts[sourceID];
3595 	sourcePtr = sourceArray;
3596 
3597 	targetID = -getNodeID(node) + graph->nodeCount;
3598 	targetArray = graph->nodeReads[targetID];
3599 	targetLength = graph->nodeReadCounts[targetID];
3600 	targetPtr = targetArray;
3601 
3602 	if (sourceArray == NULL && targetArray == NULL)
3603 		return;
3604 
3605 	mergeArray =
3606 	    mallocOrExit(sourceLength +
3607 		    targetLength, ShortReadMarker);
3608 	mergeArray2 =
3609 	    mallocOrExit(sourceLength +
3610 		    targetLength, ShortReadMarker);
3611 	mergePtr = mergeArray;
3612 	mergePtr2 = mergeArray2;
3613 
3614 	mergeLength = 0;
3615 	sourceIndex = 0;
3616 	targetIndex = 0;
3617 	if (targetIndex < targetLength)
3618 		targetVal = targetPtr->readID;
3619 	if (sourceIndex < sourceLength)
3620 		sourceVal = sourcePtr->readID;
3621 
3622 	while (sourceIndex < sourceLength && targetIndex < targetLength) {
3623 		if (sourceVal < targetVal) {
3624 			mergePtr->readID = sourceVal;
3625 			setShortReadMarkerPosition(mergePtr, -1);
3626 			setShortReadMarkerOffset(mergePtr, -1);
3627 			mergePtr2->readID = sourceVal;
3628 			setShortReadMarkerPosition(mergePtr2, -1);
3629 			setShortReadMarkerOffset(mergePtr2, -1);
3630 			sourceIndex++;
3631 			sourcePtr++;
3632 			if (sourceIndex < sourceLength)
3633 				sourceVal = sourcePtr->readID;
3634 		} else if (sourceVal == targetVal) {
3635 			mergePtr->readID = sourceVal;
3636 			setShortReadMarkerPosition(mergePtr, -1);
3637 			setShortReadMarkerOffset(mergePtr, -1);
3638 			mergePtr2->readID = sourceVal;
3639 			setShortReadMarkerPosition(mergePtr2, -1);
3640 			setShortReadMarkerOffset(mergePtr2, -1);
3641 			sourceIndex++;
3642 			sourcePtr++;
3643 			if (sourceIndex < sourceLength)
3644 				sourceVal = sourcePtr->readID;
3645 			targetIndex++;
3646 			targetPtr++;
3647 			if (targetIndex < targetLength)
3648 				targetVal = targetPtr->readID;
3649 		} else {
3650 			mergePtr->readID = targetVal;
3651 			setShortReadMarkerPosition(mergePtr, -1);
3652 			setShortReadMarkerOffset(mergePtr, -1);
3653 			mergePtr2->readID = targetVal;
3654 			setShortReadMarkerPosition(mergePtr2, -1);
3655 			setShortReadMarkerOffset(mergePtr2, -1);
3656 			targetIndex++;
3657 			targetPtr++;
3658 			if (targetIndex < targetLength)
3659 				targetVal = targetPtr->readID;
3660 		}
3661 
3662 		mergeLength++;
3663 		mergePtr++;
3664 		mergePtr2++;
3665 	}
3666 
3667 	while (sourceIndex < sourceLength) {
3668 		mergePtr->readID = sourcePtr->readID;
3669 		setShortReadMarkerPosition(mergePtr, -1);
3670 		setShortReadMarkerOffset(mergePtr, -1);
3671 		mergePtr2->readID = sourcePtr->readID;
3672 		setShortReadMarkerPosition(mergePtr2, -1);
3673 		setShortReadMarkerOffset(mergePtr2, -1);
3674 		mergeLength++;
3675 		mergePtr++;
3676 		mergePtr2++;
3677 		sourceIndex++;
3678 		sourcePtr++;
3679 	}
3680 
3681 	while (targetIndex < targetLength) {
3682 		mergePtr->readID = targetPtr->readID;
3683 		setShortReadMarkerPosition(mergePtr, -1);
3684 		setShortReadMarkerOffset(mergePtr, -1);
3685 		mergePtr2->readID = targetPtr->readID;
3686 		setShortReadMarkerPosition(mergePtr2, -1);
3687 		setShortReadMarkerOffset(mergePtr2, -1);
3688 		mergeLength++;
3689 		mergePtr++;
3690 		mergePtr2++;
3691 		targetIndex++;
3692 		targetPtr++;
3693 	}
3694 
3695 	free(targetArray);
3696 	graph->nodeReads[targetID] = mergeArray;
3697 	graph->nodeReadCounts[targetID] = mergeLength;
3698 
3699 	free(sourceArray);
3700 	graph->nodeReads[sourceID] = mergeArray2;
3701 	graph->nodeReadCounts[sourceID] = mergeLength;
3702 }
3703 
shareReadStarts(Node * target,Node * source,Graph * graph)3704 void shareReadStarts(Node * target, Node * source, Graph * graph)
3705 {
3706 	ShortReadMarker *sourceArray;
3707 	IDnum sourceLength, sourceID;
3708 
3709 	if (graph->nodeReads == NULL)
3710 		return;
3711 
3712 	if (target == NULL || source == NULL)
3713 		return;
3714 
3715 	sourceID = source->ID + graph->nodeCount;
3716 	sourceArray = graph->nodeReads[sourceID];
3717 	sourceLength = graph->nodeReadCounts[sourceID];
3718 
3719 	if (sourceArray == NULL)
3720 		return;
3721 
3722 	spreadReadIDs(sourceArray, sourceLength, target, graph);
3723 }
3724 
getNodeToReadMappings(Graph * graph)3725 ShortReadMarker **getNodeToReadMappings(Graph * graph)
3726 {
3727 	return graph->nodeReads;
3728 }
3729 
getShortReadMarkerID(ShortReadMarker * marker)3730 IDnum getShortReadMarkerID(ShortReadMarker * marker)
3731 {
3732 	return marker->readID;
3733 }
3734 
getShortReadMarkerOffset(ShortReadMarker * marker)3735 inline ShortLength getShortReadMarkerOffset(ShortReadMarker * marker)
3736 {
3737 	return marker->offset;
3738 }
3739 
setShortReadMarkerOffset(ShortReadMarker * marker,ShortLength offset)3740 inline void setShortReadMarkerOffset(ShortReadMarker * marker,
3741 				     ShortLength offset)
3742 {
3743 	marker->offset = offset;
3744 }
3745 
getNodeReadCounts(Graph * graph)3746 IDnum *getNodeReadCounts(Graph * graph)
3747 {
3748 	return graph->nodeReadCounts;
3749 }
3750 
getWordLength(Graph * graph)3751 int getWordLength(Graph * graph)
3752 {
3753 	return graph->wordLength;
3754 }
3755 
getNodeReads(Node * node,Graph * graph)3756 ShortReadMarker *getNodeReads(Node * node, Graph * graph)
3757 {
3758 	IDnum id = node->ID + graph->nodeCount;
3759 	return graph->nodeReads[id];
3760 }
3761 
getNodeReadCount(Node * node,Graph * graph)3762 IDnum getNodeReadCount(Node * node, Graph * graph)
3763 {
3764 	IDnum id = node->ID + graph->nodeCount;
3765 	return graph->nodeReadCounts[id];
3766 }
3767 
getShortReadMarkerPosition(ShortReadMarker * marker)3768 inline Coordinate getShortReadMarkerPosition(ShortReadMarker * marker)
3769 {
3770 	return marker->position;
3771 }
3772 
setShortReadMarkerPosition(ShortReadMarker * marker,Coordinate position)3773 inline void setShortReadMarkerPosition(ShortReadMarker * marker,
3774 				       Coordinate position)
3775 {
3776 	if (position < -100)
3777 		return;
3778 
3779 	marker->position = position;
3780 }
3781 
getShortReadMarkerAtIndex(ShortReadMarker * array,IDnum index)3782 ShortReadMarker *getShortReadMarkerAtIndex(ShortReadMarker * array,
3783 					   IDnum index)
3784 {
3785 	return &(array[index]);
3786 }
3787 
destroyGraph(Graph * graph)3788 void destroyGraph(Graph * graph)
3789 {
3790 	IDnum index;
3791 	Node *node;
3792 	for (index = 1; index <= graph->nodeCount; index++) {
3793 		node = getNodeInGraph(graph, index);
3794 		if (node != NULL)
3795 			destroyNode(node, graph);
3796 	}
3797 
3798 	if (graph->gapMarkers)
3799 		deactivateGapMarkers(graph);
3800 
3801 	free(graph->nodes);
3802 	destroyRecycleBin(nodeMemory);
3803 	destroyRecycleBin(arcMemory);
3804 	destroyAllPassageMarkers();
3805 	free(graph->arcLookupTable);
3806 	free(graph->nodeReads);
3807 	free(graph->nodeReadCounts);
3808 	free(graph);
3809 }
3810 
setInsertLengths(Graph * graph,Category cat,Coordinate insertLength,Coordinate insertLength_std_dev)3811 void setInsertLengths(Graph * graph, Category cat, Coordinate insertLength,
3812 		      Coordinate insertLength_std_dev)
3813 {
3814 	graph->insertLengths[cat] = insertLength;
3815 	graph->insertLengths_var[cat] =
3816 	    insertLength_std_dev * insertLength_std_dev;
3817 }
3818 
getInsertLength(Graph * graph,Category cat)3819 Coordinate getInsertLength(Graph * graph, Category cat)
3820 {
3821 	return graph->insertLengths[cat / 2];
3822 }
3823 
getInsertLength_var(Graph * graph,Category cat)3824 double getInsertLength_var(Graph * graph, Category cat)
3825 {
3826 	return graph->insertLengths_var[cat / 2];
3827 }
3828 
activateGapMarkers(Graph * graph)3829 void activateGapMarkers(Graph * graph)
3830 {
3831 	graph->gapMarkers =
3832 	    callocOrExit(graph->nodeCount + 1, GapMarker *);
3833 	gapMarkerMemory = newRecycleBin(sizeof(GapMarker), GAPBLOCKSIZE);
3834 }
3835 
deactivateGapMarkers(Graph * graph)3836 void deactivateGapMarkers(Graph * graph)
3837 {
3838 	free(graph->gapMarkers);
3839 	graph->gapMarkers = NULL;
3840 	destroyRecycleBin(gapMarkerMemory);
3841 	gapMarkerMemory = NULL;
3842 }
3843 
allocateGapMarker()3844 static GapMarker *allocateGapMarker()
3845 {
3846 	return (GapMarker *) allocatePointer(gapMarkerMemory);
3847 }
3848 
appendGap(Node * node,Coordinate length,Graph * graph)3849 void appendGap(Node * node, Coordinate length, Graph * graph)
3850 {
3851 	IDnum nodeID = getNodeID(node);
3852 	GapMarker *marker = allocateGapMarker();
3853 	GapMarker *tmp;
3854 
3855 	marker->length = length;
3856 
3857 	if (nodeID > 0) {
3858 		marker->position = node->length;
3859 		marker->next = graph->gapMarkers[nodeID];
3860 		graph->gapMarkers[nodeID] = marker;
3861 	} else {
3862 		for (tmp = graph->gapMarkers[-nodeID]; tmp != NULL;
3863 		     tmp = tmp->next)
3864 			tmp->position += length;
3865 
3866 		marker->position = 0;
3867 		marker->next = graph->gapMarkers[-nodeID];
3868 		graph->gapMarkers[-nodeID] = marker;
3869 	}
3870 
3871 	addBufferToDescriptor(node, length);
3872 }
3873 
appendNodeGaps(Node * destination,Node * source,Graph * graph)3874 void appendNodeGaps(Node * destination, Node * source, Graph * graph)
3875 {
3876 	IDnum destinationID = getNodeID(destination);
3877 	IDnum sourceID = getNodeID(source);
3878 	GapMarker *marker;
3879 
3880 	if (graph->gapMarkers == NULL)
3881 		return;
3882 
3883 	if (destinationID > 0 && sourceID > 0) {
3884 		for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3885 		     marker = marker->next)
3886 			marker->position += destination->length;
3887 	} else if (destinationID > 0 && sourceID < 0) {
3888 		sourceID = -sourceID;
3889 		for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3890 		     marker = marker->next)
3891 			marker->position =
3892 			    source->length + destination->length -
3893 			    marker->position - marker->length;
3894 	} else if (destinationID < 0 && sourceID > 0) {
3895 		destinationID = -destinationID;
3896 		for (marker = graph->gapMarkers[destinationID];
3897 		     marker != NULL; marker = marker->next)
3898 			marker->position += source->length;
3899 
3900 		for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3901 		     marker = marker->next)
3902 			marker->position =
3903 			    source->length - marker->position -
3904 			    marker->length;
3905 	} else {
3906 		destinationID = -destinationID;
3907 		sourceID = -sourceID;
3908 		for (marker = graph->gapMarkers[destinationID];
3909 		     marker != NULL; marker = marker->next)
3910 			marker->position += source->length;
3911 	}
3912 
3913 	if (graph->gapMarkers[destinationID] == NULL)
3914 		graph->gapMarkers[destinationID] =
3915 		    graph->gapMarkers[sourceID];
3916 	else {
3917 		marker = graph->gapMarkers[destinationID];
3918 		while (marker->next != NULL)
3919 			marker = marker->next;
3920 		marker->next = graph->gapMarkers[sourceID];
3921 	}
3922 
3923 	graph->gapMarkers[sourceID] = NULL;
3924 }
3925 
getGap(Node * node,Graph * graph)3926 GapMarker *getGap(Node * node, Graph * graph)
3927 {
3928 	IDnum nodeID = getNodeID(node);
3929 
3930 	if (graph->gapMarkers == NULL)
3931 		return NULL;
3932 
3933 	if (nodeID < 0)
3934 		nodeID = -nodeID;
3935 
3936 	return graph->gapMarkers[nodeID];
3937 }
3938 
getNextGap(GapMarker * marker)3939 GapMarker *getNextGap(GapMarker * marker)
3940 {
3941 	return marker->next;
3942 }
3943 
getGapStart(GapMarker * marker)3944 Coordinate getGapStart(GapMarker * marker)
3945 {
3946 	return marker->position;
3947 }
3948 
getGapFinish(GapMarker * marker)3949 Coordinate getGapFinish(GapMarker * marker)
3950 {
3951 	return marker->position + marker->length;
3952 }
3953 
reallocateNodeDescriptor(Node * node,Coordinate length)3954 void reallocateNodeDescriptor(Node * node, Coordinate length) {
3955 	Coordinate arrayLength, index, shift;
3956 	Node * twin = node->twinNode;
3957 	Descriptor * array;
3958 	Nucleotide nucleotide;
3959 
3960 	if (length < node->length)
3961 		exitErrorf(EXIT_FAILURE, true, "Sum of node lengths smaller than first!");
3962 
3963 	shift = length - node->length;
3964 
3965 	arrayLength = length / 4;
3966 	if (length % 4)
3967 		arrayLength++;
3968 
3969 	node->descriptor = reallocOrExit(node->descriptor, arrayLength, Descriptor);
3970 
3971 	array = callocOrExit(arrayLength, Descriptor);
3972 	for (index = node->length - 1; index >= 0; index--) {
3973 		nucleotide = getNucleotideInDescriptor(twin->descriptor, index);
3974 		writeNucleotideInDescriptor(nucleotide, array, index + shift);
3975 	}
3976 
3977 	free(twin->descriptor);
3978 	twin->descriptor = array;
3979 }
3980 
doubleStrandedGraph(Graph * graph)3981 boolean doubleStrandedGraph(Graph * graph) {
3982 	return graph->double_stranded;
3983 }
3984