1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3 
4     This file is part of Velvet.
5 
6     Velvet is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     Velvet is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with Velvet; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 
26 #include "globals.h"
27 #include "preGraph.h"
28 #include "recycleBin.h"
29 #include "roadMap.h"
30 #include "readSet.h"
31 #include "concatenatedPreGraph.h"
32 #include "utility.h"
33 #include "kmer.h"
34 
35 #define ADENINE 0
36 #define CYTOSINE 1
37 #define GUANINE 2
38 #define THYMINE 3
39 
40 // Internal structure used to mark the ends of an Annotation
41 struct insertionMarker_st {
42 	Annotation *annot;
43 	boolean isStart;
44 };
45 
getInsertionMarkerPosition(InsertionMarker * marker)46 Coordinate getInsertionMarkerPosition(InsertionMarker * marker)
47 {
48 	if (marker->isStart)
49 		return getStart(marker->annot);
50 	else
51 		return getFinish(marker->annot);
52 }
53 
compareInsertionMarkers(const void * A,const void * B)54 int compareInsertionMarkers(const void *A, const void *B)
55 {
56 	Coordinate Apos =
57 	    getInsertionMarkerPosition((InsertionMarker *) A);
58 	Coordinate Bpos =
59 	    getInsertionMarkerPosition((InsertionMarker *) B);
60 
61 	if (Apos < Bpos)
62 		return -1;
63 	else if (Apos == Bpos)
64 		return 0;
65 	else
66 		return 1;
67 }
68 
69 // Applies mergeSort to each insertion marker list (in order of position)
70 static void
orderInsertionMarkers(InsertionMarker ** insMarkers,IDnum * markerCounters,RoadMapArray * rdmaps)71 orderInsertionMarkers(InsertionMarker ** insMarkers,
72 		      IDnum * markerCounters, RoadMapArray * rdmaps)
73 {
74 	IDnum sequenceIndex;
75 	IDnum sequenceCounter = rdmaps->length;
76 
77 	puts("Ordering insertion markers");
78 	for (sequenceIndex = 1; sequenceIndex <= sequenceCounter;
79 	     sequenceIndex++) {
80 		qsort(insMarkers[sequenceIndex],
81 		      markerCounters[sequenceIndex],
82 		      sizeof(InsertionMarker), compareInsertionMarkers);
83 	}
84 }
85 
86 // Creates insertion marker lists
87 static void
setInsertionMarkers(RoadMapArray * rdmaps,IDnum * markerCounters,InsertionMarker ** veryLastMarker,InsertionMarker ** insertionMarkers)88 setInsertionMarkers(RoadMapArray * rdmaps,
89 		    IDnum * markerCounters,
90 		    InsertionMarker ** veryLastMarker,
91 		    InsertionMarker ** insertionMarkers)
92 {
93 	IDnum sequenceCounter = rdmaps->length;
94 	IDnum sequenceIndex, sequenceIndex2;
95 	IDnum totalCount = 0;
96 	RoadMap *rdmap;
97 	Annotation *annot = rdmaps->annotations;
98 	InsertionMarker *nextMarker, *newMarker;
99 	IDnum annotIndex, lastAnnotIndex;
100 	InsertionMarker **insMarkers =
101 	    callocOrExit(rdmaps->length + 1, InsertionMarker *);
102 	// Counting insertion markers
103 	for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
104 	     sequenceIndex++) {
105 		//printf("Going through sequence %d\n", sequenceIndex);
106 		rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
107 		lastAnnotIndex = getAnnotationCount(rdmap);
108 
109 		// Set insertion markers in previous sequences :
110 
111 		for (annotIndex = 0; annotIndex < lastAnnotIndex;
112 		     annotIndex++) {
113 			if (getAnnotSequenceID(annot) > 0) {
114 				markerCounters[getAnnotSequenceID(annot)]
115 				    += 2;
116 			} else {
117 				markerCounters[-getAnnotSequenceID(annot)]
118 				    += 2;
119 			}
120 			totalCount += 2;
121 			annot = getNextAnnotation(annot);
122 		}
123 	}
124 
125 	// Allocating space
126 	*insertionMarkers = callocOrExit(totalCount, InsertionMarker);
127 	*veryLastMarker = *insertionMarkers + totalCount;
128 
129 	// Pointing each node to its space
130 	nextMarker = *insertionMarkers;
131 	for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
132 	     sequenceIndex++) {
133 		insMarkers[sequenceIndex] = nextMarker;
134 		nextMarker = nextMarker + markerCounters[sequenceIndex];
135 		markerCounters[sequenceIndex] = 0;
136 	}
137 
138 	// Filling up space with data
139 	annot = rdmaps->annotations;
140 	for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
141 	     sequenceIndex++) {
142 		//printf("Going through sequence %d\n", sequenceIndex);
143 		rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
144 		lastAnnotIndex = getAnnotationCount(rdmap);
145 
146 		// Set insertion markers in previous sequences :
147 
148 		for (annotIndex = 0; annotIndex < lastAnnotIndex;
149 		     annotIndex++) {
150 			sequenceIndex2 = getAnnotSequenceID(annot);
151 			if (sequenceIndex2 > 0) {
152 				newMarker =
153 				    insMarkers[sequenceIndex2] +
154 				    (markerCounters[sequenceIndex2])++;
155 				newMarker->annot = annot;
156 				newMarker->isStart = true;
157 
158 				newMarker =
159 				    insMarkers[sequenceIndex2] +
160 				    (markerCounters[sequenceIndex2])++;
161 				newMarker->annot = annot;
162 				newMarker->isStart = false;
163 			} else {
164 				incrementAnnotationCoordinates(annot);
165 
166 				newMarker =
167 				    insMarkers[-sequenceIndex2] +
168 				    (markerCounters[-sequenceIndex2])++;
169 				newMarker->annot = annot;
170 				newMarker->isStart = true;
171 
172 				newMarker =
173 				    insMarkers[-sequenceIndex2] +
174 				    (markerCounters[-sequenceIndex2])++;
175 				newMarker->annot = annot;
176 				newMarker->isStart = false;
177 			}
178 			annot = getNextAnnotation(annot);
179 		}
180 	}
181 
182 	orderInsertionMarkers(insMarkers, markerCounters, rdmaps);
183 	free(insMarkers);
184 }
185 
186 // Counts how many preNodes are to be created to allocate appropriate memory
187 static void
countPreNodes(RoadMapArray * rdmaps,PreGraph * preGraph,IDnum * markerCounters,InsertionMarker * insertionMarkers,InsertionMarker * veryLastMarker)188 countPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
189 	      IDnum * markerCounters, InsertionMarker * insertionMarkers,
190 	      InsertionMarker * veryLastMarker)
191 {
192 	Annotation *annot = rdmaps->annotations;
193 	InsertionMarker *currentMarker = insertionMarkers;
194 	IDnum markerIndex, lastMarkerIndex;
195 	IDnum sequenceIndex;
196 	Coordinate currentPosition, nextStop;
197 	IDnum preNodeCounter = 0;
198 	RoadMap *rdmap;
199 	IDnum annotIndex, lastAnnotIndex;
200 
201 	// Now that we have read all of the annotations, we go on to create the preNodes and tie them up
202 	for (sequenceIndex = 1;
203 	     sequenceIndex <= sequenceCount_pg(preGraph);
204 	     sequenceIndex++) {
205 		rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
206 		annotIndex = 0;
207 		lastAnnotIndex = getAnnotationCount(rdmap);
208 		markerIndex = 0;
209 		lastMarkerIndex = markerCounters[sequenceIndex];
210 		currentPosition = 0;
211 
212 
213 		while (annotIndex < lastAnnotIndex) {
214 			if (markerIndex == lastMarkerIndex
215 			    || getPosition(annot) <=
216 			    getInsertionMarkerPosition(currentMarker))
217 				nextStop = getPosition(annot);
218 			else
219 				nextStop =
220 				    getInsertionMarkerPosition
221 				    (currentMarker);
222 
223 			if (currentPosition != nextStop) {
224 				preNodeCounter++;
225 				currentPosition = nextStop;
226 			}
227 
228 			while (markerIndex < lastMarkerIndex
229 			       && getInsertionMarkerPosition(currentMarker)
230 			       == currentPosition) {
231 				currentMarker++;
232 				markerIndex++;
233 			}
234 
235 			while (annotIndex < lastAnnotIndex
236 			       && getPosition(annot) == currentPosition) {
237 				annot = getNextAnnotation(annot);
238 				annotIndex++;
239 			}
240 
241 		}
242 
243 		while (markerIndex < lastMarkerIndex) {
244 			if (currentPosition ==
245 			    getInsertionMarkerPosition(currentMarker)) {
246 				currentMarker++;
247 				markerIndex++;
248 			} else {
249 				preNodeCounter++;
250 				currentPosition =
251 				    getInsertionMarkerPosition
252 				    (currentMarker);
253 			}
254 		}
255 	}
256 
257 	allocatePreNodeSpace_pg(preGraph, preNodeCounter);
258 }
259 
convertInsertionMarkers(InsertionMarker * insertionMarkers,InsertionMarker * veryLastMarker,IDnum * chains)260 static void convertInsertionMarkers(InsertionMarker * insertionMarkers,
261 				    InsertionMarker * veryLastMarker,
262 				    IDnum * chains)
263 {
264 	InsertionMarker *marker;
265 	Annotation *annot;
266 
267 	for (marker = insertionMarkers; marker != veryLastMarker; marker++) {
268 		annot = marker->annot;
269 
270 		if (getAnnotSequenceID(annot) > 0) {
271 			if (marker->isStart) {
272 				if (getStartID(annot) == 0)
273 					setStartID(annot,
274 						   chains
275 						   [getAnnotSequenceID
276 						    (annot)]);
277 				else
278 					setStartID(annot,
279 						   getStartID(annot) + 1);
280 			}
281 		} else {
282 			if (marker->isStart)
283 				setStartID(annot, -getStartID(annot));
284 			else {
285 				if (getFinishID(annot) == 0)
286 					setFinishID(annot,
287 						    -chains
288 						    [-getAnnotSequenceID
289 						     (annot)]);
290 				else
291 					setFinishID(annot,
292 						    -getFinishID(annot) -
293 						    1);
294 			}
295 		}
296 	}
297 
298 	free(insertionMarkers);
299 }
300 
convertMarker(InsertionMarker * marker,IDnum nodeID)301 static void convertMarker(InsertionMarker * marker, IDnum nodeID)
302 {
303 	if (marker->isStart)
304 		setStartID(marker->annot, nodeID);
305 	else
306 		setFinishID(marker->annot, nodeID);
307 }
308 
309 // Creates the preNode using insertion marker and annotation lists for each sequence
310 static void
createPreNodes(RoadMapArray * rdmaps,PreGraph * preGraph,IDnum * markerCounters,InsertionMarker * insertionMarkers,InsertionMarker * veryLastMarker,IDnum * chains,char * sequenceFilename,int WORDLENGTH)311 createPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
312 	       IDnum * markerCounters, InsertionMarker * insertionMarkers,
313 	       InsertionMarker * veryLastMarker, IDnum * chains,
314 	       char *sequenceFilename, int WORDLENGTH)
315 {
316 	Annotation *annot = rdmaps->annotations;
317 	IDnum latestPreNodeID;
318 	InsertionMarker *currentMarker = insertionMarkers;
319 	IDnum sequenceIndex;
320 	Coordinate currentPosition, nextStop;
321 	IDnum preNodeCounter = 1;
322 	FILE *file = fopen(sequenceFilename, "r");
323 	char line[50000];
324 	int lineLength = 50000;
325 	Coordinate readIndex;
326 	boolean tooShort;
327 	Kmer initialKmer;
328 	char c;
329 	RoadMap *rdmap;
330 	IDnum annotIndex, lastAnnotIndex;
331 	IDnum markerIndex, lastMarkerIndex;
332 
333 	if (file == NULL)
334 		exitErrorf(EXIT_FAILURE, true, "Could not read %s", sequenceFilename);
335 	// Reading sequence descriptor in first line
336 	if (!fgets(line, lineLength, file))
337 		exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
338 
339 	// Now that we have read all of the annotations, we go on to create the preNodes and tie them up
340 	for (sequenceIndex = 1;
341 	     sequenceIndex <= sequenceCount_pg(preGraph);
342 	     sequenceIndex++) {
343 		if (sequenceIndex % 100000 == 0)
344 			printf("Sequence %d / %d\n", sequenceIndex,
345 			       sequenceCount_pg(preGraph));
346 
347 		while (line[0] != '>')
348 			if (!fgets(line, lineLength, file))
349 				exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
350 
351 		rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
352 		annotIndex = 0;
353 		lastAnnotIndex = getAnnotationCount(rdmap);
354 		markerIndex = 0;
355 		lastMarkerIndex = markerCounters[sequenceIndex];
356 		currentPosition = 0;
357 
358 		// Reading first (k-1) nucleotides
359 		tooShort = false;
360 		clearKmer(&initialKmer);
361 		//printf("Initial kmer: ");
362 		for (readIndex = 0; readIndex < WORDLENGTH - 1;
363 		     readIndex++) {
364 			if (!isalpha((c = getc(file)))) {
365 				if (c == '>') {
366 					ungetc(c, file);
367 					tooShort = true;
368 					break;
369 				} else {
370 					continue;
371 				}
372 			}
373 			//printf("%c", c);
374 			switch (c) {
375 			case 'A':
376 				pushNucleotide(&initialKmer, ADENINE);
377 				break;
378 			case 'C':
379 				pushNucleotide(&initialKmer, CYTOSINE);
380 				break;
381 			case 'G':
382 				pushNucleotide(&initialKmer, GUANINE);
383 				break;
384 			case 'T':
385 				pushNucleotide(&initialKmer, THYMINE);
386 				break;
387 			default:
388 				printf
389 				    ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n");
390 				fflush(stdout);
391 				abort();
392 			}
393 		}
394 		//puts("");
395 
396 		if (tooShort) {
397 			//printf("Skipping short read.. %d\n", sequenceIndex);
398 			chains[sequenceIndex] = preNodeCounter;
399 			if (!fgets(line, lineLength, file))
400 				exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
401 			continue;
402 		}
403 
404 		latestPreNodeID = 0;
405 
406 		while (annotIndex < lastAnnotIndex) {
407 			if (markerIndex == lastMarkerIndex
408 			    || getPosition(annot) <=
409 			    getInsertionMarkerPosition(currentMarker))
410 				nextStop = getPosition(annot);
411 			else {
412 				nextStop =
413 				    getInsertionMarkerPosition
414 				    (currentMarker);
415 			}
416 
417 			if (currentPosition != nextStop) {
418 				addPreNodeToPreGraph_pg(preGraph,
419 							currentPosition,
420 							nextStop,
421 							file,
422 							&initialKmer,
423 							preNodeCounter);
424 				if (latestPreNodeID == 0) {
425 					chains[sequenceIndex] =
426 					    preNodeCounter;
427 				}
428 				latestPreNodeID = preNodeCounter++;
429 				currentPosition = nextStop;
430 			}
431 
432 			while (markerIndex < lastMarkerIndex
433 			       && getInsertionMarkerPosition(currentMarker)
434 			       == nextStop) {
435 				convertMarker(currentMarker,
436 					      latestPreNodeID);
437 				currentMarker++;
438 				markerIndex++;
439 			}
440 
441 			while (annotIndex < lastAnnotIndex
442 			       && getPosition(annot) == nextStop) {
443 				for (readIndex = 0;
444 				     readIndex <
445 				     getAnnotationLength(annot);
446 				     readIndex++) {
447 					c = getc(file);
448 					while (!isalpha(c))
449 						c = getc(file);
450 
451 					//printf("(%c)", c);
452 					switch (c) {
453 					case 'A':
454 						pushNucleotide(&initialKmer, ADENINE);
455 						break;
456 					case 'C':
457 						pushNucleotide(&initialKmer, CYTOSINE);
458 						break;
459 					case 'G':
460 						pushNucleotide(&initialKmer, GUANINE);
461 						break;
462 					case 'T':
463 						pushNucleotide(&initialKmer, THYMINE);
464 						break;
465 					default:
466 						printf
467 						    ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n");
468 						fflush(stdout);
469 						exit(1);
470 					}
471 				}
472 
473 				annot = getNextAnnotation(annot);
474 				annotIndex++;
475 			}
476 
477 		}
478 
479 		while (markerIndex < lastMarkerIndex) {
480 			if (currentPosition ==
481 			    getInsertionMarkerPosition(currentMarker)) {
482 				convertMarker(currentMarker,
483 					      latestPreNodeID);
484 				currentMarker++;
485 				markerIndex++;
486 			} else {
487 				nextStop =
488 				    getInsertionMarkerPosition
489 				    (currentMarker);
490 				addPreNodeToPreGraph_pg(preGraph,
491 							currentPosition,
492 							nextStop, file,
493 							&initialKmer,
494 							preNodeCounter);
495 				if (latestPreNodeID == 0)
496 					chains[sequenceIndex] =
497 					    preNodeCounter;
498 				latestPreNodeID = preNodeCounter++;
499 				currentPosition =
500 				    getInsertionMarkerPosition
501 				    (currentMarker);
502 			}
503 		}
504 
505 		// End of sequence
506 		if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph))
507 			exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
508 		//puts(" ");
509 
510 		if (latestPreNodeID == 0)
511 			chains[sequenceIndex] = preNodeCounter;
512 	}
513 
514 	free(markerCounters);
515 	fclose(file);
516 
517 }
518 
connectPreNodeToTheNext(IDnum * currentPreNodeID,IDnum nextPreNodeID,Coordinate * currentPosition,PassageMarker ** latestPassageMarker,IDnum sequenceIndex,PreGraph * preGraph)519 static void connectPreNodeToTheNext(IDnum * currentPreNodeID,
520 				    IDnum nextPreNodeID,
521 				    Coordinate * currentPosition,
522 				    PassageMarker ** latestPassageMarker,
523 				    IDnum sequenceIndex,
524 				    PreGraph * preGraph)
525 {
526 	if (nextPreNodeID == 0)
527 		return;
528 
529 	if (*currentPreNodeID != 0)
530 		createPreArc_pg(*currentPreNodeID, nextPreNodeID,
531 				preGraph);
532 
533 	*currentPreNodeID = nextPreNodeID;
534 
535 	*currentPosition +=
536 	    getPreNodeLength_pg(*currentPreNodeID, preGraph);
537 
538 }
539 
chooseNextInternalPreNode(IDnum currentPreNodeID,IDnum sequenceIndex,PreGraph * preGraph,IDnum * chains)540 static IDnum chooseNextInternalPreNode(IDnum currentPreNodeID,
541 				       IDnum sequenceIndex,
542 				       PreGraph * preGraph, IDnum * chains)
543 {
544 	if (currentPreNodeID >= preNodeCount_pg(preGraph))
545 		return 0;
546 	if (sequenceIndex >= sequenceCount_pg(preGraph))
547 		return currentPreNodeID + 1;
548 	if (currentPreNodeID + 1 < chains[sequenceIndex + 1])
549 		return currentPreNodeID + 1;
550 	return 0;
551 }
552 
connectAnnotation(IDnum * currentPreNodeID,Annotation * annot,Coordinate * currentPosition,PassageMarker ** latestPassageMarker,IDnum sequenceIndex,PreGraph * preGraph)553 static void connectAnnotation(IDnum * currentPreNodeID, Annotation * annot,
554 			      Coordinate * currentPosition,
555 			      PassageMarker ** latestPassageMarker,
556 			      IDnum sequenceIndex, PreGraph * preGraph)
557 {
558 	IDnum nextPreNodeID = getStartID(annot);
559 
560 	connectPreNodeToTheNext(currentPreNodeID, nextPreNodeID,
561 				currentPosition, latestPassageMarker,
562 				sequenceIndex, preGraph);
563 
564 	while (*currentPreNodeID != getFinishID(annot)) {
565 		nextPreNodeID = (*currentPreNodeID) + 1;
566 
567 		connectPreNodeToTheNext(currentPreNodeID, nextPreNodeID,
568 					currentPosition,
569 					latestPassageMarker, sequenceIndex,
570 					preGraph);
571 	}
572 }
573 
574 // Threads each sequences and creates preArcs according to road map indications
connectPreNodes(RoadMapArray * rdmaps,PreGraph * preGraph,IDnum * chains)575 static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
576 			    IDnum * chains)
577 {
578 	Coordinate currentPosition, currentInternalPosition;
579 	IDnum sequenceIndex;
580 	Annotation *annot = rdmaps->annotations;
581 	IDnum currentPreNodeID, nextInternalPreNodeID;
582 	PassageMarker *latestPassageMarker;
583 	RoadMap *rdmap;
584 	IDnum annotIndex, lastAnnotIndex;
585 
586 	for (sequenceIndex = 1;
587 	     sequenceIndex <= sequenceCount_pg(preGraph);
588 	     sequenceIndex++) {
589 
590 		if (sequenceIndex % 100000 == 0)
591 			printf("Connecting %d / %d\n", sequenceIndex,
592 			       sequenceCount_pg(preGraph));
593 
594 		rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
595 		annotIndex = 0;
596 		lastAnnotIndex = getAnnotationCount(rdmap);
597 		nextInternalPreNodeID = chooseNextInternalPreNode
598 		    (chains[sequenceIndex] - 1, sequenceIndex,
599 		     preGraph, chains);
600 
601 		currentPosition = 0;
602 		currentInternalPosition = 0;
603 		currentPreNodeID = 0;
604 		latestPassageMarker = NULL;
605 		// Recursion up to last annotation
606 		while (annotIndex < lastAnnotIndex
607 		       || nextInternalPreNodeID != 0) {
608 			if (annotIndex == lastAnnotIndex
609 			    || (nextInternalPreNodeID != 0
610 				&& currentInternalPosition <
611 				getPosition(annot))) {
612 				connectPreNodeToTheNext(&currentPreNodeID,
613 							nextInternalPreNodeID,
614 							&currentPosition,
615 							&latestPassageMarker,
616 							sequenceIndex,
617 							preGraph);
618 				nextInternalPreNodeID =
619 				    chooseNextInternalPreNode
620 				    (currentPreNodeID, sequenceIndex,
621 				     preGraph, chains);
622 				currentInternalPosition +=
623 				    getPreNodeLength_pg(currentPreNodeID,
624 							preGraph);
625 
626 			} else {
627 				connectAnnotation(&currentPreNodeID, annot,
628 						  &currentPosition,
629 						  &latestPassageMarker,
630 						  sequenceIndex, preGraph);
631 				annot = getNextAnnotation(annot);
632 				annotIndex++;
633 			}
634 		}
635 	}
636 }
637 
638 // Post construction memory deallocation routine (of sorts, could certainly be optimized)
639 static void
cleanUpMemory(PreGraph * preGraph,RoadMapArray * rdmaps,IDnum * chains)640 cleanUpMemory(PreGraph * preGraph, RoadMapArray * rdmaps, IDnum * chains)
641 {
642 	// Killing off roadmaps
643 	free(rdmaps->annotations);
644 	free(rdmaps->array);
645 	free(rdmaps);
646 
647 	// Finishing off the chain markers
648 	free(chains);
649 }
650 
651 // The full monty, wrapped up in one function
newPreGraph_pg(RoadMapArray * rdmapArray,char * sequenceFilename)652 PreGraph *newPreGraph_pg(RoadMapArray * rdmapArray, char *sequenceFilename)
653 {
654 	int WORDLENGTH = rdmapArray->WORDLENGTH;
655 	IDnum sequenceCount = rdmapArray->length;
656 	IDnum *markerCounters = callocOrExit(sequenceCount + 1, IDnum);
657 	IDnum *chains = callocOrExit(sequenceCount + 1, IDnum);
658 	InsertionMarker *insertionMarkers;
659 	InsertionMarker *veryLastMarker;
660 
661 	PreGraph *preGraph =
662 	    emptyPreGraph_pg(sequenceCount, rdmapArray->WORDLENGTH, rdmapArray->double_strand);
663 
664 	puts("Creating insertion markers");
665 	setInsertionMarkers(rdmapArray, markerCounters, &veryLastMarker,
666 			    &insertionMarkers);
667 
668 	puts("Counting preNodes");
669 	countPreNodes(rdmapArray, preGraph, markerCounters,
670 		      insertionMarkers, veryLastMarker);
671 
672 	printf("%d preNodes counted, creating them now\n",
673 	       preNodeCount_pg(preGraph));
674 	createPreNodes(rdmapArray, preGraph, markerCounters,
675 		       insertionMarkers, veryLastMarker, chains,
676 		       sequenceFilename, WORDLENGTH);
677 
678 	puts("Adjusting marker info...");
679 	convertInsertionMarkers(insertionMarkers, veryLastMarker, chains);
680 
681 	puts("Connecting preNodes");
682 	connectPreNodes(rdmapArray, preGraph, chains);
683 
684 	puts("Cleaning up memory");
685 	cleanUpMemory(preGraph, rdmapArray, chains);
686 	puts("Concatenating preGraph");
687 	concatenatePreGraph_pg(preGraph);
688 	puts("Done creating preGraph");
689 
690 	return preGraph;
691 }
692