1 /*
2 Copyright 2010 Daniel Zerbino (zerbino@ebi.ac.uk)
3 
4     This file is part of Velvet.
5 
6     Velvet is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     Velvet is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with Velvet; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <limits.h>
25 
26 #include "globals.h"
27 #include "graph.h"
28 #include "passageMarker.h"
29 #include "readSet.h"
30 #include "tightString.h"
31 #include "recycleBin.h"
32 #include "utility.h"
33 #include "kmer.h"
34 
35 // Internal structure used to mark the ends of an Annotation
36 struct kmerOccurence_st {
37 	IDnum position;
38 	IDnum nodeID;
39 	IDnum offset;
40 	Kmer kmer;
41 } ATTRIBUTE_PACKED;
42 
43 struct kmerOccurenceTable_st {
44 	KmerOccurence *kmerTable;
45 	KmerOccurence * kmerOccurencePtr;
46 	IDnum *accelerationTable;
47 	IDnum kmerTableSize;
48 	IDnum kmerOccurenceIndex;
49 	short int accelerationShift;
50 	short int accelerationBits;
51 };
52 
compareKmerOccurences(void const * A,void const * B)53 int compareKmerOccurences(void const *A, void const *B)
54 {
55 	KmerOccurence *a = (KmerOccurence *) A;
56 	KmerOccurence *b = (KmerOccurence *) B;
57 	return compareKmers(&(a->kmer), &(b->kmer));
58 }
59 
keyInAccelerationTable(Kmer * kmer,KmerOccurenceTable * table)60 static inline KmerKey keyInAccelerationTable(Kmer * kmer,
61 					  KmerOccurenceTable * table)
62 {
63 	return getKmerKey(kmer);
64 }
65 
findKmerInKmerOccurenceTable(Kmer * kmer,KmerOccurenceTable * table)66 KmerOccurence *findKmerInKmerOccurenceTable(Kmer * kmer,
67 						     KmerOccurenceTable *
68 						     table)
69 {
70 	KmerOccurence *array = table->kmerTable;
71 	KmerKey key = keyInAccelerationTable(kmer, table);
72 	Coordinate leftIndex, rightIndex, middleIndex;
73 	int diff;
74 
75 	if (table->accelerationTable != NULL) {
76 		leftIndex = table->accelerationTable[key];
77 		rightIndex = table->accelerationTable[key + 1];
78 	} else {
79 		leftIndex = 0;
80 		rightIndex = table->kmerTableSize;
81 	}
82 
83 	while (true) {
84 		middleIndex = (rightIndex + leftIndex) / 2;
85 
86 		if (leftIndex >= rightIndex)
87 			return NULL;
88 
89 		diff = compareKmers(&(array[middleIndex].kmer), kmer);
90 
91 		if (diff == 0) {
92 			middleIndex -= array[middleIndex].offset;
93 			return &(array[middleIndex]);
94 		} else if (leftIndex == middleIndex)
95 			return NULL;
96 		else if (diff > 0)
97 			rightIndex = middleIndex;
98 		else
99 			leftIndex = middleIndex;
100 	}
101 }
102 
newKmerOccurenceTable(short int accelerationBits,int wordLength)103 KmerOccurenceTable * newKmerOccurenceTable(short int accelerationBits, int wordLength) {
104 	KmerOccurenceTable * kmerTable = mallocOrExit(1, KmerOccurenceTable);
105 
106 	if (accelerationBits > 2 * wordLength)
107 		accelerationBits = 2 * wordLength;
108 
109 	if (accelerationBits > 32)
110 		accelerationBits = 32;
111 
112 	if (accelerationBits > 0) {
113 		resetKeyFilter(accelerationBits);
114 		kmerTable->accelerationBits = accelerationBits;
115 		kmerTable->accelerationTable =
116 		    callocOrExit((((size_t) 1) << accelerationBits) + 1,
117 			   IDnum);
118 		kmerTable->accelerationShift =
119 		    (short int) 2 *wordLength - accelerationBits;
120 	} else {
121 		kmerTable->accelerationBits = 0;
122 		kmerTable->accelerationTable = NULL;
123 		kmerTable->accelerationShift = 0;
124 	}
125 
126 	return kmerTable;
127 }
128 
allocateKmerOccurences(IDnum kmerCount,KmerOccurenceTable * table)129 void allocateKmerOccurences(IDnum kmerCount, KmerOccurenceTable * table) {
130 	KmerOccurence * kmerOccurences = callocOrExit(kmerCount + 1, KmerOccurence);
131 	kmerOccurences[kmerCount].position = -1;
132 	kmerOccurences[kmerCount].nodeID = 0;
133 
134 	table->kmerTable = kmerOccurences;
135 	table->kmerTableSize = kmerCount;
136 	table->kmerOccurencePtr = kmerOccurences;
137 	table->kmerOccurenceIndex = 0;
138 }
139 
recordKmerOccurence(Kmer * kmer,IDnum nodeID,Coordinate position,KmerOccurenceTable * table)140 void recordKmerOccurence(Kmer * kmer, IDnum nodeID, Coordinate position, KmerOccurenceTable * table) {
141 	KmerOccurence * kmerOccurence;
142 
143 #ifdef _OPENMP
144 	#pragma omp critical
145 #endif
146 	{
147 		kmerOccurence = table->kmerOccurencePtr++;
148 		table->kmerOccurenceIndex++;
149 	}
150 
151 	copyKmers(&(kmerOccurence->kmer), kmer);
152 	kmerOccurence->nodeID = nodeID;
153 	kmerOccurence->position = position;
154 
155 }
156 
sortKmerOccurenceTable(KmerOccurenceTable * table)157 void sortKmerOccurenceTable(KmerOccurenceTable * table) {
158 	KmerKey lastHeader = 0;
159 	KmerKey header;
160 	IDnum *accelPtr = NULL;
161 	IDnum kmerOccurenceIndex;
162 	KmerOccurence * kmerOccurence, * previous;
163 
164 	velvetLog("Sorting kmer occurence table ... \n");
165 
166 	qsort(table->kmerTable, table->kmerTableSize, sizeof(KmerOccurence),
167 	      compareKmerOccurences);
168 
169 	velvetLog("Sorting done.\n");
170 
171 	velvetLog("Computing acceleration table... \n");
172 
173 	// Fill up acceleration table
174 	if (table->accelerationTable != NULL) {
175 		accelPtr = table->accelerationTable;
176 		*accelPtr = (IDnum) 0;
177 		for (kmerOccurenceIndex = 0;
178 		     kmerOccurenceIndex < table->kmerTableSize;
179 		     kmerOccurenceIndex++) {
180 			header =
181 			    keyInAccelerationTable(&table->kmerTable
182 						   [kmerOccurenceIndex].
183 						   kmer, table);
184 			while (lastHeader < header) {
185 				lastHeader++;
186 				accelPtr++;
187 				*accelPtr = kmerOccurenceIndex;
188 			}
189 		}
190 
191 		while (lastHeader < (KmerKey) 1 << table->accelerationBits) {
192 			lastHeader++;
193 			accelPtr++;
194 			*accelPtr = table->kmerTableSize;
195 		}
196 	}
197 
198 	velvetLog("Computing offsets... \n");
199 
200 	// Compute offsets
201 	kmerOccurence = table->kmerTable;
202 	previous = NULL;
203 	for (kmerOccurenceIndex = 1;
204 	     kmerOccurenceIndex < table->kmerTableSize;
205 	     kmerOccurenceIndex++) {
206 		if (previous && compareKmerOccurences(kmerOccurence, previous) == 0)
207 			kmerOccurence->offset = previous->offset + 1;
208 		previous = kmerOccurence;
209 		kmerOccurence++;
210 	}
211 }
212 
getNextKmerOccurence(KmerOccurence * current)213 KmerOccurence * getNextKmerOccurence(KmerOccurence * current) {
214 	register KmerOccurence * next = current + 1;
215 	if (next->nodeID == 0 || next->offset == 0)
216 		return NULL;
217 	else
218 		return next;
219 }
220 
destroyKmerOccurenceTable(KmerOccurenceTable * kmerTable)221 void destroyKmerOccurenceTable(KmerOccurenceTable * kmerTable) {
222 	if (kmerTable == NULL)
223 		return;
224 
225 	free(kmerTable->kmerTable);
226 	free(kmerTable->accelerationTable);
227 	free(kmerTable);
228 }
229 
getKmerOccurenceNodeID(KmerOccurence * occurence)230 IDnum getKmerOccurenceNodeID(KmerOccurence * occurence) {
231 	return occurence->nodeID;
232 }
233 
getKmerOccurencePosition(KmerOccurence * occurence)234 Coordinate getKmerOccurencePosition(KmerOccurence * occurence) {
235 	return occurence->position;
236 }
237