1 /*
2 Copyright 2007, 2008, 2009 Daniel Zerbino (zerbino@ebi.ac.uk)
3 
4     This file is part of Velvet.
5 
6     Velvet is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     Velvet is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with Velvet; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 
24 #include "globals.h"
25 #include "kmer.h"
26 #include "utility.h"
27 
28 static const uint64_t longLongLeftFilter = (uint64_t) 3 << 62;
29 static const uint32_t longLeftFilter = (uint32_t) 3 << 30;
30 static const uint16_t intLeftFilter = (uint16_t) 3 << 14;
31 static const uint8_t charLeftFilter = (uint8_t) 3 << 6;
32 
33 static uint64_t longLongWordFilter = (uint64_t) ((int64_t) -1);
34 static uint32_t longWordFilter = (uint32_t) ((int32_t) -1);
35 static uint16_t intWordFilter = (uint16_t) ((int16_t) -1);
36 static uint8_t charWordFilter = (uint8_t) ((int8_t) -1);
37 
38 #define UNDEFINED 0
39 #define CHARS 1
40 #define INTS 2
41 #define LONGS 3
42 #define LONGLONGS 4
43 static int kmerFilterIndex = UNDEFINED;
44 static int kmerFilterOffset = 0;
45 static int longLongKmerFilterIndex = KMER_LONGLONGS;
46 static uint64_t longLongKmerFilter = (uint64_t) ((int64_t) -1);
47 
resetWordFilter(int wordLength)48 void resetWordFilter(int wordLength) {
49 	int kmer_bit_size = wordLength * 2;
50 	int i;
51 
52 	if (wordLength > MAXKMERLENGTH)
53 		exitErrorf(EXIT_FAILURE, true, "Word length %i greater than max allowed value (%i).\nRecompile Velvet to deal with this word length.", wordLength, MAXKMERLENGTH);
54 
55 #if KMER_LONGLONGS
56 	for (i = 0; i < KMER_LONGLONGS; i++) {
57 		if (kmer_bit_size > 64) {
58 			kmer_bit_size -= 64;
59 			continue;
60 		} else if (kmer_bit_size == 64) {
61 			longLongKmerFilterIndex = i;
62 			longLongKmerFilter = longLongWordFilter;
63 			kmerFilterIndex = LONGLONGS;
64 			kmerFilterOffset = kmer_bit_size - 2;
65 			longWordFilter = 0;
66 			intWordFilter = 0;
67 			charWordFilter = 0;
68 			return;
69 		} else {
70 			longLongKmerFilterIndex = i;
71 			longLongKmerFilter = (((uint64_t) 1) << kmer_bit_size) - 1;
72 			kmerFilterIndex = LONGLONGS;
73 			kmerFilterOffset = kmer_bit_size - 2;
74 			longWordFilter = 0;
75 			intWordFilter = 0;
76 			charWordFilter = 0;
77 			return;
78 		}
79 	}
80 #endif
81 #if KMER_LONGS
82 	if (kmer_bit_size > 32)
83 		kmer_bit_size -= 32;
84 	else if (kmer_bit_size == 32) {
85 		kmerFilterIndex = LONGS;
86 		kmerFilterOffset = kmer_bit_size - 2;
87 		intWordFilter = 0;
88 		charWordFilter = 0;
89 		return;
90 	} else {
91 		longWordFilter = (((uint32_t) 1) << kmer_bit_size) - 1;
92 		kmerFilterIndex = LONGS;
93 		kmerFilterOffset = kmer_bit_size - 2;
94 		intWordFilter = 0;
95 		charWordFilter = 0;
96 		return;
97 	}
98 #endif
99 #if KMER_INTS
100 	if (kmer_bit_size > 16)
101 		kmer_bit_size -= 16;
102 	else if (kmer_bit_size == 16) {
103 		kmerFilterIndex = INTS;
104 		kmerFilterOffset = kmer_bit_size - 2;
105 		charWordFilter = 0;
106 		return;
107 	} else {
108 		intWordFilter = (((uint16_t) 1) << kmer_bit_size) - 1;
109 		kmerFilterIndex = INTS;
110 		kmerFilterOffset = kmer_bit_size - 2;
111 		charWordFilter = 0;
112 		return;
113 	}
114 
115 #endif
116 #if KMER_CHARS
117 	if (kmer_bit_size < 8)
118 		charWordFilter = (((uint8_t) 1) << kmer_bit_size) - 1;
119 
120 	kmerFilterIndex = CHARS;
121 	kmerFilterOffset = kmer_bit_size - 2;
122 #endif
123 
124 }
125 
shiftRight(Kmer * kmer)126 static void shiftRight(Kmer * kmer) {
127 	int i;
128 	uint64_t leftBits = 0;
129 	uint64_t rightBits;
130 
131 #if KMER_CHARS
132 
133 #if KMER_INTS | KMER_LONGS | KMER_LONGLONGS
134 	rightBits = kmer->chars & 3;
135 #endif
136 
137 	kmer->chars >>= 2;
138 	kmer->chars += (uint8_t) leftBits;
139 
140 #if KMER_INTS | KMER_LONGS | KMER_LONGLONGS
141 	leftBits = rightBits;
142 #endif
143 #endif
144 
145 #if KMER_INTS
146 
147 #if KMER_LONGS | KMER_LONGLONGS
148 	rightBits = kmer->ints & 3;
149 #endif
150 
151 	leftBits <<= 14;
152 	kmer->ints >>= 2;
153 	kmer->ints += (uint16_t) leftBits;
154 
155 #if KMER_LONGS | KMER_LONGLONGS
156 	leftBits = rightBits;
157 #endif
158 #endif
159 
160 #if KMER_LONGS
161 
162 #if KMER_LONGLONGS
163 	rightBits = kmer->longs & 3;
164 #endif
165 
166 	leftBits <<= 30;
167 	kmer->longs >>= 2;
168 	kmer->longs += (uint32_t) leftBits;
169 
170 #if KMER_LONGLONGS
171 	leftBits = rightBits;
172 #endif
173 #endif
174 
175 #if KMER_LONGLONGS
176 	for (i = KMER_LONGLONGS - 1; i >= 0; i--) {
177 		rightBits = kmer->longlongs[i] & 3;
178 		leftBits <<= 62;
179 		kmer->longlongs[i] >>= 2;
180 		kmer->longlongs[i] += leftBits;
181 		leftBits = rightBits;
182 	}
183 #endif
184 }
185 
copyKmers(Kmer * k1,Kmer * k2)186 void copyKmers(Kmer* k1, Kmer* k2) {
187 	int i;
188 
189 #if KMER_LONGLONGS
190 	for (i = 0; i < KMER_LONGLONGS; i++)
191 		k1->longlongs[i] = k2->longlongs[i];
192 #endif
193 #if KMER_LONGS
194 	k1->longs = k2->longs;
195 #endif
196 #if KMER_INTS
197 	k1->ints = k2->ints;
198 #endif
199 #if KMER_CHARS
200 	k1->chars = k2->chars;
201 #endif
202 }
203 
compareKmers(Kmer * k1,Kmer * k2)204 int compareKmers(Kmer* k1, Kmer* k2) {
205 #if KMER_LONGLONGS
206 	int i;
207 #endif
208 
209 #if KMER_CHARS
210 	if (k1->chars == k2->chars)
211 		;
212 	else if (k1->chars > k2->chars)
213 		return 1;
214 	else
215 		return -1;
216 #endif
217 #if KMER_INTS
218 	if (k1->ints == k2->ints)
219 		;
220 	else if (k1->ints > k2->ints)
221 		return 1;
222 	else
223 		return -1;
224 #endif
225 #if KMER_LONGS
226 	if (k1->longs == k2->longs)
227 		;
228 	else if (k1->longs > k2->longs)
229 		return 1;
230 	else
231 		return -1;
232 #endif
233 #if KMER_LONGLONGS
234 	for (i = KMER_LONGLONGS - 1; i >= 0; i--) {
235 		if (k1->longlongs[i] == k2->longlongs[i])
236 			continue;
237 		else if (k1->longlongs[i] > k2->longlongs[i])
238 			return 1;
239 		else
240 			return -1;
241 	}
242 #endif
243 
244 	return 0;
245 }
246 
clearKmer(Kmer * kmer)247 void clearKmer(Kmer * kmer) {
248 	int i;
249 
250 #if KMER_LONGLONGS
251 	for (i = 0; i < KMER_LONGLONGS; i++)
252 		kmer->longlongs[i] = 0;
253 #endif
254 #if KMER_LONGS
255 	kmer->longs = 0;
256 #endif
257 #if KMER_INTS
258 	kmer->ints = 0;
259 #endif
260 #if KMER_CHARS
261 	kmer->chars = 0;
262 #endif
263 }
264 
printKmer(Kmer * kmer)265 void printKmer(Kmer * kmer) {
266 	int i;
267 
268 #if KMER_CHARS
269 	printf("%hx\t", kmer->chars);
270 #endif
271 #if KMER_INTS
272 	printf("%x\t", kmer->ints);
273 #endif
274 #if KMER_LONGS
275 	printf("%x\t", kmer->longs);
276 #endif
277 #if KMER_LONGLONGS
278 	for (i = KMER_LONGLONGS - 1; i >= 0; i--)
279 		printf("%llx\t", (long long) kmer->longlongs[i]);
280 #endif
281 	puts("");
282 }
283 
testKmers(int argc,char ** argv)284 void testKmers(int argc, char** argv) {
285 	Kmer kmer;
286 	Kmer *k2;
287 	Kmer k4;
288 
289 	k2 = &k4;
290 	int i;
291 
292 	printf("FORMATS %u %u %u %u\n", KMER_CHARS, KMER_INTS, KMER_LONGS, KMER_LONGLONGS);
293 	printf("FILTERS %hx %x %lx %llx\n", (short) charLeftFilter, (int) intLeftFilter, (long) longLeftFilter, (long long) longLongLeftFilter);
294 	printf("FILTERS %hx %x %lx %llx\n", (short) charWordFilter, (int) intWordFilter, (long) longWordFilter, (long long) longLongWordFilter);
295 	printKmer(&kmer);
296 	puts("Clear");
297 	clearKmer(&kmer);
298 	printKmer(&kmer);
299 
300 	puts("Fill up");
301 	for (i = 0; i < MAXKMERLENGTH; i++) {
302 		pushNucleotide(&kmer, ((i + 1)  % 4));
303 		printKmer(&kmer);
304 	}
305 
306 	puts("Shift right");
307 	for (i = 0; i < MAXKMERLENGTH; i++) {
308 		popNucleotide(&kmer);
309 		printKmer(&kmer);
310 	}
311 
312 	puts("Reverse complement");
313 	resetWordFilter(9);
314 	clearKmer(&kmer);
315 	for (i = 0; i < MAXKMERLENGTH; i++) {
316 		reversePushNucleotide(&kmer, ((i + 1)  % 4));
317 		printKmer(&kmer);
318 	}
319 
320 	puts("Copy");
321 	copyKmers(k2, &kmer);
322 	printKmer(k2);
323 	printf("%i\n", compareKmers(k2, &kmer));
324 
325 }
326 
pushNucleotide(Kmer * kmer,Nucleotide nucleotide)327 void pushNucleotide(Kmer * kmer, Nucleotide nucleotide) {
328 	register int i;
329 
330 #if KMER_LONGLONGS
331 	register uint64_t * ptr;
332 #endif
333 #if KMER_LONGLONGS > 1 | KMER_LONGS | KMER_INTS | KMER_CHARS
334 	uint64_t leftBits;
335 #endif
336 	uint64_t rightBits = 0;
337 
338 #if KMER_LONGLONGS
339 	ptr = kmer->longlongs;
340 
341 #if KMER_LONGLONGS > 1
342 	for (i = 0; i < longLongKmerFilterIndex; i++) {
343 		leftBits = (*ptr & longLongLeftFilter);
344 		leftBits >>= 62;
345 		*ptr <<= 2;
346 		*ptr += rightBits;
347 		*ptr &= longLongWordFilter;
348 		rightBits = leftBits;
349 		ptr++;
350 	}
351 #endif
352 
353 #if KMER_LONGS | KMER_INTS | KMER_CHARS
354 	leftBits = (*ptr & longLongLeftFilter);
355 	leftBits >>= 62;
356 #endif
357 
358 	*ptr <<= 2;
359 	*ptr += rightBits;
360 	*ptr &= longLongKmerFilter;
361 
362 #if KMER_LONGS | KMER_INTS | KMER_CHARS
363 	rightBits = leftBits;
364 #endif
365 #endif
366 
367 #if KMER_LONGS
368 
369 #if KMER_INTS | KMER_CHARS
370 	leftBits = kmer->longs & longLeftFilter;
371 	leftBits >>= 30;
372 #endif
373 	kmer->longs <<= 2;
374 	kmer->longs += rightBits;
375 	kmer->longs &= longWordFilter;
376 
377 #if KMER_INTS | KMER_CHARS
378 	rightBits = leftBits;
379 #endif
380 
381 #endif
382 
383 #if KMER_INTS
384 
385 #if KMER_CHARS
386 	leftBits = kmer->ints & intLeftFilter;
387 	leftBits >>= 14;
388 #endif
389 	kmer->ints <<= 2;
390 	kmer->ints += rightBits;
391 	kmer->ints &= intWordFilter;
392 
393 #if KMER_CHARS
394 	rightBits = leftBits;
395 #endif
396 
397 #endif
398 
399 #if KMER_CHARS
400 	kmer->chars <<= 2;
401 	kmer->chars += rightBits;
402 	kmer->chars &= charWordFilter;
403 #endif
404 
405 #if KMER_LONGLONGS
406 	kmer->longlongs[0] += nucleotide;
407 	if (kmer->longlongs[0] >= nucleotide)
408 		return;
409 
410 	for (i = 1; i < KMER_LONGLONGS; i++)
411 		if (++kmer->longlongs[i])
412 			return;
413 #if KMER_LONGS
414 	if (++kmer->longs)
415 		return;
416 #endif
417 #if KMER_INTS
418 	if (++kmer->ints)
419 		return;
420 #endif
421 #if KMER_CHARS
422 	++kmer->chars;
423 #endif
424 
425 #else
426 
427 #if KMER_LONGS
428 	kmer->longs += nucleotide;
429 	if (kmer->longs >= nucleotide)
430 		return;
431 #if KMER_INTS
432 	if (++kmer->ints)
433 		return;
434 #endif
435 #if KMER_CHARS
436 	++kmer->chars;
437 #endif
438 
439 #else
440 
441 #if KMER_INTS
442 	kmer->ints += nucleotide;
443 	if (kmer->ints >= nucleotide)
444 		return;
445 #if KMER_CHARS
446 	++kmer->chars;
447 #endif
448 
449 #else
450 
451 #if KMER_CHARS
452 	kmer->chars += nucleotide;
453 #endif
454 
455 #endif
456 #endif
457 #endif
458 }
459 
popNucleotide(Kmer * kmer)460 Nucleotide popNucleotide(Kmer * kmer) {
461 	Nucleotide nucl;
462 
463 #if KMER_LONGLONGS
464 	nucl = kmer->longlongs[0] & 3;
465 #elif KMER_LONGS
466 	nucl = kmer->longs & 3;
467 #elif KMER_INTS
468 	nucl = kmer->ints & 3;
469 #elif KMER_CHARS
470 	nucl = kmer->chars & 3;
471 #endif
472 
473 	shiftRight(kmer);
474 	return nucl;
475 }
476 
reversePushNucleotide(Kmer * kmer,Nucleotide nucleotide)477 void reversePushNucleotide(Kmer * kmer, Nucleotide nucleotide) {
478 	uint64_t templongLong = nucleotide;
479 
480 	shiftRight(kmer);
481 
482 	switch(kmerFilterIndex) {
483 		case UNDEFINED:
484 			abort();
485 #if KMER_LONGLONGS
486 		case LONGLONGS:
487 			kmer->longlongs[longLongKmerFilterIndex] += templongLong << kmerFilterOffset;
488 			return;
489 #endif
490 #if KMER_LONGS
491 		case LONGS:
492 			kmer->longs += templongLong << kmerFilterOffset;
493 			return;
494 #endif
495 #if KMER_INTS
496 		case INTS:
497 			kmer->ints += templongLong << kmerFilterOffset;
498 			return;
499 #endif
500 #if KMER_CHARS
501 		case CHARS:
502 			kmer->chars += templongLong << kmerFilterOffset;
503 			return;
504 #endif
505 	}
506 
507 	exitErrorf(EXIT_FAILURE, true, "Anomaly in k-mer filering");
508 }
509