1 /*
2 Copyright 2007, 2008, 2009 Daniel Zerbino (zerbino@ebi.ac.uk)
3
4 This file is part of Velvet.
5
6 Velvet is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 Velvet is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Velvet; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23
24 #include "globals.h"
25 #include "kmer.h"
26 #include "utility.h"
27
28 static const uint64_t longLongLeftFilter = (uint64_t) 3 << 62;
29 static const uint32_t longLeftFilter = (uint32_t) 3 << 30;
30 static const uint16_t intLeftFilter = (uint16_t) 3 << 14;
31 static const uint8_t charLeftFilter = (uint8_t) 3 << 6;
32
33 static uint64_t longLongWordFilter = (uint64_t) ((int64_t) -1);
34 static uint32_t longWordFilter = (uint32_t) ((int32_t) -1);
35 static uint16_t intWordFilter = (uint16_t) ((int16_t) -1);
36 static uint8_t charWordFilter = (uint8_t) ((int8_t) -1);
37
38 #define UNDEFINED 0
39 #define CHARS 1
40 #define INTS 2
41 #define LONGS 3
42 #define LONGLONGS 4
43 static int kmerFilterIndex = UNDEFINED;
44 static int kmerFilterOffset = 0;
45 static int longLongKmerFilterIndex = KMER_LONGLONGS;
46 static uint64_t longLongKmerFilter = (uint64_t) ((int64_t) -1);
47
resetWordFilter(int wordLength)48 void resetWordFilter(int wordLength) {
49 int kmer_bit_size = wordLength * 2;
50 int i;
51
52 if (wordLength > MAXKMERLENGTH)
53 exitErrorf(EXIT_FAILURE, true, "Word length %i greater than max allowed value (%i).\nRecompile Velvet to deal with this word length.", wordLength, MAXKMERLENGTH);
54
55 #if KMER_LONGLONGS
56 for (i = 0; i < KMER_LONGLONGS; i++) {
57 if (kmer_bit_size > 64) {
58 kmer_bit_size -= 64;
59 continue;
60 } else if (kmer_bit_size == 64) {
61 longLongKmerFilterIndex = i;
62 longLongKmerFilter = longLongWordFilter;
63 kmerFilterIndex = LONGLONGS;
64 kmerFilterOffset = kmer_bit_size - 2;
65 longWordFilter = 0;
66 intWordFilter = 0;
67 charWordFilter = 0;
68 return;
69 } else {
70 longLongKmerFilterIndex = i;
71 longLongKmerFilter = (((uint64_t) 1) << kmer_bit_size) - 1;
72 kmerFilterIndex = LONGLONGS;
73 kmerFilterOffset = kmer_bit_size - 2;
74 longWordFilter = 0;
75 intWordFilter = 0;
76 charWordFilter = 0;
77 return;
78 }
79 }
80 #endif
81 #if KMER_LONGS
82 if (kmer_bit_size > 32)
83 kmer_bit_size -= 32;
84 else if (kmer_bit_size == 32) {
85 kmerFilterIndex = LONGS;
86 kmerFilterOffset = kmer_bit_size - 2;
87 intWordFilter = 0;
88 charWordFilter = 0;
89 return;
90 } else {
91 longWordFilter = (((uint32_t) 1) << kmer_bit_size) - 1;
92 kmerFilterIndex = LONGS;
93 kmerFilterOffset = kmer_bit_size - 2;
94 intWordFilter = 0;
95 charWordFilter = 0;
96 return;
97 }
98 #endif
99 #if KMER_INTS
100 if (kmer_bit_size > 16)
101 kmer_bit_size -= 16;
102 else if (kmer_bit_size == 16) {
103 kmerFilterIndex = INTS;
104 kmerFilterOffset = kmer_bit_size - 2;
105 charWordFilter = 0;
106 return;
107 } else {
108 intWordFilter = (((uint16_t) 1) << kmer_bit_size) - 1;
109 kmerFilterIndex = INTS;
110 kmerFilterOffset = kmer_bit_size - 2;
111 charWordFilter = 0;
112 return;
113 }
114
115 #endif
116 #if KMER_CHARS
117 if (kmer_bit_size < 8)
118 charWordFilter = (((uint8_t) 1) << kmer_bit_size) - 1;
119
120 kmerFilterIndex = CHARS;
121 kmerFilterOffset = kmer_bit_size - 2;
122 #endif
123
124 }
125
shiftRight(Kmer * kmer)126 static void shiftRight(Kmer * kmer) {
127 int i;
128 uint64_t leftBits = 0;
129 uint64_t rightBits;
130
131 #if KMER_CHARS
132
133 #if KMER_INTS | KMER_LONGS | KMER_LONGLONGS
134 rightBits = kmer->chars & 3;
135 #endif
136
137 kmer->chars >>= 2;
138 kmer->chars += (uint8_t) leftBits;
139
140 #if KMER_INTS | KMER_LONGS | KMER_LONGLONGS
141 leftBits = rightBits;
142 #endif
143 #endif
144
145 #if KMER_INTS
146
147 #if KMER_LONGS | KMER_LONGLONGS
148 rightBits = kmer->ints & 3;
149 #endif
150
151 leftBits <<= 14;
152 kmer->ints >>= 2;
153 kmer->ints += (uint16_t) leftBits;
154
155 #if KMER_LONGS | KMER_LONGLONGS
156 leftBits = rightBits;
157 #endif
158 #endif
159
160 #if KMER_LONGS
161
162 #if KMER_LONGLONGS
163 rightBits = kmer->longs & 3;
164 #endif
165
166 leftBits <<= 30;
167 kmer->longs >>= 2;
168 kmer->longs += (uint32_t) leftBits;
169
170 #if KMER_LONGLONGS
171 leftBits = rightBits;
172 #endif
173 #endif
174
175 #if KMER_LONGLONGS
176 for (i = KMER_LONGLONGS - 1; i >= 0; i--) {
177 rightBits = kmer->longlongs[i] & 3;
178 leftBits <<= 62;
179 kmer->longlongs[i] >>= 2;
180 kmer->longlongs[i] += leftBits;
181 leftBits = rightBits;
182 }
183 #endif
184 }
185
copyKmers(Kmer * k1,Kmer * k2)186 void copyKmers(Kmer* k1, Kmer* k2) {
187 int i;
188
189 #if KMER_LONGLONGS
190 for (i = 0; i < KMER_LONGLONGS; i++)
191 k1->longlongs[i] = k2->longlongs[i];
192 #endif
193 #if KMER_LONGS
194 k1->longs = k2->longs;
195 #endif
196 #if KMER_INTS
197 k1->ints = k2->ints;
198 #endif
199 #if KMER_CHARS
200 k1->chars = k2->chars;
201 #endif
202 }
203
compareKmers(Kmer * k1,Kmer * k2)204 int compareKmers(Kmer* k1, Kmer* k2) {
205 #if KMER_LONGLONGS
206 int i;
207 #endif
208
209 #if KMER_CHARS
210 if (k1->chars == k2->chars)
211 ;
212 else if (k1->chars > k2->chars)
213 return 1;
214 else
215 return -1;
216 #endif
217 #if KMER_INTS
218 if (k1->ints == k2->ints)
219 ;
220 else if (k1->ints > k2->ints)
221 return 1;
222 else
223 return -1;
224 #endif
225 #if KMER_LONGS
226 if (k1->longs == k2->longs)
227 ;
228 else if (k1->longs > k2->longs)
229 return 1;
230 else
231 return -1;
232 #endif
233 #if KMER_LONGLONGS
234 for (i = KMER_LONGLONGS - 1; i >= 0; i--) {
235 if (k1->longlongs[i] == k2->longlongs[i])
236 continue;
237 else if (k1->longlongs[i] > k2->longlongs[i])
238 return 1;
239 else
240 return -1;
241 }
242 #endif
243
244 return 0;
245 }
246
clearKmer(Kmer * kmer)247 void clearKmer(Kmer * kmer) {
248 int i;
249
250 #if KMER_LONGLONGS
251 for (i = 0; i < KMER_LONGLONGS; i++)
252 kmer->longlongs[i] = 0;
253 #endif
254 #if KMER_LONGS
255 kmer->longs = 0;
256 #endif
257 #if KMER_INTS
258 kmer->ints = 0;
259 #endif
260 #if KMER_CHARS
261 kmer->chars = 0;
262 #endif
263 }
264
printKmer(Kmer * kmer)265 void printKmer(Kmer * kmer) {
266 int i;
267
268 #if KMER_CHARS
269 printf("%hx\t", kmer->chars);
270 #endif
271 #if KMER_INTS
272 printf("%x\t", kmer->ints);
273 #endif
274 #if KMER_LONGS
275 printf("%x\t", kmer->longs);
276 #endif
277 #if KMER_LONGLONGS
278 for (i = KMER_LONGLONGS - 1; i >= 0; i--)
279 printf("%llx\t", (long long) kmer->longlongs[i]);
280 #endif
281 puts("");
282 }
283
testKmers(int argc,char ** argv)284 void testKmers(int argc, char** argv) {
285 Kmer kmer;
286 Kmer *k2;
287 Kmer k4;
288
289 k2 = &k4;
290 int i;
291
292 printf("FORMATS %u %u %u %u\n", KMER_CHARS, KMER_INTS, KMER_LONGS, KMER_LONGLONGS);
293 printf("FILTERS %hx %x %lx %llx\n", (short) charLeftFilter, (int) intLeftFilter, (long) longLeftFilter, (long long) longLongLeftFilter);
294 printf("FILTERS %hx %x %lx %llx\n", (short) charWordFilter, (int) intWordFilter, (long) longWordFilter, (long long) longLongWordFilter);
295 printKmer(&kmer);
296 puts("Clear");
297 clearKmer(&kmer);
298 printKmer(&kmer);
299
300 puts("Fill up");
301 for (i = 0; i < MAXKMERLENGTH; i++) {
302 pushNucleotide(&kmer, ((i + 1) % 4));
303 printKmer(&kmer);
304 }
305
306 puts("Shift right");
307 for (i = 0; i < MAXKMERLENGTH; i++) {
308 popNucleotide(&kmer);
309 printKmer(&kmer);
310 }
311
312 puts("Reverse complement");
313 resetWordFilter(9);
314 clearKmer(&kmer);
315 for (i = 0; i < MAXKMERLENGTH; i++) {
316 reversePushNucleotide(&kmer, ((i + 1) % 4));
317 printKmer(&kmer);
318 }
319
320 puts("Copy");
321 copyKmers(k2, &kmer);
322 printKmer(k2);
323 printf("%i\n", compareKmers(k2, &kmer));
324
325 }
326
pushNucleotide(Kmer * kmer,Nucleotide nucleotide)327 void pushNucleotide(Kmer * kmer, Nucleotide nucleotide) {
328 register int i;
329
330 #if KMER_LONGLONGS
331 register uint64_t * ptr;
332 #endif
333 #if KMER_LONGLONGS > 1 | KMER_LONGS | KMER_INTS | KMER_CHARS
334 uint64_t leftBits;
335 #endif
336 uint64_t rightBits = 0;
337
338 #if KMER_LONGLONGS
339 ptr = kmer->longlongs;
340
341 #if KMER_LONGLONGS > 1
342 for (i = 0; i < longLongKmerFilterIndex; i++) {
343 leftBits = (*ptr & longLongLeftFilter);
344 leftBits >>= 62;
345 *ptr <<= 2;
346 *ptr += rightBits;
347 *ptr &= longLongWordFilter;
348 rightBits = leftBits;
349 ptr++;
350 }
351 #endif
352
353 #if KMER_LONGS | KMER_INTS | KMER_CHARS
354 leftBits = (*ptr & longLongLeftFilter);
355 leftBits >>= 62;
356 #endif
357
358 *ptr <<= 2;
359 *ptr += rightBits;
360 *ptr &= longLongKmerFilter;
361
362 #if KMER_LONGS | KMER_INTS | KMER_CHARS
363 rightBits = leftBits;
364 #endif
365 #endif
366
367 #if KMER_LONGS
368
369 #if KMER_INTS | KMER_CHARS
370 leftBits = kmer->longs & longLeftFilter;
371 leftBits >>= 30;
372 #endif
373 kmer->longs <<= 2;
374 kmer->longs += rightBits;
375 kmer->longs &= longWordFilter;
376
377 #if KMER_INTS | KMER_CHARS
378 rightBits = leftBits;
379 #endif
380
381 #endif
382
383 #if KMER_INTS
384
385 #if KMER_CHARS
386 leftBits = kmer->ints & intLeftFilter;
387 leftBits >>= 14;
388 #endif
389 kmer->ints <<= 2;
390 kmer->ints += rightBits;
391 kmer->ints &= intWordFilter;
392
393 #if KMER_CHARS
394 rightBits = leftBits;
395 #endif
396
397 #endif
398
399 #if KMER_CHARS
400 kmer->chars <<= 2;
401 kmer->chars += rightBits;
402 kmer->chars &= charWordFilter;
403 #endif
404
405 #if KMER_LONGLONGS
406 kmer->longlongs[0] += nucleotide;
407 if (kmer->longlongs[0] >= nucleotide)
408 return;
409
410 for (i = 1; i < KMER_LONGLONGS; i++)
411 if (++kmer->longlongs[i])
412 return;
413 #if KMER_LONGS
414 if (++kmer->longs)
415 return;
416 #endif
417 #if KMER_INTS
418 if (++kmer->ints)
419 return;
420 #endif
421 #if KMER_CHARS
422 ++kmer->chars;
423 #endif
424
425 #else
426
427 #if KMER_LONGS
428 kmer->longs += nucleotide;
429 if (kmer->longs >= nucleotide)
430 return;
431 #if KMER_INTS
432 if (++kmer->ints)
433 return;
434 #endif
435 #if KMER_CHARS
436 ++kmer->chars;
437 #endif
438
439 #else
440
441 #if KMER_INTS
442 kmer->ints += nucleotide;
443 if (kmer->ints >= nucleotide)
444 return;
445 #if KMER_CHARS
446 ++kmer->chars;
447 #endif
448
449 #else
450
451 #if KMER_CHARS
452 kmer->chars += nucleotide;
453 #endif
454
455 #endif
456 #endif
457 #endif
458 }
459
popNucleotide(Kmer * kmer)460 Nucleotide popNucleotide(Kmer * kmer) {
461 Nucleotide nucl;
462
463 #if KMER_LONGLONGS
464 nucl = kmer->longlongs[0] & 3;
465 #elif KMER_LONGS
466 nucl = kmer->longs & 3;
467 #elif KMER_INTS
468 nucl = kmer->ints & 3;
469 #elif KMER_CHARS
470 nucl = kmer->chars & 3;
471 #endif
472
473 shiftRight(kmer);
474 return nucl;
475 }
476
reversePushNucleotide(Kmer * kmer,Nucleotide nucleotide)477 void reversePushNucleotide(Kmer * kmer, Nucleotide nucleotide) {
478 uint64_t templongLong = nucleotide;
479
480 shiftRight(kmer);
481
482 switch(kmerFilterIndex) {
483 case UNDEFINED:
484 abort();
485 #if KMER_LONGLONGS
486 case LONGLONGS:
487 kmer->longlongs[longLongKmerFilterIndex] += templongLong << kmerFilterOffset;
488 return;
489 #endif
490 #if KMER_LONGS
491 case LONGS:
492 kmer->longs += templongLong << kmerFilterOffset;
493 return;
494 #endif
495 #if KMER_INTS
496 case INTS:
497 kmer->ints += templongLong << kmerFilterOffset;
498 return;
499 #endif
500 #if KMER_CHARS
501 case CHARS:
502 kmer->chars += templongLong << kmerFilterOffset;
503 return;
504 #endif
505 }
506
507 exitErrorf(EXIT_FAILURE, true, "Anomaly in k-mer filering");
508 }
509