1 /*
2 Copyright 2010 Daniel Zerbino (zerbino@ebi.ac.uk)
3
4 This file is part of Velvet.
5
6 Velvet is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 Velvet is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Velvet; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
20 */
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <limits.h>
25
26 #include "globals.h"
27 #include "graph.h"
28 #include "passageMarker.h"
29 #include "readSet.h"
30 #include "tightString.h"
31 #include "recycleBin.h"
32 #include "utility.h"
33 #include "kmer.h"
34
35 // Internal structure used to mark the ends of an Annotation
36 struct kmerOccurence_st {
37 IDnum position;
38 IDnum nodeID;
39 IDnum offset;
40 Kmer kmer;
41 } ATTRIBUTE_PACKED;
42
43 struct kmerOccurenceTable_st {
44 KmerOccurence *kmerTable;
45 KmerOccurence * kmerOccurencePtr;
46 IDnum *accelerationTable;
47 IDnum kmerTableSize;
48 IDnum kmerOccurenceIndex;
49 short int accelerationShift;
50 short int accelerationBits;
51 };
52
compareKmerOccurences(void const * A,void const * B)53 int compareKmerOccurences(void const *A, void const *B)
54 {
55 KmerOccurence *a = (KmerOccurence *) A;
56 KmerOccurence *b = (KmerOccurence *) B;
57 return compareKmers(&(a->kmer), &(b->kmer));
58 }
59
keyInAccelerationTable(Kmer * kmer,KmerOccurenceTable * table)60 static inline KmerKey keyInAccelerationTable(Kmer * kmer,
61 KmerOccurenceTable * table)
62 {
63 return getKmerKey(kmer);
64 }
65
findKmerInKmerOccurenceTable(Kmer * kmer,KmerOccurenceTable * table)66 KmerOccurence *findKmerInKmerOccurenceTable(Kmer * kmer,
67 KmerOccurenceTable *
68 table)
69 {
70 KmerOccurence *array = table->kmerTable;
71 KmerKey key = keyInAccelerationTable(kmer, table);
72 Coordinate leftIndex, rightIndex, middleIndex;
73 int diff;
74
75 if (table->accelerationTable != NULL) {
76 leftIndex = table->accelerationTable[key];
77 rightIndex = table->accelerationTable[key + 1];
78 } else {
79 leftIndex = 0;
80 rightIndex = table->kmerTableSize;
81 }
82
83 while (true) {
84 middleIndex = (rightIndex + leftIndex) / 2;
85
86 if (leftIndex >= rightIndex)
87 return NULL;
88
89 diff = compareKmers(&(array[middleIndex].kmer), kmer);
90
91 if (diff == 0) {
92 middleIndex -= array[middleIndex].offset;
93 return &(array[middleIndex]);
94 } else if (leftIndex == middleIndex)
95 return NULL;
96 else if (diff > 0)
97 rightIndex = middleIndex;
98 else
99 leftIndex = middleIndex;
100 }
101 }
102
newKmerOccurenceTable(short int accelerationBits,int wordLength)103 KmerOccurenceTable * newKmerOccurenceTable(short int accelerationBits, int wordLength) {
104 KmerOccurenceTable * kmerTable = mallocOrExit(1, KmerOccurenceTable);
105
106 if (accelerationBits > 2 * wordLength)
107 accelerationBits = 2 * wordLength;
108
109 if (accelerationBits > 32)
110 accelerationBits = 32;
111
112 if (accelerationBits > 0) {
113 resetKeyFilter(accelerationBits);
114 kmerTable->accelerationBits = accelerationBits;
115 kmerTable->accelerationTable =
116 callocOrExit((((size_t) 1) << accelerationBits) + 1,
117 IDnum);
118 kmerTable->accelerationShift =
119 (short int) 2 *wordLength - accelerationBits;
120 } else {
121 kmerTable->accelerationBits = 0;
122 kmerTable->accelerationTable = NULL;
123 kmerTable->accelerationShift = 0;
124 }
125
126 return kmerTable;
127 }
128
allocateKmerOccurences(IDnum kmerCount,KmerOccurenceTable * table)129 void allocateKmerOccurences(IDnum kmerCount, KmerOccurenceTable * table) {
130 KmerOccurence * kmerOccurences = callocOrExit(kmerCount + 1, KmerOccurence);
131 kmerOccurences[kmerCount].position = -1;
132 kmerOccurences[kmerCount].nodeID = 0;
133
134 table->kmerTable = kmerOccurences;
135 table->kmerTableSize = kmerCount;
136 table->kmerOccurencePtr = kmerOccurences;
137 table->kmerOccurenceIndex = 0;
138 }
139
recordKmerOccurence(Kmer * kmer,IDnum nodeID,Coordinate position,KmerOccurenceTable * table)140 void recordKmerOccurence(Kmer * kmer, IDnum nodeID, Coordinate position, KmerOccurenceTable * table) {
141 KmerOccurence * kmerOccurence;
142
143 #ifdef _OPENMP
144 #pragma omp critical
145 #endif
146 {
147 kmerOccurence = table->kmerOccurencePtr++;
148 table->kmerOccurenceIndex++;
149 }
150
151 copyKmers(&(kmerOccurence->kmer), kmer);
152 kmerOccurence->nodeID = nodeID;
153 kmerOccurence->position = position;
154
155 }
156
sortKmerOccurenceTable(KmerOccurenceTable * table)157 void sortKmerOccurenceTable(KmerOccurenceTable * table) {
158 KmerKey lastHeader = 0;
159 KmerKey header;
160 IDnum *accelPtr = NULL;
161 IDnum kmerOccurenceIndex;
162 KmerOccurence * kmerOccurence, * previous;
163
164 velvetLog("Sorting kmer occurence table ... \n");
165
166 qsort(table->kmerTable, table->kmerTableSize, sizeof(KmerOccurence),
167 compareKmerOccurences);
168
169 velvetLog("Sorting done.\n");
170
171 velvetLog("Computing acceleration table... \n");
172
173 // Fill up acceleration table
174 if (table->accelerationTable != NULL) {
175 accelPtr = table->accelerationTable;
176 *accelPtr = (IDnum) 0;
177 for (kmerOccurenceIndex = 0;
178 kmerOccurenceIndex < table->kmerTableSize;
179 kmerOccurenceIndex++) {
180 header =
181 keyInAccelerationTable(&table->kmerTable
182 [kmerOccurenceIndex].
183 kmer, table);
184 while (lastHeader < header) {
185 lastHeader++;
186 accelPtr++;
187 *accelPtr = kmerOccurenceIndex;
188 }
189 }
190
191 while (lastHeader < (KmerKey) 1 << table->accelerationBits) {
192 lastHeader++;
193 accelPtr++;
194 *accelPtr = table->kmerTableSize;
195 }
196 }
197
198 velvetLog("Computing offsets... \n");
199
200 // Compute offsets
201 kmerOccurence = table->kmerTable;
202 previous = NULL;
203 for (kmerOccurenceIndex = 1;
204 kmerOccurenceIndex < table->kmerTableSize;
205 kmerOccurenceIndex++) {
206 if (previous && compareKmerOccurences(kmerOccurence, previous) == 0)
207 kmerOccurence->offset = previous->offset + 1;
208 previous = kmerOccurence;
209 kmerOccurence++;
210 }
211 }
212
getNextKmerOccurence(KmerOccurence * current)213 KmerOccurence * getNextKmerOccurence(KmerOccurence * current) {
214 register KmerOccurence * next = current + 1;
215 if (next->nodeID == 0 || next->offset == 0)
216 return NULL;
217 else
218 return next;
219 }
220
destroyKmerOccurenceTable(KmerOccurenceTable * kmerTable)221 void destroyKmerOccurenceTable(KmerOccurenceTable * kmerTable) {
222 if (kmerTable == NULL)
223 return;
224
225 free(kmerTable->kmerTable);
226 free(kmerTable->accelerationTable);
227 free(kmerTable);
228 }
229
getKmerOccurenceNodeID(KmerOccurence * occurence)230 IDnum getKmerOccurenceNodeID(KmerOccurence * occurence) {
231 return occurence->nodeID;
232 }
233
getKmerOccurencePosition(KmerOccurence * occurence)234 Coordinate getKmerOccurencePosition(KmerOccurence * occurence) {
235 return occurence->position;
236 }
237