1 /*
2 Copyright (c) 2003 by Stefan Kurtz and The Institute for
3 Genomic Research. This is OSI Certified Open Source Software.
4 Please see the file LICENSE for licensing information and
5 the file ACKNOWLEDGEMENTS for names of contributors to the
6 code base.
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <limits.h>
12 #include "streedef.h"
13 #include "streeacc.h"
14 #include "debugdef.h"
15 #include "arraydef.h"
16 #include "spacedef.h"
17 #include "protodef.h"
18
19 #ifdef STREELARGE
20
getlargelinkstree(Suffixtree * stree,Bref btptr,Uint depth)21 Uint getlargelinkstree(Suffixtree *stree,Bref btptr,Uint depth)
22 {
23 Uint succ;
24
25 if(depth == 1)
26 {
27 return 0;
28 }
29 if(ISSMALLDEPTH(depth))
30 {
31 return (((*(btptr+2) & LOWERLINKPATT) >> SMALLDEPTHBITS) |
32 ((*(btptr+3) & MIDDLELINKPATT) >> SHIFTMIDDLE) |
33 ((stree->leaftab[GETHEADPOS(btptr)] & EXTRAPATT)
34 >> SHIFTHIGHER)) << 1;
35 }
36 succ = GETCHILD(btptr);
37 while(!NILPTR(succ))
38 {
39 if(ISLEAF(succ))
40 {
41 succ = LEAFBROTHERVAL(stree->leaftab[GETLEAFINDEX(succ)]);
42 } else
43 {
44 succ = GETBROTHER(stree->branchtab + succ);
45 }
46 }
47 return succ & MAXINDEX;
48 }
49 #endif
50
51 #ifdef STREESMALL
getlargelinkstree(Suffixtree * stree,Bref btptr,Uint depth)52 Uint getlargelinkstree(Suffixtree *stree,Bref btptr,Uint depth)
53 {
54 Uint succ, slink = 0, headnodenum;
55
56 if(depth == 1)
57 {
58 return 0;
59 }
60 if(ISSMALLDEPTH(depth))
61 {
62 slink = (*(btptr+1) >> 25) & ((1 << 7) - 1);
63 headnodenum = BRADDR2NUM(stree,btptr);
64 if(headnodenum & (~((1 << 7) - 1)))
65 {
66 slink |= ((*btptr & (255 << 24)) >> 17);
67 if(headnodenum & (~((1 << 15) - 1)))
68 {
69 slink |= ((stree->leaftab[GETHEADPOS(btptr)] & (255 << 24)) >> 9);
70 }
71 }
72 return slink;
73 }
74 succ = GETCHILD(btptr);
75 while(!NILPTR(succ))
76 {
77 if(ISLEAF(succ))
78 {
79 succ = LEAFBROTHERVAL(stree->leaftab[GETLEAFINDEX(succ)]);
80 } else
81 {
82 succ = GETBROTHER(stree->branchtab + GETBRANCHINDEX(succ));
83 }
84 }
85 return succ & MAXINDEX;
86 }
87 #endif
88
89 #ifdef STREEHUGE
getlargelinkstree(Suffixtree * stree,Bref btptr,Uint depth)90 Uint getlargelinkstree(/*@unused@*/ Suffixtree *stree,Bref btptr,Uint depth)
91 {
92 if(depth == UintConst(1))
93 {
94 return 0;
95 }
96 return *(btptr+4);
97 }
98 #endif
99
int2ref(Suffixtree * stree,Reference * ref,Uint i)100 static void int2ref(Suffixtree *stree,Reference *ref,Uint i)
101 {
102 if(ISLEAF(i))
103 {
104 ref->toleaf = True;
105 ref->address = stree->leaftab + GETLEAFINDEX(i);
106 } else
107 {
108 ref->toleaf = False;
109 ref->address = stree->branchtab + GETBRANCHINDEX(i);
110 }
111 }
112
getleafinfostree(Suffixtree * stree,Leafinfo * leafinfo,Lref lptr)113 void getleafinfostree(Suffixtree *stree,Leafinfo *leafinfo,Lref lptr)
114 {
115 Uint node = LEAFBROTHERVAL(*lptr);
116
117 if(NILPTR(node))
118 {
119 leafinfo->address = NULL;
120 } else
121 {
122 int2ref(stree,leafinfo,node);
123 }
124 }
125
getbranchinfostree(Suffixtree * stree,Uint whichinfo,Branchinfo * branchinfo,Bref btptr)126 void getbranchinfostree(Suffixtree *stree,Uint whichinfo,
127 Branchinfo *branchinfo,Bref btptr)
128 {
129 Uint which = whichinfo, node, distance, *largeptr;
130
131 if(which & ACCESSSUFFIXLINK)
132 {
133 which |= ACCESSDEPTH;
134 }
135 if(which & (ACCESSDEPTH | ACCESSHEADPOS))
136 {
137 if(stree->chainstart != NULL && btptr >= stree->chainstart)
138 {
139 distance = DIVBYSMALLINTS((Uint) (stree->nextfreebranch - btptr));
140 branchinfo->depth = stree->currentdepth + distance;
141 branchinfo->headposition = stree->nextfreeleafnum - distance;
142 } else
143 {
144 if(ISLARGE(*btptr))
145 {
146 if(which & ACCESSDEPTH)
147 {
148 branchinfo->depth = GETDEPTH(btptr);
149 }
150 if(which & ACCESSHEADPOS)
151 {
152 branchinfo->headposition = GETHEADPOS(btptr);
153 }
154 } else
155 {
156 distance = GETDISTANCE(btptr);
157 GETCHAINEND(largeptr,btptr,distance);
158 if(which & ACCESSDEPTH)
159 {
160 branchinfo->depth = GETDEPTH(largeptr) + distance;
161 }
162 if(which & ACCESSHEADPOS)
163 {
164 branchinfo->headposition = GETHEADPOS(largeptr) - distance;
165 }
166 }
167 }
168 }
169 if(which & ACCESSSUFFIXLINK)
170 {
171 if((stree->chainstart != NULL && btptr >= stree->chainstart) ||
172 !ISLARGE(*btptr))
173 {
174 branchinfo->suffixlink = btptr + SMALLINTS;
175 } else
176 {
177 branchinfo->suffixlink = stree->branchtab +
178 getlargelinkstree(stree,btptr,
179 branchinfo->depth);
180 }
181 }
182 if(which & ACCESSFIRSTCHILD)
183 {
184 int2ref(stree,&(branchinfo->firstchild),GETCHILD(btptr));
185 }
186 if(which & ACCESSBRANCHBROTHER)
187 {
188 node = GETBROTHER(btptr);
189 if(NILPTR(node))
190 {
191 branchinfo->branchbrother.address = NULL;
192 } else
193 {
194 int2ref(stree,&(branchinfo->branchbrother),node);
195 }
196 }
197 }
198
getheadstringstree(Suffixtree * stree,Stringtype * str)199 void getheadstringstree(Suffixtree *stree,Stringtype *str)
200 {
201 Branchinfo branchinfo;
202 Reference ref;
203
204 if(stree->headend == NULL)
205 {
206 str->length = stree->headnodedepth;
207 if(stree->headnodedepth > 0)
208 {
209 getbranchinfostree(stree,ACCESSHEADPOS,&branchinfo,stree->headnode);
210 str->start = branchinfo.headposition;
211 } else
212 {
213 str->start = 0;
214 }
215 } else
216 {
217 str->length = stree->headnodedepth + (Uint) (stree->headend-stree->headstart+1);
218 int2ref(stree,&ref,stree->insertnode);
219 if(ref.toleaf)
220 {
221 str->start = LEAFADDR2NUM(stree,ref.address);
222 } else
223 {
224 getbranchinfostree(stree,ACCESSHEADPOS,&branchinfo,ref.address);
225 str->start = branchinfo.headposition;
226 }
227 }
228 }
229
getmaxtextlenstree(void)230 Uint getmaxtextlenstree(void)
231 {
232 return MAXTEXTLEN;
233 }
234
showpathstree(Suffixtree * stree,Bref bnode,void (* showchar)(SYMBOL,void *),void * info)235 void showpathstree(Suffixtree *stree,Bref bnode,
236 void (*showchar)(SYMBOL,void *),void *info)
237 {
238 Branchinfo branchinfo;
239 Uint i;
240
241 getbranchinfostree(stree,ACCESSHEADPOS | ACCESSDEPTH,&branchinfo,bnode);
242 for(i = 0; i < branchinfo.depth; i++)
243 {
244 showchar(stree->text[branchinfo.headposition + i],info);
245 }
246 }
247
248 #ifdef DEBUG
showsimplelocstree(Suffixtree * stree,Simpleloc * loc)249 void showsimplelocstree(Suffixtree *stree,Simpleloc *loc)
250 {
251 Uint bnode;
252
253 if(loc->textpos == stree->textlen)
254 {
255 printf("(~,");
256 } else
257 {
258 printf("(%s,",stree->showsymbolstree(stree->text[loc->textpos],stree->alphabet));
259 }
260 if(loc->remain > 0)
261 {
262 printf("(%lu,",(Showuint) loc->remain);
263 }
264 if(loc->nextnode.toleaf)
265 {
266 printf("Leaf %lu",(Showuint) LEAFADDR2NUM(stree,loc->nextnode.address));
267 } else
268 {
269 bnode = BRADDR2NUM(stree,loc->nextnode.address);
270 if(ISLARGE(*(loc->nextnode.address)))
271 {
272 printf("Large %lu",(Showuint) bnode);
273 } else
274 {
275 printf("Small %lu",(Showuint) bnode);
276 }
277 }
278 if(loc->remain > 0)
279 {
280 (void) putchar(')');
281 }
282 (void) putchar(')');
283 }
284
showsimplelocliststree(Suffixtree * stree,ArraySimpleloc * ll)285 void showsimplelocliststree(Suffixtree *stree,ArraySimpleloc *ll)
286 {
287 Uint i;
288
289 for(i=0; i<ll->nextfreeSimpleloc; i++)
290 {
291 showsimplelocstree(stree,ll->spaceSimpleloc+i);
292 if(i < ll->nextfreeSimpleloc-1)
293 {
294 (void) putchar(',');
295 }
296 }
297 (void) putchar('\n');
298 }
299 #endif
300
301 // use the following functions only for the root location.
302
rootsucclocationsstree(Suffixtree * stree,ArraySimpleloc * ll)303 void rootsucclocationsstree(Suffixtree *stree,ArraySimpleloc *ll)
304 {
305 Uint headpos, leafindex, depth, distance, node, ch, *largeptr, *nodeptr;
306 Simpleloc *llptr;
307
308 CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1);
309 for(ch = 0; ch <= UCHAR_MAX; ch++)
310 {
311 if((node = stree->rootchildren[ch]) != UNDEFINEDREFERENCE)
312 {
313 llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
314 if(ISLEAF(node))
315 {
316 leafindex = GETLEAFINDEX(node);
317 llptr->textpos = leafindex;
318 llptr->remain = stree->textlen - leafindex;
319 llptr->nextnode.toleaf = True;
320 llptr->nextnode.address = stree->leaftab + leafindex;
321 } else
322 {
323 nodeptr = stree->branchtab + GETBRANCHINDEX(node);
324 GETBOTH(depth,headpos,nodeptr);
325 llptr->textpos = headpos;
326 llptr->remain = depth - 1;
327 llptr->nextnode.toleaf = False;
328 llptr->nextnode.address = nodeptr;
329 }
330 CHECKADDR(stree,llptr->nextnode);
331 }
332 }
333 }
334
335 // use the following functions only for non root location.
336
succlocationsstree(Suffixtree * stree,BOOL nosentinel,Simpleloc * loc,ArraySimpleloc * ll)337 void succlocationsstree(Suffixtree *stree,BOOL nosentinel,Simpleloc *loc,
338 ArraySimpleloc *ll)
339 {
340 Uint succdepth, succ, leafindex, distance, depth, headpos,
341 remain, *succptr, *largeptr, *nodeptr;
342 Simpleloc *llptr;
343
344 DEBUG0(3,"succlocationsstree\n");
345 ll->nextfreeSimpleloc = 0;
346 CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1);
347 if(loc->remain > 0)
348 {
349 if(nosentinel && loc->nextnode.toleaf && loc->remain <= UintConst(1))
350 { // at the end of leaf edge: only a\$ remains
351 return;
352 }
353 llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
354 llptr->textpos = loc->textpos + 1;
355 llptr->remain = loc->remain - 1;
356 llptr->nextnode.address = loc->nextnode.address;
357 llptr->nextnode.toleaf = loc->nextnode.toleaf;
358 CHECKADDR(stree,llptr->nextnode);
359 return;
360 }
361 nodeptr = loc->nextnode.address;
362 GETONLYDEPTH(depth,nodeptr);
363 succ = GETCHILD(nodeptr);
364 do // traverse the list of successors
365 {
366 if(ISLEAF(succ)) // successor is leaf
367 {
368 leafindex = GETLEAFINDEX(succ);
369 remain = stree->textlen - (depth + leafindex);
370 if(!nosentinel || remain >= UintConst(1))
371 {
372 llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
373 llptr->remain = remain;
374 llptr->textpos = depth + leafindex;
375 llptr->nextnode.address = stree->leaftab + leafindex;
376 llptr->nextnode.toleaf = True;
377 CHECKADDR(stree,llptr->nextnode);
378 }
379 succ = LEAFBROTHERVAL(stree->leaftab[leafindex]);
380 } else // successor is branch node
381 {
382 succptr = stree->branchtab + GETBRANCHINDEX(succ);
383 GETBOTH(succdepth,headpos,succptr); // get info for branch node
384 llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
385 llptr->textpos = depth + headpos;
386 llptr->remain = succdepth - depth - 1;
387 llptr->nextnode.toleaf = False;
388 llptr->nextnode.address = succptr;
389 CHECKADDR(stree,llptr->nextnode);
390 succ = GETBROTHER(succptr);
391 }
392 } while(!NILPTR(succ));
393 }
394