1 /*
2   Copyright (c) 2003 by Stefan Kurtz and The Institute for
3   Genomic Research.  This is OSI Certified Open Source Software.
4   Please see the file LICENSE for licensing information and
5   the file ACKNOWLEDGEMENTS for names of contributors to the
6   code base.
7 */
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <limits.h>
12 #include "streedef.h"
13 #include "streeacc.h"
14 #include "debugdef.h"
15 #include "arraydef.h"
16 #include "spacedef.h"
17 #include "protodef.h"
18 
19 #ifdef STREELARGE
20 
getlargelinkstree(Suffixtree * stree,Bref btptr,Uint depth)21 Uint getlargelinkstree(Suffixtree *stree,Bref btptr,Uint depth)
22 {
23   Uint succ;
24 
25   if(depth == 1)
26   {
27     return 0;
28   }
29   if(ISSMALLDEPTH(depth))
30   {
31     return (((*(btptr+2) & LOWERLINKPATT) >> SMALLDEPTHBITS) |
32            ((*(btptr+3) & MIDDLELINKPATT) >> SHIFTMIDDLE)   |
33            ((stree->leaftab[GETHEADPOS(btptr)] & EXTRAPATT)
34               >> SHIFTHIGHER)) << 1;
35   }
36   succ = GETCHILD(btptr);
37   while(!NILPTR(succ))
38   {
39     if(ISLEAF(succ))
40     {
41       succ = LEAFBROTHERVAL(stree->leaftab[GETLEAFINDEX(succ)]);
42     } else
43     {
44       succ = GETBROTHER(stree->branchtab + succ);
45     }
46   }
47   return succ & MAXINDEX;
48 }
49 #endif
50 
51 #ifdef STREESMALL
getlargelinkstree(Suffixtree * stree,Bref btptr,Uint depth)52 Uint getlargelinkstree(Suffixtree *stree,Bref btptr,Uint depth)
53 {
54   Uint succ, slink = 0, headnodenum;
55 
56   if(depth == 1)
57   {
58     return 0;
59   }
60   if(ISSMALLDEPTH(depth))
61   {
62     slink = (*(btptr+1) >> 25) & ((1 << 7) - 1);
63     headnodenum = BRADDR2NUM(stree,btptr);
64     if(headnodenum & (~((1 << 7) - 1)))
65     {
66       slink |= ((*btptr & (255 << 24)) >> 17);
67       if(headnodenum & (~((1 << 15) - 1)))
68       {
69         slink |= ((stree->leaftab[GETHEADPOS(btptr)] & (255 << 24)) >> 9);
70       }
71     }
72     return slink;
73   }
74   succ = GETCHILD(btptr);
75   while(!NILPTR(succ))
76   {
77     if(ISLEAF(succ))
78     {
79       succ = LEAFBROTHERVAL(stree->leaftab[GETLEAFINDEX(succ)]);
80     } else
81     {
82       succ = GETBROTHER(stree->branchtab + GETBRANCHINDEX(succ));
83     }
84   }
85   return succ & MAXINDEX;
86 }
87 #endif
88 
89 #ifdef STREEHUGE
getlargelinkstree(Suffixtree * stree,Bref btptr,Uint depth)90 Uint getlargelinkstree(/*@unused@*/ Suffixtree *stree,Bref btptr,Uint depth)
91 {
92   if(depth == UintConst(1))
93   {
94     return 0;
95   }
96   return *(btptr+4);
97 }
98 #endif
99 
int2ref(Suffixtree * stree,Reference * ref,Uint i)100 static void int2ref(Suffixtree *stree,Reference *ref,Uint i)
101 {
102   if(ISLEAF(i))
103   {
104     ref->toleaf = True;
105     ref->address = stree->leaftab + GETLEAFINDEX(i);
106   } else
107   {
108     ref->toleaf = False;
109     ref->address = stree->branchtab + GETBRANCHINDEX(i);
110   }
111 }
112 
getleafinfostree(Suffixtree * stree,Leafinfo * leafinfo,Lref lptr)113 void getleafinfostree(Suffixtree *stree,Leafinfo *leafinfo,Lref lptr)
114 {
115   Uint node = LEAFBROTHERVAL(*lptr);
116 
117   if(NILPTR(node))
118   {
119     leafinfo->address = NULL;
120   } else
121   {
122     int2ref(stree,leafinfo,node);
123   }
124 }
125 
getbranchinfostree(Suffixtree * stree,Uint whichinfo,Branchinfo * branchinfo,Bref btptr)126 void getbranchinfostree(Suffixtree *stree,Uint whichinfo,
127                         Branchinfo *branchinfo,Bref btptr)
128 {
129   Uint which = whichinfo, node, distance, *largeptr;
130 
131   if(which & ACCESSSUFFIXLINK)
132   {
133     which |= ACCESSDEPTH;
134   }
135   if(which & (ACCESSDEPTH | ACCESSHEADPOS))
136   {
137     if(stree->chainstart != NULL && btptr >= stree->chainstart)
138     {
139       distance = DIVBYSMALLINTS((Uint) (stree->nextfreebranch - btptr));
140       branchinfo->depth = stree->currentdepth + distance;
141       branchinfo->headposition = stree->nextfreeleafnum - distance;
142     } else
143     {
144       if(ISLARGE(*btptr))
145       {
146         if(which & ACCESSDEPTH)
147         {
148           branchinfo->depth = GETDEPTH(btptr);
149         }
150         if(which & ACCESSHEADPOS)
151         {
152           branchinfo->headposition = GETHEADPOS(btptr);
153         }
154       } else
155       {
156         distance = GETDISTANCE(btptr);
157         GETCHAINEND(largeptr,btptr,distance);
158         if(which & ACCESSDEPTH)
159         {
160           branchinfo->depth = GETDEPTH(largeptr) + distance;
161         }
162         if(which & ACCESSHEADPOS)
163         {
164           branchinfo->headposition = GETHEADPOS(largeptr) - distance;
165         }
166       }
167     }
168   }
169   if(which & ACCESSSUFFIXLINK)
170   {
171     if((stree->chainstart != NULL && btptr >= stree->chainstart) ||
172        !ISLARGE(*btptr))
173     {
174       branchinfo->suffixlink = btptr + SMALLINTS;
175     } else
176     {
177       branchinfo->suffixlink = stree->branchtab +
178                                getlargelinkstree(stree,btptr,
179                                                  branchinfo->depth);
180     }
181   }
182   if(which & ACCESSFIRSTCHILD)
183   {
184     int2ref(stree,&(branchinfo->firstchild),GETCHILD(btptr));
185   }
186   if(which & ACCESSBRANCHBROTHER)
187   {
188     node = GETBROTHER(btptr);
189     if(NILPTR(node))
190     {
191       branchinfo->branchbrother.address = NULL;
192     } else
193     {
194       int2ref(stree,&(branchinfo->branchbrother),node);
195     }
196   }
197 }
198 
getheadstringstree(Suffixtree * stree,Stringtype * str)199 void getheadstringstree(Suffixtree *stree,Stringtype *str)
200 {
201   Branchinfo branchinfo;
202   Reference ref;
203 
204   if(stree->headend == NULL)
205   {
206     str->length = stree->headnodedepth;
207     if(stree->headnodedepth > 0)
208     {
209       getbranchinfostree(stree,ACCESSHEADPOS,&branchinfo,stree->headnode);
210       str->start = branchinfo.headposition;
211     } else
212     {
213       str->start = 0;
214     }
215   } else
216   {
217     str->length = stree->headnodedepth + (Uint) (stree->headend-stree->headstart+1);
218     int2ref(stree,&ref,stree->insertnode);
219     if(ref.toleaf)
220     {
221       str->start = LEAFADDR2NUM(stree,ref.address);
222     } else
223     {
224       getbranchinfostree(stree,ACCESSHEADPOS,&branchinfo,ref.address);
225       str->start = branchinfo.headposition;
226     }
227   }
228 }
229 
getmaxtextlenstree(void)230 Uint getmaxtextlenstree(void)
231 {
232   return MAXTEXTLEN;
233 }
234 
showpathstree(Suffixtree * stree,Bref bnode,void (* showchar)(SYMBOL,void *),void * info)235 void showpathstree(Suffixtree *stree,Bref bnode,
236                    void (*showchar)(SYMBOL,void *),void *info)
237 {
238   Branchinfo branchinfo;
239   Uint i;
240 
241   getbranchinfostree(stree,ACCESSHEADPOS | ACCESSDEPTH,&branchinfo,bnode);
242   for(i = 0; i < branchinfo.depth; i++)
243   {
244     showchar(stree->text[branchinfo.headposition + i],info);
245   }
246 }
247 
248 #ifdef DEBUG
showsimplelocstree(Suffixtree * stree,Simpleloc * loc)249 void showsimplelocstree(Suffixtree *stree,Simpleloc *loc)
250 {
251   Uint bnode;
252 
253   if(loc->textpos == stree->textlen)
254   {
255     printf("(~,");
256   } else
257   {
258     printf("(%s,",stree->showsymbolstree(stree->text[loc->textpos],stree->alphabet));
259   }
260   if(loc->remain > 0)
261   {
262     printf("(%lu,",(Showuint) loc->remain);
263   }
264   if(loc->nextnode.toleaf)
265   {
266     printf("Leaf %lu",(Showuint) LEAFADDR2NUM(stree,loc->nextnode.address));
267   } else
268   {
269     bnode = BRADDR2NUM(stree,loc->nextnode.address);
270     if(ISLARGE(*(loc->nextnode.address)))
271     {
272       printf("Large %lu",(Showuint) bnode);
273     } else
274     {
275       printf("Small %lu",(Showuint) bnode);
276     }
277   }
278   if(loc->remain > 0)
279   {
280     (void) putchar(')');
281   }
282   (void) putchar(')');
283 }
284 
showsimplelocliststree(Suffixtree * stree,ArraySimpleloc * ll)285 void showsimplelocliststree(Suffixtree *stree,ArraySimpleloc *ll)
286 {
287   Uint i;
288 
289   for(i=0; i<ll->nextfreeSimpleloc; i++)
290   {
291     showsimplelocstree(stree,ll->spaceSimpleloc+i);
292     if(i < ll->nextfreeSimpleloc-1)
293     {
294       (void) putchar(',');
295     }
296   }
297   (void) putchar('\n');
298 }
299 #endif
300 
301 // use the following functions only for the root location.
302 
rootsucclocationsstree(Suffixtree * stree,ArraySimpleloc * ll)303 void rootsucclocationsstree(Suffixtree *stree,ArraySimpleloc *ll)
304 {
305   Uint headpos, leafindex, depth, distance, node, ch, *largeptr, *nodeptr;
306   Simpleloc *llptr;
307 
308   CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1);
309   for(ch = 0; ch <= UCHAR_MAX; ch++)
310   {
311     if((node = stree->rootchildren[ch]) != UNDEFINEDREFERENCE)
312     {
313       llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
314       if(ISLEAF(node))
315       {
316         leafindex = GETLEAFINDEX(node);
317         llptr->textpos = leafindex;
318         llptr->remain = stree->textlen - leafindex;
319         llptr->nextnode.toleaf = True;
320         llptr->nextnode.address = stree->leaftab + leafindex;
321       } else
322       {
323         nodeptr = stree->branchtab + GETBRANCHINDEX(node);
324         GETBOTH(depth,headpos,nodeptr);
325         llptr->textpos = headpos;
326         llptr->remain = depth - 1;
327         llptr->nextnode.toleaf = False;
328         llptr->nextnode.address = nodeptr;
329       }
330       CHECKADDR(stree,llptr->nextnode);
331     }
332   }
333 }
334 
335 // use the following functions only for non root location.
336 
succlocationsstree(Suffixtree * stree,BOOL nosentinel,Simpleloc * loc,ArraySimpleloc * ll)337 void succlocationsstree(Suffixtree *stree,BOOL nosentinel,Simpleloc *loc,
338                         ArraySimpleloc *ll)
339 {
340   Uint succdepth, succ, leafindex, distance, depth, headpos,
341        remain, *succptr, *largeptr, *nodeptr;
342   Simpleloc *llptr;
343 
344   DEBUG0(3,"succlocationsstree\n");
345   ll->nextfreeSimpleloc = 0;
346   CHECKARRAYSPACE(ll,Simpleloc,stree->alphasize+1);
347   if(loc->remain > 0)
348   {
349     if(nosentinel && loc->nextnode.toleaf && loc->remain <= UintConst(1))
350     {  // at the end of leaf edge: only a\$ remains
351       return;
352     }
353     llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
354     llptr->textpos = loc->textpos + 1;
355     llptr->remain = loc->remain - 1;
356     llptr->nextnode.address = loc->nextnode.address;
357     llptr->nextnode.toleaf = loc->nextnode.toleaf;
358     CHECKADDR(stree,llptr->nextnode);
359     return;
360   }
361   nodeptr = loc->nextnode.address;
362   GETONLYDEPTH(depth,nodeptr);
363   succ = GETCHILD(nodeptr);
364   do                   // traverse the list of successors
365   {
366     if(ISLEAF(succ))   // successor is leaf
367     {
368       leafindex = GETLEAFINDEX(succ);
369       remain = stree->textlen - (depth + leafindex);
370       if(!nosentinel || remain >= UintConst(1))
371       {
372         llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
373         llptr->remain = remain;
374         llptr->textpos = depth + leafindex;
375         llptr->nextnode.address = stree->leaftab + leafindex;
376         llptr->nextnode.toleaf = True;
377         CHECKADDR(stree,llptr->nextnode);
378       }
379       succ = LEAFBROTHERVAL(stree->leaftab[leafindex]);
380     } else   // successor is branch node
381     {
382       succptr = stree->branchtab + GETBRANCHINDEX(succ);
383       GETBOTH(succdepth,headpos,succptr);  // get info for branch node
384       llptr = ll->spaceSimpleloc + ll->nextfreeSimpleloc++;
385       llptr->textpos = depth + headpos;
386       llptr->remain = succdepth - depth - 1;
387       llptr->nextnode.toleaf = False;
388       llptr->nextnode.address = succptr;
389       CHECKADDR(stree,llptr->nextnode);
390       succ = GETBROTHER(succptr);
391     }
392   } while(!NILPTR(succ));
393 }
394