1 /*
2   Copyright (c) 2003 by Stefan Kurtz and The Institute for
3   Genomic Research.  This is OSI Certified Open Source Software.
4   Please see the file LICENSE for licensing information and
5   the file ACKNOWLEDGEMENTS for names of contributors to the
6   code base.
7 */
8 
9 #ifndef STREEACC_H
10 #define STREEACC_H
11 
12 #ifdef STREESMALL
13 #include "streesmall.h"
14 #endif
15 
16 #ifdef STREELARGE
17 #include "streelarge.h"
18 #endif
19 
20 #ifdef STREEHUGE
21 #include "streehuge.h"
22 #endif
23 
24 #ifdef DEBUG
25 #define SHOWVAL(S)    fprintf(stderr,"#%s %lu\n",#S,(Showuint) S)
26 #define SETVAL(E,VAL) *(E) = VAL;\
27                       if((E) > stree->maxset)\
28                       {\
29                         stree->maxset = E;\
30                       }
31 #else
32 
33 //}
34 
35 /*
36   This file contains some macros for retrieving depth, headpositions,
37   and suffix links.
38 */
39 
40 #define SETVAL(E,VAL) *(E) = VAL
41 
42 //\Ignore{
43 
44 #endif
45 
46 //}
47 
48 /*
49   \texttt{GETBOTH} retrieves the \emph{depth} and the \emph{headposition} of
50   a branching node referred to by \texttt{PT}. In case, we need these values
51   for a node of the current chain, the distance is not set. So we compute
52   it as the difference between the next free base address, and the base
53   address of the node the chain starts with. Then we refer to the current depth
54   and the number of the current leaf. In case, the node is large, we can
55   directly look up the values. In case, the node is small, we determine
56   the distance, and a pointer to the large node at the end of the chain.
57   Then we can retrieve the depth and the head positions from this, as
58   proved in \cite{KUR:1998}, Observation 7.
59 */
60 
61 #define GETBOTH(DP,HP,PT) \
62         if(stree->chainstart != NULL && (PT) >= stree->chainstart)\
63         {\
64           distance = 1 + \
65                      DIVBYSMALLINTS((Uint) (stree->nextfreebranch - (PT)));\
66           DP = stree->currentdepth + distance;\
67           HP = stree->nextfreeleafnum - distance;\
68         } else\
69         {\
70           if(ISLARGE(*(PT)))\
71           {\
72             DP = GETDEPTH(PT);\
73             HP = GETHEADPOS(PT);\
74           } else\
75           {\
76             distance = GETDISTANCE(PT);\
77             GETCHAINEND(largeptr,PT,distance);\
78             DP = GETDEPTH(largeptr) + distance;\
79             HP = GETHEADPOS(largeptr) - distance;\
80           }\
81         }
82 
83 /*
84   The macros \texttt{GETONLYHEADPOS}, \texttt{GETONLYDEPTH}, and
85   \texttt{GETDEPTHAFTERHEADPOS} retrieve the depth or the head position.
86   This is done as in the previous macro, and we omit it here.
87 */
88 
89 //\Ignore{
90 
91 #define GETONLYHEADPOS(HP,PT) \
92         if(stree->chainstart != NULL && (PT) >= stree->chainstart)\
93         {\
94           distance = 1 + DIVBYSMALLINTS((Uint) (stree->nextfreebranch - (PT)));\
95           HP = stree->nextfreeleafnum - distance;\
96         } else\
97         {\
98           if(ISLARGE(*(PT)))\
99           {\
100             HP = GETHEADPOS(PT);\
101           } else\
102           {\
103             distance = GETDISTANCE(PT);\
104             GETCHAINEND(largeptr,PT,distance);\
105             HP = GETHEADPOS(largeptr) - distance;\
106           }\
107         }
108 
109 #define GETONLYDEPTH(DP,PT) \
110         if(stree->chainstart != NULL && (PT) >= stree->chainstart)\
111         {\
112           distance = 1 + DIVBYSMALLINTS((Uint) (stree->nextfreebranch - (PT)));\
113           DP = stree->currentdepth  + distance;\
114         } else\
115         {\
116           if(ISLARGE(*(PT)))\
117           {\
118             DP = GETDEPTH(PT);\
119           } else\
120           {\
121             distance = GETDISTANCE(PT);\
122             GETCHAINEND(largeptr,PT,distance);\
123             DP = GETDEPTH(largeptr) + distance;\
124           }\
125         }
126 
127 #define GETDEPTHAFTERHEADPOS(DP,PT) \
128         if(stree->chainstart != NULL && (PT) >= stree->chainstart)\
129         {\
130           DP = stree->currentdepth + distance;\
131         } else\
132         {\
133           if(ISLARGE(*(PT)))\
134           {\
135             DP = GETDEPTH(PT);\
136           } else\
137           {\
138             DP = GETDEPTH(largeptr) + distance;\
139           }\
140         }
141 
142 #define GETHEADPOSAFTERDEPTH(HP,PT) \
143         if(stree->chainstart != NULL && (PT) >= stree->chainstart)\
144         {\
145           HP = stree->nextfreeleafnum - distance;\
146         } else\
147         {\
148           if(ISLARGE(*(PT)))\
149           {\
150             HP = GETHEADPOS(PT);\
151           } else\
152           {\
153             HP = GETHEADPOS(largeptr) - distance;\
154           }\
155         }
156 
157 #define NEXTNODE(PT)\
158         if(ISLARGE(*(PT)))\
159         {\
160           PT += LARGEINTS;\
161         } else\
162         {\
163           PT += SMALLINTS;\
164         }
165 
166 //}
167 
168 /*
169   The suffix link is always determined for the \emph{headnode}. If this
170   is large, the we have to retrieve it from that node. Otherwise, the
171   suffix link refers to the next node. In both cases, the depth of the
172   \emph{headnode} is decremented.
173 */
174 
175 #define FOLLOWSUFFIXLINK\
176         if(ISLARGE(*(stree->headnode)))\
177         {\
178           stree->headnode = stree->branchtab + GETSUFFIXLINK(stree->headnode);\
179         } else\
180         {\
181           stree->headnode += SMALLINTS;\
182         }\
183         stree->headnodedepth--
184 
185 /*
186   Whenever \emph{insertleaf} is called, \emph{onsuccpath} stores the
187   address of the new leaf.
188   Whenever \emph{insertbranch} is called, \emph{onsuccpath} stores the
189   address of the new branching node. Both nodes are a successor of the
190   node, for which a suffix link is possible to be computed in the
191   next step. In case linear retrieval of suffix links is required,
192   it is possible to start at the node referenced by \emph{onsuccpath}.
193 */
194 
195 #if defined(STREELARGE) || defined(STREESMALL)
196 #define RECALLSUCC(S)             stree->onsuccpath = S
197 #else
198 #define RECALLSUCC(S)             /* Nothing */
199 #endif
200 
201 /*
202   The following three macros handle the setting of the suffix link in a
203   nil reference. The \emph{RECALL}-macros store the address of the
204   reference. In case the reference is a new leaf, this is marked.
205 */
206 
207 #define RECALLNEWLEAFADDRESS(A)   stree->setlink = A;\
208                                   stree->setatnewleaf = True
209 #define RECALLLEAFADDRESS(A)      stree->setlink = A;\
210                                   stree->setatnewleaf = False
211 #define RECALLBRANCHADDRESS(A)    stree->setlink = (A) + 1;\
212                                   stree->setatnewleaf = False
213 
214 #ifdef STREEHUGE
215 #define SETNILBIT                 *(stree->setlink) = NILBIT
216 #else
217 #define SETNILBIT                 if(stree->setatnewleaf)\
218                                   {\
219                                     *(stree->setlink) = NILBIT;\
220                                   } else\
221                                   {\
222                                     *(stree->setlink) |= NILBIT;\
223                                   }
224 #endif
225 
226 #define SETMAXBRANCHDEPTH(D)      if((D) > stree->maxbranchdepth)\
227                                   {\
228                                     stree->maxbranchdepth = D;\
229                                   }
230 
231 //\Ignore{
232 
233 /*
234 #ifdef SHOWLEAD
235 #define LEADLEVEL 3
236 #else
237 #define LEADLEVEL 4
238 #endif
239 */
240 
241 #define LEADLEVEL 2
242 
243 #ifdef DEBUG
244 #define SHOWINDEX(NODE)\
245         if((NODE) == UNDEFINEDREFERENCE)\
246         {\
247           DEBUG0(LEADLEVEL,"UNDEFINEDREFERENCE");\
248         } else\
249         {\
250           if(NILPTR(NODE))\
251           {\
252             DEBUG0(LEADLEVEL,"NILPTR");\
253           } else\
254           {\
255             if(ISLEAF(NODE))\
256             {\
257               DEBUG1(LEADLEVEL,"Leaf %lu",(Showuint) GETLEAFINDEX(NODE));\
258             } else\
259             {\
260               if(ISLARGE(stree->branchtab[GETBRANCHINDEX(NODE)]))\
261               {\
262                 DEBUG1(LEADLEVEL,"Large %lu",(Showuint) GETBRANCHINDEX(NODE));\
263               } else\
264               {\
265                 DEBUG1(LEADLEVEL,"Small %lu",(Showuint) NODE);\
266               }\
267             }\
268           }\
269         }
270 #else
271 #define SHOWINDEX(NODE) /* Nothing */
272 #endif
273 
274 #ifdef DEBUG
275 void showtable(Suffixtree *stree,BOOL final);
276 void checkstree(Suffixtree *stree);
277 void showstate(Suffixtree *stree);
278 void showstree(Suffixtree *stree);
279 void enumlocations(Suffixtree *stree,void(*processloc)(Suffixtree *stree,Location *));
280 void checklocation(Suffixtree *stree,Location *loc);
281 #endif
282 
283 #endif
284 
285 //}
286