1/*
2  Copyright (c) 2003 by Stefan Kurtz and The Institute for
3  Genomic Research.  This is OSI Certified Open Source Software.
4  Please see the file LICENSE for licensing information and
5  the file ACKNOWLEDGEMENTS for names of contributors to the
6  code base.
7*/
8
9Sint constructstree(Suffixtree *stree,SYMBOL *text,Uint textlen)
10{
11
12}
13
14/*EE
15  In some applications it is convenient to perform some extra computations
16  during the suffix tree construction.  For this purpose, there are
17  two variations of \texttt{constructstree}:
18
19  The following function
20  additionally marks all branching nodes which are maximal.
21  A branching node \(\overline{v}\) is \emph{maximal} if
22  and only if there is
23  no suffix link to \(\overline{v}\). The maximal nodes can be
24  enumerated using the function \texttt{overmaximalstree}, see Section
25  \ref{Traverse}.
26*/
27
28Sint constructmarkmaxstree(Suffixtree *stree,SYMBOL *text,
29                           Uint textlen)
30{
31
32}
33
34/*EE
35  The following variation
36  constructs the suffix tree and calls for each \(j\in[0,n]\) the
37  function \texttt{processhead} with the second argument being \(j\),
38  as soon as \(\HD{j}\) is computed.
39  The first argument of \texttt{processhead} is the current suffix tree,
40  and the third argument is the pointer \texttt{processheadinfo}.
41*/
42
43Sint constructheadstree(Suffixtree *stree,SYMBOL *text,
44                        Uint textlen,
45                        void(*processhead)(Suffixtree *,Uint,
46                                           void *),
47                        void *processheadinfo)
48{
49}
50
51/*EE
52  The following variation
53  constructs the suffix tree for \texttt{text}. Additionally, whenever
54  \(textlen>\Numofcalls\), the function
55  \texttt{progress} is called after \(i\cdot textlen/\Numofcalls\)
56  construction
57  steps, for \(i\in[1,\Numofcalls]\). Moreover, the function
58  \texttt{finalprogress} is called at the end of the suffix tree
59  construction. When they are called, both functions are supplied
60  with the argument \texttt{info}.
61*/
62
63Sint constructprogressstree(Suffixtree *stree,SYMBOL *text,
64                            Uint textlen,
65                            void (*progress)(Uint,void *),
66                            void (*finalprogress)(void *),
67                            void *info)
68{
69
70}
71
72/*EE
73  For example, one could define \texttt{progress} and \texttt{finalprogress}
74  as follows, in order to write a line of 79 dots on the given file pointer,
75  to show the progress of the suffix tree construction:
76*/
77
78void progresswithdot(Uint nextstep,void *info)
79{
80  fputc('.',stderr);
81  fflush(stderr);
82}
83
84void finalprogress(void *info)
85{
86  fputc('\n',stderr);
87}
88
89/*EE
90  The following function stores for each branching node
91  \(\overline{v}\) the \emph{lefcount},
92  i.e.\ the number of leafs in the subtree below \(\overline{v}\).
93  The computation of the leafcounts is done in linear time. It can be
94  retrieved using the function \texttt{getleafcountstree}, described
95  below.
96*/
97
98void addleafcountsstree(Suffixtree *stree)
99{
100
101}
102
103/*EE
104  The following function frees the space allocated for \texttt{stree}.
105*/
106
107void freestree(Suffixtree *stree)
108{
109
110}
111
112/*EE
113  The following function
114  returns the maximal length of an input string allowed for the
115  suffix tree construction.
116*/
117
118Uint getmaxtextlenstree(void)
119{
120
121}
122
123/*EE
124 \subsection{Accessing the Stored Information}
125
126  The following function stores the information for leaf
127  \texttt{lref} in the structure \texttt{leafinfo}.
128*/
129
130void getleafinfostree(Suffixtree *stree,Leafinfo *leafinfo,
131                      Lref lptr)
132{
133
134}
135
136/*EE
137  The following function
138  stores the information for branching node \texttt{bnode} in
139  \texttt{branchinfo}. For efficiency reason, the function does not always
140  compute all 5 components stored for a branching node. Instead, the
141  components are delivered as specified by \texttt{whichinfo}. This
142  has the following possible bits:
143  \begin{center}
144  \begin{tabular}{l}
145  \texttt{ACCESSDEPTH}\\
146  \texttt{ACCESSHEADPOS}\\
147  \texttt{ACCESSSUFFIXLINK}\\
148  \texttt{ACCESSFIRSTCHILD}\\
149  \texttt{ACCESSBRANCHBROTHER}
150  \end{tabular}
151  \end{center}
152*/
153
154void getbranchinfostree(Suffixtree *stree,Uint whichinfo,
155                        Branchinfo *branchinfo,Bref btptr)
156{
157
158}
159
160/*EE
161  The following function
162  shows the path for the node \texttt{bnode}. A symbol \(a\) is shown
163  by applying the function \texttt{showchar} to it and the
164  additional argument \texttt{info}.
165*/
166
167void showpathstree(Suffixtree *stree,Bref bnode,
168                   void (*showchar)(SYMBOL,void *),void *info)
169{
170
171}
172
173/*EE
174  The following function stores a start position and the length of
175  \(\HD{j}\), for the current \(j\) in \texttt{str}. The start
176  position is smaller than \(j\).
177*/
178
179void getheadstringstree(Suffixtree *stree,Stringtype *str)
180{
181
182}
183
184/*EE
185  The following function returns \texttt{True}, if and only if the
186  branching node \texttt{bnode} has exactly two leaves, say with leaf
187  numbers \(i\) and \(j\), \(i<j\). In this case, \(i\) is stored in
188  \texttt{twoleaves->uint0} and \(j\) is stored in
189  \texttt{twoleaves->uint1}.
190*/
191
192BOOL exactlytwoleavesstree(Suffixtree *stree,
193                           PairUint *twoleaves,Bref start)
194{
195
196}
197
198/*EE
199  The following function
200  returns the leafcount for a node referenced by \texttt{nodeptr}.
201  The function \texttt{addleafcountstree} must have been called
202  exactly once before, in order to  compute the leafcounts.
203*/
204
205Uint getleafcountstree(Suffixtree *stree,Bref nodeptr)
206{
207
208}
209
210/*EE
211  The following function
212  computes a table \texttt{depthtab}, such that
213  \texttt{depthtab->spaceUint[i]}
214  holds the number of branching nodes in the given
215  suffix tree whose depth is
216  exactly \(i\). The maximal depth of any branching node is
217  \texttt{depthtab->nextfreeUint-1}.
218*/
219
220void makedepthtabstree(ArrayUint *depthtab,Suffixtree *stree)
221{
222
223}
224
225/*EE
226  \subsection{Traversing the Suffix Tree}\label{Traverse}
227
228  The following function
229  implements the function \emph{scanprefix}, restricted to the case that
230  the starting location is a node location. \texttt{stree} is the suffix
231  tree, \texttt{outloc} is the resulting location, \texttt{startnode} is
232  the branching node, the scanning starts from, and \texttt{left}
233  and \texttt{right} delimit the string, say \(s\), to be scanned.
234  The function returns \texttt{NULL}, if \(s\) is scanned completely.
235  Otherwise, it points to a suffix of \(s\).
236  \texttt{rescanlength} is the length of a prefix of
237  \(s\) that already occurs in the suffix tree. It is always safe
238  to set \texttt{rescanlength} to 0.
239*/
240
241SYMBOL *scanprefixfromnodestree(Suffixtree *stree,Location *loc,
242                                Bref btptr,SYMBOL *left,
243                                SYMBOL *right,Uint rescanlength)
244{
245
246}
247
248/*EE
249  The following function
250  implements the function \emph{scanprefix}.  \texttt{stree} is the suffix
251  tree, \texttt{outloc} is the resulting location, \texttt{inloc} is the
252  location, the scanning starts from, and \texttt{left}
253  and \texttt{right} delimit the string, say \(s\), to be scanned.
254  The function returns \texttt{NULL}, if \(s\) is scanned completely.
255  Otherwise, it points to a suffix of \(s\).
256  \texttt{rescanlength} is the length of a prefix of
257  \(s\) that already occurs in the suffix tree. It is always safe
258  to set \texttt{rescanlength} to 0.
259*/
260
261SYMBOL *scanprefixstree(Suffixtree *stree,Location *outloc,
262                        Location *inloc,SYMBOL *left,
263                        SYMBOL *right,Uint rescanlength)
264{
265
266}
267
268/*EE
269  The following function
270  is similar to the function \emph{scanprefixfromnodestree}. It additionally
271  stores in the array \texttt{path} the sequence of branching nodes
272  that has been visited during the scan to location \texttt{loc}.
273  The array \texttt{path} may not be empty, in which case the list of
274  visited branching nodes is appended to it.
275*/
276
277SYMBOL *findprefixpathfromnodestree(Suffixtree *stree,
278                                    ArrayPathinfo *path,
279                                    Location *loc,Bref btptr,
280                                    SYMBOL *left,SYMBOL *right,
281                                    Uint rescanlength)
282{
283
284}
285
286/*EE
287  The following function
288  is similar to the function \emph{scanprefixstree}. It additionally
289  stores in the array \texttt{path} the sequence of branching nodes
290  that has been visited during the scan to location \texttt{inloc}.
291  The array \texttt{path} may not be empty, in which case the list
292  of visited branching nodes is appended to it.
293*/
294
295SYMBOL *findprefixpathstree(Suffixtree *stree,
296                            ArrayPathinfo *path,
297                            Location *outloc,
298                            Location *inloc,
299                            SYMBOL *left,SYMBOL *right,
300                            Uint rescanlength);
301
302/*EE
303  The following function
304  implements the function \emph{rescan}. \texttt{stree} is the suffix
305  tree, \texttt{outloc} is the resulting location, \texttt{startnode}
306  is the branching node, the scanning starts from, and \texttt{left}
307  and \texttt{right} delimit the string, say \(s\), to be rescanned.
308*/
309
310void rescanstree(Suffixtree *stree,Location *loc,
311                 Bref btptr,SYMBOL *left,SYMBOL *right)
312{
313
314}
315
316/*EE
317  The following function
318  implements the function \emph{linkloc}. \texttt{stree} is the
319  suffix tree, \texttt{outloc} is the resulting location, and
320  \texttt{inloc} is the input location.
321*/
322
323void linklocstree(Suffixtree *stree,Location *outloc,
324                  Location *inloc)
325{
326
327}
328
329/*EE
330  The following function
331  applies the function \texttt{processnode} to all branching nodes
332  of the suffix tree \texttt{stree}. The \emph{root} is skipped, if
333  and only if \texttt{skiproot} is \texttt{True}. The arguments of
334  \texttt{processnode} are as follows: The suffix tree \texttt{stree},
335  the branching node, its depth, its head position, and the
336  pointer \texttt{info}.
337*/
338
339void overallstree(Suffixtree *stree,BOOL skiproot,
340                  void(*processnode)(Suffixtree *,Bref,
341                                     Uint,Uint,void *),
342                  void *info)
343{
344
345}
346
347/*EE
348  The following function
349  applies the function \texttt{processnode} to all maximal branching
350  nodes of the suffix tree \texttt{stree}. The arguments of
351  \texttt{processnode} are as follows: The suffix tree \texttt{stree},
352  the reference to the branching node, its depth, its head position,
353  and the pointer \texttt{info}.
354*/
355
356void overmaximalstree(Suffixtree *stree,
357                      void(*processnode)(Suffixtree *,Bref,
358                                         Uint,Uint,void *),
359                      void *info)
360{
361
362}
363
364/*EE
365  The following function
366  enumerates all immediate successors of the branching node \texttt{bnode}.
367  Suppose the leaf with leaf number \texttt{j} is a
368  successor of \texttt{bnode}. Then the function \texttt{processleaf} with
369  arguments \texttt{stree}, \texttt{j}, and \texttt{info} is called.
370  Suppose the branching node \texttt{bsucc} is a successor of
371  \texttt{bnode}. Then the function \texttt{processbranch} is called with
372  arguments \texttt{stree}, \texttt{bsucc}, and \texttt{info}.
373*/
374
375void oversuccsstree(Suffixtree *stree,Bref bnode,
376                    void(*processleaf)(Suffixtree *,Uint,void *),
377                    void(*processbranch)(Suffixtree *,Bref,void *),
378                    void *info)
379{
380}
381
382/*EE
383  The following function
384  performs a (possibly) limited depth first traversal of the suffix
385  tree rooted by \texttt{startnode}.
386  \texttt{startnode} can be a reference to
387  a leaf or a reference to a branching node. The nodes of the subtree are
388  enumerated in depth first left to right order. The argument
389  \texttt{processleaf}
390  is not allowed to be \texttt{NULL}. \texttt{processbranch1} and
391  \texttt{processbranch2} can either be both \texttt{NULL} or
392  both different from \texttt{NULL}.
393  \begin{itemize}
394  \item
395  Each time a leaf, say \(\overline{v}\), with leaf number
396  \texttt{j} is encountered, the function \texttt{processleaf} is called
397  with arguments \texttt{j}, \texttt{lca}, and
398  \texttt{info}. \texttt{lca} is the longest common ancestor of
399  \(\overline{v}\) and the previous leaf encountered during the traversal.
400  \texttt{lca} is \texttt{NULL}, if \(\overline{v}\) is the first leaf
401  encountered in the traversal. If \texttt{processleaf} returns a value
402  smaller than 0, then \texttt{depthfirststree} terminates with a
403  return value \texttt{-1}.
404  \item
405  Each time a branching node \texttt{bsucc} is visited for the
406  first time, the function \texttt{process\-branch1} is called with
407  arguments \texttt{bsucc} and \texttt{info}. If
408  \texttt{processbranch1} returns \texttt{False}, then
409  the entire subtree below \texttt{bsucc} is discarded. Otherwise the
410  depth first traversal continues with the subtree rooted by \texttt{bsucc}.
411  \item
412  Each time a branching node \texttt{bsucc}
413  is visited for the second time (i.e.\
414  the entire subtree below \texttt{bsucc} has been processed), the function
415  \texttt{processbranch2} is called with arguments \texttt{bsucc} and
416  \texttt{info}. If \texttt{processbranch2} returns a value smaller than
417  0, then \texttt{depthfirststree} returns with value \texttt{-1}.
418  \end{itemize}
419  Either \texttt{processleaf} or \texttt{processbranch1} and
420  \texttt{processbranch2} can be \texttt{NULL}, in which case there is no
421  function call. If \texttt{stoptraversal} is not \texttt{NULL}, then
422  after each call the function \texttt{stoptraversal} is applied
423  to \texttt{stopinfo}. If the return value of this call is \texttt{True},
424  then the depth first traversal stops.
425
426  In case everything goes right, \texttt{depthfirststree} returns 0.
427*/
428
429Sint depthfirststree(Suffixtree *stree,Reference *startnode,
430                     Sint (*processleaf)(Uint,Bref,void *),
431                     BOOL (*processbranch1)(Bref,void *),
432                     Sint (*processbranch2)(Bref,void *),
433                     BOOL (*stoptraversal)(void *),
434                     void *stopinfo,void *info)
435{
436
437}
438
439