1 /* 2 Copyright (c) 2003 by Stefan Kurtz and The Institute for 3 Genomic Research. This is OSI Certified Open Source Software. 4 Please see the file LICENSE for licensing information and 5 the file ACKNOWLEDGEMENTS for names of contributors to the 6 code base. 7 */ 8 9 #ifndef STREETYP_H 10 #define STREETYP_H 11 12 #include "types.h" 13 #include "arraydef.h" 14 15 //} 16 17 /* 18 A \texttt{Reference} consists of an \texttt{address} pointing a leaf, 19 or to a branching node. The boolean \texttt{toleaf} is \texttt{True} if 20 and only if \texttt{address} points to a leaf. 21 */ 22 23 typedef struct 24 { 25 BOOL toleaf; 26 Uint *address; 27 } Reference; // \Typedef{Reference} 28 29 /* 30 The following types are used for references to leaves and 31 branching nodes, respectively. We will always identify a leaf and 32 and branching node with their references. 33 */ 34 35 typedef Uint * Bref; // \Typedef{Bref} 36 typedef Uint * Lref; // \Typedef{Lref} 37 38 /* 39 For each branching node we store five values, as described in Section 40 \ref{Representation}. These values comprise the following structure. 41 */ 42 43 typedef struct 44 { 45 Uint headposition, // the head position of the branching node 46 depth; // the depth of the branching node 47 Bref suffixlink; // the suffix link is always to a branching node 48 Reference firstchild, // the reference to the first child 49 branchbrother; // the reference to the right brother; 50 // if this doesn't exist then it's \texttt{NULL} 51 } Branchinfo; // \Typedef{Branchinfo} 52 53 /* 54 For each leaf, we store a reference to its right brother, which is 55 \texttt{NULL}, if the right brother does not exist. This is 56 expressed in the type synonym \texttt{Leafinfo}. 57 */ 58 59 typedef Reference Leafinfo; // \Typedef{Leafinfo} 60 61 /* 62 A suffix tree is implemented by the type \texttt{Suffixtree}. 63 This structure contains several components which are mostly only used 64 during the suffix tree construction. For applications, assume the 65 following definition. Note that the input sequence is represented 66 as an array of elements of type \texttt{SYMBOL}. The latter 67 is a synonym for \texttt{Uchar} by default. 68 69 @typedef struct 70 @{ 71 @ SYMBOL *text; // points to the input string 72 @ Uint textlen; // the length of the input string 73 @ Uint *branchtab; // stores the infos for the branching nodes 74 @ Uint *leaftab; // stores the brother-references of the leaves 75 @} Suffixtree; // \Typedef{Suffixtree} 76 77 */ 78 79 //\Ignore{ 80 81 struct Suffixtreetype 82 { 83 Uint textlen, // the length of the input string 84 *leaftab, // stores the brother-references of the leafs 85 *branchtab, // table TBranch 86 *rootchildren; // references to successors of root 87 SYMBOL *text, // points to the input string 88 *sentinel; // points to the position of the \(\$\)-symbol 89 90 Uint nextfreeleafnum, // the number of the next leaf 91 headnodedepth, // the depth of the headnode 92 insertnode, // the node the split edge leads to 93 insertprev, // the edge preceeding the split edge 94 smallnotcompleted, // the number of small nodes in the current chain 95 nextfreebranchnum, // the number of the next free branch node 96 onsuccpath, // refers to node on success path of headnode 97 currentdepth, // depth of the new branch node 98 branchnodeoffset, // number of leafs in tree 99 alphasize, // the number of different characters in t 100 maxbranchdepth, // maximal depth of branching node 101 largenode, // number of large nodes 102 smallnode, // number of small nodes 103 *setlink, // address of a nil-reference 104 *nextfreeleafptr, // points to next free entry in leaftab 105 *chainstart, // address of the node, current chains starts at 106 *nextfreebranch, // reference to next free base addr. in branchtab 107 *headnode, // left component of head location 108 currentbranchtabsize, // current number of cells in branchtab 109 *firstnotallocated, // refers to the last address, such that at 110 // least \texttt{LARGEINTS} integers are 111 // available. So a large node can be stored in 112 // the available amount of space. 113 *nonmaximal, // bit table: if node with headposition \(i\) is 114 // not maximal, then \(nonmaximal[i]\) is set. 115 *leafcounts; // holds counts of the number of leafs in subtree 116 // indexed by headposition 117 BOOL setatnewleaf; // nil-reference is stored in new leaf 118 SYMBOL *headstart, // these references represent the right component 119 *headend, // of the head location \((\overline{u},v)\). 120 // \emph{headstart} refers to the first character 121 // of \(v\), and \emph{headend} to the last 122 // character. In case, \(v=\varepsilon\), 123 // \(\emph{headend}=\emph{NULL}\). 124 *tailptr; // points to the tail 125 126 #ifdef DEBUG 127 char * (*showsymbolstree)(SYMBOL,Uchar *); 128 Uchar *alphabet; 129 Uint splitleafedge, 130 splitinternaledge, 131 artificial, 132 insertleafcalls, 133 largelinks, 134 largelinkwork, 135 largelinklinkwork, 136 multiplications, 137 nodecount, 138 *maxset; 139 void *generalcounter; 140 #endif 141 #if (SYMBOLBYTES == 2) || (SYMBOLBYTES == 4) 142 Sint lastcharindex; 143 #endif 144 145 }; 146 147 typedef struct Suffixtreetype Suffixtree; 148 149 DECLAREARRAYSTRUCT(Bref); 150 151 //} 152 153 /* 154 A location is implemented by the type \texttt{Location}. 155 */ 156 157 typedef struct 158 { 159 Stringtype locstring; // string represented by location 160 Bref previousnode; // reference to previous node (which is branching) 161 SYMBOL *firstptr; // pointer to first character of edge label 162 Uint edgelen, // length of edge 163 remain; // number of remaining characters on edge 164 Reference nextnode; // reference to node the edge points to 165 } Location; // \Typedef{Location} 166 167 /* 168 If a location is a node \(\overline{u}\), we set \texttt{remain} to 0, and 169 store a reference to \(\overline{u}\) in \texttt{nextnode}. Moreover, we 170 store a position where \(u\) starts and its length in \texttt{locstring}. 171 If the location is of the form \((\overline{u},v,w,\overline{uvw})\), 172 then the components of the location satisfies the following values: 173 \begin{enumerate} 174 \item 175 \texttt{previousnode} is a reference to \(\overline{u}\) 176 \item 177 \texttt{firstptr} points to the first symbol of the edge label \(vw\). 178 \item 179 \(\texttt{edgelen}=\Size{vw}\) 180 \item 181 \(\texttt{remain}=\Size{w}\) 182 \item 183 \texttt{nextnode} is a reference to \(\overline{uvw}\). 184 \end{enumerate} 185 Since \(w\) is not empty, a location is a node location if and only if 186 \texttt{remain} is 0. 187 */ 188 189 //\Ignore{ 190 191 /* 192 A simple location stores just a part of information stored in a suffix tree. 193 */ 194 195 typedef struct 196 { 197 Uint remain, 198 textpos; // these last two items are redundant and can be computed 199 Reference nextnode; 200 } Simpleloc; // \Typedef{Simpleloc} 201 202 DECLAREARRAYSTRUCT(Simpleloc); 203 204 /* 205 A path in the suffix tree is stored as an array of \texttt{Pathinfo}-records. 206 */ 207 208 typedef struct 209 { 210 Uint depth, headposition; 211 Bref ref; 212 } Pathinfo; // \Typedef{Pathinfo} 213 214 DECLAREARRAYSTRUCT(Pathinfo); 215 216 typedef struct 217 { 218 BOOL secondtime; 219 ArrayBref stack; 220 } DFSstate; // \Typedef{DFSstate} 221 222 #endif 223 224 //} 225