1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_HEADER_INDEX_WOTD_H
36 #define SEQAN_HEADER_INDEX_WOTD_H
37 
38 namespace SEQAN_NAMESPACE_MAIN
39 {
40 
41 
42 //////////////////////////////////////////////////////////////////////////////
43 // wotd tree index fibres
44 
45 /*!
46  * @defgroup WOTDIndexFibres WOTD Index Fibres
47  * @brief Tag to select a specific fibre (e.g. table, object, ...) of an @link
48  *        IndexWotd @endlink index.
49  *
50  * These tags can be used to get @link Fibre @endlink of an @link IndexWotd @endlink.
51  *
52  * @see Fibre
53  * @see Index#getFibre
54  * @see IndexWotd
55  *
56  * @tag WOTDIndexFibres#WotdDir
57  * @brief The child table.
58  *
59  * @tag WOTDIndexFibres#WotdRawSA
60  * @brief The raw suffix array.
61  *
62  * @tag WOTDIndexFibres#WotdText
63  * @brief The original text the index should be based on.
64  *
65  * @tag WOTDIndexFibres#WotdRawText
66  * @brief The raw text the index is really based on.
67  *
68  * @tag WOTDIndexFibres#WotdSA
69  * @brief The suffix array.
70  */
71 
72 //////////////////////////////////////////////////////////////////////////////
73 /*!
74  * @fn IndexWotd#indexSA
75  * @headerfile <seqan/index.h>
76  * @brief Shortcut for <tt>getFibre(.., WotdSA)</tt>.
77  *
78  * @signature TSa indexSA(index);
79  *
80  * @param[in] index The @link IndexWotd @endlink object holding the fibre.
81  *
82  * @return TSa A reference to the @link WOTDIndexFibres#WotdSA @endlink fibre (partially sorted suffix array).
83  */
84 
85 /*!
86  * @fn IndexWotd#indexDir
87  * @headerfile <seqan/index.h>
88  * @brief Shortcut for <tt>getFibre(.., WotdDir())</tt>.
89  * @signature TFibre indexDir(index);
90  *
91  * @param[in] index The @link IndexWotd @endlink object holding the fibre.
92  *
93  * @return TFibre A reference to the @link WOTDIndexFibres#WotdDir @endlink fibre (tree structure).
94  */
95 
96 /*!
97  * @fn IndexWotd#saAt
98  * @headerfile <seqan/index.h>
99  * @note Advanced functionality, not commonly used.
100  * @brief Shortcut for <tt>value(indexSA(..), ..)</tt>.
101  *
102  * @signature TValue saAt(position, index);
103  *
104  * @param[in] index The @link IndexWotd @endlink object holding the fibre.
105  * @param[in] position A position in the array on which the value should be accessed.
106  *
107  * @return TValue A reference or proxy to the value in the @link WOTDIndexFibres#WotdSA @endlink fibre.
108  *                To be more precise, a reference to a position containing a value of type
109  *                @link SAValue @endlink is returned (or a proxy).
110  */
111 
112 /*!
113  * @fn IndexWotd#dirAt
114  * @headerfile <seqan/index.h>
115  * @brief Shortcut for <tt>value(indexDir(index), position)</tt>.
116  *
117  * @signature TFibre dirAt(position, index);
118  *
119  * @param[in] index    The @link IndexWotd @endlink object holding the fibre.
120  * @param[in] position A position in the array on which the value should be accessed.
121  *
122  * @return TFibre A reference to the @link WOTDIndexFibres#WotdDir @endlink fibre.
123  */
124 
125     typedef FibreText        WotdText;
126     typedef FibreRawText    WotdRawText;
127     typedef FibreSA         WotdSA;
128     typedef FibreRawSA        WotdRawSA;
129     typedef FibreDir        WotdDir;
130 
131 //////////////////////////////////////////////////////////////////////////////
132 // wotd tree index
133 
134 /*!
135  * @class IndexWotd
136  * @extends Index
137  * @implements StringTreeConcept
138  * @headerfile <seqan/index.h>
139  * @brief An index based on a lazy suffix tree (see Giegerich et al., "Efficient implementation of lazy suffix
140  *        trees").
141  *
142  * @signature template <typename TText, typename TSpec>
143  *            class Index<TText, IndexWotd<TSpec> >;
144  *
145  * @tparam TText The @link TextConcept @endlink text type.
146  * @tparam TSpec The type for further specialization of the Index type.
147  *
148  * The fibres (see @link Index @endlink and @link Fibre @endlink) of this index are a partially sorted suffix array
149  * (see @link WOTDIndexFibres#WotdSA @endlink) and the wotd tree (see @link WOTDIndexFibres#WotdDir @endlink).
150  *
151  * Demo: Demo.Constraint Iterator
152  *
153  * @see WOTDIndexFibres
154  */
155 
156     struct WotdOriginal_;
157     typedef Tag<WotdOriginal_> const WotdOriginal;
158 
159     template < typename TSpec = void >
160     struct IndexWotd {};
161 
162 /*
163     template < typename TObject, typename TSpec >
164     struct Fibre< Index<TObject, IndexWotd<TSpec> >, FibreDir>
165     {
166         typedef Index<TObject, IndexWotd<TSpec> > TIndex;
167         typedef String<
168             typename typename Size<TIndex>::Type,
169             Alloc<>
170         > Type;
171     };
172 */
173 
174     template < typename TObject, typename TSpec >
175     class Index<TObject, IndexWotd<TSpec> > {
176     public:
177         typedef typename Fibre<Index, WotdText>::Type        TText;
178         typedef typename Fibre<Index, WotdSA>::Type         TSA;
179         typedef typename Fibre<Index, WotdDir>::Type        TDir;
180 
181         typedef typename Value<Index>::Type                    TValue;
182         typedef typename Value<TDir>::Type                    TDirValue;
183         typedef typename Size<Index>::Type                    TSize;
184         typedef String<TSize, Alloc<> >                        TCounter;
185         typedef String<typename Value<TSA>::Type, Alloc<> >    TTempSA;
186         typedef typename Cargo<Index>::Type                    TCargo;
187 
188         // 1st word flags
189         static TDirValue const LEAF          = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 1); // this node is a leaf
190         static TDirValue const LAST_CHILD    = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 2); // this node is the last child
191         // 2nd word flag
192         static TDirValue const UNEVALUATED   = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 1); // this node is partially evalutated and has no evaluated children
193         static TDirValue const SENTINELS     = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 2); // the children of this node have solely $-edges
194 
195         static TDirValue const BITMASK0      = ~(LEAF | LAST_CHILD);
196         static TDirValue const BITMASK1      = ~(UNEVALUATED | SENTINELS);
197 
198 
199         Holder<TText>    text;    // underlying text
200         TSA                sa;        // suffix array sorted by the first q chars
201         TDir            dir;    // bucket directory
202         TCargo            cargo;    // user-defined cargo
203 
204 
205         TTempSA            tempSA;
206         TCounter        tempOcc;
207         TCounter        tempBound;
208 
209         TSize            sentinelOcc;
210         TSize            sentinelBound;
211         bool            interSentinelNodes;    // should virtually one (true) $-sign or many (false) $_i-signs be appended to the strings in text
212 
Index()213         Index():
214             interSentinelNodes(false) {}
215 
Index(Index & other)216         Index(Index &other) :
217             text(other.text),
218             sa(other.sa),
219             dir(other.dir),
220             cargo(other.cargo),
221             tempSA(other.tempSA),
222             tempBound(other.tempBound),
223             sentinelOcc(other.sentinelOcc),
224             sentinelBound(other.sentinelBound),
225             interSentinelNodes(other.interSentinelNodes) {}
226 
Index(Index const & other)227         Index(Index const &other) :
228             text(other.text),
229             sa(other.sa),
230             dir(other.dir),
231             cargo(other.cargo),
232             tempSA(other.tempSA),
233             tempBound(other.tempBound),
234             sentinelOcc(other.sentinelOcc),
235             sentinelBound(other.sentinelBound),
236             interSentinelNodes(other.interSentinelNodes) {}
237 
238         template <typename TText_>
Index(TText_ & _text)239         Index(TText_ &_text) :
240             text(_text),
241             sentinelOcc(0),
242             sentinelBound(0),
243             interSentinelNodes(false) {}
244 
245         template <typename TText_>
Index(TText_ const & _text)246         Index(TText_ const &_text):
247             text(_text),
248             sentinelOcc(0),
249             sentinelBound(0),
250             interSentinelNodes(false) {}
251     };
252 /*
253     template < typename TText, typename TSpec >
254     struct Value< Index<TText, IndexWotd<TSpec> > > {
255         typedef typename Value< typename Fibre< Index<TText, IndexWotd<TSpec> >, WotdRawText >::Type >::Type Type;
256     };
257 
258     template < typename TText, typename TSpec >
259     struct Size< Index<TText, IndexWotd<TSpec> > > {
260         typedef typename Size< typename Fibre< Index<TText, IndexWotd<TSpec> >, WotdRawText >::Type >::Type Type;
261     };
262 */
263 
264 template <typename TText, typename TSpec>
265 SEQAN_CONCEPT_IMPL((Index<TText, IndexWotd<TSpec> >), (StringTreeConcept));
266 
267 template <typename TText, typename TSpec>
268 SEQAN_CONCEPT_IMPL((Index<TText, IndexWotd<TSpec> > const), (StringTreeConcept));
269 
270 //////////////////////////////////////////////////////////////////////////////
271 // default fibre creators
272 
273     template < typename TText, typename TSpec >
274     struct DefaultIndexCreator<Index<TText, IndexWotd<TSpec> >, FibreDir> {
275         typedef Default Type;
276     };
277 
278     template < typename TText, typename TSSetSpec, typename TSpec >
279     struct DefaultIndexCreator<Index<StringSet<TText, TSSetSpec>, IndexWotd<TSpec> >, FibreDir> {
280         typedef Default Type;
281     };
282 
283 //////////////////////////////////////////////////////////////////////////////
284 // default finder
285 
286     template < typename TText, typename TSpec >
287     struct DefaultFinder< Index<TText, IndexWotd<TSpec> > >
288     {
289         typedef FinderSTree Type;    // standard wotd finder is tree based search
290     };
291 
292 //////////////////////////////////////////////////////////////////////////////
293 
294 
295     template <typename TSize>
296     struct VertexWotdOriginal_ {
297         TSize        node;            // position of current node entry in directory
298         TSize        parentRepLen;    // representative length of parent node
299         TSize        edgeLen;        // length of edge above current node
300 
301         VertexWotdOriginal_() : node(0), parentRepLen(0), edgeLen(0) {}
302         VertexWotdOriginal_(MinimalCtor) : node(0), parentRepLen(0), edgeLen(0)
303         {
304             _setSizeInval(node);
305         }
306     };
307 
308     template <typename TSize>
309     struct VertexWotdModified_ {
310         TSize        node;            // position of current node entry in directory
311         TSize        parentRepLen;    // representative length of parent node
312         TSize        edgeLen;        // length of edge above current node
313         Pair<TSize> range;            // current SA interval of hits
314         TSize        parentRight;    // right boundary of parent node's range (allows to go right)
315 
316         VertexWotdModified_() :
317             node(0),
318             parentRepLen(0),
319             edgeLen(0),
320             range(0,0),
321             parentRight(0)
322         {}
323         VertexWotdModified_(MinimalCtor) :
324             node(0),
325             parentRepLen(0),
326             edgeLen(0),
327             range(0,0),
328             parentRight(0)
329         {}
330         VertexWotdModified_(Pair<TSize> const &otherRange, TSize otherParentRight) :
331             node(0),
332             parentRepLen(0),
333             edgeLen(0),
334             range(otherRange),
335             parentRight(otherParentRight)
336         {}
337     };
338 
339 //////////////////////////////////////////////////////////////////////////////
340     template < typename TText >
341     struct VertexDescriptor< Index<TText, IndexWotd<WotdOriginal> > > {
342         typedef typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type TSize;
343         typedef VertexWotdOriginal_<TSize> Type;
344     };
345 
346     template < typename TText, typename TSpec >
347     struct VertexDescriptor< Index<TText, IndexWotd<TSpec> > > {
348         typedef typename Size< Index<TText, IndexWotd<TSpec> > >::Type TSize;
349         typedef VertexWotdModified_<TSize> Type;
350     };
351 
352     template < typename TText, typename TSpec >
353     void _indexRequireTopDownIteration(Index<TText, IndexWotd<TSpec> > &index)
354     {
355         indexRequire(index, WotdDir());
356     }
357 
358 //////////////////////////////////////////////////////////////////////////////
359 // history stack functions
360 
361     template <typename TSize>
362     struct HistoryStackWotdOriginal_
363     {
364         TSize        node;        // position of current node entry in directory
365         TSize        edgeLen;    // length of edge above current node
366     };
367 
368     template <typename TSize>
369     struct HistoryStackWotdModified_
370     {
371         TSize        node;        // position of current node entry in directory
372         TSize        edgeLen;    // length of edge above current node
373         Pair<TSize> range;        // current SA interval of hits
374     };
375 
376     template < typename TText, typename TSpec >
377     struct HistoryStackEntry_< Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > >
378     {
379         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
380         typedef typename Size<TIndex>::Type                TSize;
381         typedef HistoryStackWotdOriginal_<TSize>        Type;
382     };
383 
384     template < typename TText, typename TIndexSpec, typename TSpec >
385     struct HistoryStackEntry_< Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > >
386     {
387         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
388         typedef typename Size<TIndex>::Type                TSize;
389         typedef HistoryStackWotdModified_<TSize>        Type;
390     };
391 
392 
393     template < typename TText, typename TSpec >
394     inline void
395     _historyPush(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it)
396     {
397         it._parentDesc = value(it);
398         value(it).parentRepLen += parentEdgeLength(it);
399     }
400 
401     template < typename TText, typename TIndexSpec, typename TSpec >
402     inline void
403     _historyPush(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it)
404     {
405         it._parentDesc = value(it);
406         value(it).parentRepLen += parentEdgeLength(it);
407         value(it).parentRight = value(it).range.i2;
408     }
409 
410     template < typename TText, typename TSpec >
411     inline void
412     _historyPush(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
413     {
414         typedef typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type TSize;
415         TSize edgeLen = parentEdgeLength(it);
416         HistoryStackWotdOriginal_<TSize> entry = { value(it).node, edgeLen };
417         appendValue(it.history, entry);
418         value(it).parentRepLen += edgeLen;
419     }
420 
421     template < typename TText, typename TIndexSpec, typename TSpec >
422     inline void
423     _historyPush(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
424     {
425         typedef typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type TSize;
426         TSize edgeLen = parentEdgeLength(it);
427         HistoryStackWotdModified_<TSize> entry = { value(it).node, edgeLen, value(it).range };
428         appendValue(it.history, entry);
429         value(it).parentRepLen += edgeLen;
430         value(it).parentRight = value(it).range.i2;
431     }
432 
433 //////////////////////////////////////////////////////////////////////////////
434     template < typename TText, typename TIndexSpec, typename TPropertyMap >
435     inline void
436     resizeVertexMap(
437         TPropertyMap & pm,
438         Index<TText, IndexWotd<TIndexSpec> > const& index)
439     {
440         resize(pm, length(indexDir(index)), Generous());
441     }
442 
443 /* // different interface compared to resizeVertexMap(graph, ...)
444     template < typename TText, typename TIndexSpec, typename TPropertyMap, typename TProperty >
445     inline void
446     resizeVertexMap(
447         Index<TText, IndexWotd<TIndexSpec> > const& index,
448         TPropertyMap & pm,
449         TProperty const & prop)
450     {
451         resize(pm, length(indexDir(index)), prop, Generous());
452     }
453 */
454     template < typename TSize >
455     inline typename Id< VertexWotdOriginal_<TSize> const >::Type
456     _getId(VertexWotdOriginal_<TSize> const &desc)
457     {
458         return desc.node;
459     }
460 
461     template < typename TSize >
462     inline typename Id< VertexWotdOriginal_<TSize> >::Type
463     _getId(VertexWotdOriginal_<TSize> &desc)
464     {
465         return _getId(const_cast<VertexWotdOriginal_<TSize> const &>(desc));
466     }
467 
468     template < typename TSize >
469     inline typename Id< VertexWotdModified_<TSize> const >::Type
470     _getId(VertexWotdModified_<TSize> const &desc)
471     {
472         return desc.node;
473     }
474 
475     template < typename TSize >
476     inline typename Id< VertexWotdModified_<TSize> >::Type
477     _getId(VertexWotdModified_<TSize> &desc)
478     {
479         return _getId(const_cast<VertexWotdModified_<TSize> const &>(desc));
480     }
481 
482 //////////////////////////////////////////////////////////////////////////////
483 
484     template < typename TSize >
485     inline bool _isRoot(VertexWotdOriginal_<TSize> const &value)
486     {
487         return value.node == 0;
488     }
489 
490     template < typename TSize >
491     inline bool _isRoot(VertexWotdModified_<TSize> const &value)
492     {
493         return value.node == 0;
494     }
495 
496     // is this a leaf? (including empty $-edges)
497     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder >
498     inline bool _isLeaf(
499         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TSpec> > const &it,
500         VSTreeIteratorTraits<TDfsOrder, False> const)
501     {
502         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
503         TIndex const &index = container(it);
504         return (dirAt(value(it).node, index) & index.LEAF) != 0;
505     }
506 
507     // is this a leaf? (excluding empty $-edges)
508     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder >
509     inline bool _isLeaf(
510         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TSpec> > const &it,
511         VSTreeIteratorTraits<TDfsOrder, True> const)
512     {
513         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
514 
515         TIndex const &index = container(it);
516         if (dirAt(value(it).node, index) & index.LEAF)
517             return true;
518 
519         // ensure node evaluation and test for sentinel child edges?
520         return _wotdEvaluate(it) & index.SENTINELS;
521     }
522 
523 
524     // parentEdgeLength - ORIGINAL VERSION
525     template < typename TIndex, typename TSize >
526     inline typename Size<TIndex>::Type
527     parentEdgeLength(TIndex const &index, VertexWotdOriginal_<TSize> &vDesc)
528     {
529         TSize edgeLen = vDesc.edgeLen;
530         if (edgeLen != (TSize)-1)
531             return edgeLen;
532 
533         TSize pos = vDesc.node;
534         TSize w0 = dirAt(pos, index);
535         if (w0 & index.LEAF)
536             return vDesc.edgeLen = suffixLength(w0 & index.BITMASK0, index);
537 
538         TSize w1 = dirAt(pos + 1, index);
539         if (w1 & index.UNEVALUATED)
540             return vDesc.edgeLen = _bucketLcp(
541                 infix(indexSA(index), w0 & index.BITMASK0, w1 & index.BITMASK1),
542                 indexText(index));
543         else
544             return vDesc.edgeLen = _getNodeLP(index, w1) - (w0 & index.BITMASK0);
545     }
546 
547     // parentEdgeLength - MODIFIED VERSION
548     template < typename TIndex, typename TSize >
549     inline typename Size<TIndex>::Type
550     parentEdgeLength(TIndex const &index, VertexWotdModified_<TSize> &vDesc)
551     {
552         TSize edgeLen = vDesc.edgeLen;
553         if (edgeLen != (TSize)-1)
554             return edgeLen;
555 
556         TSize pos = vDesc.node;
557         TSize w0 = dirAt(pos, index);
558         if (w0 & index.LEAF)
559             return vDesc.edgeLen =
560                 suffixLength(saAt(vDesc.range.i1, index), index) - vDesc.parentRepLen;
561 
562         TSize w1 = dirAt(pos + 1, index);
563         if (w1 & index.UNEVALUATED)
564             if (_isSizeInval(vDesc.range.i2))
565                 return vDesc.edgeLen = _bucketLcp(
566                     suffix(indexSA(index), vDesc.range.i1),
567                     indexText(index),
568                     vDesc.parentRepLen) - vDesc.parentRepLen;
569             else
570                 return vDesc.edgeLen = _bucketLcp(
571                     infix(indexSA(index), vDesc.range.i1, vDesc.range.i2),
572                     indexText(index),
573                     vDesc.parentRepLen) - vDesc.parentRepLen;
574         else
575             return (dirAt(w1 & index.BITMASK1, index) & index.BITMASK0) - vDesc.parentRepLen;
576     }
577 
578     template < typename TText, typename TIndexSpec, typename TSpec >
579     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
580     parentEdgeLength(Iter<
581         Index<TText, IndexWotd<TIndexSpec> >,
582         VSTree< TopDown<TSpec> > > const &it)
583     {
584         typedef Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > TIter;
585         return parentEdgeLength(container(it), value(const_cast<TIter&>(it)));
586     }
587 
588     template < typename TText, typename TIndexSpec, typename TSpec >
589     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
590     parentRepLength(Iter<
591         Index<TText, IndexWotd<TIndexSpec> >,
592         VSTree< TopDown<TSpec> > > const &it)
593     {
594         return value(it).parentRepLen;
595     }
596 
597     template < typename TText, typename TIndexSpec, typename TSpec >
598     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
599     parentRepLength(Iter<
600         Index<TText, IndexWotd<TIndexSpec> >,
601         VSTree< TopDown< ParentLinks<TSpec> > > > const &it)
602     {
603         return value(it).parentRepLen;
604     }
605 
606     template < typename TText, typename TIndexSpec, typename TSpec >
607     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
608     repLength(Iter<
609         Index<TText, IndexWotd<TIndexSpec> >,
610         VSTree< TopDown<TSpec> > > const &it)
611     {
612         return parentRepLength(it) + parentEdgeLength(it);
613     }
614 
615 
616     // parentEdgeLabel - ORIGINAL VERSION (doesn't work if TText is a StringSet)
617     template < typename TText, typename TSpec >
618     inline typename Infix< typename Fibre<Index<TText, IndexWotd<WotdOriginal> >, EsaText>::Type const >::Type
619     parentEdgeLabel(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > const &it)
620     {
621         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
622         typedef typename Size<TIndex>::Type                TSize;
623 
624         TIndex const &index = container(it);
625 
626         if (isRoot(it))
627             return infix(indexText(index), 0, 0);
628         else {
629             TSize occ = _getNodeLP(index, value(it).node);
630             return infix(indexText(index), occ, occ + parentEdgeLength(it));
631         }
632     }
633 
634     // getOccurrence - ORIGINAL VERSION
635     template < typename TText, typename TSpec >
636     inline typename SAValue<Index<TText, IndexWotd<WotdOriginal> > >::Type
637     getOccurrence(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > const &it) {
638         return _getNodeLP(container(it), value(it).node) - value(it).parentRepLen;
639     }
640 
641     template < typename TText, typename TIndexSpec, typename TSpec >
642     inline bool
643     emptyParentEdge(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TopDown<TSpec> > > const &it)
644     {
645         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
646 
647         TIndex const &index = container(it);
648         typename SAValue<TIndex>::Type pos = getOccurrence(it);
649         return getSeqOffset(pos, stringSetLimits(index)) + value(it).parentRepLen
650             == sequenceLength(getSeqNo(pos, stringSetLimits(index)), index);
651     }
652 
653     // to avoid ambiguity
654     template < typename TText, typename TIndexSpec, typename TSpec >
655     inline bool
656     emptyParentEdge(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TopDown<ParentLinks<TSpec> > > > const &it)
657     {
658         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
659 
660         TIndex const &index = container(it);
661         typename SAValue<TIndex>::Type pos = getOccurrence(it);
662         return getSeqOffset(pos, stringSetLimits(index)) + value(it).parentRepLen
663             == sequenceLength(getSeqNo(pos, stringSetLimits(index)), index);
664     }
665 
666 
667 
668     template < typename TText, typename TSpec >
669     inline void
670     goRoot(Iter<
671         Index<TText, IndexWotd<WotdOriginal> >,
672         VSTree<TSpec> > &it)
673     {
674         _historyClear(it);
675         value(it).node = 0;            // start in root node (first entry in dir)
676         value(it).parentRepLen = 0;    // parent prefix length is 0
677         value(it).edgeLen = 0;        // edge length is 0
678     }
679 
680     template < typename TText, typename TIndexSpec, typename TSpec >
681     inline void
682     goRoot(Iter<
683         Index<TText, IndexWotd<TIndexSpec> >,
684         VSTree<TSpec> > &it)
685     {
686         _historyClear(it);
687         value(it).range.i1 = 0;        // start in root node with range (0,infty)
688         _setSizeInval(value(it).range.i2);    // infty is equivalent to length(index) and faster to compare
689         value(it).node = 0;            // start in root node (first entry in dir)
690         value(it).parentRepLen = 0;    // parent prefix length is 0
691         value(it).edgeLen = 0;        // edge length is 0
692     }
693 
694     template < typename TText, typename TSpec >
695     inline bool atEnd(Iter<Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > &it)
696     {
697         return _isSizeInval(value(it).node);
698     }
699 
700     template < typename TText, typename TSpec >
701     inline bool atEnd(Iter<Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > const &it)
702     {
703         return _isSizeInval(value(it).node);
704     }
705 
706 
707     // adjust iterator's right border of SA range
708     template < typename TText, typename TSpec >
709     inline void
710     _adjustRightBorder(
711         Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &)
712     {}
713 
714     template < typename TText, typename TIndexSpec, typename TSpec >
715     inline void
716     _adjustRightBorder(
717         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it)
718     {
719         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
720         typedef typename Size<TIndex>::Type                TSize;
721 
722         TIndex    const &index = container(it);
723         TSize    pos = value(it).node;
724 
725         TSize w0 = dirAt(pos, index);
726         if (w0 & index.LEAF)
727             value(it).range.i2 = value(it).range.i1 + 1;
728         else
729             if (w0 & index.LAST_CHILD)
730                 value(it).range.i2 = value(it).parentRight;
731             else {
732                 w0 = dirAt(pos + 2, index);
733                 value(it).range.i2 = w0 & index.BITMASK0;
734             }
735     }
736 
737     // go down the leftmost edge (including empty $-edges)
738     template < typename TText, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
739     inline bool
740     _goDown(
741         Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it,
742         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
743     {
744         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
745         typedef typename Size<TIndex>::Type                TSize;
746 
747         if (_isLeaf(it, EmptyEdges())) return false;
748 
749         TIndex &index = container(it);
750         _historyPush(it);
751 
752         // ensure node evaluation
753         TSize childNode = _wotdEvaluate(it);
754 
755         if (THideEmptyEdges::VALUE && (childNode & index.SENTINELS) != 0)
756             return false;
757 
758         // go down
759         value(it).node = childNode & index.BITMASK1;
760         value(it).edgeLen = -1;
761 
762         // go right if parent edge is empty
763         // or hull predicate is false
764         if ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it))
765             if (!goRight(it)) {
766                 _goUp(it);
767                 return false;
768             }
769 
770         return true;
771     }
772 
773     // go down the leftmost edge (excluding empty $-edges)
774     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
775     inline bool
776     _goDown(
777         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it,
778         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
779     {
780         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
781         typedef typename Size<TIndex>::Type                TSize;
782 
783         if (_isLeaf(it, EmptyEdges())) return false;
784         TIndex const &index = container(it);
785 
786         // ensure node evaluation
787         TSize childNode = _wotdEvaluate(it);
788 
789         if (THideEmptyEdges::VALUE && (childNode & index.SENTINELS) != 0)
790             return false;
791 
792         // go down
793         _historyPush(it);
794         value(it).node = childNode & index.BITMASK1;
795         value(it).edgeLen = -1;
796         _adjustRightBorder(it);
797 
798         // go right if parent edge is empty
799         // or hull predicate is false
800         if ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it))
801             if (!goRight(it)) {
802                 _goUp(it);
803                 return false;
804             }
805 
806         return true;
807     }
808 
809     // go right to the lexic. next sibling
810     template < typename TText, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
811     inline bool
812     _goRight(
813         Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it,
814         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
815     {
816         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
817         typedef typename Size<TIndex>::Type                TSize;
818 
819         TIndex const &index = container(it);
820 
821         do {
822             TSize w0 = dirAt(value(it).node, index);
823             if (w0 & index.LAST_CHILD)
824                 return false;
825 
826             value(it).node += (w0 & index.LEAF)? 1: 2;
827             value(it).edgeLen = -1;
828 
829             _adjustRightBorder(it);
830         } while ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it));
831         return true;
832     }
833 
834     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
835     inline bool
836     _goRight(
837         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it,
838         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
839     {
840         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
841         typedef typename Size<TIndex>::Type                TSize;
842 
843         TIndex const &index = container(it);
844 
845         do {
846             TSize w0 = dirAt(value(it).node, index);
847             if (w0 & index.LAST_CHILD)
848                 return false;
849 
850             value(it).node += (w0 & index.LEAF)? 1: 2;
851             value(it).edgeLen = -1;
852 
853             value(it).range.i1 = value(it).range.i2;
854             _adjustRightBorder(it);
855         } while ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it));
856         return true;
857     }
858 
859     // go up one edge (returns false if in root node)
860     // can be used at most once, as no history stack is available
861     template < typename TText, typename TWotdSpec, typename TSpec >
862     inline bool
863     _goUp(Iter< Index<TText, IndexWotd<TWotdSpec> >, VSTree< TopDown<TSpec> > > &it)
864     {
865         if (!isRoot(it)) {
866             value(it) = it._parentDesc;
867             return true;
868         }
869         return false;
870     }
871 
872     // go up one edge (returns false if in root node)
873     template < typename TText, typename TSpec >
874     inline bool
875     _goUp(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
876     {
877         typedef typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type TSize;
878 
879         if (!empty(it.history)) {
880             HistoryStackWotdOriginal_<TSize> const &entry = back(it.history);
881             value(it).node = entry.node;
882             value(it).parentRepLen -= entry.edgeLen;
883             value(it).edgeLen = entry.edgeLen;
884             eraseBack(it.history);
885             return true;
886         }
887         return false;
888     }
889 
890     template < typename TText, typename TIndexSpec, typename TSpec >
891     inline bool
892     _goUp(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
893     {
894         typedef typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type TSize;
895 
896         if (!empty(it.history))
897         {
898             HistoryStackWotdModified_<TSize> const &entry = back(it.history);
899             value(it).node = entry.node;
900             value(it).parentRepLen -= entry.edgeLen;
901             value(it).edgeLen = entry.edgeLen;
902             value(it).range = entry.range;
903             eraseBack(it.history);
904             if (!empty(it.history))
905                 value(it).parentRight = back(it.history).range.i2;    // copy right boundary of parent's range
906             return true;
907         }
908         return false;
909     }
910 
911     // return vertex descriptor of parent's node
912     template < typename TText, typename TSpec >
913     inline typename VertexDescriptor< Index<TText, IndexWotd<WotdOriginal> > >::Type
914     nodeUp(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > const &it)
915     {
916         typedef Index<TText, IndexWotd<WotdOriginal> > TIndex;
917         typedef typename Size<TIndex>::Type TSize;
918 
919         if (!empty(it.history))
920         {
921             HistoryStackWotdOriginal_<TSize> const &entry = back(it.history);
922             typename VertexDescriptor<TIndex>::Type desc;
923 
924             desc.node = entry.node;
925             desc.parentRepLen = value(it).parentRepLen - entry.edgeLen;
926             desc.edgeLen = entry.edgeLen;
927             TSize h = length(it.history) - 1;
928             if (h != 0) --h;
929             return desc;
930         } else
931             return value(it);
932     }
933 
934     // return vertex descriptor of parent's node
935     template < typename TText, typename TIndexSpec, typename TSpec >
936     inline typename VertexDescriptor< Index<TText, IndexWotd<TIndexSpec> > >::Type
937     nodeUp(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > const &it)
938     {
939         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
940         typedef typename Size<TIndex>::Type TSize;
941 
942         if (!empty(it.history))
943         {
944             HistoryStackWotdModified_<TSize> const &entry = back(it.history);
945             typename VertexDescriptor<TIndex>::Type desc;
946 
947             desc.node = entry.node;
948             desc.parentRepLen = value(it).parentRepLen - entry.edgeLen;
949             desc.edgeLen = entry.edgeLen;
950             return desc;
951         } else
952             return value(it);
953     }
954 
955 //////////////////////////////////////////////////////////////////////////////
956 
957 
958     // Counting sort - Step 2a: Count characters
959     template < typename TBuckets, typename TText >
960     inline void
961     _wotdCountChars(TBuckets &buckets, TText const &text)
962     {
963         typedef typename Iterator<TText const, Standard>::Type TTextIterator;
964 
965         TTextIterator itText = begin(text, Standard());
966         TTextIterator itTextEnd = end(text, Standard());
967         for (; itText != itTextEnd; ++itText)
968             ++buckets[ordValue(getValue(itText))];
969     }
970 
971 
972     template < typename TBuckets, typename TText, typename TSpec >
973     inline void
974     _wotdCountChars(TBuckets &buckets, StringSet<TText, TSpec> const &stringSet)
975     {
976         typedef typename Iterator<TText const, Standard>::Type TTextIterator;
977 
978         for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
979         {
980             TText const &text = value(stringSet, seqNo);
981             TTextIterator itText = begin(text, Standard());
982             TTextIterator itTextEnd = end(text, Standard());
983             for (; itText != itTextEnd; ++itText)
984                 ++buckets[ordValue(getValue(itText))];
985         }
986     }
987 
988     // Counting sort - Step 2b: Count the prefixLen'th characters of suffices
989     template < typename TBuckets, typename TText, typename TSA, typename TSize >
990     inline typename Size<TText>::Type
991     _wotdCountCharsWotdOriginal(
992         TBuckets &buckets,
993         TText const &text,
994         TSA &sa,
995         TSize prefixLen)
996     {
997         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
998         typedef typename Iterator<TSA, Standard>::Type          TSAIterator;
999         typedef typename Size<TText>::Type                        TTextSize;
1000 
1001         TTextIterator itText = begin(text, Standard());
1002         TSAIterator itSA = begin(sa, Standard());
1003         TSAIterator itSAEnd = end(sa, Standard());
1004 
1005         TTextSize sentinels = 0;
1006         TTextSize textLength = length(text);
1007         for (; itSA != itSAEnd; ++itSA)
1008         {
1009             // add prefix on entries in sa
1010             TTextSize saValue = (*itSA += prefixLen);
1011             if (textLength > saValue)
1012                 ++buckets[ordValue(*(itText + saValue))];
1013             else
1014                 if (textLength == saValue) ++sentinels;
1015         }
1016         return sentinels;
1017     }
1018 
1019     template < typename TBuckets, typename TText, typename TSA, typename TSize >
1020     inline typename Size<TText>::Type
1021     _wotdCountChars(
1022         TBuckets &buckets,
1023         TText const &text,
1024         TSA const &sa,
1025         TSize prefixLen)
1026     {
1027         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1028         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1029         typedef typename Size<TText>::Type                        TTextSize;
1030 
1031         TTextIterator itText = begin(text, Standard()) + prefixLen;
1032         TSAIterator itSA = begin(sa, Standard());
1033         TSAIterator itSAEnd = end(sa, Standard());
1034 
1035         TTextSize sentinels = 0;
1036         TTextSize textLength = length(text) - prefixLen;
1037         for (; itSA != itSAEnd; ++itSA)
1038         {
1039             TTextSize saValue = *itSA;
1040             if (textLength > saValue)
1041                 ++buckets[ordValue(*(itText + saValue))];
1042             else
1043                 if (textLength == saValue) ++sentinels;
1044         }
1045         return sentinels;
1046     }
1047 
1048     template <
1049         typename TBuckets,
1050         typename TText,
1051         typename TSpec,
1052         typename TSA,
1053         typename TSize
1054     >
1055     inline typename Size<TText>::Type
1056     _wotdCountChars(
1057         TBuckets &buckets,
1058         StringSet<TText, TSpec> const &stringSet,
1059         TSA const &sa,
1060         TSize prefixLen)
1061     {
1062         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1063         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1064         typedef typename Size<TText>::Type                        TTextSize;
1065 
1066         if (empty(stringSet))
1067             return 0;
1068 
1069         TTextIterator itText = begin(front(stringSet), Standard());
1070         TSAIterator itSA = begin(sa, Standard());
1071         TSAIterator itSAEnd = end(sa, Standard());
1072 
1073         TTextSize sentinels = 0;
1074         TTextSize textLength = 0;
1075         unsigned lastSeqSeen = (unsigned)-1;
1076         Pair<unsigned, TTextSize> lPos;
1077         for (; itSA != itSAEnd; ++itSA)
1078         {
1079             posLocalize(lPos, *itSA, stringSetLimits(stringSet));
1080             if (lastSeqSeen != getSeqNo(lPos))
1081             {
1082                 lastSeqSeen = getSeqNo(lPos);
1083 
1084                 // shift textBegin and textLength by prefixLen
1085                 textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1086                 itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1087             }
1088             if (textLength > getSeqOffset(lPos))
1089                 ++buckets[ordValue(*(itText + getSeqOffset(lPos)))];
1090             else
1091                 if (textLength == getSeqOffset(lPos)) ++sentinels;
1092         }
1093         return sentinels;
1094     }
1095 
1096 
1097 //////////////////////////////////////////////////////////////////////////////
1098 
1099 
1100     // Counting sort - Step 3: Cumulative sum
1101     template < typename TBounds, typename TBuckets, typename TSize >
1102     inline typename Size<TBuckets>::Type
1103     _wotdCummulativeSum(TBounds &bounds, TBuckets const &buckets, TSize offset)
1104     {
1105         typedef typename Iterator<TBounds, Standard>::Type          TBoundIterator;
1106         typedef typename Iterator<TBuckets const, Standard>::Type   TBucketsIterator;
1107 
1108         TBucketsIterator it = begin(buckets, Standard());
1109         TBucketsIterator itEnd = end(buckets, Standard());
1110         TBoundIterator bit = begin(bounds, Standard());
1111 
1112         typename Value<TBounds>::Type    sum = offset;
1113         typename Size<TBuckets>::Type    requiredSize = 0;
1114         typename Value<TBuckets>::Type    diff;
1115 
1116         for (; it != itEnd; ++it, ++bit)
1117             if ((diff = *it) != 0) {
1118                 requiredSize += (diff > 1)? 2: 1;
1119                 *bit = sum;
1120                 sum += diff;
1121             }
1122 
1123         return requiredSize;
1124     }
1125 
1126     template < typename TBounds, typename TBuckets >
1127     inline typename Size<TBuckets>::Type
1128     _wotdCummulativeSum(TBounds &bounds, TBuckets const &buckets)
1129     {
1130         return _wotdCummulativeSum(bounds, buckets, 0);
1131     }
1132 
1133 //////////////////////////////////////////////////////////////////////////////
1134 //TODO(singer): The function createWotdIndex in never defined!
1135 /*!
1136  * @fn IndexWotd#createWotdIndex
1137  * @headerfile <seqan/index.h>
1138  * @brief Builds a the WOTD index.
1139  *
1140  * @signature void createWotdIndex(sa, dir, text);
1141  *
1142  * @param[out] sa  The resulting list in which all <i>q</i>-grams are sorted alphabetically.
1143  * @param[out] dir The resulting array that indicates at which position in index the corresponding <i>q</i>-grams
1144  *                 can be found.
1145  * @param[in] text The sequence. Types: @link ContainerConcept @endlink
1146  *
1147  * The resulting <tt>index</tt> contains the sorted list of qgrams.  For each possible <i>q</i>-gram pos contains
1148  * the first position in index that corresponds to this <i>q</i>-gram.
1149  */
1150 
1151     // single sequence
1152     template < typename TIndex >
1153     typename Size<TIndex>::Type
1154     _sortFirstWotdBucket(TIndex &index)
1155     {
1156         typedef typename Fibre<TIndex, WotdText >::Type        TText;
1157         typedef typename Fibre<TIndex, WotdSA >::Type            TSA;
1158         typedef typename TIndex::TCounter                        TCounter;
1159 
1160         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1161         typedef typename Iterator<TSA, Standard>::Type            TSAIterator;
1162         typedef typename Iterator<TCounter, Standard>::Type        TCntIterator;
1163         typedef typename Size<TText>::Type                        TSize;
1164 
1165         TText const &text = indexText(index);
1166         TCounter &occ = index.tempOcc;
1167         TCounter &bound = index.tempBound;
1168 
1169         // 1. clear counters and copy SA to temporary SA
1170         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1171 
1172         // 2. count characters
1173         _wotdCountChars(occ, text);
1174 
1175         // 3. cumulative sum
1176         TSize requiredSize = _wotdCummulativeSum(bound, occ);
1177 
1178         // 4. fill suffix array
1179         {
1180             TSA &sa = indexSA(index);
1181             TSAIterator saBeg = begin(sa, Standard());
1182             TCntIterator boundBeg = begin(bound, Standard());
1183 
1184             TTextIterator itText = begin(text, Standard());
1185             TTextIterator itTextEnd = end(text, Standard());
1186             for(TSize i = 0; itText != itTextEnd; ++itText, ++i)
1187                 *(saBeg + (*(boundBeg + ordValue(getValue(itText))))++) = i;
1188         }
1189         index.sentinelOcc = 0;
1190         index.sentinelBound = 0;
1191 
1192         return requiredSize;
1193     }
1194 
1195     // multiple sequences
1196     template < typename TText, typename TSpec, typename TIndexSpec >
1197     typename Size< Index<StringSet<TText, TSpec>, TIndexSpec> >::Type
1198     _sortFirstWotdBucket(Index<StringSet<TText, TSpec>, TIndexSpec> &index)
1199     {
1200         typedef Index<StringSet<TText, TSpec>, TIndexSpec>        TIndex;
1201         typedef typename Fibre<TIndex, WotdSA >::Type            TSA;
1202         typedef typename TIndex::TCounter                        TCounter;
1203 
1204         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1205         typedef typename Iterator<TSA, Standard>::Type            TSAIterator;
1206         typedef typename Iterator<TCounter, Standard>::Type        TCntIterator;
1207         typedef typename Size<TText>::Type                        TSize;
1208 
1209         StringSet<TText, TSpec> const &stringSet = indexText(index);
1210         TCounter &occ = index.tempOcc;
1211         TCounter &bound = index.tempBound;
1212 
1213         // 1. clear counters and copy SA to temporary SA
1214         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1215 
1216         // 2. count characters
1217         _wotdCountChars(occ, stringSet);
1218 
1219         // 3. cummulative sum
1220         TSize requiredSize = _wotdCummulativeSum(bound, occ);
1221 
1222         // 4. fill suffix array
1223         for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
1224         {
1225             TSA &sa = indexSA(index);
1226             TSAIterator saBeg = begin(sa, Standard());
1227             TCntIterator boundBeg = begin(bound, Standard());
1228 
1229             typename Value<TSA>::Type localPos;
1230             assignValueI1(localPos, seqNo);
1231             assignValueI2(localPos, 0);
1232 
1233             TText const &text = value(stringSet, seqNo);
1234             TTextIterator itText = begin(text, Standard());
1235             TTextIterator itTextEnd = end(text, Standard());
1236             for(; itText != itTextEnd; ++itText)
1237             {
1238                 *(saBeg + (*(boundBeg + ordValue(getValue(itText))))++) = localPos;
1239                 assignValueI2(localPos, getValueI2(localPos) + 1);
1240             }
1241         }
1242         index.sentinelOcc = 0;
1243         index.sentinelBound = 0;
1244 
1245         return requiredSize;
1246     }
1247 
1248 
1249 
1250     // sort bucket using counting sort
1251     // (nearly) ORIGINAL VERSION:
1252     // - brings the bucket with the longest suffix (lpBucket) to front
1253     // - all other buckets are in lexicographical order
1254     // - adds prefixLen to all SA entries in SA[left,right)
1255     template < typename TText, typename TBeginPos, typename TEndPos, typename TSize >
1256     TSize _sortWotdBucket(
1257         Index<TText, IndexWotd<WotdOriginal> > &index,
1258         TBeginPos left,
1259         TEndPos right,
1260         TSize prefixLen)
1261     {
1262         typedef Index<TText, IndexWotd<WotdOriginal> >              TIndex;
1263         typedef typename Fibre<TIndex, WotdSA >::Type               TSA;
1264         typedef typename TIndex::TCounter                           TCounter;
1265 
1266         typedef typename Iterator<TText const, Standard>::Type      TTextIterator;
1267         typedef typename Iterator<TSA, Standard>::Type              TSAIterator;
1268         typedef typename Iterator<TCounter, Standard>::Type         TCntIterator;
1269         typedef typename Iterator<TCounter const, Standard>::Type   TConstCntIterator;
1270         typedef typename Size<TText>::Type                          TTextSize;
1271 
1272         TText const &text = indexText(index);
1273         TCounter const &tempSA = index.tempSA;
1274         TCounter &occ = index.tempOcc;
1275         TCounter &bound = index.tempBound;
1276 
1277         // 1. clear counters and copy SA to temporary SA
1278         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1279 
1280         // 2. count characters
1281         index.tempSA = infix(indexSA(index), left, right);
1282         index.sentinelBound = 0;
1283         index.sentinelOcc =
1284             _wotdCountCharsWotdOriginal(occ, text, index.tempSA, prefixLen);
1285 
1286         // 3. cumulative sum
1287         TSize requiredSize = 0;
1288 
1289         // actually, here sentinelOcc<=1 holds (this is the original wotd)
1290         if (index.interSentinelNodes) {
1291             if (index.sentinelOcc != 0)
1292                 requiredSize = (index.sentinelOcc > 1)? 2: 1;    // insert *one* $-edge for all $_i suffices
1293         } else
1294             requiredSize = index.sentinelOcc;                    // insert each $_i suffix one-by-one
1295 
1296         requiredSize += _wotdCummulativeSum(bound, occ, left + index.sentinelOcc);
1297         index.sentinelBound = left;
1298 
1299         // 4. fill suffix array
1300         {
1301             TSA &sa = indexSA(index);
1302             TSAIterator saBeg = begin(sa, Standard());
1303             TCntIterator boundBeg = begin(bound, Standard());
1304 
1305             TTextIterator itText = begin(text, Standard());
1306             TConstCntIterator itSA = begin(tempSA, Standard());
1307             TConstCntIterator itSAEnd = end(tempSA, Standard());
1308             TTextSize textLength = length(text);
1309             for(; itSA != itSAEnd; ++itSA)
1310             {
1311                 TTextSize saValue = *itSA;
1312                 if (textLength > saValue)
1313                     *(saBeg + (*(boundBeg + ordValue(*(itText + saValue))))++) = saValue;
1314                 else
1315                     if (textLength == saValue)
1316                         *(saBeg + index.sentinelBound++) = saValue;
1317             }
1318         }
1319 
1320         // 5. move lpBucket to front and add saOffset to hash directory entries
1321         {
1322             TSize lpBucket = ordValue(text[tempSA[0]]);
1323             if (lpBucket != 0) {
1324                 TSize lpBucketOcc = occ[lpBucket];
1325                 TSize lpBucketBound = bound[lpBucket];
1326 
1327                 TCntIterator itOcc = begin(occ, Standard()) + lpBucket;
1328                 TCntIterator itBound = begin(bound, Standard()) + lpBucket;
1329                 TCntIterator itBeg = begin(bound, Standard());
1330                 for(; itBound != itBeg; --itBound, --itOcc) {
1331                     *itOcc = *(itOcc - 1);
1332                     *itBound = *(itBound - 1);
1333                 }
1334                 if (index.sentinelOcc != 0) {
1335                     // bring first bucket before sentinel bucket
1336                     *itOcc = index.sentinelOcc;
1337                     *itBound = index.sentinelBound;
1338                     index.sentinelOcc = lpBucketOcc;
1339                     index.sentinelBound = lpBucketBound;
1340                 } else {
1341                     *itOcc = lpBucketOcc;
1342                     *itBound = lpBucketBound;
1343                 }
1344             } else
1345                 if (index.sentinelOcc != 0) {
1346                     // bring first bucket before sentinel bucket
1347                     TSize swap = index.sentinelOcc;
1348                     index.sentinelOcc = occ[0];
1349                     occ[0] = swap;
1350                     swap = index.sentinelBound;
1351                     index.sentinelBound = bound[0];
1352                     bound[0] = swap;
1353                 }
1354         }
1355 
1356         return requiredSize;
1357     }
1358 
1359 
1360 
1361 
1362 
1363     // sort bucket using counting sort
1364     // MODIFIED VERSION:
1365     // - all buckets are in lexicographical order
1366     // - SA[left,right) contains real SA entries (the beginning positions of the suffices)
1367 
1368     // single sequence
1369     template < typename TIndex, typename TBeginPos, typename TEndPos, typename TSize >
1370     TSize _sortWotdBucket(
1371         TIndex &index,
1372         TBeginPos left,
1373         TEndPos right,
1374         TSize prefixLen)
1375     {
1376         typedef typename Fibre<TIndex, WotdText >::Type             TText;
1377         typedef typename Fibre<TIndex, WotdSA >::Type               TSA;
1378         typedef typename TIndex::TCounter                           TCounter;
1379 
1380         typedef typename Iterator<TText const, Standard>::Type      TTextIterator;
1381         typedef typename Iterator<TSA, Standard>::Type              TSAIterator;
1382         typedef typename Iterator<TCounter, Standard>::Type         TCntIterator;
1383         typedef typename Iterator<TCounter const, Standard>::Type   TConstCntIterator;
1384         typedef typename Size<TText>::Type                          TTextSize;
1385 
1386         TText const &text = indexText(index);
1387         TCounter const &tempSA = index.tempSA;
1388         TCounter &occ = index.tempOcc;
1389         TCounter &bound = index.tempBound;
1390 
1391         // 1. clear counters and copy SA to temporary SA
1392         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1393         index.tempSA = infix(indexSA(index), left, right);
1394 
1395         // 2. count characters
1396         index.sentinelBound = 0;
1397         index.sentinelOcc = _wotdCountChars(occ, text, tempSA, prefixLen);
1398 
1399         // 3. cumulative sum
1400         TSize requiredSize = 0;
1401         if (index.interSentinelNodes) {
1402             if (index.sentinelOcc != 0)
1403                 requiredSize = (index.sentinelOcc > 1)? 2: 1;    // insert *one* $-edge for all $_i suffices
1404         } else
1405             requiredSize = index.sentinelOcc;                    // insert each $_i suffix one-by-one
1406 
1407         requiredSize += _wotdCummulativeSum(bound, occ, left + index.sentinelOcc);
1408         index.sentinelBound = left;
1409 
1410         // 4. fill suffix array
1411         {
1412             TSA &sa = indexSA(index);
1413             TSAIterator saBeg = begin(sa, Standard());
1414             TCntIterator boundBeg = begin(bound, Standard());
1415 
1416             TTextIterator itText = begin(text, Standard()) + prefixLen;
1417             TConstCntIterator itSA = begin(tempSA, Standard());
1418             TConstCntIterator itSAEnd = end(tempSA, Standard());
1419             TTextSize textLength = length(text) - prefixLen;
1420             for(; itSA != itSAEnd; ++itSA)
1421             {
1422                 TTextSize saValue = *itSA;
1423                 if (textLength > saValue)
1424                     *(saBeg + (*(boundBeg + ordValue(*(itText + saValue))))++) = saValue;
1425                 else
1426                     if (textLength == saValue)
1427                         *(saBeg + index.sentinelBound++) = saValue;
1428             }
1429         }
1430 
1431         return requiredSize;
1432     }
1433 
1434     // multiple sequences
1435     template < typename TText, typename TSpec, typename TIndexSpec, typename TBeginPos, typename TEndPos, typename TSize >
1436     TSize _sortWotdBucket(
1437         Index<StringSet<TText, TSpec>, TIndexSpec> &index,
1438         TBeginPos left,
1439         TEndPos right,
1440         TSize prefixLen)
1441     {
1442         typedef Index<StringSet<TText, TSpec>, TIndexSpec>            TIndex;
1443         typedef typename Fibre<TIndex, WotdSA >::Type                TSA;
1444         typedef typename TIndex::TCounter                            TCounter;
1445         typedef typename TIndex::TTempSA                            TTempSA;
1446 
1447         typedef typename Iterator<TText const, Standard>::Type        TTextIterator;
1448         typedef typename Iterator<TSA, Standard>::Type                TSAIterator;
1449         typedef typename Iterator<TTempSA const, Standard>::Type    TTempSAIterator;
1450         typedef typename Iterator<TCounter, Standard>::Type            TCntIterator;
1451         typedef typename Size<TText>::Type                            TTextSize;
1452 
1453         StringSet<TText, TSpec> const &stringSet = indexText(index);
1454         TTempSA const &tempSA = index.tempSA;
1455         TCounter &occ = index.tempOcc;
1456         TCounter &bound = index.tempBound;
1457 
1458         // 1. clear counters and copy SA to temporary SA
1459         TCntIterator occBeg = begin(occ, Standard());
1460 
1461         arrayFill(occBeg, end(occ, Standard()), 0);
1462         index.tempSA = infix(indexSA(index), left, right);
1463 
1464         // 2. count characters
1465         index.sentinelBound = 0;
1466         index.sentinelOcc = _wotdCountChars(occ, stringSet, tempSA, prefixLen);
1467 
1468         // 3. cumulative sum
1469         TSize requiredSize = 0;
1470         if (index.interSentinelNodes) {
1471             if (index.sentinelOcc != 0)
1472                 requiredSize = (index.sentinelOcc > 1)? 2: 1;    // insert *one* $-edge for all $_i suffices
1473         } else
1474             requiredSize = index.sentinelOcc;                    // insert each $_i suffix one-by-one
1475 
1476         requiredSize += _wotdCummulativeSum(bound, occ, left + index.sentinelOcc);
1477         index.sentinelBound = left;
1478 
1479         // 4. fill suffix array
1480         {
1481             if (empty(stringSet))
1482                 return requiredSize;
1483 
1484             TSA &sa = indexSA(index);
1485             TSAIterator saBeg = begin(sa, Standard());
1486             TCntIterator boundBeg = begin(bound, Standard());
1487 
1488             TTextIterator itText = begin(front(stringSet), Standard());
1489             TTempSAIterator itSA = begin(tempSA, Standard());
1490             TTempSAIterator itSAEnd = end(tempSA, Standard());
1491             TTextSize textLength = 0;
1492             unsigned lastSeqSeen = (unsigned)-1;
1493             Pair<unsigned, TTextSize> lPos;
1494             for(; itSA != itSAEnd; ++itSA)
1495             {
1496                 posLocalize(lPos, *itSA, stringSetLimits(index));
1497                 if (lastSeqSeen != getSeqNo(lPos))
1498                 {
1499                     lastSeqSeen = getSeqNo(lPos);
1500 
1501                     // shift textBegin and textLength by prefixLen
1502                     textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1503                     itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1504                 }
1505                 if (textLength > getSeqOffset(lPos))
1506                     *(saBeg + (*(boundBeg + ordValue(*(itText + getSeqOffset(lPos)))))++) = *itSA;
1507                 else
1508                     if (textLength == getSeqOffset(lPos))
1509                         *(saBeg + index.sentinelBound++) = *itSA;
1510             }
1511         }
1512 
1513         return requiredSize;
1514     }
1515 
1516 
1517 
1518 
1519 
1520     template < typename TSA, typename TText >
1521     typename Size<TText>::Type
1522     _bucketLcp(TSA const &sa, TText const &text)
1523     {
1524         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1525         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1526         typedef typename Value<TText>::Type                        TValue;
1527         typedef typename Size<TText>::Type                        TTextSize;
1528 
1529         TTextSize prefixLen = 0;
1530 
1531         if (length(sa) < 2) return prefixLen;
1532 
1533         TTextIterator itText = begin(text, Standard());
1534         TSAIterator itSAEnd = end(sa, Standard());
1535         TTextSize textLength = length(text);
1536 
1537         do {
1538             TSAIterator itSA = begin(sa, Standard());
1539             TTextSize sa = *itSA;
1540             if (textLength == sa) return prefixLen;
1541 
1542             TValue c = *(itText + sa);
1543             for(++itSA; itSA != itSAEnd; ++itSA) {
1544                 sa = *itSA;
1545                 if (textLength == sa || c != *(itText + sa))
1546                     return prefixLen;
1547             }
1548             ++prefixLen; --textLength;
1549             ++itText;
1550         } while (true);
1551     }
1552 
1553     template < typename TSA, typename TText, typename TSize >
1554     typename Size<TText>::Type
1555     _bucketLcp(TSA const &sa, TText const &text, TSize prefixLen)
1556     {
1557         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1558         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1559         typedef typename Value<TText>::Type                        TValue;
1560         typedef typename Size<TText>::Type                        TTextSize;
1561 
1562         if (length(sa) < 2) return prefixLen;
1563 
1564         TTextIterator itText = begin(text, Standard()) + prefixLen;
1565         TSAIterator itSAEnd = end(sa, Standard());
1566         TTextSize textLength = length(text) - prefixLen;
1567 
1568         do {
1569             TSAIterator itSA = begin(sa, Standard());
1570             TTextSize sa = *itSA;
1571             if (textLength == sa) return prefixLen;
1572 
1573             TValue c = *(itText + sa);
1574             for(++itSA; itSA != itSAEnd; ++itSA) {
1575                 sa = *itSA;
1576                 if (textLength == sa || *(itText + sa) != c)
1577                     return prefixLen;
1578             }
1579             ++prefixLen; --textLength;
1580             ++itText;
1581         } while (true);
1582     }
1583 
1584     template < typename TSA, typename TText, typename TSpec, typename TSize >
1585     typename Size<TText>::Type
1586     _bucketLcp(TSA const &sa, StringSet<TText, TSpec> const &stringSet, TSize prefixLen)
1587     {
1588         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1589         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1590         typedef typename Value<TText>::Type                        TValue;
1591         typedef typename Size<TText>::Type                        TTextSize;
1592 
1593         if (length(sa) < 2) return prefixLen;
1594 
1595         TSAIterator itSAEnd = end(sa, Standard());
1596         TTextIterator itText;
1597         TTextSize textLength;
1598 
1599         Pair<unsigned, TTextSize> lPos;
1600         do {
1601             TSAIterator itSA = begin(sa, Standard());
1602             posLocalize(lPos, *itSA, stringSetLimits(stringSet));
1603 
1604             unsigned lastSeqSeen = getSeqNo(*itSA);
1605             textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1606             if (textLength == getSeqOffset(lPos)) return prefixLen;
1607 
1608             itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1609             TValue c = *(itText + getSeqOffset(*itSA));
1610             for(++itSA; itSA != itSAEnd; ++itSA)
1611             {
1612                 posLocalize(lPos, *itSA, stringSetLimits(stringSet));
1613 
1614                 if (lastSeqSeen != getSeqNo(lPos))
1615                 {
1616                     lastSeqSeen = getSeqNo(lPos);
1617 
1618                     // shift textBegin and textLength by prefixLen
1619                     textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1620                     itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1621                 }
1622 
1623                 if (textLength == getSeqOffset(lPos) || c != *(itText + getSeqOffset(lPos)))
1624                     return prefixLen;
1625             }
1626             ++prefixLen; --textLength;
1627             ++itText;
1628         } while (true);
1629     }
1630 
1631 
1632     template <typename TText, typename TSpec, typename TPos>
1633     inline TPos
1634     _getNodeLP(
1635         Index<TText, IndexWotd<TSpec> > const &index,
1636         TPos pos)
1637     {
1638         TPos w0 = dirAt(pos, index);
1639         if (w0 & index.LEAF)
1640             return w0 & index.BITMASK0;
1641 
1642         TPos w1 = dirAt(pos + 1, index);
1643         if (w1 & index.UNEVALUATED)
1644             return saAt(w0 & index.BITMASK0, index);
1645         else
1646             return w0 & index.BITMASK0;
1647     }
1648 
1649     // store buckets into directory
1650     // ORIGINAL VERSION: storing SA entries and topology links in Dir
1651     template <typename TText, typename TPos>
1652     inline void
1653     _storeWotdChildren(
1654         Index<TText, IndexWotd<WotdOriginal> > &index,
1655         TPos dirOfs)
1656     {
1657         typedef Index<TText, IndexWotd<WotdOriginal> >        TIndex;
1658         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1659         typedef typename Iterator<TDir, Standard>::Type        TDirIterator;
1660         typedef typename Size<TDir>::Type                    TDirSize;
1661         typedef typename TIndex::TCounter                    TCounter;
1662         typedef typename Iterator<TCounter, Standard>::Type    TCntIterator;
1663 
1664         typedef typename Value<TCounter>::Type                TValue;
1665 
1666         TDirIterator itDir = begin(indexDir(index), Standard()) + dirOfs;
1667         TDirIterator itDirEnd = end(indexDir(index), Standard());
1668         TDirIterator itPrev = itDirEnd;
1669 
1670         TCntIterator it = begin(index.tempOcc, Standard());
1671         TCntIterator bit = begin(index.tempBound, Standard());
1672         TCntIterator itEnd = end(index.tempOcc, Standard());
1673 
1674         TValue occ;
1675         if (index.sentinelOcc != 0)
1676         {
1677             if (index.sentinelOcc > 1 && index.interSentinelNodes)    // occurs on multiseqs
1678             {
1679                 itPrev = itDir;
1680                 *itDir = index.sentinelBound - index.sentinelOcc;    ++itDir;
1681                 *itDir = index.sentinelBound | index.UNEVALUATED;    ++itDir;
1682             } else
1683                 for (TDirSize d = index.sentinelBound - index.sentinelOcc; d != index.sentinelBound; ++d)
1684                 {
1685                     itPrev = itDir;
1686                     *itDir = saAt(d, index) | index.LEAF;            ++itDir;
1687                 }
1688         }
1689         for (; it != itEnd; ++it, ++bit)
1690         {
1691             if ((occ = *it) == 0) continue;
1692             if (occ > 1) {
1693                 itPrev = itDir;
1694                 *itDir = *bit - occ;                             ++itDir;
1695                 *itDir = *bit | index.UNEVALUATED;                ++itDir;
1696             } else {
1697                 itPrev = itDir;
1698                 *itDir = saAt(*bit - occ, index) | index.LEAF;    ++itDir;
1699             }
1700         }
1701 
1702         // mark the last child
1703         if (itPrev != itDirEnd)
1704             *itPrev |= index.LAST_CHILD;
1705     }
1706 
1707     // store buckets into directory
1708     // MODIFIED VERSION: storing SA links and topology links in Dir
1709     template <typename TText, typename TSpec, typename TSize>
1710     inline void
1711     _storeWotdChildren(
1712         Index<TText, IndexWotd<TSpec> > &index,
1713         TSize dirOfs,
1714         TSize lcp)
1715     {
1716         typedef Index<TText, IndexWotd<TSpec> >            TIndex;
1717         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1718         typedef typename Iterator<TDir, Standard>::Type        TDirIterator;
1719         typedef typename Size<TDir>::Type                    TDirSize;
1720         typedef typename TIndex::TCounter                    TCounter;
1721         typedef typename Iterator<TCounter, Standard>::Type    TCntIterator;
1722 
1723         typedef typename Value<TCounter>::Type                TValue;
1724 
1725         TDirIterator itDirBegin = begin(indexDir(index), Standard()) + dirOfs;
1726         TDirIterator itDirEnd = end(indexDir(index), Standard());
1727         TDirIterator itDir = itDirBegin;
1728         TDirIterator itPrev = itDirEnd;
1729 
1730         TCntIterator it = begin(index.tempOcc, Standard());
1731         TCntIterator bit = begin(index.tempBound, Standard());
1732         TCntIterator itEnd = end(index.tempOcc, Standard());
1733 
1734         TValue occ;
1735         if (index.sentinelOcc != 0)
1736         {
1737             if (index.sentinelOcc > 1 && index.interSentinelNodes)    // occurs on multiseqs
1738             {
1739                 itPrev = itDir;
1740                 *itDir = index.sentinelBound - index.sentinelOcc;    ++itDir;
1741                 *itDir = index.sentinelBound | index.UNEVALUATED;    ++itDir;
1742             } else
1743                 for (TDirSize d = index.sentinelBound - index.sentinelOcc; d != index.sentinelBound; ++d)
1744                 {
1745                     itPrev = itDir;
1746                     *itDir = d | index.LEAF;                        ++itDir;
1747                 }
1748         }
1749         for (; it != itEnd; ++it, ++bit)
1750         {
1751             if ((occ = *it) == 0) continue;
1752             if (occ > 1) {
1753                 itPrev = itDir;
1754                 *itDir = *bit - occ;                 ++itDir;
1755                 *itDir = *bit | index.UNEVALUATED;    ++itDir;
1756             } else {
1757                 itPrev = itDir;
1758                 *itDir = (*bit - occ) | index.LEAF;    ++itDir;
1759             }
1760         }
1761 
1762         // first child gets the mutual lcp value of the children (== parent repLength)
1763         *itDirBegin = ((*itDirBegin) & ~index.BITMASK0) | lcp;
1764 
1765         // mark the last child
1766         if (itPrev != itDirEnd)
1767             *itPrev |= index.LAST_CHILD;
1768     }
1769 
1770 
1771     template < typename TText, typename TSpec >
1772     inline typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type
1773     _wotdEvaluate(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > const &it)
1774     {
1775         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
1776         typedef typename Size<TIndex>::Type                TSize;
1777 
1778         TIndex &index = const_cast<TIndex&>(container(it));
1779         TSize pos = value(it).node;
1780         TSize w1 = dirAt(pos + 1, index);
1781 
1782         // test for evaluation
1783         if (w1 & index.UNEVALUATED)
1784         {
1785             TSize w0 = dirAt(pos, index);
1786             TSize lp = saAt(w0 & index.BITMASK0, index);
1787             TSize dst = length(indexDir(index));
1788 
1789             TSize size = _sortWotdBucket(
1790                 index,
1791                 w0 & index.BITMASK0,
1792                 w1 & index.BITMASK1,
1793                 parentEdgeLength(it));
1794 
1795             resize(indexDir(index), dst + size, Generous());
1796             _storeWotdChildren(index, dst);
1797 
1798             // mark nodes with solely empty child edges
1799             w1 = dst;
1800             if (index.sentinelOcc > 0)
1801             {
1802                 TSize sentinelSize = index.sentinelOcc;
1803                 if (index.interSentinelNodes && sentinelSize > 2)
1804                     sentinelSize = 2;
1805                 if (size == sentinelSize) w1 |= index.SENTINELS;
1806             }
1807 
1808             assert(!(index.sentinelOcc == 1 && size == 1));
1809 
1810             dirAt(pos, index)     = (w0 & ~index.BITMASK0) | lp;
1811             dirAt(pos + 1, index) = w1;
1812         }
1813 
1814         return w1;
1815     }
1816 
1817     template < typename TText, typename TIndexSpec, typename TSpec >
1818     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
1819     _wotdEvaluate(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TSpec> > const &it)
1820     {
1821         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
1822         typedef typename Size<TIndex>::Type                TSize;
1823 
1824         TIndex &index = const_cast<TIndex&>(container(it));
1825         TSize pos = value(it).node;
1826         TSize w1 = dirAt(pos + 1, index);
1827 
1828         // test for evaluation
1829         if (w1 & index.UNEVALUATED)
1830         {
1831             TSize dst = length(indexDir(index));
1832 
1833             TSize size = _sortWotdBucket(
1834                 index,
1835                 value(it).range.i1,
1836                 w1 & index.BITMASK1,
1837                 repLength(it));
1838 /*
1839             if (globalDumpFlag)
1840             {
1841                 std::cerr << '"' << representative(it) << '"' << std::endl;
1842                 for (int i=0;i<length(getOccurrences(it));++i)
1843                     std::cerr << getOccurrences(it)[i]<<'\t'<<suffix(indexText(index),getOccurrences(it)[i])<<std::endl;
1844 //                _dumpFreq(index);
1845             }
1846 */
1847             resize(indexDir(index), dst + size, Generous());
1848             _storeWotdChildren(index, dst, repLength(it));
1849 
1850             // mark nodes with solely empty child edges
1851             w1 = dst;
1852             if (index.sentinelOcc > 0)
1853             {
1854                 TSize sentinelSize = index.sentinelOcc;
1855                 if (index.interSentinelNodes && sentinelSize > 2)
1856                     sentinelSize = 2;
1857                 if (size == sentinelSize) w1 |= index.SENTINELS;
1858             }
1859 
1860             dirAt(pos + 1, index) = w1;
1861         }
1862 
1863         return w1;
1864     }
1865 
1866 
1867     template <typename TText, typename TSpec>
1868     inline void
1869     _dump(Index<TText, IndexWotd<TSpec> > &index)
1870     {
1871         typedef Index<TText, IndexWotd<TSpec> >            TIndex;
1872         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1873         typedef typename Value<TDir>::Type                    TDirValue;
1874 
1875         std::cout << "  Dir (wotd)" << std::endl;
1876         for(unsigned i=0; i < length(indexDir(index)); ++i) {
1877             TDirValue d = indexDir(index)[i];
1878             std::cout << i << ":  " << (d & index.BITMASK0);
1879             if (d & index.LEAF)            std::cout << "  (Leaf/Uneval)";
1880             if (d & index.LAST_CHILD)    std::cout << "  (LastChild/SENTINELS)";
1881             std::cout << std::endl;
1882         }
1883 
1884         std::cout << std::endl << "  SA" << std::endl;
1885         for(unsigned i=0; i < length(indexSA(index)); ++i)
1886             std::cout << i << ":  " << indexSA(index)[i] << "  " << suffix(indexText(index), indexSA(index)[i]) << std::endl;
1887 
1888         std::cout << std::endl;
1889 
1890     }
1891 
1892 //////////////////////////////////////////////////////////////////////////////
1893 // _goDownChar
1894 
1895     template < typename TText, class TSpec, typename TValue >
1896     inline bool _goDownChar(
1897         Iter<Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it,
1898         TValue c)
1899     {
1900         typedef Index<TText, IndexWotd<TSpec> >    TIndex;
1901         typedef typename Value<TIndex>::Type        TIndexValue;
1902 
1903         bool sorted = false;
1904         if (!goDown(it)) return false;
1905         do {
1906             if (parentEdgeLength(it) != 0) {
1907                 TIndexValue edgeChar = parentEdgeLabel(it)[0];
1908                 if (edgeChar == c) return true;        // the edge is found
1909                 if (sorted && edgeChar > c) break;    // too far (except the first one,
1910             }                                        // child edges are sorted)
1911             sorted = true;
1912         } while (goRight(it));
1913         _goUp(it);
1914         return false;
1915     }
1916 
1917     template < typename TText, class TIndexSpec, class TSpec, typename TValue >
1918     inline bool _goDownChar(
1919         Iter<Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it,
1920         TValue c)
1921     {
1922         typedef Index<TText, IndexWotd<TSpec> >    TIndex;
1923         typedef typename Value<TIndex>::Type        TIndexValue;
1924 
1925         if (!goDown(it)) return false;
1926         do {
1927             if (parentEdgeLength(it) != 0) {
1928                 TIndexValue edgeChar = parentEdgeLabel(it)[0];
1929                 if (edgeChar == c) return true;    // the edge is found
1930                 if (edgeChar > c) break;        // too far (child edges are sorted)
1931             }
1932         } while (goRight(it));
1933         _goUp(it);
1934         return false;
1935     }
1936 
1937 /*
1938     template < typename TText, typename TSpec, typename TValue >
1939     inline bool
1940     _getNodeByChar(
1941         Iter< Index<TText, IndexWotd<TSpec> >, VSTree<TSpec> > const &it,
1942         TValue c,
1943         typename VertexDescriptor< Index<TText, IndexWotd<TSpec> > >::Type &childDesc)
1944     {
1945         typedef Index<TText, IndexWotd<TSpec> >            TIndex;
1946         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1947         typedef typename Fibre<TIndex, WotdSA>::Type        TSA;
1948 
1949         typedef typename Value<TSA>::Type                    TSAValue;
1950         typedef typename Value<TDir>::Type                    TDirValue;
1951 
1952         typename Size<TIndex>::Type len = length(index);
1953         typename VertexDescriptor<TIndex>::Type    desc;
1954 
1955         TSAValue pos = _firstSuffixOfBucket(index, value(it).node);
1956         while (pos == len || value < (c = textAt(index, pos + value.i2))) {
1957             value.node += (dirAt(value.node, index) & index.LEAF)? 1: 2;
1958             pos = _firstSuffixOfBucket(index, value.node);
1959         }
1960         assert(pos != len);
1961 
1962         return c == value;
1963     }
1964 */
1965 
1966 //////////////////////////////////////////////////////////////////////////////
1967 // interface for automatic index creation
1968 
1969     template <typename TText, typename TSpec>
1970     inline void _wotdCreateFirstLevel(Index<TText, IndexWotd<TSpec> > &index)
1971     {
1972         typedef Index<TText, IndexWotd<TSpec> > TIndex;
1973         typedef typename Value<TIndex>::Type    TValue;
1974         typedef typename Size<TIndex>::Type     TSize;
1975 
1976         resize(index.tempOcc, ValueSize<TValue>::VALUE + 1, Exact());
1977         resize(index.tempBound, ValueSize<TValue>::VALUE + 1, Exact());
1978 
1979         TSize size;
1980         if (empty(indexSA(index)))
1981         {
1982             resize(indexSA(index), length(indexRawText(index)), Exact());
1983             size = _sortFirstWotdBucket(index);
1984         } else
1985         {
1986             size = _sortWotdBucket(index, 0, length(indexSA(index)), 0);
1987         }
1988 
1989         if (size > 0)
1990         {
1991             resize(indexDir(index), size + 2, Generous());
1992             _storeWotdChildren(index, 2, 0);
1993 
1994             // mark nodes with solely empty child edges
1995             TSize w1 = 2;
1996             if (index.sentinelOcc > 0)
1997             {
1998                 TSize sentinelSize = index.sentinelOcc;
1999                 if (index.interSentinelNodes && sentinelSize > 2)
2000                     sentinelSize = 2;
2001                 if (size == sentinelSize) w1 |= index.SENTINELS;
2002             }
2003 
2004 
2005             dirAt(0, index) = 0 | index.LAST_CHILD;
2006             dirAt(1, index) = w1;
2007 
2008         } else {
2009             resize(indexDir(index), 1);
2010             dirAt(0, index) = 0 | index.LAST_CHILD | index.LEAF;
2011         }
2012     }
2013 
2014     template <typename TText, typename TSpec>
2015     inline bool indexCreate(Index<TText, IndexWotd<TSpec> > &index, WotdDir const, Default const)
2016     {
2017         _wotdCreateFirstLevel(index);
2018         return true;
2019     }
2020 
2021 //////////////////////////////////////////////////////////////////////////////
2022 // clear
2023 
2024     template < typename TText, typename TSpec >
2025     inline void clear(Index<TText, IndexWotd<TSpec> > &index)
2026     {
2027         clear(getFibre(index, WotdSA()));
2028         clear(getFibre(index, WotdDir()));
2029     }
2030 
2031 //////////////////////////////////////////////////////////////////////////////
2032 // open
2033 
2034     template < typename TText, typename TSpec >
2035     inline bool open(
2036         Index< TText, IndexWotd<TSpec> > &index,
2037         const char *fileName,
2038         int openMode)
2039     {
2040         typedef Index<TText, IndexWotd<TSpec> > TIndex;
2041         typedef typename Value<TIndex>::Type    TValue;
2042 
2043         String<char> name;
2044 
2045         name = fileName;    append(name, ".txt");
2046         if ((!open(getFibre(index, WotdText()), toCString(name), openMode)) &&
2047             (!open(getFibre(index, WotdText()), fileName, openMode))) return false;
2048 
2049         name = fileName;    append(name, ".sa");
2050         if (!open(getFibre(index, WotdSA()), toCString(name), openMode)) return false;
2051         name = fileName;    append(name, ".dir");
2052 
2053         if (!open(getFibre(index, WotdDir()), toCString(name), openMode)) return false;
2054 
2055         if (!empty(getFibre(index, WotdDir())))
2056         {
2057             resize(index.tempOcc, ValueSize<TValue>::VALUE + 1);
2058             resize(index.tempBound, ValueSize<TValue>::VALUE + 1);
2059         }
2060 
2061         return true;
2062     }
2063     template < typename TText, typename TSpec >
2064     inline bool open(
2065         Index< TText, IndexWotd<TSpec> > &index,
2066         const char *fileName)
2067     {
2068         return open(index, fileName, OPEN_RDONLY);
2069     }
2070 
2071 
2072 //////////////////////////////////////////////////////////////////////////////
2073 // save
2074 
2075     template < typename TText, typename TSpec >
2076     inline bool save(
2077         Index< TText, IndexWotd<TSpec> > &index,
2078         const char *fileName,
2079         int openMode)
2080     {
2081         String<char> name;
2082 
2083         name = fileName;    append(name, ".txt");
2084         if ((!save(getFibre(index, WotdText()), toCString(name), openMode)) &&
2085             (!save(getFibre(index, WotdText()), fileName, openMode))) return false;
2086 
2087         name = fileName;    append(name, ".sa");
2088         if (!save(getFibre(index, WotdSA()), toCString(name), openMode)) return false;
2089         name = fileName;    append(name, ".dir");
2090 
2091         if (!save(getFibre(index, WotdDir()), toCString(name), openMode)) return false;
2092 
2093         return true;
2094     }
2095     template < typename TText, typename TSpec >
2096     inline bool save(
2097         Index< TText, IndexWotd<TSpec> > &index,
2098         const char *fileName)
2099     {
2100         return save(index, fileName, OPEN_WRONLY | OPEN_CREATE);
2101     }
2102 }
2103 
2104 #endif //#ifndef SEQAN_HEADER_...
2105