1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_HEADER_INDEX_WOTD_H
36 #define SEQAN_HEADER_INDEX_WOTD_H
37 
38 namespace seqan
39 {
40 
41 
42 //////////////////////////////////////////////////////////////////////////////
43 // wotd tree index fibres
44 
45 /*!
46  * @defgroup WOTDIndexFibres WOTD Index Fibres
47  * @brief Tag to select a specific fibre (e.g. table, object, ...) of an @link
48  *        IndexWotd @endlink index.
49  *
50  * These tags can be used to get @link Fibre @endlink of an @link IndexWotd @endlink.
51  *
52  * @see Fibre
53  * @see Index#getFibre
54  * @see IndexWotd
55  *
56  * @tag WOTDIndexFibres#WotdDir
57  * @brief The child table.
58  *
59  * @tag WOTDIndexFibres#WotdRawSA
60  * @brief The raw suffix array.
61  *
62  * @tag WOTDIndexFibres#WotdText
63  * @brief The original text the index should be based on.
64  *
65  * @tag WOTDIndexFibres#WotdRawText
66  * @brief The raw text the index is really based on.
67  *
68  * @tag WOTDIndexFibres#WotdSA
69  * @brief The suffix array.
70  */
71 
72 //////////////////////////////////////////////////////////////////////////////
73 /*!
74  * @fn IndexWotd#indexSA
75  * @headerfile <seqan/index.h>
76  * @brief Shortcut for <tt>getFibre(.., WotdSA)</tt>.
77  *
78  * @signature TSa indexSA(index);
79  *
80  * @param[in] index The @link IndexWotd @endlink object holding the fibre.
81  *
82  * @return TSa A reference to the @link WOTDIndexFibres#WotdSA @endlink fibre (partially sorted suffix array).
83  */
84 
85 /*!
86  * @fn IndexWotd#indexDir
87  * @headerfile <seqan/index.h>
88  * @brief Shortcut for <tt>getFibre(.., WotdDir())</tt>.
89  * @signature TFibre indexDir(index);
90  *
91  * @param[in] index The @link IndexWotd @endlink object holding the fibre.
92  *
93  * @return TFibre A reference to the @link WOTDIndexFibres#WotdDir @endlink fibre (tree structure).
94  */
95 
96 /*!
97  * @fn IndexWotd#saAt
98  * @headerfile <seqan/index.h>
99  * @note Advanced functionality, not commonly used.
100  * @brief Shortcut for <tt>value(indexSA(..), ..)</tt>.
101  *
102  * @signature TValue saAt(position, index);
103  *
104  * @param[in] index The @link IndexWotd @endlink object holding the fibre.
105  * @param[in] position A position in the array on which the value should be accessed.
106  *
107  * @return TValue A reference or proxy to the value in the @link WOTDIndexFibres#WotdSA @endlink fibre.
108  *                To be more precise, a reference to a position containing a value of type
109  *                @link SAValue @endlink is returned (or a proxy).
110  */
111 
112 /*!
113  * @fn IndexWotd#dirAt
114  * @headerfile <seqan/index.h>
115  * @brief Shortcut for <tt>value(indexDir(index), position)</tt>.
116  *
117  * @signature TFibre dirAt(position, index);
118  *
119  * @param[in] index    The @link IndexWotd @endlink object holding the fibre.
120  * @param[in] position A position in the array on which the value should be accessed.
121  *
122  * @return TFibre A reference to the @link WOTDIndexFibres#WotdDir @endlink fibre.
123  */
124 
125     typedef FibreText        WotdText;
126     typedef FibreRawText    WotdRawText;
127     typedef FibreSA         WotdSA;
128     typedef FibreRawSA        WotdRawSA;
129     typedef FibreDir        WotdDir;
130 
131 //////////////////////////////////////////////////////////////////////////////
132 // wotd tree index
133 
134 /*!
135  * @class IndexWotd
136  * @extends Index
137  * @implements StringTreeConcept
138  * @headerfile <seqan/index.h>
139  * @brief An index based on a lazy suffix tree (see Giegerich et al., "Efficient implementation of lazy suffix
140  *        trees").
141  *
142  * @signature template <typename TText, typename TSpec>
143  *            class Index<TText, IndexWotd<TSpec> >;
144  *
145  * @tparam TText The @link TextConcept @endlink text type.
146  * @tparam TSpec The type for further specialization of the Index type.
147  *
148  * The fibres (see @link Index @endlink and @link Fibre @endlink) of this index are a partially sorted suffix array
149  * (see @link WOTDIndexFibres#WotdSA @endlink) and the wotd tree (see @link WOTDIndexFibres#WotdDir @endlink).
150  *
151  * Demo: Demo.Constraint Iterator
152  *
153  * @see WOTDIndexFibres
154  */
155 
156     struct WotdOriginal_;
157     typedef Tag<WotdOriginal_> const WotdOriginal;
158 
159     template < typename TSpec = void >
160     struct IndexWotd {};
161 
162 /*
163     template < typename TObject, typename TSpec >
164     struct Fibre< Index<TObject, IndexWotd<TSpec> >, FibreDir>
165     {
166         typedef Index<TObject, IndexWotd<TSpec> > TIndex;
167         typedef String<
168             typename typename Size<TIndex>::Type,
169             Alloc<>
170         > Type;
171     };
172 */
173 
174     template < typename TObject, typename TSpec >
175     class Index<TObject, IndexWotd<TSpec> > {
176     public:
177         typedef typename Fibre<Index, WotdText>::Type        TText;
178         typedef typename Fibre<Index, WotdSA>::Type         TSA;
179         typedef typename Fibre<Index, WotdDir>::Type        TDir;
180 
181         typedef typename Value<Index>::Type                    TValue;
182         typedef typename Value<TDir>::Type                    TDirValue;
183         typedef typename Size<Index>::Type                    TSize;
184         typedef String<TSize, Alloc<> >                        TCounter;
185         typedef String<typename Value<TSA>::Type, Alloc<> >    TTempSA;
186         typedef typename Cargo<Index>::Type                    TCargo;
187 
188         // 1st word flags
189         static TDirValue const LEAF          = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 1); // this node is a leaf
190         static TDirValue const LAST_CHILD    = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 2); // this node is the last child
191         // 2nd word flag
192         static TDirValue const UNEVALUATED   = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 1); // this node is partially evalutated and has no evaluated children
193         static TDirValue const SENTINELS     = (TDirValue)1 << (BitsPerValue<TDirValue>::VALUE - 2); // the children of this node have solely $-edges
194 
195         static TDirValue const BITMASK0      = ~(LEAF | LAST_CHILD);
196         static TDirValue const BITMASK1      = ~(UNEVALUATED | SENTINELS);
197 
198 
199         Holder<TText>    text;    // underlying text
200         TSA                sa;        // suffix array sorted by the first q chars
201         TDir            dir;    // bucket directory
202         TCargo            cargo;    // user-defined cargo
203 
204 
205         TTempSA            tempSA;
206         TCounter        tempOcc;
207         TCounter        tempBound;
208 
209         TSize            sentinelOcc;
210         TSize            sentinelBound;
211         bool            interSentinelNodes;    // should virtually one (true) $-sign or many (false) $_i-signs be appended to the strings in text
212 
213         /*!
214          * @fn IndexWotd::Index
215          * @brief Constructor
216          *
217          * @signature Index::Index();
218          * @signature Index::Index(index);
219          * @signature Index::Index(text);
220          *
221          * @param[in] index    Other Index object to copy from.
222          * @param[in] text     The text to be indexed.
223          */
224 
Index()225         Index():
226             interSentinelNodes(false) {}
227 
Index(Index & other)228         Index(Index &other) :
229             text(other.text),
230             sa(other.sa),
231             dir(other.dir),
232             cargo(other.cargo),
233             tempSA(other.tempSA),
234             tempBound(other.tempBound),
235             sentinelOcc(other.sentinelOcc),
236             sentinelBound(other.sentinelBound),
237             interSentinelNodes(other.interSentinelNodes) {}
238 
Index(Index const & other)239         Index(Index const &other) :
240             text(other.text),
241             sa(other.sa),
242             dir(other.dir),
243             cargo(other.cargo),
244             tempSA(other.tempSA),
245             tempBound(other.tempBound),
246             sentinelOcc(other.sentinelOcc),
247             sentinelBound(other.sentinelBound),
248             interSentinelNodes(other.interSentinelNodes) {}
249 
250         template <typename TText_>
Index(TText_ & _text)251         Index(TText_ &_text) :
252             text(_text),
253             sentinelOcc(0),
254             sentinelBound(0),
255             interSentinelNodes(false) {}
256 
257         template <typename TText_>
Index(TText_ const & _text)258         Index(TText_ const &_text):
259             text(_text),
260             sentinelOcc(0),
261             sentinelBound(0),
262             interSentinelNodes(false) {}
263     };
264 /*
265     template < typename TText, typename TSpec >
266     struct Value< Index<TText, IndexWotd<TSpec> > > {
267         typedef typename Value< typename Fibre< Index<TText, IndexWotd<TSpec> >, WotdRawText >::Type >::Type Type;
268     };
269 
270     template < typename TText, typename TSpec >
271     struct Size< Index<TText, IndexWotd<TSpec> > > {
272         typedef typename Size< typename Fibre< Index<TText, IndexWotd<TSpec> >, WotdRawText >::Type >::Type Type;
273     };
274 */
275 
276 template <typename TText, typename TSpec>
277 SEQAN_CONCEPT_IMPL((Index<TText, IndexWotd<TSpec> >), (StringTreeConcept));
278 
279 template <typename TText, typename TSpec>
280 SEQAN_CONCEPT_IMPL((Index<TText, IndexWotd<TSpec> > const), (StringTreeConcept));
281 
282 //////////////////////////////////////////////////////////////////////////////
283 // default fibre creators
284 
285     template < typename TText, typename TSpec >
286     struct DefaultIndexCreator<Index<TText, IndexWotd<TSpec> >, FibreDir> {
287         typedef Default Type;
288     };
289 
290     template < typename TText, typename TSSetSpec, typename TSpec >
291     struct DefaultIndexCreator<Index<StringSet<TText, TSSetSpec>, IndexWotd<TSpec> >, FibreDir> {
292         typedef Default Type;
293     };
294 
295 //////////////////////////////////////////////////////////////////////////////
296 // default finder
297 
298     template < typename TText, typename TSpec >
299     struct DefaultFinder< Index<TText, IndexWotd<TSpec> > >
300     {
301         typedef FinderSTree Type;    // standard wotd finder is tree based search
302     };
303 
304 //////////////////////////////////////////////////////////////////////////////
305 
306 
307     template <typename TSize>
308     struct VertexWotdOriginal_ {
309         TSize        node;            // position of current node entry in directory
310         TSize        parentRepLen;    // representative length of parent node
311         TSize        edgeLen;        // length of edge above current node
312 
313         VertexWotdOriginal_() : node(0), parentRepLen(0), edgeLen(0) {}
314         VertexWotdOriginal_(MinimalCtor) : node(0), parentRepLen(0), edgeLen(0)
315         {
316             _setSizeInval(node);
317         }
318     };
319 
320     template <typename TSize>
321     struct VertexWotdModified_ {
322         TSize        node;            // position of current node entry in directory
323         TSize        parentRepLen;    // representative length of parent node
324         TSize        edgeLen;        // length of edge above current node
325         Pair<TSize> range;            // current SA interval of hits
326         TSize        parentRight;    // right boundary of parent node's range (allows one to go right)
327 
328         VertexWotdModified_() :
329             node(0),
330             parentRepLen(0),
331             edgeLen(0),
332             range(0,0),
333             parentRight(0)
334         {}
335         VertexWotdModified_(MinimalCtor) :
336             node(0),
337             parentRepLen(0),
338             edgeLen(0),
339             range(0,0),
340             parentRight(0)
341         {}
342         VertexWotdModified_(Pair<TSize> const &otherRange, TSize otherParentRight) :
343             node(0),
344             parentRepLen(0),
345             edgeLen(0),
346             range(otherRange),
347             parentRight(otherParentRight)
348         {}
349     };
350 
351 //////////////////////////////////////////////////////////////////////////////
352     template < typename TText >
353     struct VertexDescriptor< Index<TText, IndexWotd<WotdOriginal> > > {
354         typedef typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type TSize;
355         typedef VertexWotdOriginal_<TSize> Type;
356     };
357 
358     template < typename TText, typename TSpec >
359     struct VertexDescriptor< Index<TText, IndexWotd<TSpec> > > {
360         typedef typename Size< Index<TText, IndexWotd<TSpec> > >::Type TSize;
361         typedef VertexWotdModified_<TSize> Type;
362     };
363 
364     template < typename TText, typename TSpec >
365     void _indexRequireTopDownIteration(Index<TText, IndexWotd<TSpec> > &index)
366     {
367         indexRequire(index, WotdDir());
368     }
369 
370 //////////////////////////////////////////////////////////////////////////////
371 // history stack functions
372 
373     template <typename TSize>
374     struct HistoryStackWotdOriginal_
375     {
376         TSize        node;        // position of current node entry in directory
377         TSize        edgeLen;    // length of edge above current node
378     };
379 
380     template <typename TSize>
381     struct HistoryStackWotdModified_
382     {
383         TSize        node;        // position of current node entry in directory
384         TSize        edgeLen;    // length of edge above current node
385         Pair<TSize> range;        // current SA interval of hits
386     };
387 
388     template < typename TText, typename TSpec >
389     struct HistoryStackEntry_< Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > >
390     {
391         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
392         typedef typename Size<TIndex>::Type                TSize;
393         typedef HistoryStackWotdOriginal_<TSize>        Type;
394     };
395 
396     template < typename TText, typename TIndexSpec, typename TSpec >
397     struct HistoryStackEntry_< Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > >
398     {
399         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
400         typedef typename Size<TIndex>::Type                TSize;
401         typedef HistoryStackWotdModified_<TSize>        Type;
402     };
403 
404 
405     template < typename TText, typename TSpec >
406     inline void
407     _historyPush(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it)
408     {
409         it._parentDesc = value(it);
410         value(it).parentRepLen += parentEdgeLength(it);
411     }
412 
413     template < typename TText, typename TIndexSpec, typename TSpec >
414     inline void
415     _historyPush(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it)
416     {
417         it._parentDesc = value(it);
418         value(it).parentRepLen += parentEdgeLength(it);
419         value(it).parentRight = value(it).range.i2;
420     }
421 
422     template < typename TText, typename TSpec >
423     inline void
424     _historyPush(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
425     {
426         typedef typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type TSize;
427         TSize edgeLen = parentEdgeLength(it);
428         HistoryStackWotdOriginal_<TSize> entry = { value(it).node, edgeLen };
429         appendValue(it.history, entry);
430         value(it).parentRepLen += edgeLen;
431     }
432 
433     template < typename TText, typename TIndexSpec, typename TSpec >
434     inline void
435     _historyPush(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
436     {
437         typedef typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type TSize;
438         TSize edgeLen = parentEdgeLength(it);
439         HistoryStackWotdModified_<TSize> entry = { value(it).node, edgeLen, value(it).range };
440         appendValue(it.history, entry);
441         value(it).parentRepLen += edgeLen;
442         value(it).parentRight = value(it).range.i2;
443     }
444 
445 //////////////////////////////////////////////////////////////////////////////
446     template < typename TText, typename TIndexSpec, typename TPropertyMap >
447     inline void
448     resizeVertexMap(
449         TPropertyMap & pm,
450         Index<TText, IndexWotd<TIndexSpec> > const& index)
451     {
452         resize(pm, length(indexDir(index)), Generous());
453     }
454 
455 /* // different interface compared to resizeVertexMap(graph, ...)
456     template < typename TText, typename TIndexSpec, typename TPropertyMap, typename TProperty >
457     inline void
458     resizeVertexMap(
459         Index<TText, IndexWotd<TIndexSpec> > const& index,
460         TPropertyMap & pm,
461         TProperty const & prop)
462     {
463         resize(pm, length(indexDir(index)), prop, Generous());
464     }
465 */
466     template < typename TSize >
467     inline typename Id< VertexWotdOriginal_<TSize> const >::Type
468     _getId(VertexWotdOriginal_<TSize> const &desc)
469     {
470         return desc.node;
471     }
472 
473     template < typename TSize >
474     inline typename Id< VertexWotdOriginal_<TSize> >::Type
475     _getId(VertexWotdOriginal_<TSize> &desc)
476     {
477         return _getId(const_cast<VertexWotdOriginal_<TSize> const &>(desc));
478     }
479 
480     template < typename TSize >
481     inline typename Id< VertexWotdModified_<TSize> const >::Type
482     _getId(VertexWotdModified_<TSize> const &desc)
483     {
484         return desc.node;
485     }
486 
487     template < typename TSize >
488     inline typename Id< VertexWotdModified_<TSize> >::Type
489     _getId(VertexWotdModified_<TSize> &desc)
490     {
491         return _getId(const_cast<VertexWotdModified_<TSize> const &>(desc));
492     }
493 
494 //////////////////////////////////////////////////////////////////////////////
495 
496     template < typename TSize >
497     inline bool _isRoot(VertexWotdOriginal_<TSize> const &value)
498     {
499         return value.node == 0;
500     }
501 
502     template < typename TSize >
503     inline bool _isRoot(VertexWotdModified_<TSize> const &value)
504     {
505         return value.node == 0;
506     }
507 
508     // is this a leaf? (including empty $-edges)
509     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder >
510     inline bool _isLeaf(
511         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TSpec> > const &it,
512         VSTreeIteratorTraits<TDfsOrder, False> const)
513     {
514         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
515         TIndex const &index = container(it);
516         return (dirAt(value(it).node, index) & index.LEAF) != 0;
517     }
518 
519     // is this a leaf? (excluding empty $-edges)
520     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder >
521     inline bool _isLeaf(
522         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TSpec> > const &it,
523         VSTreeIteratorTraits<TDfsOrder, True> const)
524     {
525         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
526 
527         TIndex const &index = container(it);
528         if (dirAt(value(it).node, index) & index.LEAF)
529             return true;
530 
531         // ensure node evaluation and test for sentinel child edges?
532         return _wotdEvaluate(it) & index.SENTINELS;
533     }
534 
535 
536     // parentEdgeLength - ORIGINAL VERSION
537     template < typename TIndex, typename TSize >
538     inline typename Size<TIndex>::Type
539     parentEdgeLength(TIndex const &index, VertexWotdOriginal_<TSize> &vDesc)
540     {
541         TSize edgeLen = vDesc.edgeLen;
542         if (edgeLen != (TSize)-1)
543             return edgeLen;
544 
545         TSize pos = vDesc.node;
546         TSize w0 = dirAt(pos, index);
547         if (w0 & index.LEAF)
548             return vDesc.edgeLen = suffixLength(w0 & index.BITMASK0, index);
549 
550         TSize w1 = dirAt(pos + 1, index);
551         if (w1 & index.UNEVALUATED)
552             return vDesc.edgeLen = _bucketLcp(
553                 infix(indexSA(index), w0 & index.BITMASK0, w1 & index.BITMASK1),
554                 indexText(index));
555         else
556             return vDesc.edgeLen = _getNodeLP(index, w1) - (w0 & index.BITMASK0);
557     }
558 
559     // parentEdgeLength - MODIFIED VERSION
560     template < typename TIndex, typename TSize >
561     inline typename Size<TIndex>::Type
562     parentEdgeLength(TIndex const &index, VertexWotdModified_<TSize> &vDesc)
563     {
564         TSize edgeLen = vDesc.edgeLen;
565         if (edgeLen != (TSize)-1)
566             return edgeLen;
567 
568         TSize pos = vDesc.node;
569         TSize w0 = dirAt(pos, index);
570         if (w0 & index.LEAF)
571             return vDesc.edgeLen =
572                 suffixLength(saAt(vDesc.range.i1, index), index) - vDesc.parentRepLen;
573 
574         TSize w1 = dirAt(pos + 1, index);
575         if (w1 & index.UNEVALUATED)
576             if (_isSizeInval(vDesc.range.i2))
577                 return vDesc.edgeLen = _bucketLcp(
578                     suffix(indexSA(index), vDesc.range.i1),
579                     indexText(index),
580                     vDesc.parentRepLen) - vDesc.parentRepLen;
581             else
582                 return vDesc.edgeLen = _bucketLcp(
583                     infix(indexSA(index), vDesc.range.i1, vDesc.range.i2),
584                     indexText(index),
585                     vDesc.parentRepLen) - vDesc.parentRepLen;
586         else
587             return (dirAt(w1 & index.BITMASK1, index) & index.BITMASK0) - vDesc.parentRepLen;
588     }
589 
590     template < typename TText, typename TIndexSpec, typename TSpec >
591     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
592     parentEdgeLength(Iter<
593         Index<TText, IndexWotd<TIndexSpec> >,
594         VSTree< TopDown<TSpec> > > const &it)
595     {
596         typedef Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > TIter;
597         return parentEdgeLength(container(it), value(const_cast<TIter&>(it)));
598     }
599 
600     template < typename TText, typename TIndexSpec, typename TSpec >
601     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
602     parentRepLength(Iter<
603         Index<TText, IndexWotd<TIndexSpec> >,
604         VSTree< TopDown<TSpec> > > const &it)
605     {
606         return value(it).parentRepLen;
607     }
608 
609     template < typename TText, typename TIndexSpec, typename TSpec >
610     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
611     parentRepLength(Iter<
612         Index<TText, IndexWotd<TIndexSpec> >,
613         VSTree< TopDown< ParentLinks<TSpec> > > > const &it)
614     {
615         return value(it).parentRepLen;
616     }
617 
618     template < typename TText, typename TIndexSpec, typename TSpec >
619     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
620     repLength(Iter<
621         Index<TText, IndexWotd<TIndexSpec> >,
622         VSTree< TopDown<TSpec> > > const &it)
623     {
624         return parentRepLength(it) + parentEdgeLength(it);
625     }
626 
627 
628     // parentEdgeLabel - ORIGINAL VERSION (doesn't work if TText is a StringSet)
629     template < typename TText, typename TSpec >
630     inline typename Infix< typename Fibre<Index<TText, IndexWotd<WotdOriginal> >, EsaText>::Type const >::Type
631     parentEdgeLabel(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > const &it)
632     {
633         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
634         typedef typename Size<TIndex>::Type                TSize;
635 
636         TIndex const &index = container(it);
637 
638         if (isRoot(it))
639             return infix(indexText(index), 0, 0);
640         else {
641             TSize occ = _getNodeLP(index, value(it).node);
642             return infix(indexText(index), occ, occ + parentEdgeLength(it));
643         }
644     }
645 
646     // getOccurrence - ORIGINAL VERSION
647     template < typename TText, typename TSpec >
648     inline typename SAValue<Index<TText, IndexWotd<WotdOriginal> > >::Type
649     getOccurrence(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > const &it) {
650         return _getNodeLP(container(it), value(it).node) - value(it).parentRepLen;
651     }
652 
653     template < typename TText, typename TIndexSpec, typename TSpec >
654     inline bool
655     emptyParentEdge(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TopDown<TSpec> > > const &it)
656     {
657         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
658 
659         TIndex const &index = container(it);
660         typename SAValue<TIndex>::Type pos = getOccurrence(it);
661         return getSeqOffset(pos, stringSetLimits(index)) + value(it).parentRepLen
662             == sequenceLength(getSeqNo(pos, stringSetLimits(index)), index);
663     }
664 
665     // to avoid ambiguity
666     template < typename TText, typename TIndexSpec, typename TSpec >
667     inline bool
668     emptyParentEdge(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TopDown<ParentLinks<TSpec> > > > const &it)
669     {
670         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
671 
672         TIndex const &index = container(it);
673         typename SAValue<TIndex>::Type pos = getOccurrence(it);
674         return getSeqOffset(pos, stringSetLimits(index)) + value(it).parentRepLen
675             == sequenceLength(getSeqNo(pos, stringSetLimits(index)), index);
676     }
677 
678 
679 
680     template < typename TText, typename TSpec >
681     inline void
682     goRoot(Iter<
683         Index<TText, IndexWotd<WotdOriginal> >,
684         VSTree<TSpec> > &it)
685     {
686         _historyClear(it);
687         value(it).node = 0;            // start in root node (first entry in dir)
688         value(it).parentRepLen = 0;    // parent prefix length is 0
689         value(it).edgeLen = 0;        // edge length is 0
690     }
691 
692     template < typename TText, typename TIndexSpec, typename TSpec >
693     inline void
694     goRoot(Iter<
695         Index<TText, IndexWotd<TIndexSpec> >,
696         VSTree<TSpec> > &it)
697     {
698         _historyClear(it);
699         value(it).range.i1 = 0;        // start in root node with range (0,infty)
700         _setSizeInval(value(it).range.i2);    // infty is equivalent to length(index) and faster to compare
701         value(it).node = 0;            // start in root node (first entry in dir)
702         value(it).parentRepLen = 0;    // parent prefix length is 0
703         value(it).edgeLen = 0;        // edge length is 0
704     }
705 
706     template < typename TText, typename TSpec >
707     inline bool atEnd(Iter<Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > &it)
708     {
709         return _isSizeInval(value(it).node);
710     }
711 
712     template < typename TText, typename TSpec >
713     inline bool atEnd(Iter<Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > const &it)
714     {
715         return _isSizeInval(value(it).node);
716     }
717 
718 
719     // adjust iterator's right border of SA range
720     template < typename TText, typename TSpec >
721     inline void
722     _adjustRightBorder(
723         Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &)
724     {}
725 
726     template < typename TText, typename TIndexSpec, typename TSpec >
727     inline void
728     _adjustRightBorder(
729         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it)
730     {
731         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
732         typedef typename Size<TIndex>::Type                TSize;
733 
734         TIndex    const &index = container(it);
735         TSize    pos = value(it).node;
736 
737         TSize w0 = dirAt(pos, index);
738         if (w0 & index.LEAF)
739             value(it).range.i2 = value(it).range.i1 + 1;
740         else
741             if (w0 & index.LAST_CHILD)
742                 value(it).range.i2 = value(it).parentRight;
743             else {
744                 w0 = dirAt(pos + 2, index);
745                 value(it).range.i2 = w0 & index.BITMASK0;
746             }
747     }
748 
749     // go down the leftmost edge (including empty $-edges)
750     template < typename TText, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
751     inline bool
752     _goDown(
753         Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it,
754         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
755     {
756         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
757         typedef typename Size<TIndex>::Type                TSize;
758 
759         if (_isLeaf(it, EmptyEdges())) return false;
760 
761         TIndex &index = container(it);
762         _historyPush(it);
763 
764         // ensure node evaluation
765         TSize childNode = _wotdEvaluate(it);
766 
767         if (THideEmptyEdges::VALUE && (childNode & index.SENTINELS) != 0)
768             return false;
769 
770         // go down
771         value(it).node = childNode & index.BITMASK1;
772         value(it).edgeLen = -1;
773 
774         // go right if parent edge is empty
775         // or hull predicate is false
776         if ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it))
777             if (!goRight(it)) {
778                 _goUp(it);
779                 return false;
780             }
781 
782         return true;
783     }
784 
785     // go down the leftmost edge (excluding empty $-edges)
786     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
787     inline bool
788     _goDown(
789         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it,
790         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
791     {
792         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
793         typedef typename Size<TIndex>::Type                TSize;
794 
795         if (_isLeaf(it, EmptyEdges())) return false;
796         TIndex const &index = container(it);
797 
798         // ensure node evaluation
799         TSize childNode = _wotdEvaluate(it);
800 
801         if (THideEmptyEdges::VALUE && (childNode & index.SENTINELS) != 0)
802             return false;
803 
804         // go down
805         _historyPush(it);
806         value(it).node = childNode & index.BITMASK1;
807         value(it).edgeLen = -1;
808         _adjustRightBorder(it);
809 
810         // go right if parent edge is empty
811         // or hull predicate is false
812         if ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it))
813             if (!goRight(it)) {
814                 _goUp(it);
815                 return false;
816             }
817 
818         return true;
819     }
820 
821     // go right to the lexic. next sibling
822     template < typename TText, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
823     inline bool
824     _goRight(
825         Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it,
826         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
827     {
828         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
829         typedef typename Size<TIndex>::Type                TSize;
830 
831         TIndex const &index = container(it);
832 
833         do {
834             TSize w0 = dirAt(value(it).node, index);
835             if (w0 & index.LAST_CHILD)
836                 return false;
837 
838             value(it).node += (w0 & index.LEAF)? 1: 2;
839             value(it).edgeLen = -1;
840 
841             _adjustRightBorder(it);
842         } while ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it));
843         return true;
844     }
845 
846     template < typename TText, typename TIndexSpec, typename TSpec, typename TDfsOrder, typename THideEmptyEdges >
847     inline bool
848     _goRight(
849         Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it,
850         VSTreeIteratorTraits<TDfsOrder, THideEmptyEdges> const)
851     {
852         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
853         typedef typename Size<TIndex>::Type                TSize;
854 
855         TIndex const &index = container(it);
856 
857         do {
858             TSize w0 = dirAt(value(it).node, index);
859             if (w0 & index.LAST_CHILD)
860                 return false;
861 
862             value(it).node += (w0 & index.LEAF)? 1: 2;
863             value(it).edgeLen = -1;
864 
865             value(it).range.i1 = value(it).range.i2;
866             _adjustRightBorder(it);
867         } while ((THideEmptyEdges::VALUE && emptyParentEdge(it)) || !nodeHullPredicate(it));
868         return true;
869     }
870 
871     // go up one edge (returns false if in root node)
872     // can be used at most once, as no history stack is available
873     template < typename TText, typename TWotdSpec, typename TSpec >
874     inline bool
875     _goUp(Iter< Index<TText, IndexWotd<TWotdSpec> >, VSTree< TopDown<TSpec> > > &it)
876     {
877         if (!isRoot(it)) {
878             value(it) = it._parentDesc;
879             return true;
880         }
881         return false;
882     }
883 
884     // go up one edge (returns false if in root node)
885     template < typename TText, typename TSpec >
886     inline bool
887     _goUp(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
888     {
889         typedef typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type TSize;
890 
891         if (!empty(it.history)) {
892             HistoryStackWotdOriginal_<TSize> const &entry = back(it.history);
893             value(it).node = entry.node;
894             value(it).parentRepLen -= entry.edgeLen;
895             value(it).edgeLen = entry.edgeLen;
896             eraseBack(it.history);
897             return true;
898         }
899         return false;
900     }
901 
902     template < typename TText, typename TIndexSpec, typename TSpec >
903     inline bool
904     _goUp(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > &it)
905     {
906         typedef typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type TSize;
907 
908         if (!empty(it.history))
909         {
910             HistoryStackWotdModified_<TSize> const &entry = back(it.history);
911             value(it).node = entry.node;
912             value(it).parentRepLen -= entry.edgeLen;
913             value(it).edgeLen = entry.edgeLen;
914             value(it).range = entry.range;
915             eraseBack(it.history);
916             if (!empty(it.history))
917                 value(it).parentRight = back(it.history).range.i2;    // copy right boundary of parent's range
918             return true;
919         }
920         return false;
921     }
922 
923     // return vertex descriptor of parent's node
924     template < typename TText, typename TSpec >
925     inline typename VertexDescriptor< Index<TText, IndexWotd<WotdOriginal> > >::Type
926     nodeUp(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown< ParentLinks<TSpec> > > > const &it)
927     {
928         typedef Index<TText, IndexWotd<WotdOriginal> > TIndex;
929         typedef typename Size<TIndex>::Type TSize;
930 
931         if (!empty(it.history))
932         {
933             HistoryStackWotdOriginal_<TSize> const &entry = back(it.history);
934             typename VertexDescriptor<TIndex>::Type desc;
935 
936             desc.node = entry.node;
937             desc.parentRepLen = value(it).parentRepLen - entry.edgeLen;
938             desc.edgeLen = entry.edgeLen;
939             TSize h = length(it.history) - 1;
940             if (h != 0) --h;
941             return desc;
942         } else
943             return value(it);
944     }
945 
946     // return vertex descriptor of parent's node
947     template < typename TText, typename TIndexSpec, typename TSpec >
948     inline typename VertexDescriptor< Index<TText, IndexWotd<TIndexSpec> > >::Type
949     nodeUp(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown< ParentLinks<TSpec> > > > const &it)
950     {
951         typedef Index<TText, IndexWotd<TIndexSpec> > TIndex;
952         typedef typename Size<TIndex>::Type TSize;
953 
954         if (!empty(it.history))
955         {
956             HistoryStackWotdModified_<TSize> const &entry = back(it.history);
957             typename VertexDescriptor<TIndex>::Type desc;
958 
959             desc.node = entry.node;
960             desc.parentRepLen = value(it).parentRepLen - entry.edgeLen;
961             desc.edgeLen = entry.edgeLen;
962             return desc;
963         } else
964             return value(it);
965     }
966 
967 //////////////////////////////////////////////////////////////////////////////
968 
969 
970     // Counting sort - Step 2a: Count characters
971     template < typename TBuckets, typename TText >
972     inline void
973     _wotdCountChars(TBuckets &buckets, TText const &text)
974     {
975         typedef typename Iterator<TText const, Standard>::Type TTextIterator;
976 
977         TTextIterator itText = begin(text, Standard());
978         TTextIterator itTextEnd = end(text, Standard());
979         for (; itText != itTextEnd; ++itText)
980             ++buckets[ordValue(*itText)];
981     }
982 
983 
984     template < typename TBuckets, typename TText, typename TSpec >
985     inline void
986     _wotdCountChars(TBuckets &buckets, StringSet<TText, TSpec> const &stringSet)
987     {
988         typedef typename Iterator<TText const, Standard>::Type TTextIterator;
989 
990         for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
991         {
992             TText const &text = value(stringSet, seqNo);
993             TTextIterator itText = begin(text, Standard());
994             TTextIterator itTextEnd = end(text, Standard());
995             for (; itText != itTextEnd; ++itText)
996                 ++buckets[ordValue(*itText)];
997         }
998     }
999 
1000     // Counting sort - Step 2b: Count the prefixLen'th characters of suffices
1001     template < typename TBuckets, typename TText, typename TSA, typename TSize >
1002     inline typename Size<TText>::Type
1003     _wotdCountCharsWotdOriginal(
1004         TBuckets &buckets,
1005         TText const &text,
1006         TSA &sa,
1007         TSize prefixLen)
1008     {
1009         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1010         typedef typename Iterator<TSA, Standard>::Type          TSAIterator;
1011         typedef typename Size<TText>::Type                        TTextSize;
1012 
1013         TTextIterator itText = begin(text, Standard());
1014         TSAIterator itSA = begin(sa, Standard());
1015         TSAIterator itSAEnd = end(sa, Standard());
1016 
1017         TTextSize sentinels = 0;
1018         TTextSize textLength = length(text);
1019         for (; itSA != itSAEnd; ++itSA)
1020         {
1021             // add prefix on entries in sa
1022             TTextSize saValue = (*itSA += prefixLen);
1023             if (textLength > saValue)
1024                 ++buckets[ordValue(*(itText + saValue))];
1025             else
1026                 if (textLength == saValue) ++sentinels;
1027         }
1028         return sentinels;
1029     }
1030 
1031     template < typename TBuckets, typename TText, typename TSA, typename TSize >
1032     inline typename Size<TText>::Type
1033     _wotdCountChars(
1034         TBuckets &buckets,
1035         TText const &text,
1036         TSA const &sa,
1037         TSize prefixLen)
1038     {
1039         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1040         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1041         typedef typename Size<TText>::Type                        TTextSize;
1042 
1043         TTextIterator itText = begin(text, Standard()) + prefixLen;
1044         TSAIterator itSA = begin(sa, Standard());
1045         TSAIterator itSAEnd = end(sa, Standard());
1046 
1047         TTextSize sentinels = 0;
1048         TTextSize textLength = length(text) - prefixLen;
1049         for (; itSA != itSAEnd; ++itSA)
1050         {
1051             TTextSize saValue = *itSA;
1052             if (textLength > saValue)
1053                 ++buckets[ordValue(*(itText + saValue))];
1054             else
1055                 if (textLength == saValue) ++sentinels;
1056         }
1057         return sentinels;
1058     }
1059 
1060     template <
1061         typename TBuckets,
1062         typename TText,
1063         typename TSpec,
1064         typename TSA,
1065         typename TSize
1066     >
1067     inline typename Size<TText>::Type
1068     _wotdCountChars(
1069         TBuckets &buckets,
1070         StringSet<TText, TSpec> const &stringSet,
1071         TSA const &sa,
1072         TSize prefixLen)
1073     {
1074         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1075         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1076         typedef typename Size<TText>::Type                        TTextSize;
1077 
1078         if (empty(stringSet))
1079             return 0;
1080 
1081         TTextIterator itText = begin(front(stringSet), Standard());
1082         TSAIterator itSA = begin(sa, Standard());
1083         TSAIterator itSAEnd = end(sa, Standard());
1084 
1085         TTextSize sentinels = 0;
1086         TTextSize textLength = 0;
1087         unsigned lastSeqSeen = (unsigned)-1;
1088         Pair<unsigned, TTextSize> lPos;
1089         for (; itSA != itSAEnd; ++itSA)
1090         {
1091             posLocalize(lPos, *itSA, stringSetLimits(stringSet));
1092             if (lastSeqSeen != getSeqNo(lPos))
1093             {
1094                 lastSeqSeen = getSeqNo(lPos);
1095 
1096                 // shift textBegin and textLength by prefixLen
1097                 textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1098                 itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1099             }
1100             if (textLength > getSeqOffset(lPos))
1101                 ++buckets[ordValue(*(itText + getSeqOffset(lPos)))];
1102             else
1103                 if (textLength == getSeqOffset(lPos)) ++sentinels;
1104         }
1105         return sentinels;
1106     }
1107 
1108 
1109 //////////////////////////////////////////////////////////////////////////////
1110 
1111 
1112     // Counting sort - Step 3: Cumulative sum
1113     template < typename TBounds, typename TBuckets, typename TSize >
1114     inline typename Size<TBuckets>::Type
1115     _wotdCummulativeSum(TBounds &bounds, TBuckets const &buckets, TSize offset)
1116     {
1117         typedef typename Iterator<TBounds, Standard>::Type          TBoundIterator;
1118         typedef typename Iterator<TBuckets const, Standard>::Type   TBucketsIterator;
1119 
1120         TBucketsIterator it = begin(buckets, Standard());
1121         TBucketsIterator itEnd = end(buckets, Standard());
1122         TBoundIterator bit = begin(bounds, Standard());
1123 
1124         typename Value<TBounds>::Type    sum = offset;
1125         typename Size<TBuckets>::Type    requiredSize = 0;
1126         typename Value<TBuckets>::Type    diff;
1127 
1128         for (; it != itEnd; ++it, ++bit)
1129             if ((diff = *it) != 0) {
1130                 requiredSize += (diff > 1)? 2: 1;
1131                 *bit = sum;
1132                 sum += diff;
1133             }
1134 
1135         return requiredSize;
1136     }
1137 
1138     template < typename TBounds, typename TBuckets >
1139     inline typename Size<TBuckets>::Type
1140     _wotdCummulativeSum(TBounds &bounds, TBuckets const &buckets)
1141     {
1142         return _wotdCummulativeSum(bounds, buckets, 0);
1143     }
1144 
1145 //////////////////////////////////////////////////////////////////////////////
1146 //TODO(singer): The function createWotdIndex in never defined!
1147 /*!
1148  * @fn IndexWotd#createWotdIndex
1149  * @headerfile <seqan/index.h>
1150  * @brief Builds a the WOTD index.
1151  *
1152  * @signature void createWotdIndex(sa, dir, text);
1153  *
1154  * @param[out] sa  The resulting list in which all <i>q</i>-grams are sorted alphabetically.
1155  * @param[out] dir The resulting array that indicates at which position in index the corresponding <i>q</i>-grams
1156  *                 can be found.
1157  * @param[in] text The sequence. Types: @link ContainerConcept @endlink
1158  *
1159  * The resulting <tt>index</tt> contains the sorted list of qgrams.  For each possible <i>q</i>-gram pos contains
1160  * the first position in index that corresponds to this <i>q</i>-gram.
1161  */
1162 
1163     // single sequence
1164     template < typename TIndex >
1165     typename Size<TIndex>::Type
1166     _sortFirstWotdBucket(TIndex &index)
1167     {
1168         typedef typename Fibre<TIndex, WotdText >::Type        TText;
1169         typedef typename Fibre<TIndex, WotdSA >::Type            TSA;
1170         typedef typename TIndex::TCounter                        TCounter;
1171 
1172         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1173         typedef typename Iterator<TSA, Standard>::Type            TSAIterator;
1174         typedef typename Iterator<TCounter, Standard>::Type        TCntIterator;
1175         typedef typename Size<TText>::Type                        TSize;
1176 
1177         TText const &text = indexText(index);
1178         TCounter &occ = index.tempOcc;
1179         TCounter &bound = index.tempBound;
1180 
1181         // 1. clear counters and copy SA to temporary SA
1182         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1183 
1184         // 2. count characters
1185         _wotdCountChars(occ, text);
1186 
1187         // 3. cumulative sum
1188         TSize requiredSize = _wotdCummulativeSum(bound, occ);
1189 
1190         // 4. fill suffix array
1191         {
1192             TSA &sa = indexSA(index);
1193             TSAIterator saBeg = begin(sa, Standard());
1194             TCntIterator boundBeg = begin(bound, Standard());
1195 
1196             TTextIterator itText = begin(text, Standard());
1197             TTextIterator itTextEnd = end(text, Standard());
1198             for(TSize i = 0; itText != itTextEnd; ++itText, ++i)
1199                 *(saBeg + (*(boundBeg + ordValue(*itText)))++) = i;
1200         }
1201         index.sentinelOcc = 0;
1202         index.sentinelBound = 0;
1203 
1204         return requiredSize;
1205     }
1206 
1207     // multiple sequences
1208     template < typename TText, typename TSpec, typename TIndexSpec >
1209     typename Size< Index<StringSet<TText, TSpec>, TIndexSpec> >::Type
1210     _sortFirstWotdBucket(Index<StringSet<TText, TSpec>, TIndexSpec> &index)
1211     {
1212         typedef Index<StringSet<TText, TSpec>, TIndexSpec>        TIndex;
1213         typedef typename Fibre<TIndex, WotdSA >::Type            TSA;
1214         typedef typename TIndex::TCounter                        TCounter;
1215 
1216         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1217         typedef typename Iterator<TSA, Standard>::Type            TSAIterator;
1218         typedef typename Iterator<TCounter, Standard>::Type        TCntIterator;
1219         typedef typename Size<TText>::Type                        TSize;
1220 
1221         StringSet<TText, TSpec> const &stringSet = indexText(index);
1222         TCounter &occ = index.tempOcc;
1223         TCounter &bound = index.tempBound;
1224 
1225         // 1. clear counters and copy SA to temporary SA
1226         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1227 
1228         // 2. count characters
1229         _wotdCountChars(occ, stringSet);
1230 
1231         // 3. cummulative sum
1232         TSize requiredSize = _wotdCummulativeSum(bound, occ);
1233 
1234         // 4. fill suffix array
1235         for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
1236         {
1237             TSA &sa = indexSA(index);
1238             TSAIterator saBeg = begin(sa, Standard());
1239             TCntIterator boundBeg = begin(bound, Standard());
1240 
1241             typename Value<TSA>::Type localPos;
1242             assignValueI1(localPos, seqNo);
1243             assignValueI2(localPos, 0);
1244 
1245             TText const &text = value(stringSet, seqNo);
1246             TTextIterator itText = begin(text, Standard());
1247             TTextIterator itTextEnd = end(text, Standard());
1248             for(; itText != itTextEnd; ++itText)
1249             {
1250                 *(saBeg + (*(boundBeg + ordValue(*itText)))++) = localPos;
1251                 assignValueI2(localPos, getValueI2(localPos) + 1);
1252             }
1253         }
1254         index.sentinelOcc = 0;
1255         index.sentinelBound = 0;
1256 
1257         return requiredSize;
1258     }
1259 
1260 
1261 
1262     // sort bucket using counting sort
1263     // (nearly) ORIGINAL VERSION:
1264     // - brings the bucket with the longest suffix (lpBucket) to front
1265     // - all other buckets are in lexicographical order
1266     // - adds prefixLen to all SA entries in SA[left,right)
1267     template < typename TText, typename TBeginPos, typename TEndPos, typename TSize >
1268     TSize _sortWotdBucket(
1269         Index<TText, IndexWotd<WotdOriginal> > &index,
1270         TBeginPos left,
1271         TEndPos right,
1272         TSize prefixLen)
1273     {
1274         typedef Index<TText, IndexWotd<WotdOriginal> >              TIndex;
1275         typedef typename Fibre<TIndex, WotdSA >::Type               TSA;
1276         typedef typename TIndex::TCounter                           TCounter;
1277 
1278         typedef typename Iterator<TText const, Standard>::Type      TTextIterator;
1279         typedef typename Iterator<TSA, Standard>::Type              TSAIterator;
1280         typedef typename Iterator<TCounter, Standard>::Type         TCntIterator;
1281         typedef typename Iterator<TCounter const, Standard>::Type   TConstCntIterator;
1282         typedef typename Size<TText>::Type                          TTextSize;
1283 
1284         TText const &text = indexText(index);
1285         TCounter const &tempSA = index.tempSA;
1286         TCounter &occ = index.tempOcc;
1287         TCounter &bound = index.tempBound;
1288 
1289         // 1. clear counters and copy SA to temporary SA
1290         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1291 
1292         // 2. count characters
1293         index.tempSA = infix(indexSA(index), left, right);
1294         index.sentinelBound = 0;
1295         index.sentinelOcc =
1296             _wotdCountCharsWotdOriginal(occ, text, index.tempSA, prefixLen);
1297 
1298         // 3. cumulative sum
1299         TSize requiredSize = 0;
1300 
1301         // actually, here sentinelOcc<=1 holds (this is the original wotd)
1302         if (index.interSentinelNodes) {
1303             if (index.sentinelOcc != 0)
1304                 requiredSize = (index.sentinelOcc > 1)? 2: 1;    // insert *one* $-edge for all $_i suffices
1305         } else
1306             requiredSize = index.sentinelOcc;                    // insert each $_i suffix one-by-one
1307 
1308         requiredSize += _wotdCummulativeSum(bound, occ, left + index.sentinelOcc);
1309         index.sentinelBound = left;
1310 
1311         // 4. fill suffix array
1312         {
1313             TSA &sa = indexSA(index);
1314             TSAIterator saBeg = begin(sa, Standard());
1315             TCntIterator boundBeg = begin(bound, Standard());
1316 
1317             TTextIterator itText = begin(text, Standard());
1318             TConstCntIterator itSA = begin(tempSA, Standard());
1319             TConstCntIterator itSAEnd = end(tempSA, Standard());
1320             TTextSize textLength = length(text);
1321             for(; itSA != itSAEnd; ++itSA)
1322             {
1323                 TTextSize saValue = *itSA;
1324                 if (textLength > saValue)
1325                     *(saBeg + (*(boundBeg + ordValue(*(itText + saValue))))++) = saValue;
1326                 else
1327                     if (textLength == saValue)
1328                         *(saBeg + index.sentinelBound++) = saValue;
1329             }
1330         }
1331 
1332         // 5. move lpBucket to front and add saOffset to hash directory entries
1333         {
1334             TSize lpBucket = ordValue(text[tempSA[0]]);
1335             if (lpBucket != 0) {
1336                 TSize lpBucketOcc = occ[lpBucket];
1337                 TSize lpBucketBound = bound[lpBucket];
1338 
1339                 TCntIterator itOcc = begin(occ, Standard()) + lpBucket;
1340                 TCntIterator itBound = begin(bound, Standard()) + lpBucket;
1341                 TCntIterator itBeg = begin(bound, Standard());
1342                 for(; itBound != itBeg; --itBound, --itOcc) {
1343                     *itOcc = *(itOcc - 1);
1344                     *itBound = *(itBound - 1);
1345                 }
1346                 if (index.sentinelOcc != 0) {
1347                     // bring first bucket before sentinel bucket
1348                     *itOcc = index.sentinelOcc;
1349                     *itBound = index.sentinelBound;
1350                     index.sentinelOcc = lpBucketOcc;
1351                     index.sentinelBound = lpBucketBound;
1352                 } else {
1353                     *itOcc = lpBucketOcc;
1354                     *itBound = lpBucketBound;
1355                 }
1356             } else
1357                 if (index.sentinelOcc != 0) {
1358                     // bring first bucket before sentinel bucket
1359                     TSize swap = index.sentinelOcc;
1360                     index.sentinelOcc = occ[0];
1361                     occ[0] = swap;
1362                     swap = index.sentinelBound;
1363                     index.sentinelBound = bound[0];
1364                     bound[0] = swap;
1365                 }
1366         }
1367 
1368         return requiredSize;
1369     }
1370 
1371 
1372 
1373 
1374 
1375     // sort bucket using counting sort
1376     // MODIFIED VERSION:
1377     // - all buckets are in lexicographical order
1378     // - SA[left,right) contains real SA entries (the beginning positions of the suffices)
1379 
1380     // single sequence
1381     template < typename TIndex, typename TBeginPos, typename TEndPos, typename TSize >
1382     TSize _sortWotdBucket(
1383         TIndex &index,
1384         TBeginPos left,
1385         TEndPos right,
1386         TSize prefixLen)
1387     {
1388         typedef typename Fibre<TIndex, WotdText >::Type             TText;
1389         typedef typename Fibre<TIndex, WotdSA >::Type               TSA;
1390         typedef typename TIndex::TCounter                           TCounter;
1391 
1392         typedef typename Iterator<TText const, Standard>::Type      TTextIterator;
1393         typedef typename Iterator<TSA, Standard>::Type              TSAIterator;
1394         typedef typename Iterator<TCounter, Standard>::Type         TCntIterator;
1395         typedef typename Iterator<TCounter const, Standard>::Type   TConstCntIterator;
1396         typedef typename Size<TText>::Type                          TTextSize;
1397 
1398         TText const &text = indexText(index);
1399         TCounter const &tempSA = index.tempSA;
1400         TCounter &occ = index.tempOcc;
1401         TCounter &bound = index.tempBound;
1402 
1403         // 1. clear counters and copy SA to temporary SA
1404         arrayFill(begin(occ, Standard()), end(occ, Standard()), 0);
1405         index.tempSA = infix(indexSA(index), left, right);
1406 
1407         // 2. count characters
1408         index.sentinelBound = 0;
1409         index.sentinelOcc = _wotdCountChars(occ, text, tempSA, prefixLen);
1410 
1411         // 3. cumulative sum
1412         TSize requiredSize = 0;
1413         if (index.interSentinelNodes) {
1414             if (index.sentinelOcc != 0)
1415                 requiredSize = (index.sentinelOcc > 1)? 2: 1;    // insert *one* $-edge for all $_i suffices
1416         } else
1417             requiredSize = index.sentinelOcc;                    // insert each $_i suffix one-by-one
1418 
1419         requiredSize += _wotdCummulativeSum(bound, occ, left + index.sentinelOcc);
1420         index.sentinelBound = left;
1421 
1422         // 4. fill suffix array
1423         {
1424             TSA &sa = indexSA(index);
1425             TSAIterator saBeg = begin(sa, Standard());
1426             TCntIterator boundBeg = begin(bound, Standard());
1427 
1428             TTextIterator itText = begin(text, Standard()) + prefixLen;
1429             TConstCntIterator itSA = begin(tempSA, Standard());
1430             TConstCntIterator itSAEnd = end(tempSA, Standard());
1431             TTextSize textLength = length(text) - prefixLen;
1432             for(; itSA != itSAEnd; ++itSA)
1433             {
1434                 TTextSize saValue = *itSA;
1435                 if (textLength > saValue)
1436                     *(saBeg + (*(boundBeg + ordValue(*(itText + saValue))))++) = saValue;
1437                 else
1438                     if (textLength == saValue)
1439                         *(saBeg + index.sentinelBound++) = saValue;
1440             }
1441         }
1442 
1443         return requiredSize;
1444     }
1445 
1446     // multiple sequences
1447     template < typename TText, typename TSpec, typename TIndexSpec, typename TBeginPos, typename TEndPos, typename TSize >
1448     TSize _sortWotdBucket(
1449         Index<StringSet<TText, TSpec>, TIndexSpec> &index,
1450         TBeginPos left,
1451         TEndPos right,
1452         TSize prefixLen)
1453     {
1454         typedef Index<StringSet<TText, TSpec>, TIndexSpec>            TIndex;
1455         typedef typename Fibre<TIndex, WotdSA >::Type                TSA;
1456         typedef typename TIndex::TCounter                            TCounter;
1457         typedef typename TIndex::TTempSA                            TTempSA;
1458 
1459         typedef typename Iterator<TText const, Standard>::Type        TTextIterator;
1460         typedef typename Iterator<TSA, Standard>::Type                TSAIterator;
1461         typedef typename Iterator<TTempSA const, Standard>::Type    TTempSAIterator;
1462         typedef typename Iterator<TCounter, Standard>::Type            TCntIterator;
1463         typedef typename Size<TText>::Type                            TTextSize;
1464 
1465         StringSet<TText, TSpec> const &stringSet = indexText(index);
1466         TTempSA const &tempSA = index.tempSA;
1467         TCounter &occ = index.tempOcc;
1468         TCounter &bound = index.tempBound;
1469 
1470         // 1. clear counters and copy SA to temporary SA
1471         TCntIterator occBeg = begin(occ, Standard());
1472 
1473         arrayFill(occBeg, end(occ, Standard()), 0);
1474         index.tempSA = infix(indexSA(index), left, right);
1475 
1476         // 2. count characters
1477         index.sentinelBound = 0;
1478         index.sentinelOcc = _wotdCountChars(occ, stringSet, tempSA, prefixLen);
1479 
1480         // 3. cumulative sum
1481         TSize requiredSize = 0;
1482         if (index.interSentinelNodes) {
1483             if (index.sentinelOcc != 0)
1484                 requiredSize = (index.sentinelOcc > 1)? 2: 1;    // insert *one* $-edge for all $_i suffices
1485         } else
1486             requiredSize = index.sentinelOcc;                    // insert each $_i suffix one-by-one
1487 
1488         requiredSize += _wotdCummulativeSum(bound, occ, left + index.sentinelOcc);
1489         index.sentinelBound = left;
1490 
1491         // 4. fill suffix array
1492         {
1493             if (empty(stringSet))
1494                 return requiredSize;
1495 
1496             TSA &sa = indexSA(index);
1497             TSAIterator saBeg = begin(sa, Standard());
1498             TCntIterator boundBeg = begin(bound, Standard());
1499 
1500             TTextIterator itText = begin(front(stringSet), Standard());
1501             TTempSAIterator itSA = begin(tempSA, Standard());
1502             TTempSAIterator itSAEnd = end(tempSA, Standard());
1503             TTextSize textLength = 0;
1504             unsigned lastSeqSeen = (unsigned)-1;
1505             Pair<unsigned, TTextSize> lPos;
1506             for(; itSA != itSAEnd; ++itSA)
1507             {
1508                 posLocalize(lPos, *itSA, stringSetLimits(index));
1509                 if (lastSeqSeen != getSeqNo(lPos))
1510                 {
1511                     lastSeqSeen = getSeqNo(lPos);
1512 
1513                     // shift textBegin and textLength by prefixLen
1514                     textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1515                     itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1516                 }
1517                 if (textLength > getSeqOffset(lPos))
1518                     *(saBeg + (*(boundBeg + ordValue(*(itText + getSeqOffset(lPos)))))++) = *itSA;
1519                 else
1520                     if (textLength == getSeqOffset(lPos))
1521                         *(saBeg + index.sentinelBound++) = *itSA;
1522             }
1523         }
1524 
1525         return requiredSize;
1526     }
1527 
1528 
1529 
1530 
1531 
1532     template < typename TSA, typename TText >
1533     typename Size<TText>::Type
1534     _bucketLcp(TSA const &sa, TText const &text)
1535     {
1536         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1537         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1538         typedef typename Value<TText>::Type                        TValue;
1539         typedef typename Size<TText>::Type                        TTextSize;
1540 
1541         TTextSize prefixLen = 0;
1542 
1543         if (length(sa) < 2) return prefixLen;
1544 
1545         TTextIterator itText = begin(text, Standard());
1546         TSAIterator itSAEnd = end(sa, Standard());
1547         TTextSize textLength = length(text);
1548 
1549         do {
1550             TSAIterator itSA = begin(sa, Standard());
1551             TTextSize sa = *itSA;
1552             if (textLength == sa) return prefixLen;
1553 
1554             TValue c = *(itText + sa);
1555             for(++itSA; itSA != itSAEnd; ++itSA) {
1556                 sa = *itSA;
1557                 if (textLength == sa || c != *(itText + sa))
1558                     return prefixLen;
1559             }
1560             ++prefixLen; --textLength;
1561             ++itText;
1562         } while (true);
1563     }
1564 
1565     template < typename TSA, typename TText, typename TSize >
1566     typename Size<TText>::Type
1567     _bucketLcp(TSA const &sa, TText const &text, TSize prefixLen)
1568     {
1569         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1570         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1571         typedef typename Value<TText>::Type                        TValue;
1572         typedef typename Size<TText>::Type                        TTextSize;
1573 
1574         if (length(sa) < 2) return prefixLen;
1575 
1576         TTextIterator itText = begin(text, Standard()) + prefixLen;
1577         TSAIterator itSAEnd = end(sa, Standard());
1578         TTextSize textLength = length(text) - prefixLen;
1579 
1580         do {
1581             TSAIterator itSA = begin(sa, Standard());
1582             TTextSize sa = *itSA;
1583             if (textLength == sa) return prefixLen;
1584 
1585             TValue c = *(itText + sa);
1586             for(++itSA; itSA != itSAEnd; ++itSA) {
1587                 sa = *itSA;
1588                 if (textLength == sa || *(itText + sa) != c)
1589                     return prefixLen;
1590             }
1591             ++prefixLen; --textLength;
1592             ++itText;
1593         } while (true);
1594     }
1595 
1596     template < typename TSA, typename TText, typename TSpec, typename TSize >
1597     typename Size<TText>::Type
1598     _bucketLcp(TSA const &sa, StringSet<TText, TSpec> const &stringSet, TSize prefixLen)
1599     {
1600         typedef typename Iterator<TText const, Standard>::Type    TTextIterator;
1601         typedef typename Iterator<TSA const, Standard>::Type    TSAIterator;
1602         typedef typename Value<TText>::Type                        TValue;
1603         typedef typename Size<TText>::Type                        TTextSize;
1604 
1605         if (length(sa) < 2) return prefixLen;
1606 
1607         TSAIterator itSAEnd = end(sa, Standard());
1608         TTextIterator itText;
1609         TTextSize textLength;
1610 
1611         Pair<unsigned, TTextSize> lPos;
1612         do {
1613             TSAIterator itSA = begin(sa, Standard());
1614             posLocalize(lPos, *itSA, stringSetLimits(stringSet));
1615 
1616             unsigned lastSeqSeen = getSeqNo(*itSA);
1617             textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1618             if (textLength == getSeqOffset(lPos)) return prefixLen;
1619 
1620             itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1621             TValue c = *(itText + getSeqOffset(*itSA));
1622             for(++itSA; itSA != itSAEnd; ++itSA)
1623             {
1624                 posLocalize(lPos, *itSA, stringSetLimits(stringSet));
1625 
1626                 if (lastSeqSeen != getSeqNo(lPos))
1627                 {
1628                     lastSeqSeen = getSeqNo(lPos);
1629 
1630                     // shift textBegin and textLength by prefixLen
1631                     textLength = length(stringSet[lastSeqSeen]) - prefixLen;
1632                     itText = begin(stringSet[lastSeqSeen], Standard()) + prefixLen;
1633                 }
1634 
1635                 if (textLength == getSeqOffset(lPos) || c != *(itText + getSeqOffset(lPos)))
1636                     return prefixLen;
1637             }
1638             ++prefixLen; --textLength;
1639             ++itText;
1640         } while (true);
1641     }
1642 
1643 
1644     template <typename TText, typename TSpec, typename TPos>
1645     inline TPos
1646     _getNodeLP(
1647         Index<TText, IndexWotd<TSpec> > const &index,
1648         TPos pos)
1649     {
1650         TPos w0 = dirAt(pos, index);
1651         if (w0 & index.LEAF)
1652             return w0 & index.BITMASK0;
1653 
1654         TPos w1 = dirAt(pos + 1, index);
1655         if (w1 & index.UNEVALUATED)
1656             return saAt(w0 & index.BITMASK0, index);
1657         else
1658             return w0 & index.BITMASK0;
1659     }
1660 
1661     // store buckets into directory
1662     // ORIGINAL VERSION: storing SA entries and topology links in Dir
1663     template <typename TText, typename TPos>
1664     inline void
1665     _storeWotdChildren(
1666         Index<TText, IndexWotd<WotdOriginal> > &index,
1667         TPos dirOfs)
1668     {
1669         typedef Index<TText, IndexWotd<WotdOriginal> >        TIndex;
1670         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1671         typedef typename Iterator<TDir, Standard>::Type        TDirIterator;
1672         typedef typename Size<TDir>::Type                    TDirSize;
1673         typedef typename TIndex::TCounter                    TCounter;
1674         typedef typename Iterator<TCounter, Standard>::Type    TCntIterator;
1675 
1676         typedef typename Value<TCounter>::Type                TValue;
1677 
1678         TDirIterator itDir = begin(indexDir(index), Standard()) + dirOfs;
1679         TDirIterator itDirEnd = end(indexDir(index), Standard());
1680         TDirIterator itPrev = itDirEnd;
1681 
1682         TCntIterator it = begin(index.tempOcc, Standard());
1683         TCntIterator bit = begin(index.tempBound, Standard());
1684         TCntIterator itEnd = end(index.tempOcc, Standard());
1685 
1686         TValue occ;
1687         if (index.sentinelOcc != 0)
1688         {
1689             if (index.sentinelOcc > 1 && index.interSentinelNodes)    // occurs on multiseqs
1690             {
1691                 itPrev = itDir;
1692                 *itDir = index.sentinelBound - index.sentinelOcc;    ++itDir;
1693                 *itDir = index.sentinelBound | index.UNEVALUATED;    ++itDir;
1694             } else
1695                 for (TDirSize d = index.sentinelBound - index.sentinelOcc; d != index.sentinelBound; ++d)
1696                 {
1697                     itPrev = itDir;
1698                     *itDir = saAt(d, index) | index.LEAF;            ++itDir;
1699                 }
1700         }
1701         for (; it != itEnd; ++it, ++bit)
1702         {
1703             if ((occ = *it) == 0) continue;
1704             if (occ > 1) {
1705                 itPrev = itDir;
1706                 *itDir = *bit - occ;                             ++itDir;
1707                 *itDir = *bit | index.UNEVALUATED;                ++itDir;
1708             } else {
1709                 itPrev = itDir;
1710                 *itDir = saAt(*bit - occ, index) | index.LEAF;    ++itDir;
1711             }
1712         }
1713 
1714         // mark the last child
1715         if (itPrev != itDirEnd)
1716             *itPrev |= index.LAST_CHILD;
1717     }
1718 
1719     // store buckets into directory
1720     // MODIFIED VERSION: storing SA links and topology links in Dir
1721     template <typename TText, typename TSpec, typename TSize>
1722     inline void
1723     _storeWotdChildren(
1724         Index<TText, IndexWotd<TSpec> > &index,
1725         TSize dirOfs,
1726         TSize lcp)
1727     {
1728         typedef Index<TText, IndexWotd<TSpec> >            TIndex;
1729         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1730         typedef typename Iterator<TDir, Standard>::Type        TDirIterator;
1731         typedef typename Size<TDir>::Type                    TDirSize;
1732         typedef typename TIndex::TCounter                    TCounter;
1733         typedef typename Iterator<TCounter, Standard>::Type    TCntIterator;
1734 
1735         typedef typename Value<TCounter>::Type                TValue;
1736 
1737         TDirIterator itDirBegin = begin(indexDir(index), Standard()) + dirOfs;
1738         TDirIterator itDirEnd = end(indexDir(index), Standard());
1739         TDirIterator itDir = itDirBegin;
1740         TDirIterator itPrev = itDirEnd;
1741 
1742         TCntIterator it = begin(index.tempOcc, Standard());
1743         TCntIterator bit = begin(index.tempBound, Standard());
1744         TCntIterator itEnd = end(index.tempOcc, Standard());
1745 
1746         TValue occ;
1747         if (index.sentinelOcc != 0)
1748         {
1749             if (index.sentinelOcc > 1 && index.interSentinelNodes)    // occurs on multiseqs
1750             {
1751                 itPrev = itDir;
1752                 *itDir = index.sentinelBound - index.sentinelOcc;    ++itDir;
1753                 *itDir = index.sentinelBound | index.UNEVALUATED;    ++itDir;
1754             } else
1755                 for (TDirSize d = index.sentinelBound - index.sentinelOcc; d != index.sentinelBound; ++d)
1756                 {
1757                     itPrev = itDir;
1758                     *itDir = d | index.LEAF;                        ++itDir;
1759                 }
1760         }
1761         for (; it != itEnd; ++it, ++bit)
1762         {
1763             if ((occ = *it) == 0) continue;
1764             if (occ > 1) {
1765                 itPrev = itDir;
1766                 *itDir = *bit - occ;                 ++itDir;
1767                 *itDir = *bit | index.UNEVALUATED;    ++itDir;
1768             } else {
1769                 itPrev = itDir;
1770                 *itDir = (*bit - occ) | index.LEAF;    ++itDir;
1771             }
1772         }
1773 
1774         // first child gets the mutual lcp value of the children (== parent repLength)
1775         *itDirBegin = ((*itDirBegin) & ~index.BITMASK0) | lcp;
1776 
1777         // mark the last child
1778         if (itPrev != itDirEnd)
1779             *itPrev |= index.LAST_CHILD;
1780     }
1781 
1782 
1783     template < typename TText, typename TSpec >
1784     inline typename Size< Index<TText, IndexWotd<WotdOriginal> > >::Type
1785     _wotdEvaluate(Iter< Index<TText, IndexWotd<WotdOriginal> >, VSTree<TSpec> > const &it)
1786     {
1787         typedef Index<TText, IndexWotd<WotdOriginal> >    TIndex;
1788         typedef typename Size<TIndex>::Type                TSize;
1789 
1790         TIndex &index = const_cast<TIndex&>(container(it));
1791         TSize pos = value(it).node;
1792         TSize w1 = dirAt(pos + 1, index);
1793 
1794         // test for evaluation
1795         if (w1 & index.UNEVALUATED)
1796         {
1797             TSize w0 = dirAt(pos, index);
1798             TSize lp = saAt(w0 & index.BITMASK0, index);
1799             TSize dst = length(indexDir(index));
1800 
1801             TSize size = _sortWotdBucket(
1802                 index,
1803                 w0 & index.BITMASK0,
1804                 w1 & index.BITMASK1,
1805                 parentEdgeLength(it));
1806 
1807             resize(indexDir(index), dst + size, Generous());
1808             _storeWotdChildren(index, dst);
1809 
1810             // mark nodes with solely empty child edges
1811             w1 = dst;
1812             if (index.sentinelOcc > 0)
1813             {
1814                 TSize sentinelSize = index.sentinelOcc;
1815                 if (index.interSentinelNodes && sentinelSize > 2)
1816                     sentinelSize = 2;
1817                 if (size == sentinelSize) w1 |= index.SENTINELS;
1818             }
1819 
1820             assert(!(index.sentinelOcc == 1 && size == 1));
1821 
1822             dirAt(pos, index)     = (w0 & ~index.BITMASK0) | lp;
1823             dirAt(pos + 1, index) = w1;
1824         }
1825 
1826         return w1;
1827     }
1828 
1829     template < typename TText, typename TIndexSpec, typename TSpec >
1830     inline typename Size< Index<TText, IndexWotd<TIndexSpec> > >::Type
1831     _wotdEvaluate(Iter< Index<TText, IndexWotd<TIndexSpec> >, VSTree<TSpec> > const &it)
1832     {
1833         typedef Index<TText, IndexWotd<TIndexSpec> >    TIndex;
1834         typedef typename Size<TIndex>::Type                TSize;
1835 
1836         TIndex &index = const_cast<TIndex&>(container(it));
1837         TSize pos = value(it).node;
1838         TSize w1 = dirAt(pos + 1, index);
1839 
1840         // test for evaluation
1841         if (w1 & index.UNEVALUATED)
1842         {
1843             TSize dst = length(indexDir(index));
1844 
1845             TSize size = _sortWotdBucket(
1846                 index,
1847                 value(it).range.i1,
1848                 w1 & index.BITMASK1,
1849                 repLength(it));
1850 /*
1851             if (globalDumpFlag)
1852             {
1853                 std::cerr << '"' << representative(it) << '"' << std::endl;
1854                 for (int i=0;i<length(getOccurrences(it));++i)
1855                     std::cerr << getOccurrences(it)[i]<<'\t'<<suffix(indexText(index),getOccurrences(it)[i])<<std::endl;
1856 //                _dumpFreq(index);
1857             }
1858 */
1859             resize(indexDir(index), dst + size, Generous());
1860             _storeWotdChildren(index, dst, repLength(it));
1861 
1862             // mark nodes with solely empty child edges
1863             w1 = dst;
1864             if (index.sentinelOcc > 0)
1865             {
1866                 TSize sentinelSize = index.sentinelOcc;
1867                 if (index.interSentinelNodes && sentinelSize > 2)
1868                     sentinelSize = 2;
1869                 if (size == sentinelSize) w1 |= index.SENTINELS;
1870             }
1871 
1872             dirAt(pos + 1, index) = w1;
1873         }
1874 
1875         return w1;
1876     }
1877 
1878 
1879     template <typename TText, typename TSpec>
1880     inline void
1881     _dump(Index<TText, IndexWotd<TSpec> > &index)
1882     {
1883         typedef Index<TText, IndexWotd<TSpec> >            TIndex;
1884         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1885         typedef typename Value<TDir>::Type                    TDirValue;
1886 
1887         std::cout << "  Dir (wotd)" << std::endl;
1888         for(unsigned i=0; i < length(indexDir(index)); ++i) {
1889             TDirValue d = indexDir(index)[i];
1890             std::cout << i << ":  " << (d & index.BITMASK0);
1891             if (d & index.LEAF)            std::cout << "  (Leaf/Uneval)";
1892             if (d & index.LAST_CHILD)    std::cout << "  (LastChild/SENTINELS)";
1893             std::cout << std::endl;
1894         }
1895 
1896         std::cout << std::endl << "  SA" << std::endl;
1897         for(unsigned i=0; i < length(indexSA(index)); ++i)
1898             std::cout << i << ":  " << indexSA(index)[i] << "  " << suffix(indexText(index), indexSA(index)[i]) << std::endl;
1899 
1900         std::cout << std::endl;
1901 
1902     }
1903 
1904 //////////////////////////////////////////////////////////////////////////////
1905 // _goDownChar
1906 
1907     template < typename TText, class TSpec, typename TValue >
1908     inline bool _goDownChar(
1909         Iter<Index<TText, IndexWotd<WotdOriginal> >, VSTree< TopDown<TSpec> > > &it,
1910         TValue c)
1911     {
1912         typedef Index<TText, IndexWotd<TSpec> >    TIndex;
1913         typedef typename Value<TIndex>::Type        TIndexValue;
1914 
1915         bool sorted = false;
1916         if (!goDown(it)) return false;
1917         do {
1918             if (parentEdgeLength(it) != 0) {
1919                 TIndexValue edgeChar = parentEdgeLabel(it)[0];
1920                 if (edgeChar == c) return true;        // the edge is found
1921                 if (sorted && edgeChar > c) break;    // too far (except the first one,
1922             }                                        // child edges are sorted)
1923             sorted = true;
1924         } while (goRight(it));
1925         _goUp(it);
1926         return false;
1927     }
1928 
1929     template < typename TText, class TIndexSpec, class TSpec, typename TValue >
1930     inline bool _goDownChar(
1931         Iter<Index<TText, IndexWotd<TIndexSpec> >, VSTree< TopDown<TSpec> > > &it,
1932         TValue c)
1933     {
1934         typedef Index<TText, IndexWotd<TSpec> >    TIndex;
1935         typedef typename Value<TIndex>::Type        TIndexValue;
1936 
1937         if (!goDown(it)) return false;
1938         do {
1939             if (parentEdgeLength(it) != 0) {
1940                 TIndexValue edgeChar = parentEdgeLabel(it)[0];
1941                 if (edgeChar == c) return true;    // the edge is found
1942                 if (edgeChar > c) break;        // too far (child edges are sorted)
1943             }
1944         } while (goRight(it));
1945         _goUp(it);
1946         return false;
1947     }
1948 
1949 /*
1950     template < typename TText, typename TSpec, typename TValue >
1951     inline bool
1952     _getNodeByChar(
1953         Iter< Index<TText, IndexWotd<TSpec> >, VSTree<TSpec> > const &it,
1954         TValue c,
1955         typename VertexDescriptor< Index<TText, IndexWotd<TSpec> > >::Type &childDesc)
1956     {
1957         typedef Index<TText, IndexWotd<TSpec> >            TIndex;
1958         typedef typename Fibre<TIndex, WotdDir>::Type        TDir;
1959         typedef typename Fibre<TIndex, WotdSA>::Type        TSA;
1960 
1961         typedef typename Value<TSA>::Type                    TSAValue;
1962         typedef typename Value<TDir>::Type                    TDirValue;
1963 
1964         typename Size<TIndex>::Type len = length(index);
1965         typename VertexDescriptor<TIndex>::Type    desc;
1966 
1967         TSAValue pos = _firstSuffixOfBucket(index, value(it).node);
1968         while (pos == len || value < (c = textAt(index, pos + value.i2))) {
1969             value.node += (dirAt(value.node, index) & index.LEAF)? 1: 2;
1970             pos = _firstSuffixOfBucket(index, value.node);
1971         }
1972         assert(pos != len);
1973 
1974         return c == value;
1975     }
1976 */
1977 
1978 //////////////////////////////////////////////////////////////////////////////
1979 // interface for automatic index creation
1980 
1981     template <typename TText, typename TSpec>
1982     inline void _wotdCreateFirstLevel(Index<TText, IndexWotd<TSpec> > &index)
1983     {
1984         typedef Index<TText, IndexWotd<TSpec> > TIndex;
1985         typedef typename Value<TIndex>::Type    TValue;
1986         typedef typename Size<TIndex>::Type     TSize;
1987 
1988         resize(index.tempOcc, ValueSize<TValue>::VALUE + 1, Exact());
1989         resize(index.tempBound, ValueSize<TValue>::VALUE + 1, Exact());
1990 
1991         TSize size;
1992         if (empty(indexSA(index)))
1993         {
1994             resize(indexSA(index), length(indexRawText(index)), Exact());
1995             size = _sortFirstWotdBucket(index);
1996         } else
1997         {
1998             size = _sortWotdBucket(index, 0, length(indexSA(index)), 0);
1999         }
2000 
2001         if (size > 0)
2002         {
2003             resize(indexDir(index), size + 2, Generous());
2004             _storeWotdChildren(index, 2, 0);
2005 
2006             // mark nodes with solely empty child edges
2007             TSize w1 = 2;
2008             if (index.sentinelOcc > 0)
2009             {
2010                 TSize sentinelSize = index.sentinelOcc;
2011                 if (index.interSentinelNodes && sentinelSize > 2)
2012                     sentinelSize = 2;
2013                 if (size == sentinelSize) w1 |= index.SENTINELS;
2014             }
2015 
2016 
2017             dirAt(0, index) = 0 | index.LAST_CHILD;
2018             dirAt(1, index) = w1;
2019 
2020         } else {
2021             resize(indexDir(index), 1);
2022             dirAt(0, index) = 0 | index.LAST_CHILD | index.LEAF;
2023         }
2024     }
2025 
2026     template <typename TText, typename TSpec>
2027     inline bool indexCreate(Index<TText, IndexWotd<TSpec> > &index, WotdDir const, Default const)
2028     {
2029         _wotdCreateFirstLevel(index);
2030         return true;
2031     }
2032 
2033     template <typename TText, typename TSpec>
2034     inline bool indexCreate(Index<TText, IndexWotd<TSpec> > &index, WotdDir const, Trie const)
2035     {
2036         resize(indexSA(index), length(indexText(index)), Exact());
2037         fillSuffixArray(indexSA(index), indexText(index), Trie());
2038         _wotdCreateFirstLevel(index);
2039         return true;
2040     }
2041 
2042 
2043 //////////////////////////////////////////////////////////////////////////////
2044 // clear
2045 
2046     template < typename TText, typename TSpec >
2047     inline void clear(Index<TText, IndexWotd<TSpec> > &index)
2048     {
2049         clear(getFibre(index, WotdSA()));
2050         clear(getFibre(index, WotdDir()));
2051     }
2052 
2053 //////////////////////////////////////////////////////////////////////////////
2054 // open
2055 
2056     template < typename TText, typename TSpec >
2057     inline bool open(
2058         Index< TText, IndexWotd<TSpec> > &index,
2059         const char *fileName,
2060         int openMode)
2061     {
2062         typedef Index<TText, IndexWotd<TSpec> > TIndex;
2063         typedef typename Value<TIndex>::Type    TValue;
2064 
2065         String<char> name;
2066 
2067         name = fileName;    append(name, ".txt");
2068         if ((!open(getFibre(index, WotdText()), toCString(name), openMode)) &&
2069             (!open(getFibre(index, WotdText()), fileName, openMode))) return false;
2070 
2071         name = fileName;    append(name, ".sa");
2072         if (!open(getFibre(index, WotdSA()), toCString(name), openMode)) return false;
2073         name = fileName;    append(name, ".dir");
2074 
2075         if (!open(getFibre(index, WotdDir()), toCString(name), openMode)) return false;
2076 
2077         if (!empty(getFibre(index, WotdDir())))
2078         {
2079             resize(index.tempOcc, ValueSize<TValue>::VALUE + 1);
2080             resize(index.tempBound, ValueSize<TValue>::VALUE + 1);
2081         }
2082 
2083         return true;
2084     }
2085     template < typename TText, typename TSpec >
2086     inline bool open(
2087         Index< TText, IndexWotd<TSpec> > &index,
2088         const char *fileName)
2089     {
2090         return open(index, fileName, OPEN_RDONLY);
2091     }
2092 
2093 
2094 //////////////////////////////////////////////////////////////////////////////
2095 // save
2096 
2097     template < typename TText, typename TSpec >
2098     inline bool save(
2099         Index< TText, IndexWotd<TSpec> > &index,
2100         const char *fileName,
2101         int openMode)
2102     {
2103         String<char> name;
2104 
2105         name = fileName;    append(name, ".txt");
2106         if ((!save(getFibre(index, WotdText()), toCString(name), openMode)) &&
2107             (!save(getFibre(index, WotdText()), fileName, openMode))) return false;
2108 
2109         name = fileName;    append(name, ".sa");
2110         if (!save(getFibre(index, WotdSA()), toCString(name), openMode)) return false;
2111         name = fileName;    append(name, ".dir");
2112 
2113         if (!save(getFibre(index, WotdDir()), toCString(name), openMode)) return false;
2114 
2115         return true;
2116     }
2117     template < typename TText, typename TSpec >
2118     inline bool save(
2119         Index< TText, IndexWotd<TSpec> > &index,
2120         const char *fileName)
2121     {
2122         return save(index, fileName, OPEN_WRONLY | OPEN_CREATE);
2123     }
2124 }
2125 
2126 #endif //#ifndef SEQAN_HEADER_...
2127