1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_HEADER_INDEX_QGRAM_H
36 #define SEQAN_HEADER_INDEX_QGRAM_H
37 
38 namespace SEQAN_NAMESPACE_MAIN
39 {
40 
41 
42 //////////////////////////////////////////////////////////////////////////////
43 // q-gram index fibres
44 
45 /**
46 .Tag.QGram Index Fibres
47 ..summary:Tag to select a specific fibre (e.g. table, object, ...) of a @Spec.IndexQGram.q-gram Index@.
48 ..remarks:These tags can be used to get @Metafunction.Fibre.Fibres@ of a @Spec.IndexQGram.q-gram Index@.
49 ..cat:Index
50 
51 ..tag.QGramText:The original text the index should be based on.
52 
53 ..tag.QGram_RawText:The concatenation of all text sequences.
54 ...remarks:$QGramText$ and $QGram_RawText$ fibres are equal by default.
55 They differ if the index text is a set of strings. Then, raw text is the concatenation of all strings in this set.
56 
57 ..tag.QGramSA:The suffix array.
58 ...remarks:Contains all occurrences of q-grams, s.t. the occurrences of a single q-gram are stored in a contiguous block (q-gram bucket).
59 q-grams exceeding the end of the text are ignored.
60 The beginning of each bucket can be determined by the q-gram directory ($QGramDir$, see below).
61 ...remarks:It corresponds to a suffix array which is sorted by the first q-gram.
62 ...remarks:@Metafunction.Fibre@ returns a @Class.String@ over the alphabet of the @Metafunction.SAValue@ of $TIndex$.
63 
64 ..tag.QGramDir:The directory/hash table.
65 ...remarks:The directory contains for every possible q-gram hash value the start index of the q-gram bucket.
66 A q-gram bucket is a contiguous interval in the suffix array ($QGramSA$, see above).
67 Each suffix in this interval begins with the same q-gram.
68 The end index is the start index of the next bucket.
69 ...remarks:@Metafunction.Fibre@ returns a @Class.String@ over the alphabet of a size type.
70 
71 ..tag.QGramBucketMap:Maps q-gram hashes to buckets.
72 This fibre is used by the @Spec.OpenAddressing@ index and stores all parameters of the open addressing hash function and hash value occupancy in the QGramDir fibre.
73 In contrast to @Spec.OpenAddressing@, @Spec.IndexQGram@ uses a trivial 1-to-1 mapping from q-gram hash values to buckets.
74 For that index the fibre is of type @Tag.Nothing@.
75 
76 ..tag.QGramCounts:The counts array.
77 ...remarks:Contains the numbers of occurrences per sequence of each q-gram, s.t. the numbers of the same q-gram are stored in a contiguous block (q-gram count bucket).
78 A bucket contains entries (seqNo,count) of sequences with at least one q-gram occurrence. q-grams exceeding the end of the text are ignored.
79 The beginning of each count bucket can be determined by the q-gram counts directory ($QGramCountsDir$, see below).
80 ...remarks:@Metafunction.Fibre@ returns a @Class.String@ over the alphabet of the @Metafunction.SAValue@ of $TIndex$.
81 
82 ..tag.QGramCountsDir:The counts directory.
83 ...remarks:The counts directory contains for every possible q-gram hash value the start index of the q-gram count bucket.
84 A q-gram count bucket is a contiguous interval in the counts array ($QGramCounts$, see above).
85 The end index is the start index of the next bucket.
86 ...remarks:@Metafunction.Fibre@ returns a @Class.String@ over the alphabet of a size type.
87 
88 ..tag.QGramShape:The shape the index is based on.
89 ...remarks:The q-gram index needs an underlying @Class.Shape@. This shape can be gapped or ungapped.
90 The number of '1's (relevant positions) in the shape determines $q$ and the size of the directory table.
91 ...remarks:Dynamic shapes (@Spec.SimpleShape@, @Spec.GenericShape@, ...) must be initialized before the index can be used.
92 
93 ..tag.QGramSADir:The union of suffix array and directory.
94 ...remarks:In most applications a q-gram index consisting of both of these table is required.
95 To efficiently create them at once use this tag for @Function.indexRequire@ or @Function.indexCreate@.
96 
97 ..see:Metafunction.Fibre
98 ..see:Function.getFibre
99 ..see:Spec.IndexQGram
100 ..include:seqan/index.h
101 */
102 
103 	struct FibreDir_;			// directory/hash table, contains start indices of buckets
104 	struct FibreSADir_;		// identifies algorithm to construct both SA and directory at once
105 	struct Fibre_Shape_;		// underlying shape
106 	struct FibreCounts_;		// counts each q-gram
107 	struct FibreCountsDir_;	// directory for counts buckets
108 	struct FibreBucketMap_;	// stores a q-gram hash value for each directory entry (-1 if empty)
109 
110 	typedef Tag<FibreDir_> const		FibreDir;
111 	typedef Tag<FibreSADir_> const		FibreSADir;
112 	typedef Tag<Fibre_Shape_> const		FibreShape;
113 	typedef Tag<FibreCounts_> const	FibreCounts;
114 	typedef Tag<FibreCountsDir_> const	FibreCountsDir;
115 	typedef Tag<FibreBucketMap_> const	FibreBucketMap;
116 
117 //////////////////////////////////////////////////////////////////////////////
118 
119 	typedef FibreText		QGramText;
120 	typedef FibreRawText	QGram_RawText;
121 	typedef FibreSA		QGramSA;
122 	typedef FibreRawSA		QGramRawSA;
123 	typedef FibreDir		QGramDir;
124 	typedef FibreSADir		QGramSADir;
125 	typedef FibreShape		QGramShape;
126 	typedef FibreCounts	QGramCounts;
127 	typedef FibreCountsDir	QGramCountsDir;
128 	typedef FibreBucketMap	QGramBucketMap;
129 
130 //////////////////////////////////////////////////////////////////////////////
131 // q-gram index
132 
133 /**
134 .Spec.IndexQGram:
135 ..summary:An index based on an array of sorted q-grams.
136 ..cat:Index
137 ..general:Class.Index
138 ..signature:Index<TText, IndexQGram<TShapeSpec[, TSpec]> >
139 ..param.TText:The text type.
140 ...type:Class.String
141 ..param.TShapeSpec:The @Class.Shape@ specialization type.
142 ...note:This can be either a $TSpec$ argument (e.g. $SimpleShape$) or a complete @Class.Shape@ class (e.g. Shape<Dna, SimpleShape>).
143 ..param.TSpec:The specializing type.
144 ...default:Default
145 ...type:Spec.OpenAddressing
146 ..remarks:The fibres (see @Class.Index@ and @Metafunction.Fibre@) of this index are a suffix array sorted by the first q characters (see @Tag.QGram Index Fibres.QGramSA@) and a q-gram directory (see @Tag.QGram Index Fibres.QGramDir@).
147 ..include:seqan/index.h
148 */
149 
150 	template < typename TShapeSpec, typename TSpec = Default >
151 	struct IndexQGram {};
152 
153 	// use the index value type as shape value type
154 	template < typename TObject, typename TShapeSpec, typename TSpec >
155 	struct Fibre< Index<TObject, IndexQGram<TShapeSpec, TSpec> >, FibreShape>
156 	{
157 		typedef Index< TObject, IndexQGram<TShapeSpec, TSpec> >	TIndex;
158 		typedef Shape< typename Value<TIndex>::Type, TShapeSpec >	Type;
159 	};
160 
161 	// allow different value types for the shape
162 	template < typename TObject, typename TShapeValue, typename TShapeSpec, typename TSpec >
163 	struct Fibre< Index<TObject, IndexQGram<Shape<TShapeValue, TShapeSpec>, TSpec> >, FibreShape>
164 	{
165 		typedef Shape<TShapeValue, TShapeSpec>	Type;
166 	};
167 
168 	template < typename TObject, typename TShapeSpec, typename TSpec >
169 	struct Fibre< Index<TObject, IndexQGram<TShapeSpec, TSpec> >, FibreBucketMap>
170 	{
171 		typedef Nothing Type;
172 	};
173 
174 #ifdef PLATFORM_WINDOWS_VS
175 #pragma warning( push )
176 // Disable warning C4521 locally (multiple copy constructors).
177 #pragma warning( disable: 4521 )
178 // Disable warning C4522 locally (multiple assignment operators).
179 #pragma warning( disable: 4522 )
180 #endif  // PLATFORM_WINDOWS_VS
181 
182 	template < typename TObject, typename TShapeSpec, typename TSpec >
183 	class Index<TObject, IndexQGram<TShapeSpec, TSpec> > {
184 	public:
185 		typedef typename Fibre<Index, QGramText>::Type			TText;
186 		typedef typename Fibre<Index, QGramSA>::Type			TSA;
187 		typedef typename Fibre<Index, QGramDir>::Type			TDir;
188 		typedef typename Fibre<Index, QGramCounts>::Type		TCounts;
189 		typedef typename Fibre<Index, QGramCountsDir>::Type	TCountsDir;
190 		typedef typename Fibre<Index, QGramShape>::Type		TShape;
191 		typedef typename Fibre<Index, QGramBucketMap>::Type	TBucketMap;
192 		typedef typename Cargo<Index>::Type						TCargo;
193 		typedef typename Size<Index>::Type						TSize;
194 
195 		Holder<TText>	text;		// underlying text
196 		TSA				sa;			// suffix array sorted by the first q chars
197 		TDir			dir;		// bucket directory
198 		TCounts			counts;		// counts each q-gram per sequence
199 		TCountsDir		countsDir;	// directory for count buckets
200 		TShape			shape;		// underlying shape
201 		TCargo			cargo;		// user-defined cargo
202 		TBucketMap		bucketMap;	// bucketMap table (used by open-addressing index)
203 		TSize			stepSize;	// store every <stepSize>'th q-gram in the index
204 
205 		Index():
206 			stepSize(1) {}
207 
208 		Index(Index &other):
209 			text(other.text),
210 			sa(other.sa),
211 			dir(other.dir),
212 			counts(other.counts),
213 			countsDir(other.countsDir),
214 			shape(other.shape),
215 			cargo(other.cargo),
216 			stepSize(1) {}
217 
218 		Index(Index const &other):
219 			text(other.text),
220 			sa(other.sa),
221 			dir(other.dir),
222 			counts(other.counts),
223 			countsDir(other.countsDir),
224 			shape(other.shape),
225 			cargo(other.cargo),
226 			stepSize(1) {}
227 
228 		template <typename TText_>
229 		Index(TText_ &_text):
230 			text(_text),
231 			stepSize(1) {}
232 
233 		template <typename TText_>
234 		Index(TText_ const &_text):
235 			text(_text),
236 			stepSize(1) {}
237 
238 		template <typename TText_, typename TShape_>
239 		Index(TText_ &_text, TShape_ const &_shape):
240 			text(_text),
241 			shape(_shape),
242 			stepSize(1) {}
243 
244 		template <typename TText_, typename TShape_>
245 		Index(TText_ const &_text, TShape_ const &_shape):
246 			text(_text),
247 			shape(_shape),
248 			stepSize(1) {}
249 	};
250 
251 #ifdef PLATFORM_WINDOWS_VS
252 // Reset warning state to previous values for C4521, C4522.
253 #pragma warning( pop )
254 #endif  // PLATFORM_WINDOWS_VS
255 
256     template < typename TText, typename TShapeSpec, typename TSpec >
257     struct Value< Index<TText, IndexQGram<TShapeSpec, TSpec> > > {
258 		typedef typename Value< typename Fibre< Index<TText, IndexQGram<TShapeSpec, TSpec> >, QGram_RawText >::Type >::Type Type;
259     };
260 
261 	template < typename TText, typename TShapeSpec, typename TSpec >
262     struct Size< Index<TText, IndexQGram<TShapeSpec, TSpec> > > {
263 		typedef typename Size< typename Fibre< Index<TText, IndexQGram<TShapeSpec, TSpec> >, QGram_RawText >::Type >::Type Type;
264     };
265 
266 
267 //////////////////////////////////////////////////////////////////////////////
268 // default fibre creators
269 
270 	template < typename TText, typename TShapeSpec, typename TSpec >
271 	struct DefaultIndexCreator<Index<TText, IndexQGram<TShapeSpec, TSpec> >, FibreSA> {
272         typedef Default Type;
273     };
274 
275 //////////////////////////////////////////////////////////////////////////////
276 // counts array type
277 
278 	template < typename TText, typename TSpec >
279 	struct Fibre< Index<TText, TSpec>, FibreCounts> {
280 		typedef String<
281 				Pair<
282 					typename Size< TText >::Type,
283 					typename Size< Index<TText, TSpec> >::Type
284 				>,
285 				typename DefaultIndexStringSpec< Index<TText, TSpec> >::Type
286 		> Type;
287 	};
288 
289 
290 //////////////////////////////////////////////////////////////////////////////
291 
292 	template <typename TText, typename TSpec>
293 	inline typename Fibre<Index<TText, TSpec>, FibreDir>::Type &
294 	getFibre(Index<TText, TSpec> &index, FibreDir) {
295 		return index.dir;
296 	}
297 	template <typename TText, typename TSpec>
298 	inline typename Fibre<Index<TText, TSpec> const, FibreDir>::Type &
299 	getFibre(Index<TText, TSpec> const &index, FibreDir) {
300 		return index.dir;
301 	}
302 
303 	template <typename TText, typename TSpec>
304 	inline typename Fibre<Index<TText, TSpec>, FibreCounts>::Type &
305 	getFibre(Index<TText, TSpec> &index, FibreCounts) {
306 		return index.counts;
307 	}
308 	template <typename TText, typename TSpec>
309 	inline typename Fibre<Index<TText, TSpec> const, FibreCounts>::Type &
310 	getFibre(Index<TText, TSpec> const &index, FibreCounts) {
311 		return index.counts;
312 	}
313 
314 	template <typename TText, typename TSpec>
315 	inline typename Fibre<Index<TText, TSpec>, FibreCountsDir>::Type &
316 	getFibre(Index<TText, TSpec> &index, FibreCountsDir) {
317 		return index.countsDir;
318 	}
319 	template <typename TText, typename TSpec>
320 	inline typename Fibre<Index<TText, TSpec> const, FibreCountsDir>::Type &
321 	getFibre(Index<TText, TSpec> const &index, FibreCountsDir) {
322 		return index.countsDir;
323 	}
324 
325 	template <typename TText, typename TSpec>
326 	inline typename Fibre<Index<TText, TSpec>, FibreBucketMap>::Type &
327 	getFibre(Index<TText, TSpec> &index, FibreBucketMap) {
328 		return index.bucketMap;
329 	}
330 	template <typename TText, typename TSpec>
331 	inline typename Fibre<Index<TText, TSpec> const, FibreBucketMap>::Type &
332 	getFibre(Index<TText, TSpec> const &index, FibreBucketMap) {
333 		return index.bucketMap;
334 	}
335 
336 	template <typename TText, typename TSpec>
337 	inline typename Fibre<Index<TText, TSpec>, FibreShape>::Type &
338 	getFibre(Index<TText, TSpec> &index, FibreShape) {
339 		return index.shape;
340 	}
341 	template <typename TText, typename TSpec>
342 	inline typename Fibre<Index<TText, TSpec> const, FibreShape>::Type &
343 	getFibre(Index<TText, TSpec> const &index, FibreShape) {
344 		return index.shape;
345 	}
346 
347 //////////////////////////////////////////////////////////////////////////////
348 /**
349 .Function.indexDir
350 ..summary:Shortcut for $getFibre(.., QGramDir)$.
351 ..cat:Index
352 ..signature:indexDir(index)
353 ..param.index:The @Class.Index@ object holding the fibre.
354 ...type:Spec.IndexQGram
355 ..returns:A reference to the @Tag.QGram Index Fibres.QGramDir@ fibre (q-gram directory).
356 ..include:seqan/index.h
357 */
358 
359 	template <typename TText, typename TSpec>
360 	inline typename Fibre<Index<TText, TSpec>, FibreDir>::Type &
361 	indexDir(Index<TText, TSpec> &index) {
362 		return getFibre(index, FibreDir());
363 	}
364 	template <typename TText, typename TSpec>
365 	inline typename Fibre<Index<TText, TSpec> const, FibreDir>::Type &
366 	indexDir(Index<TText, TSpec> const &index) {
367 		return getFibre(index, FibreDir());
368 	}
369 
370 //////////////////////////////////////////////////////////////////////////////
371 /**
372 .Function.dirAt
373 ..summary:Shortcut for $value(indexDir(..), ..)$.
374 ..cat:Index
375 ..signature:dirAt(position, index)
376 ..param.position:A position in the array on which the value should be accessed.
377 ..param.index:The @Class.Index@ object holding the fibre.
378 ...type:Spec.IndexQGram
379 ..returns:A reference or proxy to the value.
380 ..include:seqan/index.h
381 */
382 
383 	template <typename TPos, typename TIndex>
384 	inline typename Reference<typename Fibre<TIndex, FibreDir>::Type>::Type
385 	dirAt(TPos i, TIndex &index) {
386 		return value(getFibre(index, FibreDir()), i);
387 	}
388 	template <typename TPos, typename TIndex>
389 	inline typename Reference<typename Fibre<TIndex const, FibreDir>::Type>::Type
390 	dirAt(TPos i, TIndex const &index) {
391 		return value(getFibre(index, FibreDir()), i);
392 	}
393 
394 
395 //////////////////////////////////////////////////////////////////////////////
396 /**
397 .Function.indexCounts
398 ..summary:Shortcut for $getFibre(.., QGramCounts)$.
399 ..cat:Index
400 ..signature:indexCounts(index)
401 ..param.index:The @Class.Index@ object holding the fibre.
402 ...type:Spec.IndexQGram
403 ..returns:A reference to the @Tag.QGram Index Fibres.QGramCounts@ fibre (counts array).
404 ..include:seqan/index.h
405 */
406 
407 	template <typename TText, typename TSpec>
408 	inline typename Fibre<Index<TText, TSpec>, FibreCounts>::Type &
409 	indexCounts(Index<TText, TSpec> &index) {
410 		return getFibre(index, FibreCounts());
411 	}
412 	template <typename TText, typename TSpec>
413 	inline typename Fibre<Index<TText, TSpec> const, FibreCounts>::Type &
414 	indexCounts(Index<TText, TSpec> const &index) {
415 		return getFibre(index, FibreCounts());
416 	}
417 
418 //////////////////////////////////////////////////////////////////////////////
419 /**
420 .Function.indexCountsDir
421 ..summary:Shortcut for $getFibre(.., QGramCountsDir)$.
422 ..cat:Index
423 ..signature:indexCountsDir(index)
424 ..param.index:The @Class.Index@ object holding the fibre.
425 ...type:Spec.IndexQGram
426 ..returns:A reference to the @Tag.QGram Index Fibres.QGramCountsDir@ fibre (counts directory).
427 ..include:seqan/index.h
428 */
429 
430 	template <typename TText, typename TSpec>
431 	inline typename Fibre<Index<TText, TSpec>, FibreCountsDir>::Type &
432 	indexCountsDir(Index<TText, TSpec> &index) {
433 		return getFibre(index, FibreCountsDir());
434 	}
435 	template <typename TText, typename TSpec>
436 	inline typename Fibre<Index<TText, TSpec> const, FibreCountsDir>::Type &
437 	indexCountsDir(Index<TText, TSpec> const &index) {
438 		return getFibre(index, FibreCountsDir());
439 	}
440 
441 //////////////////////////////////////////////////////////////////////////////
442 /**
443 .Function.indexBucketMap
444 ..summary:Shortcut for $getFibre(.., QGramBucketMap)$.
445 ..cat:Index
446 ..signature:indexBucketMap(index)
447 ..param.index:The @Class.Index@ object holding the fibre.
448 ...type:Spec.IndexQGram
449 ..returns:A reference to the @Tag.QGram Index Fibres.QGramBucketMap@ fibre (maps q-gram hashes to buckets).
450 ..include:seqan/index.h
451 */
452 
453 	template <typename TText, typename TSpec>
454 	inline typename Fibre<Index<TText, TSpec>, FibreBucketMap>::Type &
455 	indexBucketMap(Index<TText, TSpec> &index) {
456 		return getFibre(index, FibreBucketMap());
457 	}
458 	template <typename TText, typename TSpec>
459 	inline typename Fibre<Index<TText, TSpec> const, FibreBucketMap>::Type &
460 	indexBucketMap(Index<TText, TSpec> const &index) {
461 		return getFibre(index, FibreBucketMap());
462 	}
463 
464 //////////////////////////////////////////////////////////////////////////////
465 /**
466 .Function.indexShape
467 ..summary:Shortcut for $getFibre(.., QGramShape)$.
468 ..cat:Index
469 ..signature:indexShape(index)
470 ..param.index:The @Class.Index@ object holding the fibre.
471 ...type:Spec.IndexQGram
472 ..returns:Returns a reference to the @Class.Shape@ object of a q-gram index.
473 Formally, this is a reference to the @Tag.QGram Index Fibres.QGramShape@ fibre.
474 ...type:Class.Shape
475 ..include:seqan/index.h
476 */
477 
478 	template <typename TText, typename TSpec>
479 	inline typename Fibre<Index<TText, TSpec>, FibreShape>::Type &
480 	indexShape(Index<TText, TSpec> &index) {
481 		return getFibre(index, FibreShape());
482 	}
483 	template <typename TText, typename TSpec>
484 	inline typename Fibre<Index<TText, TSpec> const, FibreShape>::Type &
485 	indexShape(Index<TText, TSpec> const &index) {
486 		return getFibre(index, FibreShape());
487 	}
488 
489 //////////////////////////////////////////////////////////////////////////////
490 /**
491 .Function.getStepSize
492 ..summary:Return the q-gram step size used for index creation.
493 ..cat:Index
494 ..signature:getStepSize(index)
495 ..param.index:A q-gram index.
496 ...type:Spec.IndexQGram
497 ..returns:The step size. If $x$ is returned every $x$'th q-gram is stored in the index.
498 ..include:seqan/index.h
499 */
500 
501 	template <typename TText, typename TShapeSpec, typename TSpec>
502 	inline typename Size<TText>::Type
503 	getStepSize(Index<TText, IndexQGram<TShapeSpec, TSpec> > const &index)
504 	{
505 		return (index.stepSize != 0)? index.stepSize: length(indexShape(index));
506 	}
507 
508 //////////////////////////////////////////////////////////////////////////////
509 /**
510 .Function.setStepSize
511 ..summary:Change the q-gram step size used for index creation.
512 ..cat:Index
513 ..signature:setStepSize(index, stepSize)
514 ..param.index:A q-gram index.
515 ...type:Spec.IndexQGram
516 ..param.stepSize:Store every $stepSize$'th q-gram in the index.
517 ..remarks:The default step size of a q-gram index is 1, which corresponds to all overlapping q-grams.
518 To take effect of changing the $stepSize$ the q-gram index should be empty or recreated.
519 ..remarks:A $stepSize$ of 0 corresponds to $stepSize=length(indexShape(index))$, i.e. all non-overlapping q-grams.
520 ..see:Function.getStepSize
521 ..include:seqan/index.h
522 */
523 
524 	template <typename TText, typename TShapeSpec, typename TSpec, typename TSize>
525 	inline void
526 	setStepSize(Index<TText, IndexQGram<TShapeSpec, TSpec> > &index, TSize stepSize)
527 	{
528 		index.stepSize = stepSize;
529 	}
530 
531 //////////////////////////////////////////////////////////////////////////////
532 
533 	template <typename TIndex>
534 	inline __int64 _fullDirLength(TIndex const &index)
535 	{
536 		typedef typename Fibre<TIndex, FibreShape>::Type	TShape;
537 		typedef typename Host<TShape>::Type					TTextValue;
538 		return _intPow((__int64)ValueSize<TTextValue>::VALUE, weight(indexShape(index))) + 1;
539 	}
540 
541 	template <typename TIndex>
542 	inline __int64 _fullDir2Length(TIndex const &index)
543 	{
544 		typedef typename Fibre<TIndex, FibreShape>::Type	TShape;
545 		typedef typename Host<TShape>::Type					TTextValue;
546 		return (_intPow(
547 					(__int64)ValueSize<TTextValue>::VALUE,
548 					weight(indexShape(index)) + 1) - 1)
549 				/ ((unsigned)ValueSize<TTextValue>::VALUE - 1) + 1;
550 	}
551 
552 
553     //////////////////////////////////////////////////////////////////////////////
554     // QGramLess
555 	//
556 	// compare two q-grams of a given text (q-grams can be smaller than q)
557     template < typename TSAValue, typename TText >
558 	struct QGramLess_ :
559 		public ::std::binary_function < TSAValue, TSAValue, bool >
560     {
561 		typedef typename Iterator<TText, Standard>::Type TIter;
562 		TIter _begin, _end;
563 		typename Size<TText>::Type _q;
564 
565 		template <typename TSize>
566 		QGramLess_(TText &text, TSize q):
567 			_begin(begin(text, Standard())),
568 			_end(end(text, Standard())),
569 			_q(q) {}
570 
571 		// skip the first <offset> characters
572 		template <typename TSize1, typename TSize2>
573 		QGramLess_(TText &text, TSize1 q, TSize2 offset):
574 			_begin(begin(text, Standard()) + offset),
575 			_end(end(text, Standard())),
576 			_q(q) {}
577 
578 		inline bool operator() (TSAValue const a, TSAValue const b) const
579 		{
580 			if (a == b) return false;
581 			TIter itA = _begin + a;
582 			TIter itB = _begin + b;
583 			if (a <= b) {
584 				TIter itEnd = itB + _q;
585 				if (itEnd > _end)
586 					itEnd = _end;
587 				for(; itB != itEnd; ++itB, ++itA) {
588 					if (lexLess(*itA, *itB)) return true;
589 					if (lexLess(*itB, *itA)) return false;
590 				}
591 				return false;
592 			} else {
593 				TIter itEnd = itA + _q;
594 				if (itEnd > _end)
595 					itEnd = _end;
596 				for(; itA != itEnd; ++itA, ++itB) {
597 					if (lexLess(*itA, *itB)) return true;
598 					if (lexLess(*itB, *itA)) return false;
599 				}
600 				return true;
601 			}
602 		}
603 	};
604 
605 	template < typename TSAValue, typename TString, typename TSpec >
606 	struct QGramLess_<TSAValue, StringSet<TString, TSpec> const> :
607 		public ::std::binary_function < TSAValue, TSAValue, bool >
608     {
609 		typedef typename Iterator<TString, Standard>::Type TIter;
610 		StringSet<TString, TSpec> const &_stringSet;
611 		typename Size<TString>::Type _q;
612 
613 		template <typename TSize>
614 		QGramLess_(StringSet<TString, TSpec> const &text, TSize q):
615 			_stringSet(text),
616 			_q(q) {}
617 
618 		inline bool operator() (TSAValue const &a, TSAValue const &b) const
619 		{
620 			if (a == b) return false;
621 			TIter itA = begin(_stringSet[getValueI1(a)], Standard()) + getValueI2(a);
622 			TIter itB = begin(_stringSet[getValueI1(b)], Standard()) + getValueI2(b);
623 			if (suffixLength(a, _stringSet) > suffixLength(b, _stringSet)) {
624 				TIter _end = end(_stringSet[getValueI1(b)], Standard());
625 				TIter itEnd = itB + _q;
626 				if (itEnd > _end)
627 					itEnd = _end;
628 				for(; itB != itEnd; ++itB, ++itA) {
629 					if (lexLess(*itA, *itB)) return true;
630 					if (lexLess(*itB, *itA)) return false;
631 				}
632 				return false;
633 			} else {
634 				TIter _end = end(_stringSet[getValueI1(a)], Standard());
635 				TIter itEnd = itA + _q;
636 				if (itEnd > _end)
637 					itEnd = _end;
638 				for(; itA != itEnd; ++itA, ++itB) {
639 					if (lexLess(*itA, *itB)) return true;
640 					if (lexLess(*itB, *itA)) return false;
641 				}
642 				return true;
643 			}
644 		}
645 	};
646 
647 
648     //////////////////////////////////////////////////////////////////////////////
649     // QGramLessOffset
650 	//
651 	// compare two q-grams of a given text and skip the first <offset> characters
652     template < typename TSAValue, typename TText >
653 	struct QGramLessOffset_ :
654 		QGramLess_<TSAValue, TText>
655 	{
656 		template <typename TSize1, typename TSize2>
657 		QGramLessOffset_(TText &text, TSize1 q, TSize2 offset):
658 			QGramLess_<TSAValue, TText> (text, q, offset) {}
659 	};
660 
661 	template < typename TSAValue, typename TString, typename TSpec >
662 	struct QGramLessOffset_<TSAValue, StringSet<TString, TSpec> const> :
663 		public ::std::binary_function < TSAValue, TSAValue, bool >
664     {
665 		typedef typename Iterator<TString, Standard>::Type	TIter;
666 		typedef typename Size<TString>::Type				TSize;
667 		StringSet<TString, TSpec> const &_stringSet;
668 		typename Size<TString>::Type _q, _offset;
669 
670 		template <typename TSize1, typename TSize2>
671 		QGramLessOffset_(StringSet<TString, TSpec> const &text, TSize1 q, TSize2 offset):
672 			_stringSet(text),
673 			_q(q),
674 			_offset(offset) {}
675 
676 		inline bool operator() (TSAValue const &a, TSAValue const &b) const
677 		{
678 			if (a == b) return false;
679 			TString const &sA = _stringSet[getValueI1(a)];
680 			TString const &sB = _stringSet[getValueI1(b)];
681 			TIter itA = begin(sA, Standard()) + getValueI2(a) + _offset;
682 			TIter itB = begin(sB, Standard()) + getValueI2(b) + _offset;
683 			TSize restA = length(sA) - getValueI2(a);
684 			TSize restB = length(sB) - getValueI2(b);
685 			if (restA > restB) {
686 				TIter itEnd;
687 				if (restB >= _q)
688 					itEnd = itB + _q;
689 				else
690 					itEnd = end(sB, Standard());
691 				for(; itB != itEnd; ++itB, ++itA) {
692 					if (lexLess(*itA, *itB)) return true;
693 					if (lexLess(*itB, *itA)) return false;
694 				}
695 				return false;
696 			} else {
697 				TIter itEnd;
698 				if (restA >= _q)
699 					itEnd = itA + _q;
700 				else
701 					itEnd = end(sA, Standard());
702 				for(; itA != itEnd; ++itA, ++itB) {
703 					if (lexLess(*itA, *itB)) return true;
704 					if (lexLess(*itB, *itA)) return false;
705 				}
706 				return true;
707 			}
708 		}
709 	};
710 
711 
712     //////////////////////////////////////////////////////////////////////////////
713     // QGramLessNoCheck
714 	//
715 	// compare two q-grams of a given text (no check for q-grams smaller than q)
716     template < typename TSAValue, typename TText >
717 	struct QGramLessNoCheck_ :
718 		public ::std::binary_function < TSAValue, TSAValue, bool >
719     {
720 		typedef typename Iterator<TText, Standard>::Type TIter;
721 		TIter _begin;
722 		typename Size<TText>::Type _q;
723 
724 		template <typename TSize>
725 		QGramLessNoCheck_(TText &text, TSize q):
726 			_begin(begin(text, Standard())),
727 			_q(q) {}
728 
729 		// skip the first <offset> characters
730 		template <typename TSize>
731 		QGramLessNoCheck_(TText &text, TSize q, TSize offset):
732 			_begin(begin(text, Standard()) + offset),
733 			_q(q) {}
734 
735 		inline bool operator() (TSAValue const a, TSAValue const b) const
736 		{
737 			if (a == b) return false;
738 			TIter itA = _begin + a;
739 			TIter itB = _begin + b;
740 			if (a <= b) {
741 				TIter itEnd = itB + _q;
742 				for(; itB != itEnd; ++itB, ++itA) {
743 					if (lexLess(*itA, *itB)) return true;
744 					if (lexLess(*itB, *itA)) return false;
745 				}
746 				return false;
747 			} else {
748 				TIter itEnd = itA + _q;
749 				for(; itA != itEnd; ++itA, ++itB) {
750 					if (lexLess(*itA, *itB)) return true;
751 					if (lexLess(*itB, *itA)) return false;
752 				}
753 				return true;
754 			}
755 		}
756 	};
757 
758     template < typename TSAValue, typename TString, typename TSpec >
759 	struct QGramLessNoCheck_<TSAValue, StringSet<TString, TSpec> const> :
760 		public ::std::binary_function < TSAValue, TSAValue, bool >
761     {
762 		typedef typename Iterator<TString, Standard>::Type TIter;
763 		StringSet<TString, TSpec> const &_stringSet;
764 		typename Size<TString>::Type _q;
765 
766 		template <typename TSize>
767 		QGramLessNoCheck_(StringSet<TString, TSpec> const &text, TSize q):
768 			_stringSet(text),
769 			_q(q) {}
770 
771 		inline bool operator() (TSAValue const &a, TSAValue const &b) const
772 		{
773 			if (a == b) return false;
774 			TIter itA = begin(_stringSet[getValueI1(a)], Standard()) + getValueI2(a);
775 			TIter itB = begin(_stringSet[getValueI1(b)], Standard()) + getValueI2(b);
776 			if (suffixLength(a, _stringSet) > suffixLength(b, _stringSet)) {
777 				TIter itEnd = itB + _q;
778 				for(; itB != itEnd; ++itB, ++itA) {
779 					if (lexLess(*itA, *itB)) return true;
780 					if (lexLess(*itB, *itA)) return false;
781 				}
782 				return false;
783 			} else {
784 				TIter itEnd = itA + _q;
785 				for(; itA != itEnd; ++itA, ++itB) {
786 					if (lexLess(*itA, *itB)) return true;
787 					if (lexLess(*itB, *itA)) return false;
788 				}
789 				return true;
790 			}
791 		}
792 	};
793 
794 
795     //////////////////////////////////////////////////////////////////////////////
796     // QGramLessNoCheckOffset_
797 	//
798 	// compare two q-grams of a given text and skip the first <offset> characters
799     template < typename TSAValue, typename TText >
800 	struct QGramLessNoCheckOffset_: QGramLessNoCheck_<TSAValue, TText>
801 	{
802 		template <typename TSize1, typename TSize2>
803 		QGramLessNoCheckOffset_(TText &text, TSize1 q, TSize2 offset):
804 			QGramLessNoCheck_<TSAValue, TText> (text, q, offset) {}
805 	};
806 
807 	template < typename TSAValue, typename TString, typename TSpec >
808 	struct QGramLessNoCheckOffset_<TSAValue, StringSet<TString, TSpec> const> :
809 		public ::std::binary_function < TSAValue, TSAValue, bool >
810     {
811 		typedef typename Iterator<TString, Standard>::Type TIter;
812 		StringSet<TString, TSpec> const &_stringSet;
813 		typename Size<TString>::Type _q, _offset;
814 
815 		template <typename TSize1, typename TSize2>
816 		QGramLessNoCheckOffset_(StringSet<TString, TSpec> const &text, TSize1 q, TSize2 offset):
817 			_stringSet(text),
818 			_q(q),
819 			_offset(offset) {}
820 
821 		inline bool operator() (TSAValue const &a, TSAValue const &b) const
822 		{
823 			if (a == b) return false;
824 			TIter itA = begin(_stringSet[getValueI1(a)], Standard()) + getValueI2(a) + _offset;
825 			TIter itB = begin(_stringSet[getValueI1(b)], Standard()) + getValueI2(b) + _offset;
826 			if (a <= b) {
827 				TIter itEnd = itB + _q;
828 				for(; itB != itEnd; ++itB, ++itA) {
829 					if (lexLess(*itA, *itB)) return true;
830 					if (lexLess(*itB, *itA)) return false;
831 				}
832 				return false;
833 			} else {
834 				TIter itEnd = itA + _q;
835 				for(; itA != itEnd; ++itA, ++itB) {
836 					if (lexLess(*itA, *itB)) return true;
837 					if (lexLess(*itB, *itA)) return false;
838 				}
839 				return true;
840 			}
841 		}
842 	};
843 
844 	//////////////////////////////////////////////////////////////////////////////
845 	// Counting sort - little helpers
846 	//
847 
848 	// map hashes 1:1 to directory
849 	template < typename THashValue >
850 	inline THashValue
851 	requestBucket(Nothing &, THashValue hash)
852 	{
853 		return hash;
854 	}
855 
856 	template < typename THashValue >
857 	inline THashValue
858 	getBucket(Nothing const &, THashValue hash)
859 	{
860 		return hash;
861 	}
862 
863 	//////////////////////////////////////////////////////////////////////////////
864 	// Counting sort - Step 1: Clear directory
865 	template < typename TDir >
866 	inline void
867 	_qgramClearDir(TDir &dir, Nothing &)
868 	{
869 		arrayFill(begin(dir, Standard()), end(dir, Standard()), 0);
870 	}
871 
872 	//////////////////////////////////////////////////////////////////////////////
873 	// Counting sort - Step 2: Count q-grams
874 	template < typename TDir, typename TBucketMap, typename TText, typename TShape, typename TStepSize >
875 	inline void
876 	_qgramCountQGrams(TDir &dir, TBucketMap &bucketMap, TText const &text, TShape shape, TStepSize stepSize)
877 	{
878 	SEQAN_CHECKPOINT
879 		typedef typename Iterator<TText const, Standard>::Type	TIterator;
880 		typedef typename Value<TDir>::Type						TSize;
881 
882 		if (length(text) < length(shape)) return;
883 		TSize num_qgrams = (length(text) - length(shape)) / stepSize + 1;
884 
885 		TIterator itText = begin(text, Standard());
886 		++dir[requestBucket(bucketMap, hash(shape, itText))];
887 		if (stepSize == 1)
888 			for(TSize i = 1; i < num_qgrams; ++i)
889 			{
890 				++itText;
891 				++dir[requestBucket(bucketMap, hashNext(shape, itText))];
892 			}
893 		else
894 			for(TSize i = 1; i < num_qgrams; ++i)
895 			{
896 				itText += stepSize;
897 				++dir[requestBucket(bucketMap, hash(shape, itText))];
898 			}
899 	}
900 
901 	template < typename TDir, typename TBucketMap, typename TString, typename TSpec, typename TShape, typename TStepSize >
902 	inline void
903 	_qgramCountQGrams(TDir &dir, TBucketMap &bucketMap, StringSet<TString, TSpec> const &stringSet, TShape shape, TStepSize stepSize)
904 	{
905 	SEQAN_CHECKPOINT
906 		typedef typename Iterator<TString const, Standard>::Type	TIterator;
907 		typedef typename Value<TDir>::Type							TSize;
908 
909 		if (stepSize == 1)
910 			for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
911 			{
912 				TString const &sequence = value(stringSet, seqNo);
913 				if (length(sequence) < length(shape)) continue;
914 				TSize num_qgrams = length(sequence) - length(shape) + 1;
915 
916 				TIterator itText = begin(sequence, Standard());
917 				++dir[requestBucket(bucketMap, hash(shape, itText))];
918 				for(TSize i = 1; i < num_qgrams; ++i)
919 				{
920 					++itText;
921 					++dir[requestBucket(bucketMap, hashNext(shape, itText))];
922 				}
923 			}
924 		else
925 			for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
926 			{
927 				TString const &sequence = value(stringSet, seqNo);
928 				if (length(sequence) < length(shape)) continue;
929 				TSize num_qgrams = (length(sequence) - length(shape)) / stepSize + 1;
930 
931 				TIterator itText = begin(sequence, Standard());
932 				for(TSize i = 0; i < num_qgrams; ++i)
933 				{
934 					++dir[requestBucket(bucketMap, hash(shape, itText))];
935 					itText += stepSize;
936 				}
937 			}
938 	}
939 
940 	//////////////////////////////////////////////////////////////////////////////
941 	// Counting sort - Step 3: Cumulative sum
942 
943 	// First two entries are 0.
944 	// Step 4 increments the entries hash(qgram)+1 on-the-fly while filling the SA table.
945 	// After step 4 each entry (0..n-1) is the beginning of a qgram bucket.
946 	template < typename TDir, typename TWithConstraints >
947 	inline typename Value<TDir>::Type
948 	_qgramCummulativeSum(TDir &dir, TWithConstraints)
949 	{
950 	SEQAN_CHECKPOINT
951 		typedef typename Iterator<TDir, Standard>::Type			TDirIterator;
952 		typedef typename Value<TDir>::Type						TSize;
953 
954 		TDirIterator it = begin(dir, Standard());
955 		TDirIterator itEnd = end(dir, Standard());
956 		TSize diff = 0, diff_prev = 0, sum = 0;
957 		while (it != itEnd)
958 		{
959 			if (TWithConstraints::VALUE && diff == (TSize)-1)
960 			{
961 				sum += diff_prev;
962 				diff_prev = 0;
963 				diff = *it;
964 				*it = (TSize)-1;								// disable bucket
965 			} else {
966 				sum += diff_prev;
967 				diff_prev = diff;
968 				diff = *it;
969 				*it = sum;
970 			}
971 			++it;
972 		}
973 		return sum + diff_prev;
974 	}
975 
976 	// The first entry is 0.
977 	// This function is used when Step 4 is ommited.
978 	template < typename TDir, typename TWithConstraints >
979 	inline typename Value<TDir>::Type
980 	_qgramCummulativeSumAlt(TDir &dir, TWithConstraints const)
981 	{
982 	SEQAN_CHECKPOINT
983 		typedef typename Iterator<TDir, Standard>::Type			TDirIterator;
984 		typedef typename Value<TDir>::Type						TSize;
985 
986 		TDirIterator it = begin(dir, Standard());
987 		TDirIterator itEnd = end(dir, Standard());
988 		TSize diff = 0, sum = 0;
989 		while (it != itEnd)
990 		{
991 			sum += diff;
992 			diff = *it;
993 			if (TWithConstraints::VALUE && diff == (TSize)-1)
994 				diff = 0;										// ignore disabled buckets
995 			*it = sum;
996 			++it;
997 		}
998 		return sum + diff;
999 	}
1000 
1001 	//////////////////////////////////////////////////////////////////////////////
1002 	// Counting sort - Step 4: Fill suffix array
1003 	// w/o constraints
1004 	template <
1005 		typename TSA,
1006 		typename TText,
1007 		typename TShape,
1008 		typename TDir,
1009 		typename TBucketMap,
1010 		typename TWithConstraints,
1011 		typename TStepSize >
1012 	inline void
1013 	_qgramFillSuffixArray(
1014 		TSA &sa,
1015 		TText const &text,
1016 		TShape shape,
1017 		TDir &dir,
1018 		TBucketMap &bucketMap,
1019 		TStepSize stepSize,
1020 		TWithConstraints const)
1021 	{
1022 	SEQAN_CHECKPOINT
1023 		typedef typename Iterator<TText const, Standard>::Type	TIterator;
1024 		typedef typename Value<TDir>::Type						TSize;
1025 
1026 		TSize num_qgrams = length(text) - length(shape) + 1;
1027 		TIterator itText = begin(text, Standard());
1028 
1029 		if (TWithConstraints::VALUE) {
1030 			TSize bktNo = getBucket(bucketMap, hash(shape, itText)) + 1;			// first hash
1031 			if (dir[bktNo] != (TSize)-1) sa[dir[bktNo]++] = 0;						// if bucket is enabled
1032 		} else
1033 			sa[dir[getBucket(bucketMap, hash(shape, itText)) + 1]++] = 0;			// first hash
1034 
1035 		if (stepSize == 1)
1036 			for(TSize i = 1; i < num_qgrams; ++i)
1037 			{
1038 				++itText;
1039 				if (TWithConstraints::VALUE) {
1040 					TSize bktNo = getBucket(bucketMap, hashNext(shape, itText)) + 1;	// next hash
1041 					if (dir[bktNo] != (TSize)-1) sa[dir[bktNo]++] = i;					// if bucket is enabled
1042 				} else
1043 					sa[dir[getBucket(bucketMap, hashNext(shape, itText)) + 1]++] = i;	// next hash
1044 			}
1045 		else
1046 			for(TSize i = 1; i < num_qgrams; i += stepSize)
1047 			{
1048 				itText += stepSize;
1049 				if (TWithConstraints::VALUE) {
1050 					TSize bktNo = getBucket(bucketMap, hash(shape, itText)) + 1;		// next hash (we mustn't use hashNext here)
1051 					if (dir[bktNo] != (TSize)-1) sa[dir[bktNo]++] = i;					// if bucket is enabled
1052 				} else
1053 					sa[dir[getBucket(bucketMap, hash(shape, itText)) + 1]++] = i;		// next hash
1054 			}
1055 	}
1056 
1057 	// multiple sequences
1058 	template <
1059 		typename TSA,
1060 		typename TString,
1061 		typename TSpec,
1062 		typename TShape,
1063 		typename TDir,
1064 		typename TBucketMap,
1065 		typename TStepSize,
1066 		typename TWithConstraints >
1067 	inline void
1068 	_qgramFillSuffixArray(
1069 		TSA &sa,
1070 		StringSet<TString, TSpec> const &stringSet,
1071 		TShape shape,
1072 		TDir &dir,
1073 		TBucketMap &bucketMap,
1074 		TStepSize stepSize,
1075 		TWithConstraints const)
1076 	{
1077 	SEQAN_CHECKPOINT
1078 		typedef typename Iterator<TString const, Standard>::Type	TIterator;
1079 		typedef typename Value<TDir>::Type							TSize;
1080 
1081 		if (stepSize == 1)
1082 			for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
1083 			{
1084 				TString const &sequence = value(stringSet, seqNo);
1085 				if (length(sequence) < length(shape)) continue;
1086 				TSize num_qgrams = length(sequence) - length(shape) + 1;
1087 
1088 				typename Value<TSA>::Type localPos;
1089 				assignValueI1(localPos, seqNo);
1090 				assignValueI2(localPos, 0);
1091 
1092 				TIterator itText = begin(sequence, Standard());
1093 				if (TWithConstraints::VALUE) {
1094 					TSize bktNo = getBucket(bucketMap, hash(shape, itText)) + 1;					// first hash
1095 					if (dir[bktNo] != (TSize)-1) sa[dir[bktNo]++] = localPos;						// if bucket is enabled
1096 				} else
1097 					sa[dir[getBucket(bucketMap, hash(shape, itText)) + 1]++] = localPos;			// first hash
1098 
1099 				for(TSize i = 1; i < num_qgrams; ++i)
1100 				{
1101 					++itText;
1102 					assignValueI2(localPos, i);
1103 					if (TWithConstraints::VALUE) {
1104 						TSize bktNo = getBucket(bucketMap, hashNext(shape, itText)) + 1;			// next hash
1105 						if (dir[bktNo] != (TSize)-1) sa[dir[bktNo]++] = localPos;					// if bucket is enabled
1106 					} else
1107 						sa[dir[getBucket(bucketMap, hashNext(shape, itText)) + 1]++] = localPos;	// next hash
1108 				}
1109 			}
1110 		else
1111 			for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
1112 			{
1113 				TString const &sequence = value(stringSet, seqNo);
1114 				if (length(sequence) < length(shape)) continue;
1115 				TSize num_qgrams = length(sequence) - length(shape) + 1;
1116 
1117 				typename Value<TSA>::Type localPos;
1118 				assignValueI1(localPos, seqNo);
1119 				assignValueI2(localPos, 0);
1120 
1121 				TIterator itText = begin(sequence, Standard());
1122 				for(TSize i = 1; i < num_qgrams; i += stepSize)
1123 				{
1124 					assignValueI2(localPos, i);
1125 					if (TWithConstraints::VALUE) {
1126 						TSize bktNo = getBucket(bucketMap, hash(shape, itText)) + 1;				// hash
1127 						if (dir[bktNo] != (TSize)-1) sa[dir[bktNo]++] = localPos;					// if bucket is enabled
1128 					} else
1129 						sa[dir[getBucket(bucketMap, hash(shape, itText)) + 1]++] = localPos;		// hash
1130 					itText += stepSize;
1131 				}
1132 			}
1133 	}
1134 
1135 	//////////////////////////////////////////////////////////////////////////////
1136 	// Step 5: Correct disabled buckets
1137 	template < typename TDir >
1138 	inline void
1139 	_qgramPostprocessBuckets(TDir &dir)
1140 	{
1141 	SEQAN_CHECKPOINT
1142 		typedef typename Iterator<TDir, Standard>::Type			TDirIterator;
1143 		typedef typename Value<TDir>::Type						TSize;
1144 
1145 		TDirIterator it = begin(dir, Standard());
1146 		TDirIterator itEnd = end(dir, Standard());
1147 		TSize prev = 0;
1148 		for (; it != itEnd; ++it)
1149 			if (*it == (TSize)-1)
1150 				*it = prev;
1151 			else
1152 				prev = *it;
1153 	}
1154 
1155 
1156 //////////////////////////////////////////////////////////////////////////////
1157 /**
1158 .Function.createQGramIndex:
1159 ..summary:Builds a q-gram index on a sequence.
1160 ..cat:Index
1161 ..signature:createQGramIndex(index)
1162 ..signature:createQGramIndex(sa, dir, bucketMap, text, shape, stepSize) [DEPRECATED]
1163 ..param.index:The q-gram index.
1164 ...type:Spec.IndexQGram
1165 ..param.sa:The resulting list in which all q-grams are sorted alphabetically.
1166 ..param.dir:The resulting array that indicates at which position in index the corresponding q-grams can be found.
1167 ..param.bucketMap:Stores the q-gram hashes for the openaddressing hash maps, see @Function.indexBucketMap@.
1168 If bucketMap is of the type @Tag.Nothing@ the q-gram hash determines the bucket address in the index.
1169 ..param.text:The sequence.
1170 ..param.shape:The shape to be used.
1171 ...type:Class.Shape
1172 ..param.stepSize:Store every $stepSize$'th q-gram in the index.
1173 ..returns:Index contains the sorted list of qgrams. For each q-gram $dir$ contains the first position in index that corresponds to this q-gram.
1174 ..remarks:This function should not be called directly. Please use @Function.indexCreate@ or @Function.indexRequire@.
1175 The resulting tables must have appropriate size before calling this function.
1176 ..include:seqan/index.h
1177 */
1178 
1179 	template < typename TIndex >
1180 	inline bool _qgramDisableBuckets(TIndex &)
1181 	{
1182 		return false;	// we disable no buckets by default
1183 	}
1184 
1185 	template < typename TIndex >
1186 	void createQGramIndex(TIndex &index)
1187 	{
1188 	SEQAN_CHECKPOINT
1189 		typename Fibre<TIndex, QGramText>::Type const &text      = indexText(index);
1190 		typename Fibre<TIndex, QGramSA>::Type         &sa        = indexSA(index);
1191 		typename Fibre<TIndex, QGramDir>::Type        &dir       = indexDir(index);
1192 		typename Fibre<TIndex, QGramShape>::Type      &shape     = indexShape(index);
1193 		typename Fibre<TIndex, QGramBucketMap>::Type  &bucketMap = index.bucketMap;
1194 
1195 		// 1. clear counters
1196 		_qgramClearDir(dir, bucketMap);
1197 
1198 		// 2. count q-grams
1199 		_qgramCountQGrams(dir, bucketMap, text, shape, getStepSize(index));
1200 
1201 		if (_qgramDisableBuckets(index))
1202 		{
1203 			// 3. cumulative sum
1204 			_qgramCummulativeSum(dir, True());
1205 
1206 			// 4. fill suffix array
1207 			_qgramFillSuffixArray(sa, text, shape, dir, bucketMap, getStepSize(index), True());
1208 
1209 			// 5. correct disabled buckets
1210 			_qgramPostprocessBuckets(dir);
1211 		}
1212 		else
1213 		{
1214 			// 3. cumulative sum
1215 			_qgramCummulativeSum(dir, False());
1216 
1217 			// 4. fill suffix array
1218 			_qgramFillSuffixArray(sa, text, shape, dir, bucketMap, getStepSize(index), False());
1219 		}
1220 	}
1221 
1222 	// DEPRECATED
1223 	// better use createQGramIndex(index) (above)
1224 	template <
1225         typename TSA,
1226 		typename TDir,
1227 		typename TBucketMap,
1228 		typename TText,
1229 		typename TShape,
1230 		typename TStepSize >
1231 	void createQGramIndex(
1232 		TSA &sa,
1233 		TDir &dir,
1234 		TBucketMap &bucketMap,
1235 		TText const &text,
1236 		TShape &shape,
1237 		TStepSize stepSize)
1238 	{
1239 	SEQAN_CHECKPOINT
1240 
1241 		// 1. clear counters
1242 		_qgramClearDir(dir, bucketMap);
1243 
1244 		// 2. count q-grams
1245 		_qgramCountQGrams(dir, bucketMap, text, shape, stepSize);
1246 
1247 		// 3. cumulative sum
1248 		SEQAN_DO(_qgramCummulativeSum(dir, False()) == length(sa));
1249 
1250 		// 4. fill suffix array
1251 		_qgramFillSuffixArray(sa, text, shape, dir, bucketMap, stepSize, False());
1252 	}
1253 
1254 	// DEPRECATED
1255 	// better use createQGramIndex(index) (above)
1256 	template <
1257         typename TSA,
1258 		typename TDir,
1259 		typename TBucketMap,
1260 		typename TText,
1261 		typename TShape,
1262 		typename TStepSize >
1263 	void createQGramIndex(
1264 		TSA &sa,
1265 		TDir &dir,
1266 		TBucketMap &bucketMap,
1267 		TText const &text,
1268 		TShape &shape)
1269 	{
1270 		createQGramIndex(sa, dir, bucketMap, text, shape, 1);
1271 	}
1272 
1273 //////////////////////////////////////////////////////////////////////////////
1274 /**
1275 .Function.createQGramIndexSAOnly:
1276 ..summary:Builds the suffix array of a q-gram index on a sequence.
1277 ..cat:Index
1278 ..signature:createQGramIndexSAOnly(sa, text, shape, stepSize)
1279 ..param.sa:The resulting list in which all q-grams are sorted alphabetically.
1280 ..param.text:The sequence.
1281 ..param.shape:The shape to be used. q is the length of this shape
1282 ...type:Class.Shape
1283 ..param.stepSize:Store every $stepSize$'th q-gram in the index.
1284 ..remarks:This function should not be called directly. Please use @Function.indexCreate@ or @Function.indexRequire@.
1285 The resulting tables must have appropriate size before calling this function.
1286 ..include:seqan/index.h
1287 */
1288 
1289 	template <
1290 		typename TSA,
1291 		typename TText,
1292 		typename TShape,
1293 		typename TStepSize >
1294 	void createQGramIndexSAOnly(
1295 		TSA &sa,
1296 		TText const &text,
1297 		TShape &shape,
1298 		TStepSize stepSize)
1299 	{
1300 	SEQAN_CHECKPOINT
1301 		typedef typename Size<TSA>::Type TSize;
1302 		typedef typename Iterator<TSA, Standard>::Type TIter;
1303 
1304 		// 1. Fill suffix array with a permutation (the identity)
1305 		TIter it = begin(sa, Standard());
1306 		TIter itEnd = end(sa, Standard());
1307 		TSize i = 0;
1308 		for(; it != itEnd; ++it, i += stepSize)
1309 			*it = i;
1310 
1311 		// 2. Sort suffix array with quicksort
1312 		TSize span = length(shape);
1313 		if (i + span > length(text) + 1)
1314 			::std::sort(
1315 				begin(sa, Standard()),
1316 				end(sa, Standard()),
1317 				QGramLess_<typename Value<TSA>::Type, TText const>(text, span));
1318 		else
1319 			::std::sort(
1320 				begin(sa, Standard()),
1321 				end(sa, Standard()),
1322 				QGramLessNoCheck_<typename Value<TSA>::Type, TText const>(text, span));
1323 	}
1324 
1325 	template <
1326 		typename TSA,
1327 		typename TString,
1328 		typename TSpec,
1329 		typename TShape,
1330 		typename TStepSize >
1331 	void createQGramIndexSAOnly(
1332 		TSA &sa,
1333 		StringSet<TString, TSpec> const &stringSet,
1334 		TShape &shape,
1335 		TStepSize stepSize)
1336 	{
1337 	SEQAN_CHECKPOINT
1338 		typedef typename Iterator<TSA, Standard>::Type	TIter;
1339 		typedef typename Value<TSA>::Type				TValue;
1340 		typedef typename Size<TString>::Type			TSize;
1341 		typedef StringSet<TString, TSpec>				TStringSet;
1342 
1343 		// 1. Fill suffix array with a permutation (the identity)
1344 		TIter it = begin(sa, Standard());
1345 		TValue pair;
1346 		unsigned int const q1 = length(shape) - 1;
1347 		for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
1348 		{
1349 			unsigned int const strlen = length(stringSet[seqNo]);
1350 			if (strlen > q1) {
1351 				assignValueI1(pair, seqNo);
1352 				for (TSize i = 0; i < strlen - q1; ++it, i += stepSize) {
1353 					assignValueI2(pair, i);
1354 					*it = pair;
1355 				}
1356 			}
1357 		}
1358 
1359 		// 2. Sort suffix array with quicksort
1360 		TSize q = length(shape);
1361 		if (lengthSum(stringSet) == length(sa))
1362 			::std::sort(
1363 				begin(sa, Standard()),
1364 				end(sa, Standard()),
1365 				QGramLess_<typename Value<TSA>::Type, TStringSet const>(stringSet, q));
1366 		else
1367 			::std::sort(
1368 				begin(sa, Standard()),
1369 				end(sa, Standard()),
1370 				QGramLessNoCheck_<typename Value<TSA>::Type, TStringSet const>(stringSet, q));
1371 	}
1372 
1373 	template <
1374 		typename TSA,
1375 		typename TDir,
1376 		typename TText,
1377 		typename TSize1,
1378 		typename TSize2 >
1379 	void _refineQGramIndex(
1380 		TSA &sa,
1381 		TDir &dir,
1382 		TText const &text,
1383 		TSize1 oldQ,
1384 		TSize2 newQ)
1385 	{
1386 	SEQAN_CHECKPOINT
1387 		typedef typename Size<TSA>::Type TSize;
1388 		typedef typename Iterator<TSA, Standard>::Type		TIter;
1389 		typedef typename Iterator<TDir, Standard>::Type		TDirIter;
1390 
1391 		if (newQ <= (TSize2)oldQ) return;
1392 
1393 		if (length(dir) < 2) {
1394 			::std::sort(
1395 				begin(sa, Standard()),
1396 				end(sa, Standard()),
1397 				QGramLess_<typename Value<TSA>::Type, TText const>(text, newQ));
1398 			return;
1399 		}
1400 
1401 		// 1. Sort each bucket with quicksort and compare substrings s[i+oldQ..i+newQ)
1402 		TDirIter dirIt = begin(dir, Standard());
1403 		TDirIter dirItEnd = end(dir, Standard());
1404 		TIter itBegin = begin(sa, Standard());
1405 		TIter itBktBegin = itBegin + *dirIt;
1406 		TIter itBktEnd;
1407 		++dirIt;
1408 		for(; dirIt != dirItEnd; ++dirIt, itBktBegin = itBktEnd) {
1409 			itBktEnd = itBegin + *dirIt;
1410 			if (itBktEnd - itBktBegin < 2) continue;
1411 			::std::sort(
1412 				itBktBegin,
1413 				itBktEnd,
1414 				QGramLessOffset_<typename Value<TSA>::Type, TText const>(text, newQ - oldQ, oldQ));
1415 		}
1416 	}
1417 
1418 
1419 //////////////////////////////////////////////////////////////////////////////
1420 /**
1421 .Function.createQGramIndexDirOnly:
1422 ..summary:Builds the directory of a q-gram index on a sequence.
1423 ..cat:Index
1424 ..signature:createQGramIndexDirOnly(dir, bucketMap, text, shape, stepSize)
1425 ..param.dir:The resulting array that indicates at which position in index the corresponding q-grams can be found.
1426 ..param.bucketMap:Stores the q-gram hashes for the openaddressing hash maps, see @Function.indexBucketMap@.
1427 If bucketMap is of the type @Tag.Nothing@ the q-gram hash determines the bucket address in the index.
1428 ..param.text:The sequence.
1429 ..param.shape:The shape to be used.
1430 ...type:Class.Shape
1431 ..param.stepSize:Store every $stepSize$'th q-gram in the index.
1432 ..returns:Index contains the sorted list of qgrams. For each possible q-gram pos contains the first position in index that corresponds to this q-gram.
1433 ..remarks:This function should not be called directly. Please use @Function.indexCreate@ or @Function.indexRequire@.
1434 The resulting tables must have appropriate size before calling this function.
1435 ..include:seqan/index.h
1436 */
1437 
1438 	template <
1439 		typename TDir,
1440 		typename TBucketMap,
1441 		typename TText,
1442 		typename TShape,
1443 		typename TStepSize >
1444 	void createQGramIndexDirOnly(
1445 		TDir &dir,
1446 		TBucketMap &bucketMap,
1447 		TText const &text,
1448 		TShape &shape,
1449 		TStepSize stepSize)
1450 	{
1451 	SEQAN_CHECKPOINT
1452 
1453 		// 1. clear counters
1454 		_qgramClearDir(dir, bucketMap);
1455 
1456 		// 2. count q-grams
1457 		_qgramCountQGrams(dir, bucketMap, text, shape, stepSize);
1458 
1459 		// 3. cumulative sum (Step 4 is ommited)
1460 		_qgramCummulativeSumAlt(dir, False());
1461 	}
1462 
1463 	template <
1464 		typename TDir,
1465 		typename TBucketMap,
1466 		typename TString,
1467 		typename TSpec,
1468 		typename TShape,
1469 		typename TStepSize >
1470 	void createQGramIndexDirOnly(
1471 		TDir &dir,
1472 		TBucketMap &bucketMap,
1473 		StringSet<TString, TSpec> const &stringSet,
1474 		TShape &shape,
1475 		TStepSize stepSize)
1476 	{
1477 	SEQAN_CHECKPOINT
1478 
1479 		// 1. clear counters
1480 		_qgramClearDir(dir, bucketMap);
1481 
1482 		// 2. count q-grams
1483 		_qgramCountQGrams(dir, bucketMap, stringSet, shape, stepSize);
1484 
1485 		// 3. cumulative sum (Step 4 is ommited)
1486 		_qgramCummulativeSumAlt(dir, False());
1487 	}
1488 
1489 
1490 //////////////////////////////////////////////////////////////////////////////
1491 /**
1492 .Function.createCountArray:
1493 ..summary:Builds an index on a StringSet storing how often a q-gram occurs in each sequence.
1494 ..cat:Index
1495 ..signature:createCountsArray(counts, dir, bucketMap, stringSet, shape, stepSize)
1496 ..param.counts:The resulting list of pairs (seqNo,count).
1497 ..param.dir:The resulting array that indicates at which position in the count table the corresponding a certain q-gram can be found.
1498 ..param.bucketMap:Stores the q-gram hashes for the openaddressing hash maps, see @Function.indexBucketMap@.
1499 If bucketMap is of the type @Tag.Nothing@ the q-gram hash determines the bucket address in the index.
1500 ..param.stringSet:The StringSet.
1501 ...type:Class.StringSet
1502 ..param.shape:The shape to be used.
1503 ...type:Class.Shape
1504 ..param.stepSize:Store every $stepSize$'th q-gram in the index.
1505 ..remarks:This function should not be called directly. Please use @Function.indexCreate@ or @Function.indexRequire@.
1506 The resulting tables must have appropriate size before calling this function.
1507 ..include:seqan/index.h
1508 */
1509 
1510 	template <
1511 		typename TCounts,
1512 		typename TDir,
1513 		typename TBucketMap,
1514 		typename TString,
1515 		typename TSpec,
1516 		typename TShape,
1517 		typename TStepSize >
1518 	void createCountsArray(
1519 		TCounts &counts,
1520 		TDir &dir,
1521 		TBucketMap &bucketMap,
1522 		StringSet<TString, TSpec> const &stringSet,
1523 		TShape shape,
1524 		TStepSize stepSize)
1525 	{
1526 	SEQAN_CHECKPOINT
1527 		typedef typename Iterator<TString const, Standard>::Type	TIterator;
1528 		typedef typename Iterator<TDir, Standard>::Type				TDirIterator;
1529 		typedef typename Value<TShape>::Type						TValue;
1530 		typedef typename Size<TString>::Type						TSize;
1531 
1532 		TDir lastSeqSeen;
1533 		resize(lastSeqSeen, length(dir));
1534 
1535 		// 1. clear counters
1536 		_qgramClearDir(dir, bucketMap);
1537 		arrayFill(begin(lastSeqSeen, Standard()), end(lastSeqSeen, Standard()), -1);
1538 
1539 		// 2. count distinct sequences for each q-gram
1540 		for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
1541 		{
1542 			TString const &sequence = value(stringSet, seqNo);
1543 			if (length(sequence) < length(shape)) continue;
1544 			TSize num_qgrams = (length(sequence) - length(shape)) / stepSize + 1;
1545 
1546 			TIterator itText = begin(sequence, Standard());
1547 			TValue bktNo = requestBucket(bucketMap, hash(shape, itText));
1548 			lastSeqSeen[bktNo] = seqNo;
1549 			++dir[bktNo];
1550 			if (stepSize == 1)
1551 				for(TSize i = 1; i < num_qgrams; ++i)
1552 				{
1553 					++itText;
1554 					bktNo = requestBucket(bucketMap, hashNext(shape, itText));
1555 					if (seqNo != lastSeqSeen[bktNo]) {
1556 						lastSeqSeen[bktNo] = seqNo;
1557 						++dir[bktNo];
1558 					}
1559 				}
1560 			else
1561 				for(TSize i = 1; i < num_qgrams; ++i)
1562 				{
1563 					itText += stepSize;
1564 					bktNo = requestBucket(bucketMap, hash(shape, itText));
1565 					if (seqNo != lastSeqSeen[bktNo]) {
1566 						lastSeqSeen[bktNo] = seqNo;
1567 						++dir[bktNo];
1568 					}
1569 				}
1570 		}
1571 
1572 		// 3. cumulative sum
1573 		resize(counts, _qgramCummulativeSum(dir, False()));
1574 
1575 		// 4. fill count array
1576 		arrayFill(begin(lastSeqSeen, Standard()), end(lastSeqSeen, Standard()), -1);
1577 		for(unsigned seqNo = 0; seqNo < length(stringSet); ++seqNo)
1578 		{
1579 			TString const &sequence = value(stringSet, seqNo);
1580 			if (length(sequence) < length(shape)) continue;
1581 			TSize num_qgrams = (length(sequence) - length(shape)) / stepSize + 1;
1582 
1583 			TIterator itText = begin(sequence, Standard());
1584 			TValue bktNo = getBucket(bucketMap, hash(shape, itText));
1585 			lastSeqSeen[bktNo] = seqNo;
1586 			TSize pos = dir[bktNo + 1]++;
1587 			counts[pos].i1 = seqNo;				// first hash
1588 			counts[pos].i2 = 1;
1589 
1590 			if (stepSize == 1)
1591 				for(TSize i = 1; i < num_qgrams; ++i)
1592 				{
1593 					++itText;
1594 					bktNo = getBucket(bucketMap, hashNext(shape, itText));
1595 					if (seqNo == lastSeqSeen[bktNo])
1596 						++(counts[dir[bktNo + 1] - 1].i2);
1597 					else {
1598 						lastSeqSeen[bktNo] = seqNo;
1599 						pos = dir[bktNo + 1]++;
1600 						counts[pos].i1 = seqNo;				// next hash
1601 						counts[pos].i2 = 1;
1602 					}
1603 				}
1604 			else
1605 				for(TSize i = 1; i < num_qgrams; ++i)
1606 				{
1607 					itText += stepSize;
1608 					bktNo = getBucket(bucketMap, hash(shape, itText));
1609 					if (seqNo == lastSeqSeen[bktNo])
1610 						++(counts[dir[bktNo + 1] - 1].i2);
1611 					else {
1612 						lastSeqSeen[bktNo] = seqNo;
1613 						pos = dir[bktNo + 1]++;
1614 						counts[pos].i1 = seqNo;				// next hash
1615 						counts[pos].i2 = 1;
1616 					}
1617 				}
1618 		}
1619 	}
1620 
1621 
1622 //////////////////////////////////////////////////////////////////////////////
1623 // create q-gram index of *one* sequence in external memory
1624 
1625 	// *** COMPARATORS & MAPS ***
1626 
1627     template <typename InType, typename Result = int>
1628     struct _qgramComp : public ::std::binary_function<InType,InType,Result> {
1629         inline Result operator()(InType const &a, InType const &b) const
1630         {
1631 			typedef typename Value<InType, 2>::Type TQGram;
1632 			typename Value<TQGram>::Type const *sa = a.i2.i;
1633             typename Value<TQGram>::Type const *sb = b.i2.i;
1634 			typename Value<TQGram>::Type const *saEnd = sa + length(a.i2);
1635 
1636             for(; sa != saEnd; ++sa, ++sb) {
1637                 if (*sa == *sb) continue;
1638                 return (*sa < *sb)? -1 : 1;
1639             }
1640 			return posCompare(a.i1, b.i1);
1641         }
1642     };
1643 
1644     // optimized for bitvectors
1645     template <typename T1, typename TValue, unsigned _size, typename Result>
1646     struct _qgramComp< Pair<T1, Tuple<TValue, _size, Compressed>, Compressed >, Result > :
1647         public ::std::binary_function<
1648             Pair<T1, Tuple<TValue, _size, Compressed>, Compressed >,
1649             Pair<T1, Tuple<TValue, _size, Compressed>, Compressed >,
1650             Result> {
1651         inline Result operator()(
1652             const Pair<T1, Tuple<TValue, _size, Compressed>, Compressed > &a,
1653             const Pair<T1, Tuple<TValue, _size, Compressed>, Compressed > &b) const
1654         {
1655             if (a.i2 < b.i2) return -1;
1656             if (a.i2 > b.i2) return 1;
1657 			return posCompare(a.i1, b.i1);
1658         }
1659     };
1660 
1661 
1662     template <typename TValue, typename TResult = unsigned>
1663     struct _qgramHash : public ::std::unary_function<TValue, TResult> {
1664         inline TResult operator()(TValue const &a) const
1665         {
1666 			typedef typename Value<TValue, 2>::Type	TQGram;
1667 			TResult hash = 0;
1668 			unsigned len = length(a.i2);
1669             for (unsigned i = 0; i < len; ++i) {
1670 				hash *= ValueSize< typename Value<TQGram>::Type >::VALUE;
1671 				hash += ordValue(a.i2[i]);
1672             }
1673             return hash;
1674         }
1675     };
1676 
1677 	// TODO: replace fixed tuple size of 6 with q and add q to Shape template arguments
1678 	template <
1679 		typename TSA,
1680 		typename TDir,
1681 		typename TText,
1682 		typename TValue,
1683 		unsigned q >
1684 	void createQGramIndexExt(
1685 		TSA &suffixArray,
1686 		TDir &dir,
1687 		TText &text,
1688 		Shape<TValue, UngappedShape<q> >)
1689 	{
1690         // signed characters behave different than unsigned when compared
1691         // to get the same index with signed or unsigned chars we simply cast them to unsigned
1692         // before feeding them into the pipeline
1693 		typedef Shape<TValue, UngappedShape<q> >					TShape;
1694         typedef typename MakeUnsigned_<TValue>::Type				TUValue;
1695 
1696         // *** SPECIALIZATION ***
1697 
1698 		typedef Pipe< TText, Source<> >								TSource;
1699         typedef Pipe< TSource, Caster<TUValue, CasterConvert> >		TUnsigner;
1700 		typedef Pipe< TUnsigner, Tupler<q> >						TTupler;
1701 						                typedef _qgramComp<TypeOf_(TTupler)> qcomp_t;
1702         typedef Pool<
1703 					TypeOf_(TTupler),
1704 					SorterSpec< SorterConfigSize<qcomp_t, TSizeOf_(TTupler) > >
1705 				> TSortTuples;
1706 										typedef _qgramHash<TypeOf_(TTupler), typename Size<TDir>::Type> qhash_t;
1707 
1708         // *** INSTANTIATION ***
1709 
1710 		TSource			src(text);
1711         TUnsigner		unsigner(src);
1712 		TTupler			tupler(unsigner);
1713 		TSortTuples		sorter;
1714 
1715 		// sort q-grams
1716 		sorter << tupler;
1717 
1718 		// fill sa and dir
1719 		if (!beginRead(sorter)) return;
1720 
1721 		typename Iterator<TSA>::Type itSA = begin(suffixArray);
1722 		typename Iterator<TDir>::Type itDir = begin(dir);
1723 
1724 		qcomp_t	qcomp;
1725 		qhash_t qhash;
1726 
1727 		typename Value<TSortTuples>::Type	old_qgram;
1728 		typename Size<TDir>::Type			hash, old_hash = 0;
1729         typename Size<TSortTuples>::Type	leftToRead = length(sorter);
1730 		bool first = true;
1731 
1732 		for (leftToRead = length(sorter); leftToRead > 0; --leftToRead, ++sorter, ++itSA)
1733 		{
1734 			// copy occurence position
1735 			*itSA = (*sorter).i1;
1736 			if (first || qcomp(old_qgram, *sorter) != 0)
1737 			{
1738 				old_qgram = *sorter;
1739 				hash = qhash(old_qgram);
1740 
1741 				SEQAN_ASSERT(old_hash < hash);
1742 
1743 				// copy bucket begin
1744 				typename Size<TSortTuples>::Type i = length(sorter) - leftToRead;
1745 				for(; old_hash < hash; ++old_hash, ++itDir)
1746 					*itDir = i;
1747 				first = false;
1748 			}
1749 		}
1750 
1751 		// fill bucket table
1752 		typename Size<TSortTuples>::Type i = length(sorter);
1753 		hash = length(dir);
1754 		for(; old_hash < hash; ++old_hash, ++itDir)
1755 			*itDir = i;
1756 
1757 		endRead(sorter);
1758 	}
1759 
1760 
1761 //////////////////////////////////////////////////////////////////////////////
1762 // create q-gram index of *multiple* sequences in external memory
1763 
1764 	template <
1765 		typename TSA,
1766 		typename TDir,
1767 		typename TString,
1768 		typename TSpec,
1769 		typename TValue,
1770 		unsigned q,
1771 		typename TLimitsString >
1772 	void createQGramIndexExt(
1773 		TSA &suffixArray,
1774 		TDir &dir,
1775 		StringSet<TString, TSpec> const &stringSet,
1776 		Shape<TValue, UngappedShape<q> >,
1777 		TLimitsString &limits)
1778 	{
1779         // signed characters behave different than unsigned when compared
1780         // to get the same index with signed or unsigned chars we simply cast them to unsigned
1781         // before feeding them into the pipeline
1782 		typedef typename Concatenator<StringSet<TString, TSpec> >::Type	TConcat;
1783 		typedef Shape<TValue, UngappedShape<q> >						TShape;
1784         typedef typename MakeUnsigned_<TValue>::Type					TUValue;
1785 		typedef Multi<
1786 			Tupler<q, true, Compressed>,
1787 			typename Value<TSA>::Type,
1788 			typename StringSetLimits< StringSet<TString, TSpec> >::Type >		TTuplerSpec;
1789 
1790         // *** SPECIALIZATION ***
1791 
1792 		typedef Pipe< TConcat, Source<> >							TSource;
1793         typedef Pipe< TSource, Caster<TUValue, CasterConvert> >		TUnsigner;
1794 		typedef Pipe< TUnsigner, TTuplerSpec >						TTupler;
1795 						                typedef _qgramComp<TypeOf_(TTupler)> qcomp_t;
1796         typedef Pool<
1797 					TypeOf_(TTupler),
1798 					SorterSpec< SorterConfigSize<qcomp_t, TSizeOf_(TTupler) > >
1799 				> TSortTuples;
1800 										typedef _qgramHash<TypeOf_(TTupler), typename Size<TDir>::Type> qhash_t;
1801 
1802         // *** INSTANTIATION ***
1803 
1804 		TSource			src(concat(stringSet));
1805         TUnsigner		unsigner(src);
1806 		TTupler			tupler(unsigner, limits);
1807 		TSortTuples		sorter;
1808 
1809 		// sort q-grams
1810 		sorter << tupler;
1811 
1812 		// fill sa and dir
1813 		if (!beginRead(sorter)) return;
1814 
1815 		typename Iterator<TSA>::Type itSA = begin(suffixArray);
1816 		typename Iterator<TDir>::Type itDir = begin(dir);
1817 
1818 		qcomp_t	qcomp;
1819 		qhash_t qhash;
1820 
1821 		typename Value<TSortTuples>::Type	old_qgram = *sorter;
1822 		typename Size<TDir>::Type			hash, old_hash = 0;
1823         typename Size<TSortTuples>::Type	leftToRead;
1824 		bool first = true;
1825 
1826 		for (leftToRead = length(sorter); leftToRead > 0; --leftToRead, ++sorter, ++itSA)
1827 		{
1828 			// copy occurence position
1829 			*itSA = (*sorter).i1;
1830 
1831 			if (first || qcomp(old_qgram, *sorter) != 0)
1832 			{
1833 				old_qgram = *sorter;
1834 				hash = qhash(old_qgram);
1835 
1836 				SEQAN_ASSERT_LEQ(old_hash, hash);
1837 
1838 				// copy bucket begin
1839 				typename Size<TSortTuples>::Type i = length(sorter) - leftToRead;
1840 				for(; old_hash < hash; ++old_hash, ++itDir)
1841 					*itDir = i;
1842 				first = false;
1843 			}
1844 		}
1845 
1846 		// fill bucket table
1847 		typename Size<TSortTuples>::Type i = length(sorter);
1848 		hash = length(dir);
1849 		for(; old_hash < hash; ++old_hash, ++itDir)
1850 			*itDir = i;
1851 
1852 		endRead(sorter);
1853 	}
1854 
1855 
1856 
1857 //////////////////////////////////////////////////////////////////////////////
1858 // interface for automatic index creation
1859 
1860 	template <typename TText, typename TShapeSpec, typename TSpec>
1861 	inline typename Size<Index<TText, IndexQGram<TShapeSpec, TSpec> > >::Type
1862 	_qgramQGramCount(Index<TText, IndexQGram<TShapeSpec, TSpec> > const &index)
1863 	{
1864 		typedef Index<TText, IndexQGram<TShapeSpec, TSpec> >	TIndex;
1865 		typedef typename Fibre<TIndex, QGramShape>::Type		TShape;
1866 		typedef typename Size<TIndex>::Type						TSize;
1867 
1868 		TShape const &shape = indexShape(index);
1869 
1870 		// count all overlapping q-grams
1871 		TSize stepSize = getStepSize(index);
1872 		TSize qgramCount = 0;
1873 		for(unsigned i = 0; i < countSequences(index); ++i)
1874 			if (sequenceLength(i, index) >= length(shape))
1875 				qgramCount += (sequenceLength(i, index) - length(shape)) / stepSize + 1;
1876 		return qgramCount;
1877 	}
1878 
1879 	template <typename TText, typename TShapeSpec, typename TSpec>
1880 	inline bool indexCreate(
1881 		Index<TText, IndexQGram<TShapeSpec, TSpec> > &index,
1882 		FibreSADir,
1883 		Default const)
1884 	{
1885 		resize(indexSA(index), _qgramQGramCount(index), Exact());
1886 		resize(indexDir(index), _fullDirLength(index), Exact());
1887 		createQGramIndex(index);
1888 		return true;
1889 	}
1890 
1891 	template <typename TText, typename TSpec>
1892 	inline bool indexSupplied(Index<TText, TSpec> &index, FibreSADir) {
1893 		return !(empty(getFibre(index, FibreSA())) || empty(getFibre(index, FibreDir())));
1894 	}
1895 
1896 	template <typename TText, typename TShapeSpec, typename TSpec>
1897 	inline bool indexCreate(
1898 		Index<TText, IndexQGram<TShapeSpec, TSpec> > &index,
1899 		FibreSA,
1900 		Default const)
1901 	{
1902 		resize(indexSA(index), _qgramQGramCount(index), Exact());
1903 		createQGramIndexSAOnly(indexSA(index), indexText(index), indexShape(index), getStepSize(index));
1904 		return true;
1905 	}
1906 
1907 	template <typename TText, typename TShapeSpec, typename TSpec>
1908 	inline bool indexCreate(
1909 		Index<TText, IndexQGram<TShapeSpec, TSpec> > &index,
1910 		FibreCounts,
1911 		Default const)
1912 	{
1913 		resize(indexCountsDir(index), _fullDirLength(index), Exact());
1914 		createCountsArray(indexCounts(index), indexCountsDir(index), indexBucketMap(index), indexText(index), indexShape(index), getStepSize(index));
1915 		return true;
1916 	}
1917 
1918 	template <typename TText, typename TShapeSpec, typename TSpec>
1919 	inline bool indexCreate(
1920 		Index<TText, IndexQGram<TShapeSpec, TSpec> > &index,
1921 		FibreDir,
1922 		Default const)
1923 	{
1924 		typedef Index<TText, IndexQGram<TShapeSpec, TSpec> > TIndex;
1925 
1926 		resize(indexDir(index), _fullDirLength(index), Exact());
1927 		createQGramIndexDirOnly(indexDir(index), indexBucketMap(index), indexText(index), indexShape(index), getStepSize(index));
1928 		return true;
1929 	}
1930 
1931 
1932 //////////////////////////////////////////////////////////////////////////////
1933 /**
1934 .Function.getKmerSimilarityMatrix:
1935 ..summary:Creates a matrix storing the number of common q-grams between all pairs of sequences.
1936 ..cat:Index
1937 ..signature:getKmerSimilarityMatrix(index, distMat[, seqSet])
1938 ..param.index:A q-gram index.
1939 ...type:Spec.IndexQGram
1940 ..param.distMat:The resulting q-gram similarity matrix.
1941 ...type:Concept.Container
1942 ..param.seqSet:Contains sequence numbers if only a subset of sequences should be compared.
1943 ...type:Concept.Container
1944 ..remarks:$distMat$ will be resized to $seqCount*seqCount$, where $seqCount$ is the number of sequences in the index/in $seqSet$.
1945 The number of common q-grams between sequence $i$ and $j$ is stored at position $i*seqCount + j$.
1946 It sums up the minimum number of q-gram occurrences between both sequences for each q-gram.
1947 ..include:seqan/index.h
1948 */
1949 
1950 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TDistMatrix >
1951 	inline void getKmerSimilarityMatrix(
1952 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
1953 		TDistMatrix &distMat)
1954 	{
1955 		typedef Index< TObject, IndexQGram<TShapeSpec,TSpec> >	TIndex;
1956 		typedef typename Size<TIndex>::Type						TSize;
1957 		typedef typename Size<TDistMatrix>::Type				TSizeMat;
1958 		typedef typename Value<TDistMatrix>::Type				TValueMat;
1959 
1960 		typedef typename Fibre<TIndex, QGramCountsDir>::Type	TCountsDir;
1961 		typedef typename Iterator<TCountsDir, Standard>::Type	TIterCountsDir;
1962 		typedef typename Fibre<TIndex, QGramCounts>::Type		TCounts;
1963 		typedef typename Iterator<TCounts, Standard>::Type		TIterCounts;
1964 
1965 		// declare requirements
1966 		indexRequire(index, QGramCounts());
1967 
1968 		// initialize distance matrix
1969 		TSizeMat seqNoLength = countSequences(index);
1970 		clear(distMat);
1971 		resize(distMat, seqNoLength * seqNoLength);
1972 		arrayFill(begin(distMat, Standard()), end(distMat, Standard()), 0);
1973 
1974 		TIterCountsDir itCountsDir = begin(indexCountsDir(index), Standard());
1975 		TIterCountsDir itCountsDirEnd = end(indexCountsDir(index), Standard());
1976 		TIterCounts itCountsBegin = begin(indexCounts(index), Standard());
1977 
1978 		// for each bucket count common q-grams for each sequence pair
1979 		TSize bucketBegin = *itCountsDir;
1980 		for(++itCountsDir; itCountsDir != itCountsDirEnd; ++itCountsDir)
1981 		{
1982 			TSize bucketEnd = *itCountsDir;
1983 
1984 			// q-gram must occur in at least 2 different sequences
1985 			if (bucketBegin != bucketEnd)
1986 			{
1987 				TIterCounts itA = itCountsBegin + bucketBegin;
1988 				TIterCounts itEnd = itCountsBegin + bucketEnd;
1989 				for(; itA != itEnd; ++itA)
1990 				{
1991 					TSizeMat ofs = (*itA).i1 * seqNoLength;
1992 					TSize countA = (*itA).i2;
1993 					TIterCounts itB = itA;
1994 
1995 					for(; itB != itEnd; ++itB)
1996 					{
1997 						TSize countB = (*itB).i2;
1998 						if (countA < countB)
1999 							distMat[ofs + (*itB).i1] += countA;
2000 						else
2001 							distMat[ofs + (*itB).i1] += countB;
2002 					}
2003 				}
2004 			}
2005 			bucketBegin = bucketEnd;
2006 		}
2007 
2008 		// copy upper triangle to lower triangle and scale
2009 		for(TSizeMat row = 0; row < seqNoLength; ++row)
2010 		{
2011 			TValueMat maxValRow = distMat[row * (seqNoLength + 1)];
2012 			for(TSizeMat col = row + 1; col < seqNoLength; ++col)
2013 			{
2014 				// fractional common kmer count
2015 				TValueMat maxValCol = distMat[col * (seqNoLength + 1)];
2016 				TValueMat val = distMat[row * seqNoLength + col];
2017 
2018 				// number of common q-grams / Number of possible common q-grams
2019 				if (maxValRow < maxValCol) {
2020 					if (maxValRow != 0)
2021 						val /= maxValRow;
2022 					distMat[col * seqNoLength + row] = val;
2023 					distMat[row * seqNoLength + col] = val;
2024 				} else {
2025 					if (maxValCol != 0)
2026 						val /= maxValCol;
2027 					distMat[col * seqNoLength + row] = val;
2028 					distMat[row * seqNoLength + col] = val;
2029 				}
2030 			}
2031 		}
2032 
2033 		// set diagonal to 1
2034 		for(TSizeMat i = 0; i < seqNoLength; ++i)
2035 			distMat[i * (seqNoLength + 1)] = 1;
2036 	}
2037 
2038 
2039 //////////////////////////////////////////////////////////////////////////////
2040 // getKmerSimilarityMatrix
2041 //     for a subset of the StringSet given as a sorted string of sequence numbers
2042 
2043 	template <
2044 		typename TObject,
2045 		typename TShapeSpec,
2046 		typename TSpec,
2047 		typename TDistMatrix,
2048 		typename TSeqNoString
2049 	>
2050 	inline void getKmerSimilarityMatrix(
2051 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2052 		TDistMatrix &distMat,
2053 		TSeqNoString const &seqNo)
2054 	{
2055 		typedef Index< TObject, IndexQGram<TShapeSpec,TSpec> >	TIndex;
2056 		typedef typename Size<TIndex>::Type						TSize;
2057 		typedef typename Size<TDistMatrix>::Type				TSizeMat;
2058 		typedef typename Value<TDistMatrix>::Type				TValueMat;
2059 
2060 		typedef typename Fibre<TIndex, QGramCountsDir>::Type	TCountsDir;
2061 		typedef typename Iterator<TCountsDir, Standard>::Type	TIterCountsDir;
2062 		typedef typename Fibre<TIndex, QGramCounts>::Type		TCounts;
2063 		typedef typename Iterator<TCounts, Standard>::Type		TIterCounts;
2064 		typedef typename Iterator<TSeqNoString, Standard>::Type	TIterSeqNo;
2065 
2066 		// declare requirements
2067 		indexRequire(index, QGramCounts());
2068 
2069 		// initialize distance matrix
2070 		TSizeMat seqNoLength = length(seqNo);
2071 		clear(distMat);
2072 		resize(distMat, seqNoLength * seqNoLength);
2073 		arrayFill(begin(distMat, Standard()), end(distMat, Standard()), 0);
2074 
2075 		TIterCountsDir itCountsDir = begin(indexCountsDir(index), Standard());
2076 		TIterCountsDir itCountsDirEnd = end(indexCountsDir(index), Standard());
2077 		TIterCounts itCountsBegin = begin(indexCounts(index), Standard());
2078 		TIterSeqNo itSetBegin = begin(seqNo, Standard());
2079 		TIterSeqNo itSetEnd = end(seqNo, Standard());
2080 
2081 		// for each bucket count common q-grams for each sequence pair
2082 		TSize bucketBegin = *itCountsDir;
2083 		for(++itCountsDir; itCountsDir != itCountsDirEnd; ++itCountsDir)
2084 		{
2085 			TSize bucketEnd = *itCountsDir;
2086 
2087 			// q-gram must occur in at least 2 different sequences
2088 			if (bucketBegin != bucketEnd)
2089 			{
2090 				TIterCounts itA = itCountsBegin + bucketBegin;
2091 				TIterCounts itEnd = itCountsBegin + bucketEnd;
2092 				TIterSeqNo itSetA = itSetBegin;
2093 
2094 				while (itA != itEnd && itSetA != itSetEnd)
2095 				{
2096 					if ((*itA).i1 < *itSetA)
2097 						++itA;
2098 					else if ((*itA).i1 > *itSetA)
2099 						++itSetA;
2100 					else
2101 					{
2102 						TSizeMat ofs = (itSetA - itSetBegin) * seqNoLength;
2103 						TSize countA = (*itA).i2;
2104 						TIterCounts itB = itA;
2105 						TIterSeqNo itSetB = itSetA;
2106 
2107 						while (itB != itEnd && itSetB != itSetEnd)
2108 						{
2109 							if ((*itB).i1 < *itSetB)
2110 								++itB;
2111 							else if ((*itB).i1 > *itSetB)
2112 								++itSetB;
2113 							else
2114 							{
2115 								TSize countB = (*itB).i2;
2116 								if (countA < countB)
2117 									distMat[ofs + (itSetB - itSetBegin)] += countA;
2118 								else
2119 									distMat[ofs + (itSetB - itSetBegin)] += countB;
2120 								++itB;
2121 								++itSetB;
2122 							}
2123 						}
2124 						++itA;
2125 						++itSetA;
2126 					}
2127 				}
2128 			}
2129 			bucketBegin = bucketEnd;
2130 		}
2131 
2132 		// copy upper triangle to lower triangle and scale
2133 		for(TSizeMat row = 0; row < seqNoLength; ++row)
2134 		{
2135 			TValueMat maxValRow = distMat[row * (seqNoLength + 1)];
2136 			for(TSizeMat col = row + 1; col < seqNoLength; ++col)
2137 			{
2138 				// fractional common kmer count
2139 				TValueMat maxValCol = distMat[col * (seqNoLength + 1)];
2140 				TValueMat val = distMat[row * seqNoLength + col];
2141 
2142 				// number of common q-grams / Number of possible common q-grams
2143 				if (maxValRow < maxValCol) {
2144 					if (maxValRow != 0)
2145 						val /= maxValRow;
2146 					distMat[col * seqNoLength + row] = val;
2147 					distMat[row * seqNoLength + col] = val;
2148 				} else {
2149 					if (maxValCol != 0)
2150 						val /= maxValCol;
2151 					distMat[col * seqNoLength + row] = val;
2152 					distMat[row * seqNoLength + col] = val;
2153 				}
2154 			}
2155 		}
2156 
2157 		// set diagonal to 1
2158 		for(TSizeMat i = 0; i < seqNoLength; ++i)
2159 			distMat[i * (seqNoLength + 1)] = 1;
2160 	}
2161 
2162 
2163 //////////////////////////////////////////////////////////////////////////////
2164 /**
2165 .Function.range:
2166 ..signature:range(index, shape)
2167 ..param.index:A q-gram index.
2168 ...type:Spec.IndexQGram
2169 ..param.shape:A shape object.
2170 ...note:The shape stores the q-gram of the last call to @Function.hash@ or @Function.hashNext@.
2171 ...type:Class.Shape
2172 ..returns:All positions where the q-gram stored in $shape$ occurs in the text (see @Tag.QGram Index Fibres.QGramText@)
2173 are stored in a contiguous range of the suffix array.
2174 $range$ returns begin and end position of this range.
2175 If the type of $index$ is $TIndex$ the return type is $Pair<Size<TIndex>::Type>.
2176 ..note:The necessary index tables are built on-demand via @Function.indexRequire@ if index is not $const$.
2177 ..include:seqan/index.h
2178 */
2179 
2180 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2181 	inline Pair<typename Size< Index< TObject, IndexQGram<TShapeSpec, TSpec> > >::Type>
2182 	range(
2183 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &index,
2184 		Shape< TValue, TShapeSpec2 > const &shape)
2185 	{
2186 		typedef typename Size< Index< TObject, IndexQGram<TShapeSpec, TSpec> > >::Type TSize;
2187 		typedef typename Size< typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreDir>::Type>::Type TDirSize;
2188 		TDirSize bucket = getBucket(indexBucketMap(index), value(shape));
2189 		return Pair<TSize>(indexDir(index)[bucket], indexDir(index)[bucket + 1]);
2190 	}
2191 
2192 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2193 	inline Pair<typename Size< Index< TObject, IndexQGram<TShapeSpec, TSpec> > >::Type>
2194 	range(
2195 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2196 		Shape< TValue, TShapeSpec2 > const &shape)
2197 	{
2198 		indexRequire(index, QGramDir());
2199 		return getOccurrences(const_cast<Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &>(index), shape);
2200 	}
2201 
2202 //////////////////////////////////////////////////////////////////////////////
2203 /**
2204 .Function.getOccurrence:
2205 ..signature:getOccurrence(index, shape)
2206 ..param.index:A q-gram index.
2207 ...type:Spec.IndexQGram
2208 ..param.shape:A shape object.
2209 ...note:The shape stores the q-gram of the last call to @Function.hash@ or @Function.hashNext@.
2210 ...type:Class.Shape
2211 ..returns:A position where the q-gram stored in $shape$ occurs in the text (see @Tag.QGram Index Fibres.QGramText@).
2212 If the type of $index$ is $TIndex$ the return type is $SAValue<TIndex>::Type$.
2213 ..include:seqan/index.h
2214 */
2215 
2216 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2217 	inline typename SAValue< Index< TObject, IndexQGram<TShapeSpec, TSpec> > >::Type
2218 	getOccurrence(
2219 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &index,
2220 		Shape< TValue, TShapeSpec2 > const &shape)
2221 	{
2222 		return saAt(indexDir(index)[getBucket(indexBucketMap(index), value(shape))], index);
2223 	}
2224 
2225 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2226 	inline typename SAValue< Index< TObject, IndexQGram<TShapeSpec, TSpec> > >::Type
2227 	getOccurrence(
2228 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2229 		Shape< TValue, TShapeSpec2 > const &shape)
2230 	{
2231 		indexRequire(index, QGramSADir());
2232 		return getOccurrence(const_cast<Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &>(index), shape);
2233 	}
2234 
2235 //////////////////////////////////////////////////////////////////////////////
2236 /**
2237 .Function.getOccurrences:
2238 ..signature:getOccurrences(index, shape)
2239 ..param.index:A q-gram index.
2240 ...type:Spec.IndexQGram
2241 ..param.shape:A shape object.
2242 ...note:The shape stores the q-gram of the last call to @Function.hash@ or @Function.hashNext@.
2243 ...type:Class.Shape
2244 ..returns:All positions where the q-gram stored in $shape$ occurs in the text (see @Tag.QGram Index Fibres.QGramText@).
2245 If the type of $index$ is $TIndex$ the return type is $Infix<Fibre<TIndex, QGramSA>::Type const>::Type$.
2246 ..remarks:The necessary index tables are built on-demand via @Function.indexRequire@ if index is not $const$.
2247 ..include:seqan/index.h
2248 */
2249 
2250 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2251 	inline typename Infix< typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreSA>::Type const >::Type
2252 	getOccurrences(
2253 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &index,
2254 		Shape< TValue, TShapeSpec2 > const &shape)
2255 	{
2256 		typedef typename Size<typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreDir>::Type>::Type TDirSize;
2257 		TDirSize bucket = getBucket(indexBucketMap(index), value(shape));
2258 		return infix(indexSA(index), indexDir(index)[bucket], indexDir(index)[bucket + 1]);
2259 	}
2260 
2261 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2262 	inline typename Infix< typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreSA>::Type const >::Type
2263 	getOccurrences(
2264 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2265 		Shape< TValue, TShapeSpec2 > const &shape)
2266 	{
2267 		indexRequire(index, QGramSADir());
2268 		return getOccurrences(const_cast<Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &>(index), shape);
2269 	}
2270 
2271 //////////////////////////////////////////////////////////////////////////////
2272 /**
2273 .Function.countOccurrences:
2274 ..signature:countOccurrences(index, shape)
2275 ..param.index:A q-gram index.
2276 ...type:Spec.IndexQGram
2277 ..param.shape:A shape object.
2278 ...note:The shape stores the q-gram of the last call to @Function.hash@ or @Function.hashNext@.
2279 ...type:Class.Shape
2280 ..returns:The number of positions where the q-gram stored in $shape$ occurs in the text (see @Tag.QGram Index Fibres.QGramText@).
2281 If the type of $index$ is $TIndex$ the return type is $Size<TIndex>::Type$.
2282 ..note:The necessary index tables are built on-demand via @Function.indexRequire@ if index is not $const$.
2283 ..include:seqan/index.h
2284 */
2285 
2286 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2287 	inline typename Size< typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreSA>::Type const >::Type
2288 	countOccurrences(
2289 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &index,
2290 		Shape< TValue, TShapeSpec2 > const &shape)
2291 	{
2292 		typedef typename Size<typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreDir>::Type>::Type TDirSize;
2293 		TDirSize bucket = getBucket(indexBucketMap(index), value(shape));
2294 		return indexDir(index)[bucket + 1] - indexDir(index)[bucket];
2295 	}
2296 
2297 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2298 	inline typename Size< typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreSA>::Type const >::Type
2299 	countOccurrences(
2300 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2301 		Shape< TValue, TShapeSpec2 > const &shape)
2302 	{
2303 		indexRequire(index, QGramDir());
2304 		return countOccurrences(const_cast<Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &>(index), shape);
2305 	}
2306 
2307 //////////////////////////////////////////////////////////////////////////////
2308 /**
2309 .Function.countOccurrencesMultiple:
2310 ..summary:Returns the number of occurences of a q-gram for every sequence of a @Class.StringSet@ .
2311 ..signature:countOccurrencesMultiple(index, shape)
2312 ..param.index:A q-gram index of a @Class.StringSet@.
2313 ...type:Spec.IndexQGram
2314 ..param.shape:A shape object.
2315 ...note:The shape stores the q-gram of the last call to @Function.hash@ or @Function.hashNext@.
2316 ...type:Class.Shape
2317 ..returns:A sequence of @Class.Pair.pairs@ (seqNo,count), count>0.
2318 For every @Class.StringSet@ sequence the q-gram occurs in, seqNo is the sequence number and count the number of occurrences.
2319 If the type of $index$ is $TIndex$ the return type is $Infix<Fibre<TIndex, QGramCounts>::Type const>::Type$.
2320 ..remarks:The necessary index tables are built on-demand via @Function.indexRequire@ if index is not $const$.
2321 ..see:Function.countOccurrences
2322 ..include:seqan/index.h
2323 */
2324 
2325 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2326 	inline typename Infix< typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreCounts>::Type const >::Type
2327 	countOccurrencesMultiple(
2328 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &index,
2329 		Shape< TValue, TShapeSpec2 > const &shape)
2330 	{
2331 		typedef typename Size<typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreCountsDir>::Type>::Type TDirSize;
2332 		TDirSize bucket = getBucket(indexBucketMap(index), value(shape));
2333 		return infix(indexCounts(index), indexCountsDir(index)[bucket], indexCountsDir(index)[bucket + 1]);
2334 	}
2335 
2336 	template < typename TObject, typename TShapeSpec, typename TSpec, typename TShapeSpec2, typename TValue >
2337 	inline typename Infix< typename Fibre< Index< TObject, IndexQGram<TShapeSpec, TSpec> >, FibreCounts>::Type const >::Type
2338 	countOccurrencesMultiple(
2339 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2340 		Shape< TValue, TShapeSpec2 > const &shape)
2341 	{
2342 		indexRequire(index, QGramCounts());
2343 		return countOccurrencesMultiple(const_cast<Index< TObject, IndexQGram<TShapeSpec, TSpec> > const &>(index), shape);
2344 	}
2345 
2346 
2347 //////////////////////////////////////////////////////////////////////////////
2348 // clear
2349 
2350 	template < typename TText, typename TShapeSpec, typename TSpec >
2351 	inline void clear(Index<TText, IndexQGram<TShapeSpec, TSpec> > &index)
2352 	{
2353 		clear(getFibre(index, QGramSA()));
2354 		clear(getFibre(index, QGramDir()));
2355 		clear(getFibre(index, QGramCounts()));
2356 		clear(getFibre(index, QGramCountsDir()));
2357 	}
2358 
2359 //////////////////////////////////////////////////////////////////////////////
2360 // open
2361 
2362 	template < typename TObject, typename TShapeSpec, typename TSpec >
2363 	inline bool open(
2364 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2365 		const char *fileName,
2366 		int openMode)
2367 	{
2368 		String<char> name;
2369 		name = fileName;	append(name, ".txt");
2370 		bool result = true;
2371 		if ((!open(getFibre(index, QGramText()), toCString(name), openMode)) &&
2372 			(!open(getFibre(index, QGramText()), fileName, openMode)))
2373 			result = false;
2374 
2375 		name = fileName;	append(name, ".sa");	open(getFibre(index, QGramSA()), toCString(name), openMode);
2376 		name = fileName;	append(name, ".dir");	open(getFibre(index, QGramDir()), toCString(name), openMode);
2377 		return result;
2378 	}
2379 	template < typename TObject, typename TShapeSpec, typename TSpec >
2380 	inline bool open(
2381 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2382 		const char *fileName)
2383 	{
2384 		return open(index, fileName, OPEN_RDONLY);
2385 	}
2386 
2387 
2388 //////////////////////////////////////////////////////////////////////////////
2389 // save
2390 
2391 	template < typename TObject, typename TShapeSpec, typename TSpec >
2392 	inline bool save(
2393 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2394 		const char *fileName,
2395 		int openMode)
2396 	{
2397 		String<char> name;
2398 		name = fileName;	append(name, ".txt");
2399 		if ((!save(getFibre(index, QGramText()), toCString(name), openMode)) &&
2400 			(!save(getFibre(index, QGramText()), fileName, openMode))) return false;
2401 
2402 		name = fileName;	append(name, ".sa");	save(getFibre(index, QGramSA()), toCString(name), openMode);
2403 		name = fileName;	append(name, ".dir");	save(getFibre(index, QGramDir()), toCString(name), openMode);
2404 		return true;
2405 	}
2406 	template < typename TObject, typename TShapeSpec, typename TSpec >
2407 	inline bool save(
2408 		Index< TObject, IndexQGram<TShapeSpec, TSpec> > &index,
2409 		const char *fileName)
2410 	{
2411 		return save(index, fileName, OPEN_WRONLY | OPEN_CREATE);
2412 	}
2413 
2414 }
2415 
2416 #endif //#ifndef SEQAN_HEADER_...
2417