1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_ALIGN_BASE_H
34 #define SEQAN_HEADER_ALIGN_BASE_H
35 
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38 
39 //////////////////////////////////////////////////////////////////////////////
40 // Alignment Specific Metafunctions
41 //////////////////////////////////////////////////////////////////////////////
42 
43 /**
44 .Metafunction.Cols:
45 ..summary:Type of column container of an alignment.
46 ..signature:Cols<T>::Type
47 ..param.T:An alignment type.
48 ...type:Class.Align
49 ..returns.param.Type:The type of the container that allows access to the columns of $T$.
50 ..include:seqan/align.h
51 */
52 template <typename T> struct Cols;
53 
54 //////////////////////////////////////////////////////////////////////////////
55 
56 /**
57 .Metafunction.Col:
58 ..summary:Type of a column in an alignment.
59 ..signature:Col<T>::Type
60 ..param.T:An alignment type.
61 ...type:Class.Align
62 ..returns.param.Type:The column type of $T$.
63 ..remarks:The returned type is equivalent to $Value<Cols<T>::Type>::Type$.
64 ..see:Metafunction.Cols
65 ..see:Metafunction.Value
66 ..include:seqan/align.h
67 */
68 template <typename T>
69 struct Col:
70 	Value<typename Cols<T>::Type>
71 {
72 };
73 
74 //////////////////////////////////////////////////////////////////////////////
75 
76 /**
77 .Metafunction.Rows:
78 ..summary:Type of row container of an alignment.
79 ..signature:Rows<T>::Type
80 ..param.T:An alignment type.
81 ...type:Class.Align
82 ..returns.param.Type:The type of the container that allows access to the rows of $T$.
83 ..see:Metafunction.Cols
84 ..include:seqan/align.h
85 */
86 template <typename T> struct Rows;
87 
88 //////////////////////////////////////////////////////////////////////////////
89 
90 /**
91 .Metafunction.Row:
92 ..summary:Type of a row in an alignment.
93 ..signature:Row<T>::Type
94 ..param.T:An alignment type.
95 ...type:Class.Align
96 ..returns.param.Type:The row type of $T$.
97 ..remarks:The returned type is equivalent to $Value<Rows<T>::Type>::Type$.
98 ..see:Metafunction.Rows
99 ..see:Metafunction.Value
100 ..include:seqan/align.h
101 */
102 template <typename T>
103 struct Row:
104 	Value<typename Rows<T>::Type>
105 {
106 };
107 
108 template <typename T>
109 struct Row<T const> {
110 	typedef typename Row<T>::Type const Type;
111 };
112 
113 
114 
115 //////////////////////////////////////////////////////////////////////////////
116 //////////////////////////////////////////////////////////////////////////////
117 // Align
118 //////////////////////////////////////////////////////////////////////////////
119 //Default implementation: array of Gaps objects
120 
121 /**
122 .Class.Align:
123 ..cat:Alignments
124 ..summary:An alignment of sequences.
125 ..signature:Align<TSource, TSpec>
126 ..param.TSource:Type of the ungapped sequences.
127 ...metafunction:Metafunction.Source
128 ..param.TSpec:The specializing type.
129 ...metafunction:Metafunction.Spec
130 ...XXdefault:@Spec.ArrayGaps@
131 ..remarks:The default implementation of $Align$ stores the alignment in a set of @Class.Gaps.Gaps<TSource.TSpec>@ objects.
132  Hence, the default implementation is row-based, so it will be faster to access the alignment row-wise than column-wise.
133 ..include:seqan/align.h
134 */
135 
136 template <typename TSource, typename TSpec = ArrayGaps>
137 class Align
138 {
139 public:
140 	typedef Gaps<TSource, TSpec> TGaps;
141 	typedef String<TGaps> TRows;
142 	typedef typename Size<TRows>::Type TRowsSize;
143 
144 	TRows data_rows;
145 
146 //____________________________________________________________________________
147 public:
148 	Align()
149 	{
150 SEQAN_CHECKPOINT
151 	}
152 	Align(Align const & _other):
153 		data_rows(_other.data_rows)
154 	{
155 SEQAN_CHECKPOINT
156 	}
157 	template <typename TString, typename TStringsetSpec>
158 	Align(StringSet<TString, TStringsetSpec> & stringset)
159 	{
160 SEQAN_CHECKPOINT
161 		setStrings(*this, stringset);
162 	}
163 	~Align()
164 	{
165 SEQAN_CHECKPOINT
166 	}
167 
168 	Align const &
169 	operator = (Align const & _other)
170 	{
171 SEQAN_CHECKPOINT
172 		data_rows = _other.data_rows;
173 		return *this;
174 	}
175 
176 //____________________________________________________________________________
177 };
178 
179 template <typename TSource, typename TSpec>
180 inline
181 void move(Align<TSource, TSpec> & target, Align<TSource, TSpec> & source)
182 {
183     SEQAN_CHECKPOINT;
184     move(target.data_rows, source.data_rows);
185 }
186 
187 //////////////////////////////////////////////////////////////////////////////
188 //ALIGN INTERFACE
189 //////////////////////////////////////////////////////////////////////////////
190 
191 //////////////////////////////////////////////////////////////////////////////
192 //Metafunctions
193 //////////////////////////////////////////////////////////////////////////////
194 
195 ///.Metafunction.Value.param.T.type:Class.Align
196 
197 template <typename TSource, typename TSpec>
198 struct Value<Align<TSource, TSpec> >:
199 	Value<Gaps<TSource, TSpec> >
200 {
201 };
202 template <typename TSource, typename TSpec>
203 struct Value<Align<TSource, TSpec> const>:
204 	Value<Gaps<TSource, TSpec> const>
205 {
206 };
207 
208 //////////////////////////////////////////////////////////////////////////////
209 
210 ///.Metafunction.GetValue.param.T.type:Class.Align
211 
212 template <typename TSource, typename TSpec>
213 struct GetValue<Align<TSource, TSpec> >:
214 	GetValue<Gaps<TSource, TSpec> >
215 {
216 };
217 template <typename TSource, typename TSpec>
218 struct GetValue<Align<TSource, TSpec> const>:
219 	GetValue<Gaps<TSource, TSpec> const>
220 {
221 };
222 
223 //////////////////////////////////////////////////////////////////////////////
224 
225 ///.Metafunction.Reference.param.T.type:Class.Align
226 
227 template <typename TSource, typename TSpec>
228 struct Reference<Align<TSource, TSpec> >:
229 	Reference<Gaps<TSource, TSpec> >
230 {
231 };
232 template <typename TSource, typename TSpec>
233 struct Reference<Align<TSource, TSpec> const>:
234 	Reference<Gaps<TSource, TSpec> const>
235 {
236 };
237 
238 //////////////////////////////////////////////////////////////////////////////
239 
240 // struct Cols<Align>: see below (in align_cols_base)
241 
242 //////////////////////////////////////////////////////////////////////////////
243 
244 ///.Metafunction.Rows.param.T.type:Class.Align
245 
246 template <typename TSource, typename TSpec>
247 struct Rows<Align<TSource, TSpec> >
248 {
249 	typedef String<Gaps<TSource, TSpec> > Type;
250 };
251 template <typename TSource, typename TSpec>
252 struct Rows<Align<TSource, TSpec> const>
253 {
254 	typedef String<Gaps<TSource, TSpec> > const Type;
255 };
256 
257 //////////////////////////////////////////////////////////////////////////////
258 
259 ///.Metafunction.Source.param.T.type:Class.Align
260 
261 template <typename TSource, typename TSpec>
262 struct Source<Align<TSource, TSpec> >
263 {
264 	typedef TSource Type;
265 };
266 template <typename TSource, typename TSpec>
267 struct Source<Align<TSource, TSpec> const >
268 {
269 	typedef TSource Type;
270 };
271 
272 
273 //////////////////////////////////////////////////////////////////////////////
274 
275 /**
276 .Metafunction.StringSetType:
277 ..summary:Return type of @Function.stringSet@ function.
278 ..signature:StringSetType<T>::Type
279 ..param.T:Alignment data structure.
280 ..param.T.Type:Spec.Alignment Graph
281 ..param.T.Type:Class.Align
282 ..returns.param.Type:A @Class.StringSet.string set@ type of a reference to a string set type.
283 ..see:Function.stringSet
284 ..include:seqan/align.h
285 */
286 template <typename T>
287 struct StringSetType;
288 
289 template <typename TSource, typename TSpec>
290 struct StringSetType<Align<TSource, TSpec> >
291 {
292 	typedef StringSet<TSource, Dependent<> > Type;
293 };
294 template <typename TSource, typename TSpec>
295 struct StringSetType<Align<TSource, TSpec> const >
296 {
297 	typedef StringSet<TSource, Dependent<> > Type;
298 };
299 
300 
301 //////////////////////////////////////////////////////////////////////////////
302 // Functions
303 //////////////////////////////////////////////////////////////////////////////
304 
305 /**
306 .Function.rows:
307 ..cat:Alignments
308 ..summary:The container of rows in an alignment.
309 ..signature:Rows rows(align)
310 ..param.align:An alignment.
311 ...type:Class.Align
312 ..returns:The container of rows in $align$.
313 ...metafunction:Metafunction.Rows
314 ..see:Function.cols
315 ..see:Metafunction.Rows
316 ..include:seqan/align.h
317 */
318 template <typename TSource, typename TSpec>
319 inline typename Rows< Align<TSource, TSpec> >::Type &
320 rows(Align<TSource, TSpec> & me)
321 {
322 SEQAN_CHECKPOINT
323 	return me.data_rows;
324 }
325 template <typename TSource, typename TSpec>
326 inline typename Rows< Align<TSource, TSpec> const >::Type &
327 rows(Align<TSource, TSpec> const & me)
328 {
329 SEQAN_CHECKPOINT
330 	return me.data_rows;
331 }
332 
333 //////////////////////////////////////////////////////////////////////////////
334 
335 /**
336 .Function.row:
337 ..cat:Alignments
338 ..summary:A row in an alignment.
339 ..signature:Row & row(align, position)
340 ..param.align:An alignment.
341 ...type:Class.Align
342 ..param.position:A position in the @Function.rows@ container of $align$.
343 ..returns:The row in @Function.rows@ container of $align$ at the given $position$.
344 ...metafunction:Metafunction.Row
345 ..remarks:This function is equivalent to $value(rows(align), position)$.
346 ..see:Function.rows
347 ..see:Function.col
348 ..see:Metafunction.Row
349 ..include:seqan/align.h
350 */
351 template <typename TSource, typename TSpec, typename TPosition>
352 inline typename Row< Align<TSource, TSpec> >::Type &
353 row(Align<TSource, TSpec> & me,
354 	TPosition _pos)
355 {
356 SEQAN_CHECKPOINT
357 	return value(rows(me), _pos);
358 }
359 template <typename TSource, typename TSpec, typename TPosition>
360 inline typename Row< Align<TSource, TSpec> const>::Type &
361 row(Align<TSource, TSpec> const & me,
362 	TPosition _pos)
363 {
364 SEQAN_CHECKPOINT
365 	return value(rows(me), _pos);
366 }
367 
368 //////////////////////////////////////////////////////////////////////////////
369 
370 /**
371 .Function.cols:
372 ..cat:Alignments
373 ..summary:The container of columns in an alignment.
374 ..signature:Cols cols(align)
375 ..param.align:An alignment.
376 ...type:Class.Align
377 ..returns:The container of columns in $align$.
378 ...metafunction:Metafunction.Cols
379 ..see:Metafunction.Cols
380 ..include:seqan/align.h
381 */
382 template <typename TSource, typename TSpec>
383 inline typename Cols< Align<TSource, TSpec> >::Type
384 cols(Align<TSource, TSpec> & me)
385 {
386 SEQAN_CHECKPOINT
387 	return typename Cols< Align<TSource, TSpec> >::Type(me);
388 }
389 template <typename TSource, typename TSpec>
390 inline typename Cols< Align<TSource, TSpec> const>::Type
391 cols(Align<TSource, TSpec> const & me)
392 {
393 SEQAN_CHECKPOINT
394 	return typename Cols< Align<TSource, TSpec> const>::Type(me);
395 }
396 
397 //////////////////////////////////////////////////////////////////////////////
398 
399 /**
400 .Function.col:
401 ..cat:Alignments
402 ..summary:A column in an alignment.
403 ..signature:Col & col(align, position)
404 ..param.align:An alignment.
405 ...type:Class.Align
406 ..param.position:A position in the @Function.cols@ container of $align$.
407 ..returns:The column in @Function.cols@ container of $align$ at the given $position$.
408 ...metafunction:Metafunction.Col
409 ..remarks:This function is equivalent to $value(cols(align), position)$.
410 ..see:Function.cols
411 ..see:Metafunction.Col
412 ..include:seqan/align.h
413 */
414 template <typename TSource, typename TSpec, typename TPosition>
415 inline typename Col< Align<TSource, TSpec> >::Type
416 col(Align<TSource, TSpec> & me,
417 	TPosition _pos)
418 {
419 SEQAN_CHECKPOINT
420 	return value(cols(me), _pos);
421 }
422 template <typename TSource, typename TSpec, typename TPosition>
423 inline typename Col< Align<TSource, TSpec> const>::Type
424 col(Align<TSource, TSpec> const & me,
425 	TPosition _pos)
426 {
427 SEQAN_CHECKPOINT
428 	return value(cols(me), _pos);
429 }
430 
431 //////////////////////////////////////////////////////////////////////////////
432 
433 ///.Function.detach.param.object.type:Class.Align
434 
435 template <typename TSource, typename TSpec>
436 inline void
437 detach(Align<TSource, TSpec> & me)
438 {
439 SEQAN_CHECKPOINT
440 	typedef Align<TSource, TSpec> TAlign;
441 	typedef typename Rows<TAlign>::Type TRows;
442 	typedef typename Iterator<TRows, Standard>::Type TRowsIterator;
443 
444 	TRowsIterator it = begin(rows(me));
445 	TRowsIterator it_end = end(rows(me));
446 
447 	while (it != it_end)
448 	{
449 		detach(*it);
450 		++it;
451 	}
452 }
453 
454 
455 //////////////////////////////////////////////////////////////////////////////
456 
457 template <typename TFile, typename TSource, typename TSpec, typename TIDString>
458 inline void
459 write(TFile & target,
460 	  Align<TSource, TSpec> const & source,
461 	  TIDString const &,
462 	  Raw)
463 {
464 SEQAN_CHECKPOINT
465 	typedef Align<TSource, TSpec> const TAlign;
466 	typedef typename Row<TAlign>::Type TRow;
467 	typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
468 	typedef typename Position<TAlign>::Type TPosition;
469 
470 	TRowsPosition row_count = length(rows(source));
471 	TPosition begin_ = beginPosition(cols(source));
472 	TPosition end_ = endPosition(cols(source));
473 
474 	unsigned int baseCount=0;
475 	unsigned int leftSpace=6;
476 	while(begin_ < end_) {
477 		unsigned int windowSize_ = 50;
478 		if ((begin_ + windowSize_)>end_) windowSize_=end_ - begin_;
479 
480 		// Print header line
481 		unsigned int offset=0;
482 		if (baseCount != 0) offset = (unsigned int) floor(log((double)baseCount) / log((double)10));
483 		for(unsigned int j = 0;j<leftSpace-offset;++j) {
484 			_streamPut(target, ' ');
485 		}
486 		_streamPutInt(target, baseCount);
487 		baseCount+=windowSize_;
488 		_streamPut(target, ' ');
489 		for(TPosition i = 1;i<=windowSize_;++i) {
490 			if ((i % 10)==0) _streamPut(target, ':');
491 			else if ((i % 5)==0) _streamPut(target, '.');
492 			else _streamPut(target, ' ');
493 		}
494 		_streamPut(target, ' ');
495 		_streamPut(target, '\n');
496 
497 		// Print sequences
498 		for(TRowsPosition i=0;i<2*row_count-1;++i) {
499 			for(unsigned int j = 0;j<leftSpace+2;++j) _streamPut(target, ' ');
500 			if ((i % 2)==0) {
501 				TRow& row_ = row(source, i/2);
502 				typedef typename Iterator<typename Row<TAlign>::Type const, Standard>::Type TIter;
503 				TIter begin1_ = iter(row_, begin_);
504 				TIter end1_ = iter(row_, begin_ + windowSize_);
505 				for (; begin1_ != end1_; ++begin1_) {
506 					if (isGap(begin1_)) _streamPut(target, gapValue<char>());
507 					else _streamPut(target, *begin1_);
508 				}
509 				//_streamWriteRange(target, iter(row_, begin_), iter(row_, begin_ + windowSize_));
510 			} else {
511 				for(unsigned int j = 0;j<windowSize_;++j) {
512 					if ((!isGap(row(source, (i-1)/2), begin_+j)) &&
513 						(!isGap(row(source, (i+1)/2), begin_+j)) &&
514 						(row(source, (i-1)/2)[begin_+j]==row(source, (i+1)/2)[begin_+j]))
515 					{
516 						_streamPut(target, '|');
517 					} else {
518 						_streamPut(target, ' ');
519 					}
520 				}
521 			}
522 			_streamPut(target, '\n');
523 		}
524 		_streamPut(target, '\n');
525 		begin_+=50;
526 	}
527 	_streamPut(target, '\n');
528 }
529 
530 template <typename TSource, typename TSpec>
531 inline void
532 clearClipping(Align<TSource, TSpec> & align_)
533 {
534 SEQAN_CHECKPOINT
535 	typedef typename Rows<Align<TSource, TSpec> >::Type TRows;
536 	typedef typename Iterator<TRows>::Type TRowsIterator;
537 
538 	for (TRowsIterator it = begin(rows(align_)); it != end(rows(align_)); goNext(it))
539 	{
540 		clearClipping(*it);
541 	}
542 }
543 
544 //////////////////////////////////////////////////////////////////////////////
545 // stream operators
546 //////////////////////////////////////////////////////////////////////////////
547 
548 template <typename TStream, typename TSource, typename TSpec>
549 inline TStream &
550 operator << (TStream & target,
551 			 Align<TSource, TSpec> const & source)
552 {
553 SEQAN_CHECKPOINT
554 	write(target, source);
555 	return target;
556 }
557 
558 //////////////////////////////////////////////////////////////////////////////
559 /*
560 template <typename TStream, typename TSource, typename TSpec>
561 inline TStream &
562 operator >> (TStream & source,
563 			 Align<TSource, TSpec> & target)
564 {
565 SEQAN_CHECKPOINT
566 	read(source, target);
567 	return source;
568 }
569 */
570 //////////////////////////////////////////////////////////////////////////////
571 
572 /**
573 .Function.setStrings:
574 ..cat:Alignments
575 ..summary:Loads the sequences of a stringset into an alignment.
576 ..signature:setStrings(align, stringset)
577 ..param.align:An alignment.
578 ...type:Class.Align
579 ..param.stringset:A string set.
580 ...type:Class.StringSet
581 ..remarks:The function clears $align$ and creates an new global alignment between strings in $stringset$ that contains only trainling gaps.
582 The alignment will be dependent from the strings in the stringset; use @Function.detach@ to make $align$ the owner of its strings.
583 ..include:seqan/align.h
584 */
585 
586 template <typename TSource, typename TSpec, typename TSpec2>
587 inline void
588 setStrings(Align<TSource, TSpec> & me,
589 		   StringSet<TSource, TSpec2> & stringset)
590 {
591 SEQAN_CHECKPOINT
592 	typedef Align<TSource, TSpec> TAlign;
593 	typedef StringSet<TSource, TSpec2> TStringset;
594 
595 	typedef typename Rows<TAlign>::Type TRows;
596 	typedef typename Iterator<TRows>::Type TRowsIterator;
597 	typedef typename Size<TStringset>::Type TStringsetSize;
598 
599 	clear(me.data_rows);
600 	resize(me.data_rows, length(stringset));
601 	TRowsIterator it = begin(rows(me));
602 	TStringsetSize stringset_length = length(stringset);
603 	for (TStringsetSize i = 0; i < stringset_length; ++i)
604 	{
605 		setSource(*it, value(stringset, i));
606 		++it;
607 	}
608 }
609 
610 //////////////////////////////////////////////////////////////////////////////
611 
612 template <typename TSource, typename TSpec>
613 inline void
614 clearGaps(Align<TSource, TSpec> & me)
615 {
616 	typedef Align<TSource, TSpec> TAlign;
617 	typedef typename Rows<TAlign>::Type TRows;
618 	typedef typename Iterator<TRows>::Type TRowsIterator;
619 
620 	for (TRowsIterator it = begin(rows(me)); it != end(rows(me)); goNext(it))
621 	{
622 		clearGaps(*it);
623 	}
624 }
625 
626 //////////////////////////////////////////////////////////////////////////////
627 /**
628 .Function.stringSet:
629 ..param.g.type:Class.Align
630 ..include:seqan/align.h
631 */
632 template <typename TSource, typename TSpec>
633 inline typename StringSetType<Align<TSource, TSpec> >::Type
634 stringSet(Align<TSource, TSpec> & me)
635 {
636 SEQAN_CHECKPOINT
637 	typedef Align<TSource, TSpec> TAlign;
638 	typedef typename StringSetType<TAlign>::Type TStringSet;
639 
640 	typedef typename Rows<TAlign>::Type TRows;
641 	typedef typename Iterator<TRows>::Type TRowsIterator;
642 
643 	TStringSet ss;
644 
645 	for (TRowsIterator it = begin(rows(me)); it != end(rows(me)); goNext(it))
646 	{
647 		appendValue(ss, source(*it));
648 	}
649 	return ss;
650 }
651 
652 //////////////////////////////////////////////////////////////////////////////
653 
654 }// namespace SEQAN_NAMESPACE_MAIN
655 
656 #endif //#ifndef SEQAN_HEADER_...
657