1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_GRAPH_REFINE_EXACT_H
34 #define SEQAN_HEADER_GRAPH_REFINE_EXACT_H
35 
36 
37 namespace SEQAN_NAMESPACE_MAIN
38 {
39 
40 
41 
42 struct TagExactRefinement_;
43 typedef Tag<TagExactRefinement_> const ExactRefinement;
44 
45 //exact method, every cut is made (unless it already exists)
46 template<typename TValue>
47 inline bool
_cutIsValid(String<std::set<TValue>> & all_nodes,TValue seq_i_pos,TValue,typename std::set<TValue>::iterator iter,TValue,Tag<TagExactRefinement_> const)48 _cutIsValid(String<std::set<TValue> > & all_nodes,
49 		TValue seq_i_pos,
50 		TValue,
51 		typename std::set<TValue>::iterator iter,
52 		TValue,
53 		Tag<TagExactRefinement_> const)
54 {
55 SEQAN_CHECKPOINT
56 	//cut already exists
57 	if(iter != all_nodes[seq_i_pos].end())
58 		return false;
59 	return true;
60 }
61 
62 
63 template<typename TSize, typename TSpec,typename TPos>
64 inline void
_updateCutPosition(Fragment<TSize,ExactReversableFragment<TSpec>> & f,TPos & pos_j)65 _updateCutPosition(Fragment<TSize, ExactReversableFragment<TSpec> > & f, TPos & pos_j)
66 {
67 	if(f.reversed)
68 		++pos_j;
69 }
70 //template<typename TSize, typename TSpec,typename TPos>
71 //inline void
72 //_updateCutPosition(Fragment<TSize, ExactReversableFragment<TSpec> > const& f, TPos & pos_j)
73 //{
74 //	if(f.reversed)
75 //		++pos_j;
76 //}
77 
78 
79 template<typename TFrag,typename TPos>
80 inline void
_updateCutPosition(TFrag &,TPos &)81 _updateCutPosition(TFrag &, TPos &)
82 {
83 	return;
84 }
85 
86 ///////////////////////////////////////////////////////////////////////////////////////////////////////
87 //Recursive Refinement
88 //refine position node_i on sequence seq_i
89 template<typename TValue, typename TAlignmentString, typename TStringSet,typename TGraph, typename TPropertyMap,typename TSeqMap, typename TTagSpec>
90 inline void
_refine(TValue node_i,TValue seq_i_id,TStringSet & seqs,TSeqMap & seq_map,TAlignmentString & alis,String<TGraph> & gs,String<TPropertyMap> & pms,String<std::set<TValue>> & all_nodes,TValue min_len,Tag<TTagSpec> tag)91 _refine(TValue node_i,
92 	 TValue seq_i_id,
93 	 TStringSet & seqs,
94 	 TSeqMap & seq_map,
95 	 TAlignmentString & alis,
96 	 String<TGraph> & gs,
97 	 String<TPropertyMap> & pms,
98      String<std::set<TValue> > & all_nodes,
99 	 TValue min_len,
100 	 Tag<TTagSpec> tag)
101 {
102 SEQAN_CHECKPOINT
103 	typedef typename Cargo<typename Value<TPropertyMap>::Type>::Type TAlignmentPointer;
104 	typedef typename Iterator<String<TAlignmentPointer>, Rooted>::Type TSegmentIterator;
105 	//find all segment matches that contain the current position (node_i)
106 	String<TAlignmentPointer> relevant_segments;
107 	TValue seq_i_pos = idToPosition(seqs,seq_i_id);
108 	findIntervalsExcludeTouching(gs[seq_i_pos],pms[seq_i_pos],node_i,relevant_segments);
109 
110 
111 	TSegmentIterator segment_it = begin(relevant_segments);
112 	TSegmentIterator segment_end = end(relevant_segments);
113 	//foreach of those segments
114 	while(segment_it != segment_end)
115 	{
116 		TValue match_id = (*segment_it).i1; // segment match
117 		TValue seg_num = (*segment_it).i2; // first or second segment in segment match?
118 
119 		//get the sequence that node_i needs to be projected onto (seq_j)
120 		//and get the projected position (pos_j)
121 		TValue seq_j_id, node_j;
122 		_getOtherSequenceAndProject(alis[match_id],seg_num,seq_map,seq_i_id,node_i,seq_j_id,node_j);
123 		TValue seq_j_pos = idToPosition(seqs,seq_j_id);
124 		_updateCutPosition(alis[match_id],node_j);
125 
126 		typename std::set<TValue>::iterator iter;
127 		iter = all_nodes[seq_j_pos].find(node_j);
128 
129 		//if node does not exist yet ---> insert and continue cutting
130 		if(_cutIsValid(all_nodes,seq_j_pos,node_j,iter,min_len,tag))
131 		{
132 			all_nodes[seq_j_pos].insert(node_j);
133 			_refine(node_j,seq_j_id,seqs,seq_map,alis,gs,pms,all_nodes,min_len,tag);
134 			//TODO: else //verschmelzen, abschneiden und �bergehen, erst sp�ter...
135 			//do nothing or resolve problems
136 		}
137 
138 		++segment_it;
139 	}
140 }
141 
142 //template<typename TFragSize, typename TFragSpec>
143 //void
144 //printMatch(Fragment<TFragSize,TFragSpec> & f)
145 //{
146 //	::std::cout << "FRAGMENT:" << " f.len = "<< f.len <<std::endl;
147 //	::std::cout << "f.seqId1 = "<< f.seqId1 << " f.begin1 = " << f.begin1 << std::endl;
148 //	::std::cout << "f.seqId2 = "<< f.seqId2 << " f.begin2 = " << f.begin2 << std::endl;
149 //}
150 
151 //template<typename TAlign>
152 //void
153 //printMatch(TAlign & f)
154 //{
155 //	::std::cout << f;
156 //}
157 
158 ///////////////////////////////////////////////////////////////////////////////////////////////////////
159 //Construct interval trees
160 ////////////////////////////////////////////////////////////////////////////////////////////////////
161 //construct intervals from allignments for each sequence (other Alignment types)
162 template<typename TInterval, typename TStringSet, typename TAlignmentString, typename TSeqMap>
163 void
_buildIntervalsForAllSequences(TAlignmentString & alis,String<String<TInterval>> & intervals,TStringSet & seqs,TSeqMap & seq_map)164 _buildIntervalsForAllSequences(TAlignmentString & alis,
165 							   String<String<TInterval> > & intervals,
166 	   						   TStringSet & seqs,
167 							   TSeqMap & seq_map)
168 {
169 SEQAN_CHECKPOINT
170 
171 	typedef typename Value<TInterval>::Type TValue;
172 	typedef typename Cargo<TInterval>::Type TCargo;
173 	typedef typename Iterator<TAlignmentString,Standard>::Type TAliIterator;
174 	TAliIterator ali_it = begin(alis,Standard());
175 	TAliIterator ali_end = end(alis,Standard());
176 	TValue ali_counter = 0;
177 	//foreach alignment
178 	while(ali_it != ali_end)
179 	{
180 		TValue seq_i_id,begin_,end_;
181 		//printMatch(*ali_it);
182 		//get the first sequence (and its begin and end) that takes part in the alignment (seq_i)
183 		_getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_,end_,0);
184 		TValue seq_i_pos = idToPosition(seqs, seq_i_id);
185 		//and append the interval (ali_begin, ali_end) with cargo ali* to the list of intervals of seq_i
186 		appendValue(intervals[seq_i_pos],IntervalAndCargo<TValue,TCargo>(begin_,end_,TCargo(ali_counter,0)));
187 
188 		//get the second sequence (and its begin and end) that takes part in the alignment (seq_i)
189 		_getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_,end_,1);
190 		seq_i_pos = idToPosition(seqs, seq_i_id);
191 		//and again append the interval (ali_begin, ali_end) with cargo ali* to the list of intervals of seq_i
192 		appendValue(intervals[seq_i_pos],IntervalAndCargo<TValue,TCargo>(begin_,end_,TCargo(ali_counter,1)));
193 
194 		++ali_counter;
195 		++ali_it;
196 	}
197 
198 }
199 
200 
201 //get all intervals from the alignments and construct an interval tree for each sequence
202 template<typename TGraph, typename TPropertyMap, typename TAlignmentString, typename TSequence, typename TSetSpec, typename TValue, typename TSeqMap>
203 void
_createTreesForAllSequences(String<TGraph> & gs,String<TPropertyMap> & pms,TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TValue numSequences)204 _createTreesForAllSequences(String<TGraph> & gs,
205 						   String<TPropertyMap> & pms,
206 						   TAlignmentString & alis,
207 						   StringSet<TSequence,TSetSpec> & seqs,
208                            TSeqMap & seq_map,
209 						   TValue numSequences)
210 {
211 SEQAN_CHECKPOINT
212 	typedef typename Value<TAlignmentString>::Type TAlignment;
213 //	typedef TAlignment* TCargo;
214 	typedef Pair<unsigned,unsigned,BitCompressed<31,1> > TCargo;
215 	typedef IntervalAndCargo<int,TCargo> TInterval;
216 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
217 
218 	//std::cout <<"create interval trees...";
219 	clock_t start, finish1;
220 	double duration;
221 	start = clock();
222 	//one tree for each sequence
223 	resize(gs,numSequences);
224 	resize(pms,numSequences);
225 
226 	//and one string of intervals for each sequence
227 	String<String<TInterval> > intervals;
228 	resize(intervals,numSequences);
229 	//fill intervals
230 	_buildIntervalsForAllSequences(alis,intervals,seqs,seq_map);
231 
232 	TValue i = 0;
233 
234 	while(i < numSequences)
235 	{
236 		//std::cout << (numSequences-i) <<" more ("<<length(intervals[i])<<" intervals)... "<<std::flush;
237 		//vllt zum speicher sparen: numSequences mal alle alis durchgehen
238 		//und jedes mal nur buildIntervalsForJustOneSequence();
239 		TValue center = length(seqs[i])/2; // center raus, hat hier nix zu suchen
240 		//create interval tree!
241 		createIntervalTree(gs[i],pms[i],intervals[i],center);
242 
243 		//intervals for sequence i are not needed anymore
244 		clear(intervals[i]);
245 		++i;
246 	}
247 	finish1 = clock();
248 	duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
249 	//std::cout << "\ntook " << duration << " seconds.\n";
250 }
251 
252 
253 //step 1 of constructing the refined alignment graph: create all the nodes
254 template<typename TStringSet,typename TValue,typename TAliGraph>
255 void
_makeRefinedGraphNodes(String<std::set<TValue>> & all_nodes,TStringSet & seqs,TAliGraph & ali_g)256 _makeRefinedGraphNodes(String<std::set<TValue> > & all_nodes,
257 					  TStringSet & seqs,
258 					  TAliGraph & ali_g)
259 {
260 SEQAN_CHECKPOINT
261 	typedef typename std::set<TValue>::iterator TSetIterator;
262 	//for each sequence look at all cut positions and create nodes between them
263 	for(unsigned int seq_i_pos = 0; seq_i_pos < length(seqs); ++seq_i_pos)
264 	{
265 		TValue seq_i_id = positionToId(stringSet(ali_g), seq_i_pos);
266 		TSetIterator it = all_nodes[seq_i_pos].begin();
267 		TSetIterator end_it = all_nodes[seq_i_pos].end();
268 		TSetIterator next_it = it;
269 		if(next_it != end_it)
270 			++next_it;
271 		else
272 			addVertex(ali_g, seq_i_id, 0, length(seqs[seq_i_pos]));
273 
274 		//first unaligned node
275 		if(it != end_it && *it != 0)
276 			addVertex(ali_g, seq_i_id, 0, *it);
277 		//a new node for each interval
278 		while(next_it != end_it)
279 		{
280 			TValue pos_i = *it;
281 			addVertex(ali_g, seq_i_id, pos_i, *next_it - pos_i);
282 			++it;
283 			++next_it;
284 		}
285 		//last unaligned node
286 		if(it !=end_it && *it<length(seqs[seq_i_pos]))
287 			addVertex(ali_g, seq_i_id, *it, (length(seqs[seq_i_pos])) - *it);
288 		all_nodes[seq_i_pos].clear();
289 	}
290 }
291 
292 
293 //step 2 of constructing the refined alignment graph: add all edges
294 //version for exact refinement
295 template<typename TAlignmentString,typename TStringSet,typename TSeqMap, typename TPropertyMap,typename TScore,typename TAliGraph>
296 void
_makeRefinedGraphEdges(TAlignmentString & alis,TPropertyMap & pm,TStringSet & seqs,TSeqMap & seq_map,TScore & score_type,TAliGraph & ali_g,Tag<TagExactRefinement_> const)297 _makeRefinedGraphEdges(TAlignmentString & alis,
298 					   TPropertyMap & pm,
299 					  TStringSet & seqs,
300 				      TSeqMap & seq_map,
301 				      TScore & score_type,
302 					  TAliGraph & ali_g,
303 					  Tag<TagExactRefinement_> const)
304 {
305 SEQAN_CHECKPOINT
306 	typedef typename Value<TAlignmentString>::Type TAlign;
307 	typedef typename Size<TAlign>::Type TValue;
308 	typedef typename Iterator<TAlignmentString, Rooted>::Type TAliIterator;
309 	typedef typename VertexDescriptor<TAliGraph>::Type TVertexDescriptor;
310 	typedef typename EdgeDescriptor<TAliGraph>::Type TEdgeDescriptor;
311 	typedef typename Cargo<TAliGraph>::Type TCargo;
312 	//make edges
313 	TAliIterator ali_it = begin(alis);
314 	TAliIterator ali_end = end(alis);
315 	//for each segment/fragment/alignment
316 	while(ali_it != ali_end)
317 	{
318 		//get sequence, begin position and end position
319 		TValue seq_id,begin_pos,end_pos;
320 		_getSeqBeginAndEnd(*ali_it,seq_map,seq_id,begin_pos,end_pos,(TValue)0);
321 
322 		//get the node represents the current interval (begin_pos until next_cut_pos or end_pos)
323 		TVertexDescriptor act_knot = findVertex(ali_g,seq_id,begin_pos);
324 		TValue act_pos = begin_pos;
325 
326 		//for each interval that lies within the current segment/fragement/alignment
327 		while(act_pos < end_pos)
328 		{
329 			//get other sequence and projected position
330 			TValue seq_j_id,pos_j;
331 			_getOtherSequenceAndProject(*ali_it,(TValue)0,seq_map,seq_id,act_pos,seq_j_id,pos_j);
332 			//find node that contains the projected position (pos_j)
333 			TVertexDescriptor vd = findVertex(ali_g, seq_j_id, pos_j);
334 
335 			SEQAN_ASSERT_TRUE(fragmentBegin(ali_g,vd)==pos_j);
336 			typename Value<TScore>::Type score = _getRefinedMatchScore(score_type,seqs,*ali_it,act_pos,pos_j,fragmentLength(ali_g,act_knot),fragmentLength(ali_g,vd));//,fragmentLength(ali_g,vd));
337 	//		typename Value<TScore>::Type score = fragmentLength(ali_g,vd);
338 			score *= _getRefinedAnnoScore(ali_g,pm,vd,act_knot,score_type);
339 		//this needs to be generalized (makes sense for positive scores only)
340 			if(score <= 0) score = 1;
341 			if(score > 0)
342 			{
343 				if (findEdge(ali_g, act_knot, vd) == 0) addEdge(ali_g,act_knot,vd,(TCargo)score);
344 				else {
345 					TEdgeDescriptor ed = findEdge(ali_g, act_knot, vd);
346 					//if((TCargo)score > getCargo(ed))
347 						//assignCargo(ed, score);
348 					assignCargo(ed, getCargo(ed)+score);
349 				}
350 			}
351 			//prepare for next interval
352 			act_pos += fragmentLength(ali_g,act_knot);
353 			act_knot = findVertex(ali_g,seq_id,act_pos);
354 
355 		}
356 		++ali_it;
357 	}
358 }
359 
360 
361 
362 
363 
364 ////////////////////////////////////////////////////////////////////////////////////////
365 //build refined alignment graph
366 ////////////////////////////////////////////////////////////////////////////////////////
367 //nodes are numbered ascendingly:
368 //seq1   0  1  2  3  4
369 //seq2   5  6  7  8  9 10
370 //seq3  11 12 13 14 15
371 template<typename TValue,typename TAlignmentString,typename TScore,typename TSequence, typename TSetSpec,typename TAliGraph,typename TSeqMap,typename TTagSpec>
372 void
_makeAlignmentGraphFromRefinedSegments(String<std::set<TValue>> & all_nodes,TAlignmentString & alis,TScore & score_type,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TAliGraph & ali_g,Tag<TTagSpec> const tag,bool)373 _makeAlignmentGraphFromRefinedSegments(String<std::set<TValue> > & all_nodes,
374 				   TAlignmentString & alis,
375 				   TScore & score_type,
376 				   StringSet<TSequence, TSetSpec> & seqs,
377 				   TSeqMap & seq_map,
378 				   TAliGraph & ali_g,
379 			   	   Tag<TTagSpec> const tag,
380 				   bool)
381 {
382 SEQAN_CHECKPOINT
383 	//std::cout << "making refined alignment graph...";
384 	//clock_t start, finish1;
385 	//double duration;
386 	//start = clock();
387 
388 	//make nodes (same function for inexact and exact refinement)
389 	_makeRefinedGraphNodes(all_nodes,seqs,ali_g);
390 
391 	bool pm = false;
392 	//add edges (different functions depending on exact/inexact refinement)
393 	_makeRefinedGraphEdges(alis,pm,seqs,seq_map,score_type,ali_g,tag);
394 
395 	//std::cout << "check\n";
396 	//finish1 = clock();
397 	//duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
398 	//std::cout << "\ntook " << duration << " seconds.\n";
399 }
400 
401 
402 
403 template<typename TValue,typename TAlignmentString,typename TScore,typename TSequence, typename TSetSpec,typename TAliGraph,typename TSeqMap,typename TAnnoString,typename TTagSpec>
404 void
_makeAlignmentGraphFromRefinedSegments(String<std::set<TValue>> & all_nodes,TAlignmentString & alis,TScore & score_type,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TAliGraph & ali_g,Tag<TTagSpec> const tag,TAnnoString & annotation)405 _makeAlignmentGraphFromRefinedSegments(String<std::set<TValue> > & all_nodes,
406 				   TAlignmentString & alis,
407 				   TScore & score_type,
408 				   StringSet<TSequence, TSetSpec> & seqs,
409 				   TSeqMap & seq_map,
410 				   TAliGraph & ali_g,
411 			   	   Tag<TTagSpec> const tag,
412 				   TAnnoString & annotation)
413 {
414 SEQAN_CHECKPOINT
415 	//std::cout << "making refined alignment graph...";
416 	//clock_t start, finish1;
417 	//double duration;
418 	//start = clock();
419 
420 	//make nodes (same function for inexact and exact refinement)
421 	_makeRefinedGraphNodes(all_nodes,seqs,ali_g);
422 
423 	//add annotation to nodes
424 	typedef typename Value<TAnnoString>::Type TAnnotation;
425 	//typedef typename Value<TAnnotation>::Type TLabel;
426 	typedef char TLabel;
427 	String<String<TLabel> > pm;
428 	_addNodeAnnotation(seqs,seq_map,annotation,pm,ali_g,tag);
429 
430 	//add edges (different functions depending on exact/inexact refinement)
431 	_makeRefinedGraphEdges(alis,pm,seqs,seq_map,score_type,ali_g,tag);
432 
433 	//std::cout << "check\n";
434 	//finish1 = clock();
435 	//duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
436 	//std::cout << "\ntook " << duration << " seconds.\n";
437 }
438 
439 
440 
441 
442 
443 ////////////////////////////////////////////////////////////////////////////////////////
444 //The big matchRefinement function that does everything: build interval trees, do the
445 //refinement and construct a refined alignment graph
446 ////////////////////////////////////////////////////////////////////////////////////////
447 template<typename TAlignmentString, typename TAnnotation, typename TOutGraph, typename TSequence, typename TSetSpec, typename TScore,typename TTagSpec>
448 void
matchRefinement(TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seq,TScore & score_type,TOutGraph & ali_graph,typename Size<typename Value<TAlignmentString>::Type>::Type min_fragment_len,TAnnotation & annotation,Tag<TTagSpec> const tag)449 matchRefinement(TAlignmentString & alis,
450 				StringSet<TSequence, TSetSpec> & seq,
451 				TScore & score_type,
452 				TOutGraph & ali_graph,
453 				typename Size<typename Value<TAlignmentString>::Type>::Type min_fragment_len,
454 				TAnnotation & annotation,
455 				Tag<TTagSpec> const tag)
456 {
457 SEQAN_CHECKPOINT
458 	////////////////////////////////////////////////////////////////
459 	//typedefs
460 	typedef typename Value<TAlignmentString>::Type TAlign;
461 	typedef typename Iterator<TAlignmentString, Rooted>::Type TAliIterator;
462 	typedef typename Size<TAlign>::Type TValue;
463 //	typedef TValue TCargo;
464 //	typedef Pair<unsigned,unsigned,BitCompressed<31,1> > TCargo;
465 	typedef Pair<unsigned,unsigned,BitCompressed<31,1> > TCargo;
466 	typedef IntervalAndCargo<int,TCargo> TInterval;
467 	typedef Graph<Directed<void,WithoutEdgeId> > TGraph;
468 	typedef IntervalTreeNode<TInterval> TNode;
469 	typedef String<TNode> TPropertyMap;
470 	typedef VertexDescriptor<TGraph>::Type TVertexDescriptor;
471 	typedef String<TCargo> TList;
472 	typedef typename std::set<TValue>::iterator TSetIterator;
473 	typedef typename Cargo<typename Value<TPropertyMap>::Type>::Type TAlignmentPointer;
474 	typedef typename Iterator<String<TAlignmentPointer>, Rooted>::Type TSegmentIterator;
475 
476 
477 	////////////////////////////////////////////////////////////////
478 	TValue numSequences = length(seq);
479 	//weird ID --> good ID map
480 	std::map<const void * ,int> seq_map;
481 	for(int i = 0; i < (int) numSequences; ++i)
482 		seq_map[id(seq[i])] = i;
483 	////////////////////////////////////////////////////////////////
484 	//build interval trees
485 	String<TGraph> gs;
486 	String<TPropertyMap> pms;
487 	_createTreesForAllSequences(gs, pms, alis, seq, seq_map, numSequences);
488 
489 	////////////////////////////////////////////////////////////////
490 	//do refinement
491 	//std::cout <<"refining..."<<std::flush;
492 	clock_t start, finish1;
493 	double duration;
494 	start = clock();
495 
496 	//all_nodes = set of all cut positions
497 	String<std::set<TValue> > all_nodes;
498 	resize(all_nodes,numSequences);
499 
500 	//all_nodes that need to be processed set of all cut positions
501 	String<std::set<TValue> > all_node_queues;
502 	resize(all_node_queues,numSequences);
503 
504 	//call function _refine for each startknoten
505 	TAliIterator ali_it = begin(alis);
506 	TAliIterator ali_end = end(alis);
507 	//for each segment/fragement/alignment
508 	while(ali_it != ali_end)
509 	{
510 		//for each of the two sequences
511 		for(TValue i = 0; i < 2; ++i)
512 		{
513 			TValue seq_i_id,begin_i,end_i;
514 			_getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_i,end_i,i);
515 			TValue seq_i_pos = idToPosition(seq,seq_i_id);
516 
517 			all_node_queues[seq_i_pos].insert(begin_i);
518 			all_node_queues[seq_i_pos].insert(end_i);
519 		}
520 		++ali_it;
521 	}
522 
523 
524 	TSetIterator queueIt;
525 	bool done = false;
526 	while(!done)
527 	{
528 		for(unsigned seq_i_pos = 0; seq_i_pos < numSequences; ++seq_i_pos)
529 		{
530 			queueIt = all_node_queues[seq_i_pos].begin();
531 			while (queueIt != all_node_queues[seq_i_pos].end())
532 			{
533 				TValue node_i = *queueIt;
534 				TSetIterator iter = all_nodes[seq_i_pos].find(node_i);
535 				if(iter == all_nodes[seq_i_pos].end())
536 				{
537 					TValue seq_i_id = positionToId(seq, seq_i_pos);
538 					all_nodes[seq_i_pos].insert(node_i);
539 					String<TAlignmentPointer> relevant_segments;
540 					findIntervalsExcludeTouching(gs[seq_i_pos],pms[seq_i_pos],node_i,relevant_segments);
541 
542 					TSegmentIterator segment_it = begin(relevant_segments);
543 					TSegmentIterator segment_end = end(relevant_segments);
544 					//foreach of those segments
545 					while(segment_it != segment_end)
546 					{
547 						TValue match_id = (*segment_it).i1;
548 						TValue seg_num = (*segment_it).i2;						//get the sequence that node_i needs to be projected onto (seq_j)
549 						//and get the projected position (pos_j)
550 						TValue seq_j_id, node_j;
551 						_getOtherSequenceAndProject(alis[match_id],seg_num,seq_map,seq_i_id,node_i,seq_j_id,node_j);
552 						TValue seq_j_pos = idToPosition(seq,seq_j_id);
553 						_updateCutPosition(alis[match_id],node_j);
554 
555 						typename std::set<TValue>::iterator iter_j;
556 						iter_j = all_nodes[seq_j_pos].find(node_j);
557 
558 						//if node does not exist yet ---> insert and continue cutting
559 						if(iter_j == all_nodes[seq_j_pos].end())
560 						{
561 							all_node_queues[seq_j_pos].insert(node_j);
562 						}
563 
564 						++segment_it;
565 					}
566 
567 				}
568 				++queueIt;
569 			}
570 			all_node_queues[seq_i_pos].clear();
571 		}
572 		unsigned i;
573 		for(i = 0; i < numSequences; ++i)
574 		{
575 			queueIt = all_node_queues[i].begin();
576 			if (queueIt != all_node_queues[i].end())
577 				break;
578 		}
579 		if(i==numSequences)
580 			done=true;
581 	}
582 	_addAnnotationCuts(all_nodes,alis,gs,pms,seq,seq_map,annotation,min_fragment_len,tag);
583 
584 	finish1 = clock();
585 	duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
586 	//std::cout << "\ntook " << duration << " seconds.\n";
587 	//for(int seq_i = 0; seq_i < length(seq); ++seq_i)
588 	//{
589 	//	typename std::set<TValue>::iterator it = all_nodes[seq_i].begin();
590 	//	typename std::set<TValue>::iterator end_it = all_nodes[seq_i].end();
591 	//
592 	//	while(it != end_it)
593 	//	{
594 	//		std::cout << *it << ",";
595 	//		++it;
596 	//	}
597 	//	std::cout << "\n";
598 	//}
599 	//std::cout <<"building tree..."<<std::flush;
600 
601 	////////////////////////////////////////////////////////////////
602 	//build refined alignment graph
603 	_makeAlignmentGraphFromRefinedSegments(all_nodes,alis,score_type,seq,seq_map,ali_graph,tag,annotation);
604 }
605 
606 
607 ///////WRAPPERS
608 
609 /**
610 .Function.matchRefinement:
611 ..signature:matchRefinement(matches,stringSet,scoringScheme,refinedGraph)
612 ..param.matches:The set of matches.
613 ...type:Class.Fragment
614 ...type:Class.Align
615 ...type:Spec.Alignment Graph
616 ..param.scoringScheme:The scoring scheme used to score the refined matches (scores are attached to
617 edges in the refined Alignment Graph).
618 ...remarks:If no scoring scheme is given, all edges get weight 1.
619 ...type:Class.Score
620 ..include:seqan/refinement.h
621 */
622 //exact refinement, score type given
623 template<typename TAlignmentString, typename TScoreValue,typename TScoreSpec,typename TOutGraph, typename TSequence, typename TSetSpec>
624 void
matchRefinement(TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seq,Score<TScoreValue,TScoreSpec> & score_type,TOutGraph & ali_graph)625 matchRefinement(TAlignmentString & alis,
626 				StringSet<TSequence, TSetSpec> & seq,
627 				Score<TScoreValue,TScoreSpec> & score_type,
628 				TOutGraph & ali_graph)
629 {
630 SEQAN_CHECKPOINT
631 	//min_fragment_len = 1   ==> Exact cutting
632 	bool anno = false;
633 	matchRefinement(alis,seq,score_type,ali_graph,1,anno,ExactRefinement());
634 }
635 
636 
637 
638 /**
639 .Function.matchRefinement:
640 ..cat:Alignments
641 ..summary:Refines (i.e. cuts into smaller parts) a set of pairwise segment
642 matches in such a way that none of the segments partly overlap. They are either
643 identical (fully overlapping) or non-overlapping.
644 ..signature:matchRefinement(matches,stringSet,refinedGraph)
645 ..param.stringSet:The StringSet containing the sequences which the matches lie on.
646 ...type:Class.StringSet
647 ..param.refinedGraph:The resulting refined set of matches stored in a graph.
648 ...type:Spec.Alignment Graph
649 ..include:seqan/refinement.h
650 */
651 //exact refinement, score type not given
652 template<typename TFragmentString, typename TOutGraph, typename TSequence, typename TSetSpec>
653 void
matchRefinement(TFragmentString & matches,StringSet<TSequence,TSetSpec> & strSet,TOutGraph & ali_graph)654 matchRefinement(TFragmentString & matches,
655 				StringSet<TSequence, TSetSpec> & strSet,
656 				TOutGraph & ali_graph)
657 {
658 	SEQAN_CHECKPOINT
659 	typename Cargo<TOutGraph>::Type fake_score = 1;
660 	bool anno = false;
661 	matchRefinement(matches,strSet,fake_score,ali_graph,1,anno,ExactRefinement());
662 }
663 
664 
665 }
666 #endif //#ifndef SEQAN_HEADER_...
667