1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Tobias Rausch <rausch@embl.de>
33 // Author: Anne-Katrin Emde <anne-katrin.emde@fu-berlin.de>
34 // ==========================================================================
35 // Iterative instead of recursive implementation of segment match refinement.
36 // This is required for some large inputs to circumvent stack overflows.
37 // ==========================================================================
38 
39 #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALGORITHM_REFINE_EXACT_ITERATIVE_H_
40 #define SEQAN_INCLUDE_SEQAN_GRAPH_ALGORITHM_REFINE_EXACT_ITERATIVE_H_
41 
42 namespace seqan {
43 
44 struct TagExactRefinement_;
45 typedef Tag<TagExactRefinement_> const ExactRefinement;
46 
47 // exact method, every cut is made (unless it already exists)
48 template<typename TValue, typename TValue2, typename TSize>
49 inline bool
_cutIsValid(String<std::set<TValue>> & all_nodes,TValue2 seq_i_pos,TSize,typename std::set<TValue>::iterator iter,TSize,Tag<TagExactRefinement_> const)50 _cutIsValid(String<std::set<TValue> > & all_nodes,
51         TValue2 seq_i_pos,
52         TSize,
53         typename std::set<TValue>::iterator iter,
54         TSize,
55         Tag<TagExactRefinement_> const)
56 {
57     //cut already exists
58     if(iter != all_nodes[seq_i_pos].end())
59         return false;
60     return true;
61 }
62 
63 
64 // necessary for reversed fragments: projected position pos_j is shifted one to the left --> ++pos_j if fragement reversed
65 template<typename TSize, typename TSpec,typename TPos>
66 inline void
_updateCutPosition(Fragment<TSize,ExactReversableFragment<TSpec>> & f,TPos & pos_j)67 _updateCutPosition(Fragment<TSize, ExactReversableFragment<TSpec> > & f, TPos & pos_j)
68 {
69     if(f.reversed)
70         ++pos_j;
71 }
72 //template<typename TSize, typename TSpec,typename TPos>
73 //inline void
74 //_updateCutPosition(Fragment<TSize, ExactReversableFragment<TSpec> > const& f, TPos & pos_j)
75 //{
76 //    if(f.reversed)
77 //        ++pos_j;
78 //}
79 
80 // for all other fragment types --> no shifting necessary
81 template<typename TFrag,typename TPos>
82 inline void
_updateCutPosition(TFrag &,TPos &)83 _updateCutPosition(TFrag &, TPos &)
84 {
85     return;
86 }
87 
88 ///////////////////////////////////////////////////////////////////////////////////////////////////////
89 //Recursive Refinement
90 //refine position node_i on sequence seq_i
91 template<typename TValue, typename TAlignmentString, typename TStringSet,typename TGraph, typename TPropertyMap,typename TSeqMap, typename TTagSpec>
92 inline void
_refine(TValue node_i,TValue seq_i_id,TStringSet & seqs,TSeqMap & seq_map,TAlignmentString & alis,String<TGraph> & gs,String<TPropertyMap> & pms,String<std::set<TValue>> & all_nodes,TValue min_len,Tag<TTagSpec> tag)93 _refine(TValue node_i,
94      TValue seq_i_id,
95      TStringSet & seqs,
96      TSeqMap & seq_map,
97      TAlignmentString & alis,
98      String<TGraph> & gs,
99      String<TPropertyMap> & pms,
100      String<std::set<TValue> > & all_nodes,
101      TValue min_len,
102      Tag<TTagSpec> tag)
103 {
104     typedef typename Cargo<typename Value<TPropertyMap>::Type>::Type TAlignmentPointer;
105     typedef typename Iterator<String<TAlignmentPointer>, Rooted>::Type TSegmentIterator;
106     //find all segment matches that contain the current position (node_i)
107     String<TAlignmentPointer> relevant_segments;
108     TValue seq_i_pos = idToPosition(seqs,seq_i_id);
109     findIntervalsExcludeTouching(relevant_segments, gs[seq_i_pos],pms[seq_i_pos],node_i);
110 
111 
112     TSegmentIterator segment_it = begin(relevant_segments);
113     TSegmentIterator segment_end = end(relevant_segments);
114     //foreach of those segments
115     while(segment_it != segment_end)
116     {
117         TValue match_id = (*segment_it).i1; // segment match
118         TValue seg_num = (*segment_it).i2; // first or second segment in segment match?
119 
120         //get the sequence that node_i needs to be projected onto (seq_j)
121         //and get the projected position (pos_j)
122         TValue seq_j_id, node_j;
123         _getOtherSequenceAndProject(alis[match_id],seg_num,seq_map,seq_i_id,node_i,seq_j_id,node_j);
124         TValue seq_j_pos = idToPosition(seqs,seq_j_id);
125         _updateCutPosition(alis[match_id],node_j);
126 
127         typename std::set<TValue>::iterator iter;
128         iter = all_nodes[seq_j_pos].find(node_j);
129 
130         //if node does not exist yet ---> insert and continue cutting
131         if(_cutIsValid(all_nodes,seq_j_pos,node_j,iter,min_len,tag))
132         {
133             all_nodes[seq_j_pos].insert(node_j);
134             _refine(node_j,seq_j_id,seqs,seq_map,alis,gs,pms,all_nodes,min_len,tag);
135         }
136         // TODO: else //verschmelzen, abschneiden und ergehen, erst sp�ter...
137         // do nothing or resolve problems
138 
139         ++segment_it;
140     }
141 }
142 
143 //template<typename TFragSize, typename TFragSpec>
144 //void
145 //printMatch(Fragment<TFragSize,TFragSpec> & f)
146 //{
147 //    std::cout << "FRAGMENT:" << " f.len = "<< f.len <<std::endl;
148 //    std::cout << "f.seqId1 = "<< f.seqId1 << " f.begin1 = " << f.begin1 << std::endl;
149 //    std::cout << "f.seqId2 = "<< f.seqId2 << " f.begin2 = " << f.begin2 << std::endl;
150 //}
151 
152 //template<typename TAlign>
153 //void
154 //printMatch(TAlign & f)
155 //{
156 //    std::cout << f;
157 //}
158 
159 ///////////////////////////////////////////////////////////////////////////////////////////////////////
160 // Construct interval trees
161 ////////////////////////////////////////////////////////////////////////////////////////////////////
162 
163 
164 //construct intervals from allignments for each sequence (other Alignment types)
165 template<typename TInterval, typename TStringSet, typename TAlignmentString, typename TSeqMap>
166 void
_buildIntervalsForAllSequences(TAlignmentString & alis,String<String<TInterval>> & intervals,TStringSet & seqs,TSeqMap & seq_map)167 _buildIntervalsForAllSequences(TAlignmentString & alis,
168                                String<String<TInterval> > & intervals,
169                                   TStringSet & seqs,
170                                TSeqMap & seq_map)
171 {
172 
173     typedef typename Value<TInterval>::Type TValue;
174     typedef typename Cargo<TInterval>::Type TCargo;
175     typedef typename Iterator<TAlignmentString,Standard>::Type TAliIterator;
176     TAliIterator ali_it = begin(alis,Standard());
177     TAliIterator ali_end = end(alis,Standard());
178     TValue ali_counter = 0;
179     //foreach alignment
180     while(ali_it != ali_end)
181     {
182         TValue seq_i_id,begin_,end_;
183         //printMatch(*ali_it);
184         //get the first sequence (and its begin and end) that takes part in the alignment (seq_i)
185         _getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_,end_,0);
186         TValue seq_i_pos = idToPosition(seqs, seq_i_id);
187         //and append the interval (ali_begin, ali_end) with cargo ali* to the list of intervals of seq_i
188         appendValue(intervals[seq_i_pos],IntervalAndCargo<TValue,TCargo>(begin_,end_,TCargo(ali_counter,0)));
189 
190         //get the second sequence (and its begin and end) that takes part in the alignment (seq_i)
191         _getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_,end_,1);
192         seq_i_pos = idToPosition(seqs, seq_i_id);
193         //and again append the interval (ali_begin, ali_end) with cargo ali* to the list of intervals of seq_i
194         appendValue(intervals[seq_i_pos],IntervalAndCargo<TValue,TCargo>(begin_,end_,TCargo(ali_counter,1)));
195 
196         ++ali_counter;
197         ++ali_it;
198     }
199 
200 }
201 
202 
203 //get all intervals from the alignments and construct an interval tree for each sequence
204 template<typename TGraph, typename TPropertyMap, typename TAlignmentString, typename TSequence, typename TSetSpec, typename TValue, typename TSeqMap>
205 void
_createTreesForAllSequences(String<TGraph> & gs,String<TPropertyMap> & pms,TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TValue numSequences)206 _createTreesForAllSequences(String<TGraph> & gs,
207                            String<TPropertyMap> & pms,
208                            TAlignmentString & alis,
209                            StringSet<TSequence,TSetSpec> & seqs,
210                            TSeqMap & seq_map,
211                            TValue numSequences)
212 {
213     //typedef typename Value<TAlignmentString>::Type TAlignment;
214 //    typedef TAlignment* TCargo;
215     typedef Pair<unsigned,unsigned,BitPacked<31,1> > TCargo;
216     typedef IntervalAndCargo<int,TCargo> TInterval;
217     //typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
218 
219     //std::cout <<"create interval trees...";
220     // clock_t start, finish1;
221     // double duration;
222     // start = clock();
223     //one tree for each sequence
224     resize(gs,numSequences);
225     resize(pms,numSequences);
226 
227     // and one string of intervals for each sequence
228     String<String<TInterval> > intervals;
229     resize(intervals,numSequences);
230     // fill intervals
231     _buildIntervalsForAllSequences(alis,intervals,seqs,seq_map);
232 
233     TValue i = 0;
234 
235     while(i < numSequences)
236     {
237         //std::cout << (numSequences-i) <<" more ("<<length(intervals[i])<<" intervals)... "<<std::flush;
238         TValue center = length(seqs[i])/2; // center raus, hat hier nix zu suchen
239         //create interval tree!
240         createIntervalTree(gs[i], pms[i], intervals[i], center);
241 
242         //intervals for sequence i are not needed anymore
243         clear(intervals[i]);
244         ++i;
245     }
246     // finish1 = clock();
247     // duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
248     // std::cout << "\ntook " << duration << " seconds.\n";
249 }
250 
251 
252 ///////////////////////////////////////////////////////////////////////////////////////////////////////
253 // Construct refined alignment graph
254 ////////////////////////////////////////////////////////////////////////////////////////////////////
255 
256 // step 1 of constructing the refined alignment graph: create all nodes
257 template<typename TStringSet,typename TValue,typename TAliGraph>
258 void
_makeRefinedGraphNodes(String<std::set<TValue>> & all_nodes,TStringSet & seqs,TAliGraph & ali_g)259 _makeRefinedGraphNodes(String<std::set<TValue> > & all_nodes,
260                       TStringSet & seqs,
261                       TAliGraph & ali_g)
262 {
263     typedef typename std::set<TValue>::iterator TSetIterator;
264     //for each sequence look at all cut positions and create nodes between them
265     for(unsigned int seq_i_pos = 0; seq_i_pos < length(seqs); ++seq_i_pos)
266     {
267         TValue seq_i_id = positionToId(stringSet(ali_g), seq_i_pos);
268         TSetIterator it = all_nodes[seq_i_pos].begin();
269         TSetIterator end_it = all_nodes[seq_i_pos].end();
270         TSetIterator next_it = it;
271         if(next_it != end_it)
272             ++next_it;
273         else
274             addVertex(ali_g, seq_i_id, 0, length(seqs[seq_i_pos]));
275 
276         //first unaligned node
277         if(it != end_it && *it != 0)
278             addVertex(ali_g, seq_i_id, 0, *it);
279         //a new node for each interval
280         while(next_it != end_it)
281         {
282             TValue pos_i = *it;
283             addVertex(ali_g, seq_i_id, pos_i, *next_it - pos_i);
284             ++it;
285             ++next_it;
286         }
287         //last unaligned node
288         if(it !=end_it && *it<length(seqs[seq_i_pos]))
289             addVertex(ali_g, seq_i_id, *it, (length(seqs[seq_i_pos])) - *it);
290         all_nodes[seq_i_pos].clear();
291     }
292 }
293 
294 
295 // step 2 of constructing the refined alignment graph: add all edges
296 // version for exact refinement
297 template<typename TAlignmentString,typename TStringSet,typename TSeqMap, typename TPropertyMap,typename TScore,typename TAliGraph >
298 void
_makeRefinedGraphEdges(TAlignmentString & alis,TPropertyMap & pm,TStringSet & seqs,TSeqMap & seq_map,TScore & score_type,TAliGraph & ali_g,Tag<TagExactRefinement_> const)299 _makeRefinedGraphEdges(TAlignmentString & alis,
300                        TPropertyMap & pm,
301                       TStringSet & seqs,
302                       TSeqMap & seq_map,
303                       TScore & score_type,
304                       TAliGraph & ali_g,
305                       Tag<TagExactRefinement_> const)
306 {
307     typedef typename Value<TAlignmentString>::Type TAlign;
308     typedef typename Size<TAlign>::Type TValue;
309     typedef typename Iterator<TAlignmentString, Rooted>::Type TAliIterator;
310     typedef typename VertexDescriptor<TAliGraph>::Type TVertexDescriptor;
311     typedef typename EdgeDescriptor<TAliGraph>::Type TEdgeDescriptor;
312     typedef typename Cargo<TAliGraph>::Type TCargo;
313     //make edges
314     TAliIterator ali_it = begin(alis);
315     TAliIterator ali_end = end(alis);
316     //for each segment/fragment/alignment
317     while(ali_it != ali_end)
318     {
319         //get sequence, begin position and end position
320         TValue seq_id,begin_pos,end_pos;
321         _getSeqBeginAndEnd(*ali_it,seq_map,seq_id,begin_pos,end_pos,(TValue)0);
322         SEQAN_ASSERT_LEQ(end_pos, length(seqs[idToPosition(seqs, seq_id)]));
323         SEQAN_ASSERT(ali_it.data_container == ali_end.data_container);
324         SEQAN_ASSERT(ali_it.data_iterator != ali_end.data_iterator);
325 
326         //get the node represents the current interval (begin_pos until next_cut_pos or end_pos)
327         TVertexDescriptor act_knot = findVertex(ali_g,seq_id,begin_pos);
328         TValue act_pos = begin_pos;
329         TValue seq_j_id_temp,pos_j_begin;
330         _getOtherSequenceAndProject(*ali_it,(TValue)0,seq_map,seq_id,act_pos,seq_j_id_temp,pos_j_begin);
331 
332         //for each interval that lies within the current segment/fragement/alignment
333         while(act_pos < end_pos)
334         {
335             //get other sequence and projected position
336             TValue seq_j_id,pos_j;
337             _getOtherSequenceAndProject(*ali_it,(TValue)0,seq_map,seq_id,act_pos,seq_j_id,pos_j);
338             SEQAN_ASSERT_NEQ(pos_j, static_cast<TValue>(-1));
339             //find node that contains the projected position (pos_j)
340             TVertexDescriptor vd = findVertex(ali_g, seq_j_id, pos_j);
341             bool doAddEdge = true;
342 
343 //            if(doAddEdge && fragmentBegin(ali_g,vd)!=pos_j) // check if edge makes sense
344             if (vd == getNil<TVertexDescriptor>())
345                 doAddEdge = false;
346             else
347             {
348                 TValue temp_seq_i_id,temp_act_pos;
349                 _getOtherSequenceAndProject(*ali_it,(TValue)1,seq_map,seq_j_id,static_cast<TValue>(fragmentBegin(ali_g,vd)),temp_seq_i_id,temp_act_pos);
350                 if(temp_act_pos == static_cast<TValue>(-1))
351                     doAddEdge = false;
352                 else
353                 {
354                     TVertexDescriptor temp_act_knot = findVertex(ali_g, temp_seq_i_id, temp_act_pos);
355                     if(act_knot!=temp_act_knot)
356                         doAddEdge = false;
357                 }
358             }
359             if(doAddEdge)
360             {
361                 typename Value<TScore>::Type score = _getRefinedMatchScore(score_type,seqs,*ali_it,act_pos,pos_j,fragmentLength(ali_g,act_knot),fragmentLength(ali_g,vd));//,fragmentLength(ali_g,vd));
362         //        typename Value<TScore>::Type score = fragmentLength(ali_g,vd);
363                 score *= _getRefinedAnnoScore(ali_g,pm,vd,act_knot,score_type);
364             //this needs to be generalized (makes sense for positive scores only)
365                 if(score <= 0) score = 1;
366                 if(score > 0)
367                 {
368                     if (findEdge(ali_g, act_knot, vd) == 0) {
369                         //if(abs((double)fragmentLength(ali_g, act_knot) - (double)fragmentLength(ali_g, vd)) > 20) {
370                         //    std::cerr << "added edge: " << fragmentLength(ali_g, act_knot) << "  " <<  fragmentLength(ali_g, vd) << std::endl;
371                         //    std::cerr << *ali_it;
372                         //    std::cerr << "act_pos=" << act_pos-begin_pos << " pos_j=" << pos_j-pos_j_begin << std::endl;
373                         //} else {
374                             addEdge(ali_g,act_knot,vd,(TCargo)score);
375                         //}
376                     }
377                     else {
378                         TEdgeDescriptor ed = findEdge(ali_g, act_knot, vd);
379                         //if((TCargo)score > getCargo(ed))
380                             //assignCargo(ed, score);
381                         assignCargo(ed, getCargo(ed)+score);
382                     }
383                 }
384             }
385             //prepare for next interval
386             act_pos += fragmentLength(ali_g,act_knot);
387             act_knot = findVertex(ali_g,seq_id,act_pos);
388         }
389         ++ali_it;
390     }
391 }
392 
393 
394 
395 
396 
397 //build refined alignment graph, nodes are numbered ascendingly:
398 //seq1   0  1  2  3  4
399 //seq2   5  6  7  8  9 10
400 //seq3  11 12 13 14 15
401 template<typename TValue,typename TAlignmentString,typename TScore,typename TSequence, typename TSetSpec,typename TAliGraph,typename TSeqMap,typename TTagSpec>
402 void
_makeAlignmentGraphFromRefinedSegments(String<std::set<TValue>> & all_nodes,TAlignmentString & alis,TScore & score_type,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TAliGraph & ali_g,Tag<TTagSpec> const tag,bool)403 _makeAlignmentGraphFromRefinedSegments(String<std::set<TValue> > & all_nodes,
404                    TAlignmentString & alis,
405                    TScore & score_type,
406                    StringSet<TSequence, TSetSpec> & seqs,
407                    TSeqMap & seq_map,
408                    TAliGraph & ali_g,
409                       Tag<TTagSpec> const tag,
410                    bool)
411 {
412     //std::cout << "making refined alignment graph...";
413     //clock_t start, finish1;
414     //double duration;
415     //start = clock();
416 
417     //make nodes (same function for inexact and exact refinement)
418     _makeRefinedGraphNodes(all_nodes,seqs,ali_g);
419 
420     bool pm = false;
421     //add edges (different functions depending on exact/inexact refinement)
422     _makeRefinedGraphEdges(alis,pm,seqs,seq_map,score_type,ali_g,tag);
423 
424     //std::cout << "check\n";
425     //finish1 = clock();
426     //duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
427     //std::cout << "\ntook " << duration << " seconds.\n";
428 }
429 
430 
431 //build refined alignment graph as above, but with additional annotation information
432 template<typename TValue,typename TAlignmentString,typename TScore,typename TSequence, typename TSetSpec,typename TAliGraph,typename TSeqMap,typename TAnnoString,typename TTagSpec>
433 void
_makeAlignmentGraphFromRefinedSegments(String<std::set<TValue>> & all_nodes,TAlignmentString & alis,TScore & score_type,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TAliGraph & ali_g,Tag<TTagSpec> const tag,TAnnoString & annotation)434 _makeAlignmentGraphFromRefinedSegments(String<std::set<TValue> > & all_nodes,
435                    TAlignmentString & alis,
436                    TScore & score_type,
437                    StringSet<TSequence, TSetSpec> & seqs,
438                    TSeqMap & seq_map,
439                    TAliGraph & ali_g,
440                       Tag<TTagSpec> const tag,
441                    TAnnoString & annotation)
442 {
443     //std::cout << "making refined alignment graph...";
444     //clock_t start, finish1;
445     //double duration;
446     //start = clock();
447 
448     //make nodes (same function for inexact and exact refinement)
449     _makeRefinedGraphNodes(all_nodes,seqs,ali_g);
450 
451     //add annotation to nodes
452     //typedef typename Value<TAnnoString>::Type TAnnotation;
453     //typedef typename Value<TAnnotation>::Type TLabel;
454     typedef char TLabel;
455     String<String<TLabel> > pm;
456     _addNodeAnnotation(seqs,seq_map,annotation,pm,ali_g,tag);
457 
458     //add edges (different functions depending on exact/inexact refinement)
459     _makeRefinedGraphEdges(alis,pm,seqs,seq_map,score_type,ali_g,tag);
460 
461     //std::cout << "check\n";
462     //finish1 = clock();
463     //duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
464     //std::cout << "\ntook " << duration << " seconds.\n";
465 }
466 
467 
468 
469 
470 
471 ////////////////////////////////////////////////////////////////////////////////////////
472 //The big matchRefinement function that does everything: build interval trees, do the
473 //refinement and construct a refined alignment graph
474 ////////////////////////////////////////////////////////////////////////////////////////
475 template<typename TAlignmentString, typename TAnnotation, typename TOutGraph, typename TSequence, typename TSetSpec, typename TScore,typename TTagSpec>
476 void
matchRefinement(TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seq,TScore & score_type,TOutGraph & ali_graph,typename Size<typename Value<TAlignmentString>::Type>::Type min_fragment_len,TAnnotation & annotation,Tag<TTagSpec> const tag)477 matchRefinement(TAlignmentString & alis,
478                 StringSet<TSequence, TSetSpec> & seq,
479                 TScore & score_type,
480                 TOutGraph & ali_graph,
481                 typename Size<typename Value<TAlignmentString>::Type>::Type min_fragment_len,
482                 TAnnotation & annotation,
483                 Tag<TTagSpec> const tag)
484 {
485     ////////////////////////////////////////////////////////////////
486     //typedefs
487     typedef typename Value<TAlignmentString>::Type TAlign;
488     typedef typename Iterator<TAlignmentString, Rooted>::Type TAliIterator;
489     typedef typename Size<TAlign>::Type TValue;
490 //    typedef TValue TCargo;
491 //    typedef Pair<unsigned,unsigned,BitPacked<31,1> > TCargo;
492     typedef Pair<unsigned,unsigned,BitPacked<31,1> > TCargo;
493     typedef IntervalAndCargo<int,TCargo> TInterval;
494     typedef Graph<Directed<void,WithoutEdgeId> > TGraph;
495     typedef IntervalTreeNode<TInterval> TNode;
496     typedef String<TNode> TPropertyMap;
497     typedef typename std::set<TValue>::iterator TSetIterator;
498     typedef typename Cargo<typename Value<TPropertyMap>::Type>::Type TAlignmentPointer;
499     typedef typename Iterator<String<TAlignmentPointer>, Rooted>::Type TSegmentIterator;
500 
501 #ifdef SEQAN_TCOFFEE_DEBUG
502     double refinementTime = sysTime();
503 #endif
504 
505     ////////////////////////////////////////////////////////////////
506     TValue numSequences = length(seq);
507     //weird ID --> good ID map
508     std::map<const void * ,int> seq_map;
509     for(int i = 0; i < (int) numSequences; ++i)
510         seq_map[getObjectId(seq[i])] = i;
511     ////////////////////////////////////////////////////////////////
512     //build interval trees
513     String<TGraph> gs;
514     String<TPropertyMap> pms;
515     _createTreesForAllSequences(gs, pms, alis, seq, seq_map, numSequences);
516 
517     ////////////////////////////////////////////////////////////////
518     //do refinement
519     //std::cout <<"refining..."<<std::flush;
520     // clock_t start, finish1;
521     // double duration;
522     // start = clock();
523 
524     //all_nodes = set of all cut positions
525     String<std::set<TValue> > all_nodes;
526     resize(all_nodes,numSequences);
527 
528     //all_nodes that need to be processed set of all cut positions
529     String<std::set<TValue> > all_node_queues;
530     resize(all_node_queues,numSequences);
531 
532     //call function _refine for each startknoten
533     TAliIterator ali_it = begin(alis);
534     TAliIterator ali_end = end(alis);
535     //for each segment/fragement/alignment
536     while(ali_it != ali_end)
537     {
538         //for each of the two sequences
539         for(TValue i = 0; i < 2; ++i)
540         {
541             TValue seq_i_id,begin_i,end_i;
542             _getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_i,end_i,i);
543             TValue seq_i_pos = idToPosition(seq,seq_i_id);
544 
545             all_node_queues[seq_i_pos].insert(begin_i);
546             all_node_queues[seq_i_pos].insert(end_i);
547         }
548         ++ali_it;
549     }
550 
551 
552     TSetIterator queueIt;
553     bool done = false;
554     while(!done)
555     {
556         for(unsigned seq_i_pos = 0; seq_i_pos < numSequences; ++seq_i_pos)
557         {
558             queueIt = all_node_queues[seq_i_pos].begin();
559             while (queueIt != all_node_queues[seq_i_pos].end())
560             {
561                 TValue node_i = *queueIt;
562                 TSetIterator iter = all_nodes[seq_i_pos].find(node_i);
563         //        TSetIterator qiter = all_node_queues[seq_i_pos].find(node_i);
564                 if(_cutIsValid(all_nodes,seq_i_pos,node_i,iter,min_fragment_len,tag))
565                    //&& _cutIsValid(all_node_queues,seq_i_pos,node_i,qiter,min_fragment_len,tag))
566 //                if(iter == all_nodes[seq_i_pos].end())
567                 {
568                     TValue seq_i_id = positionToId(seq, seq_i_pos);
569                     all_nodes[seq_i_pos].insert(node_i);
570                     String<TAlignmentPointer> relevant_segments;
571                     findIntervalsExcludeTouching(relevant_segments, gs[seq_i_pos],pms[seq_i_pos],node_i);
572 
573                     TSegmentIterator segment_it = begin(relevant_segments);
574                     TSegmentIterator segment_end = end(relevant_segments);
575                     //foreach of those segments
576                     while(segment_it != segment_end)
577                     {
578                         TValue match_id = (*segment_it).i1;
579                         TValue seg_num = (*segment_it).i2;                        //get the sequence that node_i needs to be projected onto (seq_j)
580                         //and get the projected position (pos_j)
581                         TValue seq_j_id, node_j;
582                         _getOtherSequenceAndProject(alis[match_id],seg_num,seq_map,seq_i_id,node_i,seq_j_id,node_j);
583                         TValue seq_j_pos = idToPosition(seq,seq_j_id);
584                         _updateCutPosition(alis[match_id],node_j);
585 
586                         typename std::set<TValue>::iterator iter_j, qiter_j;
587                         iter_j = all_nodes[seq_j_pos].find(node_j);
588                         qiter_j = all_node_queues[seq_j_pos].find(node_j);
589 
590                         //if node does not exist yet ---> insert and continue cutting
591                         if(_cutIsValid(all_nodes,seq_j_pos,node_j,iter_j,min_fragment_len,tag)
592                             && _cutIsValid(all_node_queues,seq_j_pos,node_j,qiter_j,min_fragment_len,tag))
593                         //if(iter_j == all_nodes[seq_j_pos].end())
594                         {
595                             all_node_queues[seq_j_pos].insert(node_j);
596                         }
597 
598                         ++segment_it;
599                     }
600 
601                 }
602                 ++queueIt;
603             }
604             all_node_queues[seq_i_pos].clear();
605         }
606         unsigned i;
607         for(i = 0; i < numSequences; ++i)
608         {
609             queueIt = all_node_queues[i].begin();
610             if (queueIt != all_node_queues[i].end())
611                 break;
612         }
613         if(i==numSequences)
614             done=true;
615     }
616     _addAnnotationCuts(all_nodes,alis,gs,pms,seq,seq_map,annotation,min_fragment_len,tag);
617 
618     // finish1 = clock();
619     // duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
620     //std::cout << "\ntook " << duration << " seconds.\n";
621     //for(int seq_i = 0; seq_i < length(seq); ++seq_i)
622     //{
623     //    typename std::set<TValue>::iterator it = all_nodes[seq_i].begin();
624     //    typename std::set<TValue>::iterator end_it = all_nodes[seq_i].end();
625     //
626     //    while(it != end_it)
627     //    {
628     //        std::cout << *it << ",";
629     //        ++it;
630     //    }
631     //    std::cout << "\n";
632     //}
633     //std::cout <<"building tree..."<<std::flush;
634 
635 #ifdef SEQAN_TCOFFEE_DEBUG
636     std::cout << std::setw(30) << std::left << "Segment-match refinement:" << std::setw(10) << std::right << sysTime() - refinementTime << "  s" << std::endl;
637 
638     double buildGraphTime = sysTime();
639 #endif
640 
641     ////////////////////////////////////////////////////////////////
642     //build refined alignment graph
643     _makeAlignmentGraphFromRefinedSegments(all_nodes,alis,score_type,seq,seq_map,ali_graph,tag,annotation);
644 
645 #ifdef SEQAN_TCOFFEE_DEBUG
646     std::cout << std::setw(30) << std::left << "Build alignment graph:" << std::setw(10) << std::right << sysTime() - buildGraphTime << "  s" << std::endl;
647 #endif
648 
649 }
650 
651 
652 ///////WRAPPERS
653 
654 //exact refinement, score type given
655 template<typename TAlignmentString, typename TScoreValue,typename TScoreSpec,typename TOutGraph, typename TSequence, typename TSetSpec>
656 void
matchRefinement(TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seq,Score<TScoreValue,TScoreSpec> & score_type,TOutGraph & ali_graph)657 matchRefinement(TAlignmentString & alis,
658                 StringSet<TSequence, TSetSpec> & seq,
659                 Score<TScoreValue,TScoreSpec> & score_type,
660                 TOutGraph & ali_graph)
661 {
662     //min_fragment_len = 1   ==> Exact cutting
663     bool anno = false;
664     matchRefinement(alis,seq,score_type,ali_graph,1,anno,ExactRefinement());
665 }
666 
667 
668 
669 //exact refinement, score type not given
670 template<typename TFragmentString, typename TOutGraph, typename TSequence, typename TSetSpec>
671 void
matchRefinement(TFragmentString & matches,StringSet<TSequence,TSetSpec> & strSet,TOutGraph & ali_graph)672 matchRefinement(TFragmentString & matches,
673                 StringSet<TSequence, TSetSpec> & strSet,
674                 TOutGraph & ali_graph)
675 {
676     typename Cargo<TOutGraph>::Type fake_score = 1;
677     bool anno = false;
678     matchRefinement(matches,strSet,fake_score,ali_graph,1,anno,ExactRefinement());
679 }
680 
681 }  // namespace seqan
682 
683 #endif  // #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALGORITHM_REFINE_EXACT_ITERATIVE_H_
684