1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32
33 #ifndef SEQAN_HEADER_GRAPH_REFINE_EXACT_H
34 #define SEQAN_HEADER_GRAPH_REFINE_EXACT_H
35
36
37 namespace SEQAN_NAMESPACE_MAIN
38 {
39
40
41
42 struct TagExactRefinement_;
43 typedef Tag<TagExactRefinement_> const ExactRefinement;
44
45 //exact method, every cut is made (unless it already exists)
46 template<typename TValue>
47 inline bool
_cutIsValid(String<std::set<TValue>> & all_nodes,TValue seq_i_pos,TValue,typename std::set<TValue>::iterator iter,TValue,Tag<TagExactRefinement_> const)48 _cutIsValid(String<std::set<TValue> > & all_nodes,
49 TValue seq_i_pos,
50 TValue,
51 typename std::set<TValue>::iterator iter,
52 TValue,
53 Tag<TagExactRefinement_> const)
54 {
55 SEQAN_CHECKPOINT
56 //cut already exists
57 if(iter != all_nodes[seq_i_pos].end())
58 return false;
59 return true;
60 }
61
62
63 template<typename TSize, typename TSpec,typename TPos>
64 inline void
_updateCutPosition(Fragment<TSize,ExactReversableFragment<TSpec>> & f,TPos & pos_j)65 _updateCutPosition(Fragment<TSize, ExactReversableFragment<TSpec> > & f, TPos & pos_j)
66 {
67 if(f.reversed)
68 ++pos_j;
69 }
70 //template<typename TSize, typename TSpec,typename TPos>
71 //inline void
72 //_updateCutPosition(Fragment<TSize, ExactReversableFragment<TSpec> > const& f, TPos & pos_j)
73 //{
74 // if(f.reversed)
75 // ++pos_j;
76 //}
77
78
79 template<typename TFrag,typename TPos>
80 inline void
_updateCutPosition(TFrag &,TPos &)81 _updateCutPosition(TFrag &, TPos &)
82 {
83 return;
84 }
85
86 ///////////////////////////////////////////////////////////////////////////////////////////////////////
87 //Recursive Refinement
88 //refine position node_i on sequence seq_i
89 template<typename TValue, typename TAlignmentString, typename TStringSet,typename TGraph, typename TPropertyMap,typename TSeqMap, typename TTagSpec>
90 inline void
_refine(TValue node_i,TValue seq_i_id,TStringSet & seqs,TSeqMap & seq_map,TAlignmentString & alis,String<TGraph> & gs,String<TPropertyMap> & pms,String<std::set<TValue>> & all_nodes,TValue min_len,Tag<TTagSpec> tag)91 _refine(TValue node_i,
92 TValue seq_i_id,
93 TStringSet & seqs,
94 TSeqMap & seq_map,
95 TAlignmentString & alis,
96 String<TGraph> & gs,
97 String<TPropertyMap> & pms,
98 String<std::set<TValue> > & all_nodes,
99 TValue min_len,
100 Tag<TTagSpec> tag)
101 {
102 SEQAN_CHECKPOINT
103 typedef typename Cargo<typename Value<TPropertyMap>::Type>::Type TAlignmentPointer;
104 typedef typename Iterator<String<TAlignmentPointer>, Rooted>::Type TSegmentIterator;
105 //find all segment matches that contain the current position (node_i)
106 String<TAlignmentPointer> relevant_segments;
107 TValue seq_i_pos = idToPosition(seqs,seq_i_id);
108 findIntervalsExcludeTouching(gs[seq_i_pos],pms[seq_i_pos],node_i,relevant_segments);
109
110
111 TSegmentIterator segment_it = begin(relevant_segments);
112 TSegmentIterator segment_end = end(relevant_segments);
113 //foreach of those segments
114 while(segment_it != segment_end)
115 {
116 TValue match_id = (*segment_it).i1; // segment match
117 TValue seg_num = (*segment_it).i2; // first or second segment in segment match?
118
119 //get the sequence that node_i needs to be projected onto (seq_j)
120 //and get the projected position (pos_j)
121 TValue seq_j_id, node_j;
122 _getOtherSequenceAndProject(alis[match_id],seg_num,seq_map,seq_i_id,node_i,seq_j_id,node_j);
123 TValue seq_j_pos = idToPosition(seqs,seq_j_id);
124 _updateCutPosition(alis[match_id],node_j);
125
126 typename std::set<TValue>::iterator iter;
127 iter = all_nodes[seq_j_pos].find(node_j);
128
129 //if node does not exist yet ---> insert and continue cutting
130 if(_cutIsValid(all_nodes,seq_j_pos,node_j,iter,min_len,tag))
131 {
132 all_nodes[seq_j_pos].insert(node_j);
133 _refine(node_j,seq_j_id,seqs,seq_map,alis,gs,pms,all_nodes,min_len,tag);
134 //TODO: else //verschmelzen, abschneiden und �bergehen, erst sp�ter...
135 //do nothing or resolve problems
136 }
137
138 ++segment_it;
139 }
140 }
141
142 //template<typename TFragSize, typename TFragSpec>
143 //void
144 //printMatch(Fragment<TFragSize,TFragSpec> & f)
145 //{
146 // ::std::cout << "FRAGMENT:" << " f.len = "<< f.len <<std::endl;
147 // ::std::cout << "f.seqId1 = "<< f.seqId1 << " f.begin1 = " << f.begin1 << std::endl;
148 // ::std::cout << "f.seqId2 = "<< f.seqId2 << " f.begin2 = " << f.begin2 << std::endl;
149 //}
150
151 //template<typename TAlign>
152 //void
153 //printMatch(TAlign & f)
154 //{
155 // ::std::cout << f;
156 //}
157
158 ///////////////////////////////////////////////////////////////////////////////////////////////////////
159 //Construct interval trees
160 ////////////////////////////////////////////////////////////////////////////////////////////////////
161 //construct intervals from allignments for each sequence (other Alignment types)
162 template<typename TInterval, typename TStringSet, typename TAlignmentString, typename TSeqMap>
163 void
_buildIntervalsForAllSequences(TAlignmentString & alis,String<String<TInterval>> & intervals,TStringSet & seqs,TSeqMap & seq_map)164 _buildIntervalsForAllSequences(TAlignmentString & alis,
165 String<String<TInterval> > & intervals,
166 TStringSet & seqs,
167 TSeqMap & seq_map)
168 {
169 SEQAN_CHECKPOINT
170
171 typedef typename Value<TInterval>::Type TValue;
172 typedef typename Cargo<TInterval>::Type TCargo;
173 typedef typename Iterator<TAlignmentString,Standard>::Type TAliIterator;
174 TAliIterator ali_it = begin(alis,Standard());
175 TAliIterator ali_end = end(alis,Standard());
176 TValue ali_counter = 0;
177 //foreach alignment
178 while(ali_it != ali_end)
179 {
180 TValue seq_i_id,begin_,end_;
181 //printMatch(*ali_it);
182 //get the first sequence (and its begin and end) that takes part in the alignment (seq_i)
183 _getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_,end_,0);
184 TValue seq_i_pos = idToPosition(seqs, seq_i_id);
185 //and append the interval (ali_begin, ali_end) with cargo ali* to the list of intervals of seq_i
186 appendValue(intervals[seq_i_pos],IntervalAndCargo<TValue,TCargo>(begin_,end_,TCargo(ali_counter,0)));
187
188 //get the second sequence (and its begin and end) that takes part in the alignment (seq_i)
189 _getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_,end_,1);
190 seq_i_pos = idToPosition(seqs, seq_i_id);
191 //and again append the interval (ali_begin, ali_end) with cargo ali* to the list of intervals of seq_i
192 appendValue(intervals[seq_i_pos],IntervalAndCargo<TValue,TCargo>(begin_,end_,TCargo(ali_counter,1)));
193
194 ++ali_counter;
195 ++ali_it;
196 }
197
198 }
199
200
201 //get all intervals from the alignments and construct an interval tree for each sequence
202 template<typename TGraph, typename TPropertyMap, typename TAlignmentString, typename TSequence, typename TSetSpec, typename TValue, typename TSeqMap>
203 void
_createTreesForAllSequences(String<TGraph> & gs,String<TPropertyMap> & pms,TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TValue numSequences)204 _createTreesForAllSequences(String<TGraph> & gs,
205 String<TPropertyMap> & pms,
206 TAlignmentString & alis,
207 StringSet<TSequence,TSetSpec> & seqs,
208 TSeqMap & seq_map,
209 TValue numSequences)
210 {
211 SEQAN_CHECKPOINT
212 typedef typename Value<TAlignmentString>::Type TAlignment;
213 // typedef TAlignment* TCargo;
214 typedef Pair<unsigned,unsigned,BitCompressed<31,1> > TCargo;
215 typedef IntervalAndCargo<int,TCargo> TInterval;
216 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
217
218 //std::cout <<"create interval trees...";
219 clock_t start, finish1;
220 double duration;
221 start = clock();
222 //one tree for each sequence
223 resize(gs,numSequences);
224 resize(pms,numSequences);
225
226 //and one string of intervals for each sequence
227 String<String<TInterval> > intervals;
228 resize(intervals,numSequences);
229 //fill intervals
230 _buildIntervalsForAllSequences(alis,intervals,seqs,seq_map);
231
232 TValue i = 0;
233
234 while(i < numSequences)
235 {
236 //std::cout << (numSequences-i) <<" more ("<<length(intervals[i])<<" intervals)... "<<std::flush;
237 //vllt zum speicher sparen: numSequences mal alle alis durchgehen
238 //und jedes mal nur buildIntervalsForJustOneSequence();
239 TValue center = length(seqs[i])/2; // center raus, hat hier nix zu suchen
240 //create interval tree!
241 createIntervalTree(gs[i],pms[i],intervals[i],center);
242
243 //intervals for sequence i are not needed anymore
244 clear(intervals[i]);
245 ++i;
246 }
247 finish1 = clock();
248 duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
249 //std::cout << "\ntook " << duration << " seconds.\n";
250 }
251
252
253 //step 1 of constructing the refined alignment graph: create all the nodes
254 template<typename TStringSet,typename TValue,typename TAliGraph>
255 void
_makeRefinedGraphNodes(String<std::set<TValue>> & all_nodes,TStringSet & seqs,TAliGraph & ali_g)256 _makeRefinedGraphNodes(String<std::set<TValue> > & all_nodes,
257 TStringSet & seqs,
258 TAliGraph & ali_g)
259 {
260 SEQAN_CHECKPOINT
261 typedef typename std::set<TValue>::iterator TSetIterator;
262 //for each sequence look at all cut positions and create nodes between them
263 for(unsigned int seq_i_pos = 0; seq_i_pos < length(seqs); ++seq_i_pos)
264 {
265 TValue seq_i_id = positionToId(stringSet(ali_g), seq_i_pos);
266 TSetIterator it = all_nodes[seq_i_pos].begin();
267 TSetIterator end_it = all_nodes[seq_i_pos].end();
268 TSetIterator next_it = it;
269 if(next_it != end_it)
270 ++next_it;
271 else
272 addVertex(ali_g, seq_i_id, 0, length(seqs[seq_i_pos]));
273
274 //first unaligned node
275 if(it != end_it && *it != 0)
276 addVertex(ali_g, seq_i_id, 0, *it);
277 //a new node for each interval
278 while(next_it != end_it)
279 {
280 TValue pos_i = *it;
281 addVertex(ali_g, seq_i_id, pos_i, *next_it - pos_i);
282 ++it;
283 ++next_it;
284 }
285 //last unaligned node
286 if(it !=end_it && *it<length(seqs[seq_i_pos]))
287 addVertex(ali_g, seq_i_id, *it, (length(seqs[seq_i_pos])) - *it);
288 all_nodes[seq_i_pos].clear();
289 }
290 }
291
292
293 //step 2 of constructing the refined alignment graph: add all edges
294 //version for exact refinement
295 template<typename TAlignmentString,typename TStringSet,typename TSeqMap, typename TPropertyMap,typename TScore,typename TAliGraph>
296 void
_makeRefinedGraphEdges(TAlignmentString & alis,TPropertyMap & pm,TStringSet & seqs,TSeqMap & seq_map,TScore & score_type,TAliGraph & ali_g,Tag<TagExactRefinement_> const)297 _makeRefinedGraphEdges(TAlignmentString & alis,
298 TPropertyMap & pm,
299 TStringSet & seqs,
300 TSeqMap & seq_map,
301 TScore & score_type,
302 TAliGraph & ali_g,
303 Tag<TagExactRefinement_> const)
304 {
305 SEQAN_CHECKPOINT
306 typedef typename Value<TAlignmentString>::Type TAlign;
307 typedef typename Size<TAlign>::Type TValue;
308 typedef typename Iterator<TAlignmentString, Rooted>::Type TAliIterator;
309 typedef typename VertexDescriptor<TAliGraph>::Type TVertexDescriptor;
310 typedef typename EdgeDescriptor<TAliGraph>::Type TEdgeDescriptor;
311 typedef typename Cargo<TAliGraph>::Type TCargo;
312 //make edges
313 TAliIterator ali_it = begin(alis);
314 TAliIterator ali_end = end(alis);
315 //for each segment/fragment/alignment
316 while(ali_it != ali_end)
317 {
318 //get sequence, begin position and end position
319 TValue seq_id,begin_pos,end_pos;
320 _getSeqBeginAndEnd(*ali_it,seq_map,seq_id,begin_pos,end_pos,(TValue)0);
321
322 //get the node represents the current interval (begin_pos until next_cut_pos or end_pos)
323 TVertexDescriptor act_knot = findVertex(ali_g,seq_id,begin_pos);
324 TValue act_pos = begin_pos;
325
326 //for each interval that lies within the current segment/fragement/alignment
327 while(act_pos < end_pos)
328 {
329 //get other sequence and projected position
330 TValue seq_j_id,pos_j;
331 _getOtherSequenceAndProject(*ali_it,(TValue)0,seq_map,seq_id,act_pos,seq_j_id,pos_j);
332 //find node that contains the projected position (pos_j)
333 TVertexDescriptor vd = findVertex(ali_g, seq_j_id, pos_j);
334
335 SEQAN_ASSERT_TRUE(fragmentBegin(ali_g,vd)==pos_j);
336 typename Value<TScore>::Type score = _getRefinedMatchScore(score_type,seqs,*ali_it,act_pos,pos_j,fragmentLength(ali_g,act_knot),fragmentLength(ali_g,vd));//,fragmentLength(ali_g,vd));
337 // typename Value<TScore>::Type score = fragmentLength(ali_g,vd);
338 score *= _getRefinedAnnoScore(ali_g,pm,vd,act_knot,score_type);
339 //this needs to be generalized (makes sense for positive scores only)
340 if(score <= 0) score = 1;
341 if(score > 0)
342 {
343 if (findEdge(ali_g, act_knot, vd) == 0) addEdge(ali_g,act_knot,vd,(TCargo)score);
344 else {
345 TEdgeDescriptor ed = findEdge(ali_g, act_knot, vd);
346 //if((TCargo)score > getCargo(ed))
347 //assignCargo(ed, score);
348 assignCargo(ed, getCargo(ed)+score);
349 }
350 }
351 //prepare for next interval
352 act_pos += fragmentLength(ali_g,act_knot);
353 act_knot = findVertex(ali_g,seq_id,act_pos);
354
355 }
356 ++ali_it;
357 }
358 }
359
360
361
362
363
364 ////////////////////////////////////////////////////////////////////////////////////////
365 //build refined alignment graph
366 ////////////////////////////////////////////////////////////////////////////////////////
367 //nodes are numbered ascendingly:
368 //seq1 0 1 2 3 4
369 //seq2 5 6 7 8 9 10
370 //seq3 11 12 13 14 15
371 template<typename TValue,typename TAlignmentString,typename TScore,typename TSequence, typename TSetSpec,typename TAliGraph,typename TSeqMap,typename TTagSpec>
372 void
_makeAlignmentGraphFromRefinedSegments(String<std::set<TValue>> & all_nodes,TAlignmentString & alis,TScore & score_type,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TAliGraph & ali_g,Tag<TTagSpec> const tag,bool)373 _makeAlignmentGraphFromRefinedSegments(String<std::set<TValue> > & all_nodes,
374 TAlignmentString & alis,
375 TScore & score_type,
376 StringSet<TSequence, TSetSpec> & seqs,
377 TSeqMap & seq_map,
378 TAliGraph & ali_g,
379 Tag<TTagSpec> const tag,
380 bool)
381 {
382 SEQAN_CHECKPOINT
383 //std::cout << "making refined alignment graph...";
384 //clock_t start, finish1;
385 //double duration;
386 //start = clock();
387
388 //make nodes (same function for inexact and exact refinement)
389 _makeRefinedGraphNodes(all_nodes,seqs,ali_g);
390
391 bool pm = false;
392 //add edges (different functions depending on exact/inexact refinement)
393 _makeRefinedGraphEdges(alis,pm,seqs,seq_map,score_type,ali_g,tag);
394
395 //std::cout << "check\n";
396 //finish1 = clock();
397 //duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
398 //std::cout << "\ntook " << duration << " seconds.\n";
399 }
400
401
402
403 template<typename TValue,typename TAlignmentString,typename TScore,typename TSequence, typename TSetSpec,typename TAliGraph,typename TSeqMap,typename TAnnoString,typename TTagSpec>
404 void
_makeAlignmentGraphFromRefinedSegments(String<std::set<TValue>> & all_nodes,TAlignmentString & alis,TScore & score_type,StringSet<TSequence,TSetSpec> & seqs,TSeqMap & seq_map,TAliGraph & ali_g,Tag<TTagSpec> const tag,TAnnoString & annotation)405 _makeAlignmentGraphFromRefinedSegments(String<std::set<TValue> > & all_nodes,
406 TAlignmentString & alis,
407 TScore & score_type,
408 StringSet<TSequence, TSetSpec> & seqs,
409 TSeqMap & seq_map,
410 TAliGraph & ali_g,
411 Tag<TTagSpec> const tag,
412 TAnnoString & annotation)
413 {
414 SEQAN_CHECKPOINT
415 //std::cout << "making refined alignment graph...";
416 //clock_t start, finish1;
417 //double duration;
418 //start = clock();
419
420 //make nodes (same function for inexact and exact refinement)
421 _makeRefinedGraphNodes(all_nodes,seqs,ali_g);
422
423 //add annotation to nodes
424 typedef typename Value<TAnnoString>::Type TAnnotation;
425 //typedef typename Value<TAnnotation>::Type TLabel;
426 typedef char TLabel;
427 String<String<TLabel> > pm;
428 _addNodeAnnotation(seqs,seq_map,annotation,pm,ali_g,tag);
429
430 //add edges (different functions depending on exact/inexact refinement)
431 _makeRefinedGraphEdges(alis,pm,seqs,seq_map,score_type,ali_g,tag);
432
433 //std::cout << "check\n";
434 //finish1 = clock();
435 //duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
436 //std::cout << "\ntook " << duration << " seconds.\n";
437 }
438
439
440
441
442
443 ////////////////////////////////////////////////////////////////////////////////////////
444 //The big matchRefinement function that does everything: build interval trees, do the
445 //refinement and construct a refined alignment graph
446 ////////////////////////////////////////////////////////////////////////////////////////
447 template<typename TAlignmentString, typename TAnnotation, typename TOutGraph, typename TSequence, typename TSetSpec, typename TScore,typename TTagSpec>
448 void
matchRefinement(TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seq,TScore & score_type,TOutGraph & ali_graph,typename Size<typename Value<TAlignmentString>::Type>::Type min_fragment_len,TAnnotation & annotation,Tag<TTagSpec> const tag)449 matchRefinement(TAlignmentString & alis,
450 StringSet<TSequence, TSetSpec> & seq,
451 TScore & score_type,
452 TOutGraph & ali_graph,
453 typename Size<typename Value<TAlignmentString>::Type>::Type min_fragment_len,
454 TAnnotation & annotation,
455 Tag<TTagSpec> const tag)
456 {
457 SEQAN_CHECKPOINT
458 ////////////////////////////////////////////////////////////////
459 //typedefs
460 typedef typename Value<TAlignmentString>::Type TAlign;
461 typedef typename Iterator<TAlignmentString, Rooted>::Type TAliIterator;
462 typedef typename Size<TAlign>::Type TValue;
463 // typedef TValue TCargo;
464 // typedef Pair<unsigned,unsigned,BitCompressed<31,1> > TCargo;
465 typedef Pair<unsigned,unsigned,BitCompressed<31,1> > TCargo;
466 typedef IntervalAndCargo<int,TCargo> TInterval;
467 typedef Graph<Directed<void,WithoutEdgeId> > TGraph;
468 typedef IntervalTreeNode<TInterval> TNode;
469 typedef String<TNode> TPropertyMap;
470 typedef VertexDescriptor<TGraph>::Type TVertexDescriptor;
471 typedef String<TCargo> TList;
472 typedef typename std::set<TValue>::iterator TSetIterator;
473 typedef typename Cargo<typename Value<TPropertyMap>::Type>::Type TAlignmentPointer;
474 typedef typename Iterator<String<TAlignmentPointer>, Rooted>::Type TSegmentIterator;
475
476
477 ////////////////////////////////////////////////////////////////
478 TValue numSequences = length(seq);
479 //weird ID --> good ID map
480 std::map<const void * ,int> seq_map;
481 for(int i = 0; i < (int) numSequences; ++i)
482 seq_map[id(seq[i])] = i;
483 ////////////////////////////////////////////////////////////////
484 //build interval trees
485 String<TGraph> gs;
486 String<TPropertyMap> pms;
487 _createTreesForAllSequences(gs, pms, alis, seq, seq_map, numSequences);
488
489 ////////////////////////////////////////////////////////////////
490 //do refinement
491 //std::cout <<"refining..."<<std::flush;
492 clock_t start, finish1;
493 double duration;
494 start = clock();
495
496 //all_nodes = set of all cut positions
497 String<std::set<TValue> > all_nodes;
498 resize(all_nodes,numSequences);
499
500 //all_nodes that need to be processed set of all cut positions
501 String<std::set<TValue> > all_node_queues;
502 resize(all_node_queues,numSequences);
503
504 //call function _refine for each startknoten
505 TAliIterator ali_it = begin(alis);
506 TAliIterator ali_end = end(alis);
507 //for each segment/fragement/alignment
508 while(ali_it != ali_end)
509 {
510 //for each of the two sequences
511 for(TValue i = 0; i < 2; ++i)
512 {
513 TValue seq_i_id,begin_i,end_i;
514 _getSeqBeginAndEnd(*ali_it,seq_map,seq_i_id,begin_i,end_i,i);
515 TValue seq_i_pos = idToPosition(seq,seq_i_id);
516
517 all_node_queues[seq_i_pos].insert(begin_i);
518 all_node_queues[seq_i_pos].insert(end_i);
519 }
520 ++ali_it;
521 }
522
523
524 TSetIterator queueIt;
525 bool done = false;
526 while(!done)
527 {
528 for(unsigned seq_i_pos = 0; seq_i_pos < numSequences; ++seq_i_pos)
529 {
530 queueIt = all_node_queues[seq_i_pos].begin();
531 while (queueIt != all_node_queues[seq_i_pos].end())
532 {
533 TValue node_i = *queueIt;
534 TSetIterator iter = all_nodes[seq_i_pos].find(node_i);
535 if(iter == all_nodes[seq_i_pos].end())
536 {
537 TValue seq_i_id = positionToId(seq, seq_i_pos);
538 all_nodes[seq_i_pos].insert(node_i);
539 String<TAlignmentPointer> relevant_segments;
540 findIntervalsExcludeTouching(gs[seq_i_pos],pms[seq_i_pos],node_i,relevant_segments);
541
542 TSegmentIterator segment_it = begin(relevant_segments);
543 TSegmentIterator segment_end = end(relevant_segments);
544 //foreach of those segments
545 while(segment_it != segment_end)
546 {
547 TValue match_id = (*segment_it).i1;
548 TValue seg_num = (*segment_it).i2; //get the sequence that node_i needs to be projected onto (seq_j)
549 //and get the projected position (pos_j)
550 TValue seq_j_id, node_j;
551 _getOtherSequenceAndProject(alis[match_id],seg_num,seq_map,seq_i_id,node_i,seq_j_id,node_j);
552 TValue seq_j_pos = idToPosition(seq,seq_j_id);
553 _updateCutPosition(alis[match_id],node_j);
554
555 typename std::set<TValue>::iterator iter_j;
556 iter_j = all_nodes[seq_j_pos].find(node_j);
557
558 //if node does not exist yet ---> insert and continue cutting
559 if(iter_j == all_nodes[seq_j_pos].end())
560 {
561 all_node_queues[seq_j_pos].insert(node_j);
562 }
563
564 ++segment_it;
565 }
566
567 }
568 ++queueIt;
569 }
570 all_node_queues[seq_i_pos].clear();
571 }
572 unsigned i;
573 for(i = 0; i < numSequences; ++i)
574 {
575 queueIt = all_node_queues[i].begin();
576 if (queueIt != all_node_queues[i].end())
577 break;
578 }
579 if(i==numSequences)
580 done=true;
581 }
582 _addAnnotationCuts(all_nodes,alis,gs,pms,seq,seq_map,annotation,min_fragment_len,tag);
583
584 finish1 = clock();
585 duration = (double)(finish1 - start) / CLOCKS_PER_SEC;
586 //std::cout << "\ntook " << duration << " seconds.\n";
587 //for(int seq_i = 0; seq_i < length(seq); ++seq_i)
588 //{
589 // typename std::set<TValue>::iterator it = all_nodes[seq_i].begin();
590 // typename std::set<TValue>::iterator end_it = all_nodes[seq_i].end();
591 //
592 // while(it != end_it)
593 // {
594 // std::cout << *it << ",";
595 // ++it;
596 // }
597 // std::cout << "\n";
598 //}
599 //std::cout <<"building tree..."<<std::flush;
600
601 ////////////////////////////////////////////////////////////////
602 //build refined alignment graph
603 _makeAlignmentGraphFromRefinedSegments(all_nodes,alis,score_type,seq,seq_map,ali_graph,tag,annotation);
604 }
605
606
607 ///////WRAPPERS
608
609 /**
610 .Function.matchRefinement:
611 ..signature:matchRefinement(matches,stringSet,scoringScheme,refinedGraph)
612 ..param.matches:The set of matches.
613 ...type:Class.Fragment
614 ...type:Class.Align
615 ...type:Spec.Alignment Graph
616 ..param.scoringScheme:The scoring scheme used to score the refined matches (scores are attached to
617 edges in the refined Alignment Graph).
618 ...remarks:If no scoring scheme is given, all edges get weight 1.
619 ...type:Class.Score
620 ..include:seqan/refinement.h
621 */
622 //exact refinement, score type given
623 template<typename TAlignmentString, typename TScoreValue,typename TScoreSpec,typename TOutGraph, typename TSequence, typename TSetSpec>
624 void
matchRefinement(TAlignmentString & alis,StringSet<TSequence,TSetSpec> & seq,Score<TScoreValue,TScoreSpec> & score_type,TOutGraph & ali_graph)625 matchRefinement(TAlignmentString & alis,
626 StringSet<TSequence, TSetSpec> & seq,
627 Score<TScoreValue,TScoreSpec> & score_type,
628 TOutGraph & ali_graph)
629 {
630 SEQAN_CHECKPOINT
631 //min_fragment_len = 1 ==> Exact cutting
632 bool anno = false;
633 matchRefinement(alis,seq,score_type,ali_graph,1,anno,ExactRefinement());
634 }
635
636
637
638 /**
639 .Function.matchRefinement:
640 ..cat:Alignments
641 ..summary:Refines (i.e. cuts into smaller parts) a set of pairwise segment
642 matches in such a way that none of the segments partly overlap. They are either
643 identical (fully overlapping) or non-overlapping.
644 ..signature:matchRefinement(matches,stringSet,refinedGraph)
645 ..param.stringSet:The StringSet containing the sequences which the matches lie on.
646 ...type:Class.StringSet
647 ..param.refinedGraph:The resulting refined set of matches stored in a graph.
648 ...type:Spec.Alignment Graph
649 ..include:seqan/refinement.h
650 */
651 //exact refinement, score type not given
652 template<typename TFragmentString, typename TOutGraph, typename TSequence, typename TSetSpec>
653 void
matchRefinement(TFragmentString & matches,StringSet<TSequence,TSetSpec> & strSet,TOutGraph & ali_graph)654 matchRefinement(TFragmentString & matches,
655 StringSet<TSequence, TSetSpec> & strSet,
656 TOutGraph & ali_graph)
657 {
658 SEQAN_CHECKPOINT
659 typename Cargo<TOutGraph>::Type fake_score = 1;
660 bool anno = false;
661 matchRefinement(matches,strSet,fake_score,ali_graph,1,anno,ExactRefinement());
662 }
663
664
665 }
666 #endif //#ifndef SEQAN_HEADER_...
667