1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Tobias Rausch <rausch@embl.de>
33 // Author: Anne-Katrin Emde <anne-katrin.emde@fu-berlin.de>
34 // ==========================================================================
35
36 #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_ALGORITHM_REFINE_ALIGNGRAPH_H_
37 #define SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_ALGORITHM_REFINE_ALIGNGRAPH_H_
38
39 //SEQAN_NO_DDDOC: do not generate documentation for this file
40
41 namespace seqan {
42
43
44 ///////////////////////////////////////////////////////////////////////////////////////////////////////
45 //Functios for Align Graphs
46 //project onto other sequence for Graph<Alignment>
47 template<typename TAlignment,typename TId1, typename TPos1, typename TId2, typename TPos2, typename TValue,typename TMap>
48 void
_getOtherSequenceAndProject(Graph<TAlignment> & segment,TValue seg_num,TMap &,TId1 seq_i_id,TPos1 pos_i,TId2 & seq_j_id,TPos2 & pos_j)49 _getOtherSequenceAndProject(Graph<TAlignment> & segment,
50 TValue seg_num,
51 TMap &,
52 TId1 seq_i_id,
53 TPos1 pos_i,
54 TId2 & seq_j_id,
55 TPos2 & pos_j)
56 {
57 getProjectedPosition(segment,seg_num,seq_i_id, pos_i,seq_j_id,pos_j);
58
59 }
60
61
62
63
64 //given seq and segment, get the sequenceId (seq_i) and its begin and end
65 //if seq = 0 get first sequence (that takes part in the segment match)
66 //if seq = 1 get second sequence
67 template<typename TAlign, typename TId, typename TPosition, typename TId2>
68 void
_getSeqBeginAndEnd(Graph<TAlign> & segment,std::map<const void *,int> &,TId & seq_i_id,TPosition & begin_i,TPosition & end_i,TId2 seq)69 _getSeqBeginAndEnd(Graph<TAlign> & segment,
70 std::map<const void * ,int> &,
71 TId & seq_i_id,
72 TPosition & begin_i,
73 TPosition & end_i,
74 TId2 seq)
75 {
76 //walk through edges, take first edge, target, source,
77 //define: seq == 0 ==> seq_i_id = id of source of first edge
78 // seq == 1 ==> seq_i_id = id of target of first edge
79 typedef Graph<TAlign> TGraph;
80 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
81 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
82
83 TEdgeIterator ed_it(segment);
84 //goBegin(ed_it);
85 if(seq==0)
86 {
87 TVertexDescriptor src = sourceVertex(ed_it);
88 seq_i_id = sequenceId(segment,src);
89 }
90 else
91 {
92 TVertexDescriptor trg = targetVertex(ed_it);
93 seq_i_id = sequenceId(segment,trg);
94 }
95 begin_i = getFirstCoveredPosition(segment,seq_i_id);
96 end_i = getLastCoveredPosition(segment,seq_i_id);
97
98
99 }
100
101
102
103
104 //////////////////////////
105 //for Graph<TAlign>
106 //vorsichtig! noch nicht richtig, bis jetzt nur ungapped exact matches...
107 //template<typename TScore,typename TStringSet,typename TAlignment,typename TValue>
108 //typename Value<TScore>::Type
109 //_getRefinedMatchScore(TScore & score_type,
110 // TStringSet & seqs,
111 // Graph<TAlignment> & segment,
112 // TValue pos_i,
113 // TValue pos_j,
114 // TValue len)
115 //{
116 // int pseudo_map = 0;
117 // TValue pos_j_check,seq_j_id;
118 // _getOtherSequenceAndProject(segment,pseudo_map,seq_i_id,pos_i,seq_j_id,pos_j_check);
119 // SEQAN_ASSERT(pos_j_check==pos_j);
120 // TValue last_pos_i = pos_i + len;
121 // TValue last_pos_j;
122 // _getOtherSequenceAndProject(segment,pseudo_map,seq_i_id,last_pos_i,seq_j_id,last_pos_j);
123 //
124 //// typename Infix<typename Value<TStringSet>::Type>::Type label0 = infix(getValueById(seqs,seq_i_id),pos_i,last_pos_i);
125 //// typename Infix<typename Value<TStringSet>::Type>::Type label1 = infix(getValueById(seqs,seq_j_id),pos_j,last_pos_j);
126 //
127 // typename Value<TScore>::Type score = 0;
128 // TValue i = 0;
129 // while (i < len)
130 // {
131 // next_pos_j = getProjectedPosition(segment,seq_i_id,pos_i);
132 // //gaps
133 // if(pos_j+1 != next_pos_j)
134 // {
135 // if(pos_j == next_pos_j)
136 // {
137 // score += scoreGapExtend(score_type);
138 // ++pos_i;
139 // ++i;
140 // continue;
141 // }
142 // }
143 // pos_j = next_pos_j;
144 // score += score(score_type,getValueById(seqs,seq_i_id)[pos_i],getValueById(seqs,seq_j_id)[pos_j]);
145 // ++i;
146 // ++pos_i;
147 // }
148 //
149 //
150 //
151 //
152 //
153 //
154 //
155 //
156 // typename Infix<typename Value<TStringSet>::Type>::Type label0 = label(segment,stringSet(segment)[0]);
157 // typename Infix<typename Value<TStringSet>::Type>::Type label1 = label(segment,stringSet(segment)[1]);
158 //
159 //
160 // //typename Infix<typename Value<TStringSet>::Type>::Type label0 = label(segment,0);
161 // //typename Infix<typename Value<TStringSet>::Type>::Type label1 = label(segment,1);
162 // int i = 0;
163 // typename Value<TScore>::Type ret_score = 0;
164 // while(i < len)
165 // {
166 // ret_score += score(score_type,label0[i],label1[i]);
167 // ++i;
168 // }
169 // //ret_score = scoreMatch(score_type);
170 // //ret_score *= len;
171 // return ret_score;
172 //}
173
174
175 //////////////////////////
176 // get score for part of pairwise alignment graph starting in pos_i in first sequence and in
177 // pos_j in second sequence, and with length len and len_j respectively
178 // only for exact refinement
179 template<typename TScoreValue,typename TScoreSpec,typename TStringSet,typename TAlignment,typename TValue>
180 TScoreValue
_getRefinedMatchScore(Score<TScoreValue,TScoreSpec> & score_type,TStringSet & seqs,Graph<TAlignment> & segment,TValue pos_i,TValue pos_j,TValue len,TValue len_j)181 _getRefinedMatchScore(Score<TScoreValue,TScoreSpec> & score_type,
182 TStringSet & seqs,
183 Graph<TAlignment> & segment,
184 TValue pos_i,
185 TValue pos_j,
186 TValue len,
187 TValue len_j)
188 {
189
190 typedef Graph<TAlignment> TGraph;
191 typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
192 typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
193 typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
194 typedef typename TGraph::TPosToVertexMap TPosToVertexMap;
195 typedef typename TPosToVertexMap::const_iterator TVertexMapIter;
196 TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
197
198 TValue seq_i_id;
199 TEdgeIterator ed(segment);
200 //goBegin(ed);
201 seq_i_id = sequenceId(segment,sourceVertex(ed));
202
203 int pseudo_map = 0;
204 TValue pos_j_check,seq_j_id;
205 _getOtherSequenceAndProject(segment,pseudo_map,seq_i_id,pos_i,seq_j_id,pos_j_check);
206 SEQAN_ASSERT(pos_j_check==pos_j);
207 TValue last_pos_j = pos_j + len_j;
208
209 TScoreValue ret_score = 0;
210 bool last_one_was_aligned = false;
211
212 while(len != 0)
213 {
214 TValue rest = 0;
215 TVertexMapIter it = segment.data_pvMap.upper_bound(std::make_pair(seq_i_id, pos_i));
216 // it->second is nilVertex if pos_i lies within gap
217 if(it->second == nilVertex)
218 {
219 ++it;
220 if(it != segment.data_pvMap.end() && it->first.first == seq_i_id)
221 {
222 rest = fragmentBegin(segment,it->second)-pos_i;
223 last_one_was_aligned = false;
224 if(rest < len)//add rest many gaps
225 ret_score += rest * scoreGapExtend(score_type);
226 else
227 {//add len many gaps
228 ret_score += len * scoreGapExtend(score_type);
229 return ret_score; //and done!
230 }
231 }
232 else
233 {//add len many gaps
234 ret_score += len * scoreGapExtend(score_type);
235 return ret_score; //and done!
236 }
237 }
238 else{
239 TVertexDescriptor vd = it->second;
240 rest = fragmentBegin(segment,vd)+fragmentLength(segment,vd)-pos_i;
241 TOutEdgeIterator ed_it(segment,vd);
242 if(!atEnd(ed_it)) //aligned stretch
243 {
244 if(last_one_was_aligned)
245 {
246 TValue next_pos_j,temp;
247 getProjectedPosition(segment,seq_i_id,pos_i,temp,next_pos_j);
248 ret_score += (next_pos_j-pos_j) * scoreGapExtend(score_type);
249 pos_j = next_pos_j;
250 }
251 last_one_was_aligned = true;
252 TValue i = 0;
253 while(i < rest && i < len)
254 {
255 ret_score += score(score_type,getValueById(seqs,seq_i_id)[pos_i++],getValueById(seqs,seq_j_id)[pos_j++]);
256 ++i;
257 }
258 if(rest>len) return ret_score; //done (last_pos_i is somewhere inside the current node)
259 else
260 {//last_pos_i is the first position of the next node
261 if(rest == len) //check if there is an unalgined stretch on seq_j_id that needs to be included
262 {
263 if(pos_j != last_pos_j)
264 ret_score += (last_pos_j-pos_j) * scoreGapExtend(score_type);
265 }
266 }
267 }
268 else //gap
269 {
270 last_one_was_aligned = false;
271 if(rest < len)//add rest many gaps
272 ret_score += rest * scoreGapExtend(score_type);
273 else
274 {//add len many gaps
275 ret_score += len * scoreGapExtend(score_type);
276 return ret_score; //and done!
277 }
278 }
279 }
280 len -= rest;
281 }
282
283
284 return ret_score;
285 }
286
287 } // namespace seqan
288
289 #endif // #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_ALGORITHM_REFINE_ALIGNGRAPH_H_
290