1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Tobias Rausch <rausch@embl.de>
33 // Author: Anne-Katrin Emde <anne-katrin.emde@fu-berlin.de>
34 // ==========================================================================
35 
36 #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_ALGORITHM_REFINE_ALIGNGRAPH_H_
37 #define SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_ALGORITHM_REFINE_ALIGNGRAPH_H_
38 
39 //SEQAN_NO_DDDOC: do not generate documentation for this file
40 
41 namespace seqan {
42 
43 
44 ///////////////////////////////////////////////////////////////////////////////////////////////////////
45 //Functios for Align Graphs
46 //project onto other sequence for Graph<Alignment>
47 template<typename TAlignment,typename TId1, typename TPos1, typename TId2, typename TPos2, typename TValue,typename TMap>
48 void
_getOtherSequenceAndProject(Graph<TAlignment> & segment,TValue seg_num,TMap &,TId1 seq_i_id,TPos1 pos_i,TId2 & seq_j_id,TPos2 & pos_j)49 _getOtherSequenceAndProject(Graph<TAlignment> & segment,
50                 TValue seg_num,
51                             TMap &,
52                              TId1 seq_i_id,
53                             TPos1 pos_i,
54                             TId2 & seq_j_id,
55                             TPos2 & pos_j)
56 {
57     getProjectedPosition(segment,seg_num,seq_i_id, pos_i,seq_j_id,pos_j);
58 
59 }
60 
61 
62 
63 
64 //given seq and segment, get the sequenceId (seq_i) and its begin and end
65 //if seq = 0 get first sequence (that takes part in the segment match)
66 //if seq = 1 get second sequence
67 template<typename TAlign, typename TId, typename TPosition, typename TId2>
68 void
_getSeqBeginAndEnd(Graph<TAlign> & segment,std::map<const void *,int> &,TId & seq_i_id,TPosition & begin_i,TPosition & end_i,TId2 seq)69 _getSeqBeginAndEnd(Graph<TAlign> & segment,
70                   std::map<const void * ,int> &,
71                   TId & seq_i_id,
72                   TPosition & begin_i,
73                   TPosition & end_i,
74                   TId2 seq)
75 {
76     //walk through edges, take first edge, target, source,
77     //define: seq == 0   ==> seq_i_id = id of source of first edge
78     //        seq == 1   ==> seq_i_id = id of target of first edge
79     typedef Graph<TAlign> TGraph;
80     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
81     typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
82 
83     TEdgeIterator ed_it(segment);
84     //goBegin(ed_it);
85     if(seq==0)
86     {
87         TVertexDescriptor src = sourceVertex(ed_it);
88         seq_i_id = sequenceId(segment,src);
89     }
90     else
91     {
92         TVertexDescriptor trg = targetVertex(ed_it);
93         seq_i_id = sequenceId(segment,trg);
94     }
95     begin_i = getFirstCoveredPosition(segment,seq_i_id);
96     end_i = getLastCoveredPosition(segment,seq_i_id);
97 
98 
99 }
100 
101 
102 
103 
104 //////////////////////////
105 //for Graph<TAlign>
106 //vorsichtig! noch nicht richtig, bis jetzt nur ungapped exact matches...
107 //template<typename TScore,typename TStringSet,typename TAlignment,typename TValue>
108 //typename Value<TScore>::Type
109 //_getRefinedMatchScore(TScore & score_type,
110 //         TStringSet & seqs,
111 //         Graph<TAlignment> & segment,
112 //         TValue pos_i,
113 //         TValue pos_j,
114 //         TValue len)
115 //{
116 //    int pseudo_map = 0;
117 //    TValue pos_j_check,seq_j_id;
118 //    _getOtherSequenceAndProject(segment,pseudo_map,seq_i_id,pos_i,seq_j_id,pos_j_check);
119 //    SEQAN_ASSERT(pos_j_check==pos_j);
120 //    TValue last_pos_i = pos_i + len;
121 //    TValue last_pos_j;
122 //    _getOtherSequenceAndProject(segment,pseudo_map,seq_i_id,last_pos_i,seq_j_id,last_pos_j);
123 //
124 ////    typename Infix<typename Value<TStringSet>::Type>::Type label0 = infix(getValueById(seqs,seq_i_id),pos_i,last_pos_i);
125 ////    typename Infix<typename Value<TStringSet>::Type>::Type label1 = infix(getValueById(seqs,seq_j_id),pos_j,last_pos_j);
126 //
127 //    typename Value<TScore>::Type score = 0;
128 //    TValue i = 0;
129 //    while (i < len)
130 //    {
131 //        next_pos_j = getProjectedPosition(segment,seq_i_id,pos_i);
132 //        //gaps
133 //        if(pos_j+1 != next_pos_j)
134 //        {
135 //            if(pos_j == next_pos_j)
136 //            {
137 //                score += scoreGapExtend(score_type);
138 //                ++pos_i;
139 //                ++i;
140 //                continue;
141 //            }
142 //        }
143 //        pos_j = next_pos_j;
144 //        score += score(score_type,getValueById(seqs,seq_i_id)[pos_i],getValueById(seqs,seq_j_id)[pos_j]);
145 //        ++i;
146 //        ++pos_i;
147 //    }
148 //
149 //
150 //
151 //
152 //
153 //
154 //
155 //
156 //    typename Infix<typename Value<TStringSet>::Type>::Type label0 = label(segment,stringSet(segment)[0]);
157 //    typename Infix<typename Value<TStringSet>::Type>::Type label1 = label(segment,stringSet(segment)[1]);
158 //
159 //
160 //    //typename Infix<typename Value<TStringSet>::Type>::Type label0 = label(segment,0);
161 //    //typename Infix<typename Value<TStringSet>::Type>::Type label1 = label(segment,1);
162 //    int i = 0;
163 //    typename Value<TScore>::Type ret_score = 0;
164 //    while(i < len)
165 //    {
166 //        ret_score += score(score_type,label0[i],label1[i]);
167 //        ++i;
168 //    }
169 //    //ret_score = scoreMatch(score_type);
170 //    //ret_score *= len;
171 //    return ret_score;
172 //}
173 
174 
175 //////////////////////////
176 // get score for part of pairwise alignment graph starting in pos_i in first sequence and in
177 // pos_j in second sequence, and with length len and len_j respectively
178 // only for exact refinement
179 template<typename TScoreValue,typename TScoreSpec,typename TStringSet,typename TAlignment,typename TValue>
180 TScoreValue
_getRefinedMatchScore(Score<TScoreValue,TScoreSpec> & score_type,TStringSet & seqs,Graph<TAlignment> & segment,TValue pos_i,TValue pos_j,TValue len,TValue len_j)181 _getRefinedMatchScore(Score<TScoreValue,TScoreSpec> & score_type,
182          TStringSet & seqs,
183          Graph<TAlignment> & segment,
184          TValue pos_i,
185          TValue pos_j,
186          TValue len,
187          TValue len_j)
188 {
189 
190     typedef Graph<TAlignment> TGraph;
191     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
192     typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
193     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
194     typedef typename TGraph::TPosToVertexMap TPosToVertexMap;
195     typedef typename TPosToVertexMap::const_iterator TVertexMapIter;
196     TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
197 
198     TValue seq_i_id;
199     TEdgeIterator ed(segment);
200     //goBegin(ed);
201     seq_i_id = sequenceId(segment,sourceVertex(ed));
202 
203     int pseudo_map = 0;
204     TValue pos_j_check,seq_j_id;
205     _getOtherSequenceAndProject(segment,pseudo_map,seq_i_id,pos_i,seq_j_id,pos_j_check);
206     SEQAN_ASSERT(pos_j_check==pos_j);
207     TValue last_pos_j = pos_j + len_j;
208 
209     TScoreValue ret_score = 0;
210     bool last_one_was_aligned = false;
211 
212     while(len != 0)
213     {
214         TValue rest = 0;
215         TVertexMapIter it = segment.data_pvMap.upper_bound(std::make_pair(seq_i_id, pos_i));
216         // it->second is nilVertex if pos_i lies within gap
217         if(it->second == nilVertex)
218         {
219             ++it;
220             if(it != segment.data_pvMap.end() && it->first.first == seq_i_id)
221             {
222                 rest = fragmentBegin(segment,it->second)-pos_i;
223                 last_one_was_aligned = false;
224                 if(rest < len)//add rest many gaps
225                         ret_score += rest * scoreGapExtend(score_type);
226                 else
227                 {//add len many gaps
228                     ret_score += len * scoreGapExtend(score_type);
229                     return ret_score;    //and done!
230                 }
231             }
232             else
233             {//add len many gaps
234                 ret_score += len * scoreGapExtend(score_type);
235                 return ret_score;    //and done!
236             }
237         }
238         else{
239             TVertexDescriptor vd = it->second;
240             rest = fragmentBegin(segment,vd)+fragmentLength(segment,vd)-pos_i;
241             TOutEdgeIterator ed_it(segment,vd);
242             if(!atEnd(ed_it)) //aligned stretch
243             {
244                 if(last_one_was_aligned)
245                 {
246                     TValue next_pos_j,temp;
247                     getProjectedPosition(segment,seq_i_id,pos_i,temp,next_pos_j);
248                     ret_score += (next_pos_j-pos_j) * scoreGapExtend(score_type);
249                     pos_j = next_pos_j;
250                 }
251                 last_one_was_aligned = true;
252                 TValue i = 0;
253                 while(i < rest && i < len)
254                 {
255                     ret_score += score(score_type,getValueById(seqs,seq_i_id)[pos_i++],getValueById(seqs,seq_j_id)[pos_j++]);
256                     ++i;
257                 }
258                 if(rest>len) return ret_score; //done (last_pos_i is somewhere inside the current node)
259                 else
260                 {//last_pos_i is the first position of the next node
261                     if(rest == len) //check if there is an unalgined stretch on seq_j_id that needs to be included
262                     {
263                         if(pos_j != last_pos_j)
264                             ret_score += (last_pos_j-pos_j) * scoreGapExtend(score_type);
265                     }
266                 }
267             }
268             else //gap
269             {
270                 last_one_was_aligned = false;
271                 if(rest < len)//add rest many gaps
272                         ret_score += rest * scoreGapExtend(score_type);
273                 else
274                 {//add len many gaps
275                     ret_score += len * scoreGapExtend(score_type);
276                     return ret_score;    //and done!
277                 }
278             }
279         }
280         len -= rest;
281     }
282 
283 
284     return ret_score;
285 }
286 
287 }  // namespace seqan
288 
289 #endif  // #ifndef SEQAN_INCLUDE_SEQAN_GRAPH_ALIGN_GRAPH_ALGORITHM_REFINE_ALIGNGRAPH_H_
290