1 /*  $Id: align_compare.cpp 522944 2016-12-26 16:08:29Z mozese2 $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <objects/seq/Seq_annot.hpp>
35 #include <objects/seqloc/Seq_loc.hpp>
36 #include <objects/seqalign/Seq_align.hpp>
37 #include <objects/seqalign/Seq_align_set.hpp>
38 #include <objects/seqalign/Std_seg.hpp>
39 #include <objects/seqalign/Dense_seg.hpp>
40 #include <objects/seqalign/Spliced_seg.hpp>
41 #include <objects/seqalign/Spliced_exon.hpp>
42 #include <objects/seqalign/Spliced_exon_chunk.hpp>
43 #include <objects/seqalign/Prot_pos.hpp>
44 #include <objects/seqalign/Product_pos.hpp>
45 #include <objects/seqalign/Score.hpp>
46 #include <objects/seq/seq_id_handle.hpp>
47 #include <objects/general/Object_id.hpp>
48 
49 #include <objtools/alnmgr/pairwise_aln.hpp>
50 #include <objtools/alnmgr/aln_converters.hpp>
51 
52 #include <algo/align/util/align_compare.hpp>
53 #include <algo/align/util/score_lookup.hpp>
54 #include <algo/align/util/algo_align_util_exceptions.hpp>
55 
56 #include <cmath>
57 #include <ctype.h>
58 
59 BEGIN_NCBI_SCOPE
60 USING_SCOPE(objects);
61 
62 
63 // Retrieve a list of interval-by-interval accounting within an alignment
64 //
65 bool s_IsOverlapping(CAlignCompare::SAlignment const* lhs, CAlignCompare::SAlignment const* rhs, CAlignCompare::ERowComparison row);
66 
s_UpdateSpans(const TSeqRange & query_range,const TSeqRange & subject_range,CAlignCompare::SAlignment & align_info,CAlignCompare::ERowComparison row)67 static void s_UpdateSpans(const TSeqRange &query_range,
68                           const TSeqRange &subject_range,
69                           CAlignCompare::SAlignment& align_info,
70                           CAlignCompare::ERowComparison row)
71 {
72      align_info.spans[row == CAlignCompare::e_Query ? query_range : subject_range] =
73          row == CAlignCompare::e_Subject ? subject_range : query_range;
74 }
75 
s_GetAlignmentSpans_Interval(const CSeq_align & align,CAlignCompare::SAlignment & align_info,CAlignCompare::ERowComparison row)76 static void s_GetAlignmentSpans_Interval(const CSeq_align& align,
77                                          CAlignCompare::SAlignment& align_info,
78                                          CAlignCompare::ERowComparison row)
79 {
80     switch (align.GetSegs().Which()) {
81     case CSeq_align::TSegs::e_Disc:
82         ITERATE (CSeq_align::TSegs::TDisc::Tdata, it,
83                  align.GetSegs().GetDisc().Get()) {
84             s_GetAlignmentSpans_Interval(**it, align_info, row);
85         }
86         break;
87 
88     case CSeq_align::TSegs::e_Std:
89         {{
90              ITERATE (CSeq_align::TSegs::TStd, seg_it,
91                       align.GetSegs().GetStd()) {
92                  // our expectation is to find a set of two locs
93                  // each loc is expected to be a Seq-interval
94 
95                  // we expect std-seg alignments to be single intervals
96                  // check this here
97                  if ((*seg_it)->GetLoc().size() != 2) {
98                      NCBI_THROW(CException, eUnknown,
99                                 "Pairwise Std-seg alignments in comparison "
100                                 "should always have two locs");
101                  }
102                  CConstRef<CSeq_loc> loc1 = (*seg_it)->GetLoc()[0];
103                  CConstRef<CSeq_loc> loc2 = (*seg_it)->GetLoc()[1];
104 
105                  if (loc1->IsEmpty()  ||  loc2->IsEmpty()  ||
106                      loc1->IsNull()  ||  loc2->IsNull()) {
107                      // gaps - omit
108                      continue;
109                  }
110 
111                  if (!loc1->IsInt()  ||  !loc2->IsInt()) {
112                      NCBI_THROW(CException, eUnknown,
113                                 "Pairwise Std-set alignments in comparison "
114                                 "should always be intervals");
115                  }
116                  s_UpdateSpans(loc1->GetTotalRange(),
117                                loc2->GetTotalRange(),
118                                align_info, row);
119              }
120          }}
121         break;
122 
123     default:
124         {{
125              CAlnSeqId id0(align.GetSeq_id(0));
126              CAlnSeqId id1(align.GetSeq_id(1));
127 
128              TAlnSeqIdIRef r0(&id0);
129              TAlnSeqIdIRef r1(&id1);
130              CPairwiseAln pw(r0, r1);
131              ConvertSeqAlignToPairwiseAln(pw, align, 0, 1);
132              ITERATE (CPairwiseAln, it, pw) {
133                  const CPairwiseAln::TAlignRange& r = *it;
134                  s_UpdateSpans(TSeqRange(r.GetFirstFrom(), r.GetFirstTo()),
135                                TSeqRange(r.GetSecondFrom(), r.GetSecondTo()),
136                                align_info, row);
137              }
138          }}
139         break;
140     }
141 }
142 
s_GetAlignmentMismatches(const CSeq_align & align,CAlignCompare::SAlignment & align_info,CAlignCompare::ERowComparison row)143 static void s_GetAlignmentMismatches(const CSeq_align& align,
144                                      CAlignCompare::SAlignment& align_info,
145                                      CAlignCompare::ERowComparison row)
146 {
147     if (!align.GetSegs().IsSpliced()) {
148         CScoreLookup lookup;
149         string traceback = lookup.GetTraceback(align, 0);
150         if (traceback.empty()) {
151             NCBI_THROW(CException, eUnknown,
152                        "Comparing mismatches for dense-seg alignments requires "
153                        "traceback information");
154         }
155         int product_dir = align.GetSeqStrand(0) == eNa_strand_minus
156                            ? -1 : 1;
157         TSeqPos product_pos = product_dir == 1 ? align.GetSeqStart(0)
158                                                 : align.GetSeqStop(0);
159         int genomic_dir = align.GetSeqStrand(1) == eNa_strand_minus
160                            ? -1 : 1;
161         TSeqPos genomic_pos = product_dir == 1 ? align.GetSeqStart(1)
162                                                 : align.GetSeqStop(1);
163         unsigned match = 0;
164         ITERATE (string, it, traceback) {
165             if (isdigit(*it)) {
166                 match = match * 10 + (*it - '0');
167                 continue;
168             }
169             product_pos += match * product_dir;
170             genomic_pos += match * genomic_dir;
171             match = 0;
172             bool genomic_ins = *it == '-';
173             bool product_ins = *++it == '-';
174             if (!genomic_ins && !product_ins) {
175                 /// mismatch
176                 if (row != CAlignCompare::e_Subject) {
177                     align_info.query_mismatches += TSeqRange(
178                         product_pos, product_pos);
179 
180                 }
181                 if (row != CAlignCompare::e_Query) {
182                     align_info.subject_mismatches += TSeqRange(
183                         genomic_pos, genomic_pos);
184                 }
185             }
186             if (!genomic_ins) {
187                 product_pos += product_dir;
188             }
189             if (!product_ins) {
190                 genomic_pos += genomic_dir;
191             }
192         }
193         if (product_pos != (product_dir == 1 ? align.GetSeqStop(0)+1
194                                               : align.GetSeqStart(0)-1)
195          || genomic_pos != (genomic_dir == 1 ? align.GetSeqStop(1)+1
196                                               : align.GetSeqStart(1)-1))
197         {
198            NCBI_THROW(CException, eUnknown,
199                       "Inconsistent length of traceback string " + traceback);
200         }
201         return;
202     }
203 
204     bool is_product_minus = align.GetSegs().GetSpliced().IsSetProduct_strand() &&
205                             align.GetSegs().GetSpliced().GetProduct_strand() == eNa_strand_minus;
206     bool is_genomic_minus = align.GetSegs().GetSpliced().IsSetGenomic_strand() &&
207                             align.GetSegs().GetSpliced().GetGenomic_strand() == eNa_strand_minus;
208     ITERATE (CSpliced_seg::TExons, it, align.GetSegs().GetSpliced().GetExons())
209     {
210         const CSpliced_exon& exon = **it;
211         if (!exon.IsSetParts()) {
212             continue;
213         }
214         int product_dir = is_product_minus ||
215                           (exon.IsSetProduct_strand() &&
216                            exon.GetProduct_strand() == eNa_strand_minus)
217                            ? -1 : 1;
218         TSeqPos product_pos = (product_dir == 1 ? exon.GetProduct_start()
219                                                  : exon.GetProduct_end())
220                               . AsSeqPos();
221         int genomic_dir = is_genomic_minus ||
222                           (exon.IsSetGenomic_strand() &&
223                            exon.GetGenomic_strand() == eNa_strand_minus)
224                            ? -1 : 1;
225         TSeqPos genomic_pos = genomic_dir == 1 ? exon.GetGenomic_start()
226                                                 : exon.GetGenomic_end();
227         ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
228             switch ((*part_it)->Which()) {
229                 case CSpliced_exon_chunk::e_Mismatch:
230                     if (row != CAlignCompare::e_Subject) {
231                         TSeqPos product_mismatch_end = product_pos +
232                               product_dir * ((*part_it)->GetMismatch()-1);
233                         align_info.query_mismatches += TSeqRange(
234                             min(product_pos,product_mismatch_end),
235                             max(product_pos,product_mismatch_end));
236 
237                     }
238                     if (row != CAlignCompare::e_Query) {
239                         TSeqPos genomic_mismatch_end = genomic_pos +
240                               genomic_dir * ((*part_it)->GetMismatch()-1);
241                         align_info.subject_mismatches += TSeqRange(
242                             min(genomic_pos,genomic_mismatch_end),
243                             max(genomic_pos,genomic_mismatch_end));
244                     }
245                     product_pos += product_dir * (*part_it)->GetMismatch();
246                     genomic_pos += genomic_dir * (*part_it)->GetMismatch();
247                     break;
248 
249                 case CSpliced_exon_chunk::e_Match:
250                     product_pos += product_dir * (*part_it)->GetMatch();
251                     genomic_pos += genomic_dir * (*part_it)->GetMatch();
252                     break;
253 
254                 case CSpliced_exon_chunk::e_Product_ins:
255                     product_pos += product_dir * (*part_it)->GetProduct_ins();
256                     break;
257 
258                 case CSpliced_exon_chunk::e_Genomic_ins:
259                     genomic_pos += genomic_dir * (*part_it)->GetGenomic_ins();
260                     break;
261 
262                 default:
263                     NCBI_THROW(CException, eUnknown,
264                                "Unsupported exon part");
265             }
266         }
267     }
268 }
269 
270 // Retrieve a list of exon-by-exon accounting within an alignment
271 //
s_GetAlignmentSpans_Exon(const CSeq_align & align,CAlignCompare::SAlignment & align_info,CAlignCompare::ERowComparison row)272 static void s_GetAlignmentSpans_Exon(const CSeq_align& align,
273                                      CAlignCompare::SAlignment &align_info,
274                                      CAlignCompare::ERowComparison row)
275 {
276     switch (align.GetSegs().Which()) {
277     case CSeq_align::TSegs::e_Denseg:
278         s_UpdateSpans(align.GetSeqRange(0), align.GetSeqRange(1), align_info, row);
279         break;
280 
281     case CSeq_align::TSegs::e_Disc:
282         /* UNTESTED */
283         ITERATE (CSeq_align::TSegs::TDisc::Tdata, it,
284                  align.GetSegs().GetDisc().Get()) {
285             s_UpdateSpans((*it)->GetSeqRange(0), (*it)->GetSeqRange(1), align_info, row);
286         }
287         break;
288 
289     case CSeq_align::TSegs::e_Std:
290         /* UNTESTED */
291         ITERATE (CSeq_align::TSegs::TStd, it, align.GetSegs().GetStd()) {
292             const CStd_seg& seg = **it;
293             s_UpdateSpans(seg.GetLoc()[0]->GetTotalRange(),
294                           seg.GetLoc()[1]->GetTotalRange(), align_info, row);
295         }
296         break;
297 
298     case CSeq_align::TSegs::e_Spliced:
299         ITERATE (CSpliced_seg::TExons, it,
300                  align.GetSegs().GetSpliced().GetExons()) {
301             const CSpliced_exon& exon = **it;
302             TSeqRange genomic(exon.GetGenomic_start(), exon.GetGenomic_end());
303             TSeqRange product;
304             product.SetFrom(exon.GetProduct_start().AsSeqPos());
305             product.SetTo(exon.GetProduct_end().AsSeqPos());
306             s_UpdateSpans(product, genomic, align_info, row);
307         }
308         break;
309 
310     default:
311         NCBI_THROW(CException, eUnknown,
312                    "unhandled alignment type");
313     }
314 }
315 
316 
317 // Retrieve a list of intron-by-intron accounting within an alignment;
318 // meaningful only for Spliced-seg alignments
319 //
s_GetAlignmentSpans_Intron(const CSeq_align & align,CAlignCompare::SAlignment & align_info,CAlignCompare::ERowComparison row)320 static void s_GetAlignmentSpans_Intron(const CSeq_align& align,
321                                        CAlignCompare::SAlignment &align_info,
322                                        CAlignCompare::ERowComparison row)
323 {
324     if (!align.GetSegs().IsSpliced() ||
325         !align.GetSegs().GetSpliced().CanGetProduct_strand() ||
326         !align.GetSegs().GetSpliced().CanGetGenomic_strand())
327     {
328         NCBI_THROW(CException, eUnknown,
329                    "intron mode only meaningful for Spliced-seg alignments");
330     }
331 
332     bool is_reverse = align.GetSegs().GetSpliced().GetProduct_strand() !=
333                           align.GetSegs().GetSpliced().GetGenomic_strand();
334 
335     CRef<CSpliced_exon> last_exon;
336     ITERATE (CSpliced_seg::TExons, it,
337              align.GetSegs().GetSpliced().GetExons()) {
338         CRef<CSpliced_exon> exon = *it;
339         if (last_exon) {
340             CRef<CSpliced_exon> first_exon = is_reverse ? exon : last_exon;
341             CRef<CSpliced_exon> second_exon = is_reverse ? last_exon : exon;
342             TSeqRange genomic(first_exon->GetGenomic_end(),
343                               second_exon->GetGenomic_start());
344             TSeqRange product;
345             product.SetFrom(last_exon->GetProduct_end().AsSeqPos());
346             product.SetTo(exon->GetProduct_start().AsSeqPos());
347             s_UpdateSpans(product, genomic, align_info, row);
348         }
349         last_exon = exon;
350     }
351 }
352 
353 
354 // Retrieve a list of total range spans for an alignment
355 //
s_GetAlignmentSpans_Span(const CSeq_align & align,CAlignCompare::SAlignment & align_info,CAlignCompare::ERowComparison row)356 static void s_GetAlignmentSpans_Span(const CSeq_align& align,
357                                      CAlignCompare::SAlignment &align_info,
358                                      CAlignCompare::ERowComparison row)
359 {
360     s_UpdateSpans(align.GetSeqRange(0), align.GetSeqRange(1), align_info, row);
361 }
362 
363 template<typename T>
s_PopulateScores(const CSeq_align & align,const vector<string> & score_list,vector<T> & scores,bool required=true)364 void s_PopulateScores(const CSeq_align &align,
365                       const vector<string> &score_list,
366                       vector<T> &scores,
367                       bool required = true)
368 {
369     CScoreLookup lookup;
370     ITERATE (vector<string>, it, score_list) {
371         T value = 0;
372         try {
373             value = lookup.GetScore(align, *it);
374         } catch(CAlgoAlignUtilException &e) {
375             /// If scores are not required, use value of 0 for scores that were not found
376             if (required || e.GetErrCode() != CAlgoAlignUtilException::eScoreNotFound) {
377                 throw;
378             }
379         }
380         scores.push_back(value);
381     }
382 }
383 
s_PopulateScoreSet(const CSeq_align & align,const set<string> & score_set,bool score_set_as_blacklist,CAlignCompare::TIntegerScoreSet & integer_scores,CAlignCompare::TRealScoreSet & real_scores)384 static void s_PopulateScoreSet(const CSeq_align &align,
385                                const set<string> &score_set,
386                                bool score_set_as_blacklist,
387                                CAlignCompare::TIntegerScoreSet &integer_scores,
388                                CAlignCompare::TRealScoreSet &real_scores)
389 {
390     if (score_set_as_blacklist) {
391         ITERATE (CSeq_align::TScore, score_it, align.GetScore()) {
392             if ((*score_it)->GetId().IsStr() &&
393                 !score_set.count((*score_it)->GetId().GetStr()))
394             {
395                 if ((*score_it)->GetValue().IsInt()) {
396                     integer_scores[(*score_it)->GetId().GetStr()] =
397                         (*score_it)->GetValue().GetInt();
398                 } else {
399                     real_scores[(*score_it)->GetId().GetStr()] =
400                         (*score_it)->GetValue().GetReal();
401                 }
402             }
403         }
404     } else {
405         CScoreLookup lookup;
406         ITERATE (set<string>, score_it, score_set) {
407             double value = lookup.GetScore(align, *score_it);
408             if (lookup.IsIntegerScore(align, *score_it)) {
409                 integer_scores[*score_it] = static_cast<int>(value);
410             } else {
411                 real_scores[*score_it] = value;
412             }
413         }
414     }
415 }
416 
s_EquivalentScores(const CAlignCompare::TRealScoreSet & scores1,const CAlignCompare::TRealScoreSet & scores2,double real_score_tolerance)417 static bool s_EquivalentScores(const CAlignCompare::TRealScoreSet &scores1,
418                                const CAlignCompare::TRealScoreSet &scores2,
419                                double real_score_tolerance)
420 {
421     for (CAlignCompare::TRealScoreSet::const_iterator it1 = scores1.begin(),
422                                                       it2 = scores2.begin();
423          it1 != scores1.end() || it2 != scores2.end(); ++it1, ++it2)
424     {
425         if (it1 == scores1.end() || it2 == scores2.end()
426                                  || it1->first != it2->first)
427         {
428             /// The two don't have the same set of real-value scores
429             return false;
430         }
431         double allowed_diff = max(abs(it1->second), abs(it2->second))
432                             * real_score_tolerance;
433         if (abs(it1->second - it2->second) > allowed_diff) {
434             return false;
435         }
436     }
437     return true;
438 }
439 
440 //////////////////////////////////////////////////////////////////////////////
441 //
442 // SComparison constructor does the hard work of verifying that two sets of alignment
443 // span ranges actually overlap, and determines by how much
444 //
445 struct SComparison
446 {
447     SComparison();
448 
449     SComparison(const CAlignCompare::SAlignment& first,
450                 const CAlignCompare::SAlignment& second,
451                 double real_score_tolerance);
452 
453     size_t spans_in_common;
454     size_t spans_overlap;
455     Int8 spans_unique_first;
456     Int8 spans_unique_second;
457     bool is_equivalent;
458     float overlap;
459 };
460 
SComparison()461 SComparison::SComparison()
462 : spans_in_common(0)
463 , spans_overlap(0)
464 , spans_unique_first(0)
465 , spans_unique_second(0)
466 , is_equivalent(false)
467 , overlap(0)
468 {
469 }
470 
SComparison(const CAlignCompare::SAlignment & first,const CAlignCompare::SAlignment & second,double real_score_tolerance)471 SComparison::SComparison(const CAlignCompare::SAlignment& first,
472                          const CAlignCompare::SAlignment& second,
473                          double real_score_tolerance)
474 : spans_in_common(0)
475 , spans_overlap(0)
476 , spans_unique_first(0)
477 , spans_unique_second(0)
478 , is_equivalent(false)
479 , overlap(0)
480 {
481     if (first.CompareGroup(second, false) != 0) {
482         /// Alignments have different disambiguiting score values, can't be compared
483         return;
484     }
485 
486     float dot = 0;
487     float sum_a = 0;
488     float sum_b = 0;
489 
490     CAlignCompare::TAlignmentSpans::const_iterator first_it = first.spans.begin();
491     CAlignCompare::TAlignmentSpans::const_iterator second_it = second.spans.begin();
492     for ( ;  first_it != first.spans.end()  &&  second_it != second.spans.end();  ) {
493         if (*first_it == *second_it) {
494             TSeqPos intersecting_len = first_it->first.GetLength();
495             dot += float(intersecting_len) * float(intersecting_len);
496             sum_a += first_it->first.GetLength() * first_it->first.GetLength();
497             sum_b += second_it->first.GetLength() * second_it->first.GetLength();
498 
499             spans_in_common += intersecting_len;
500             ++first_it;
501             ++second_it;
502         } else {
503             bool overlap =
504                 first_it->first.IntersectingWith(second_it->first)  &&
505                 first_it->second.IntersectingWith(second_it->second);
506             TSeqPos intersecting_len = 0;
507             if (overlap) {
508                 TSeqRange r = first_it->first;
509                 r.IntersectWith(second_it->first);
510 
511                 intersecting_len = r.GetLength();
512                 dot += float(intersecting_len) * float(intersecting_len);
513 
514                 spans_overlap += intersecting_len;
515                 spans_unique_first -= intersecting_len;
516                 spans_unique_second -= intersecting_len;
517             }
518             if (*first_it < *second_it) {
519                 sum_a += first_it->first.GetLength() * first_it->first.GetLength();
520                 spans_unique_first += first_it->first.GetLength();
521                 ++first_it;
522             } else {
523                 sum_b += second_it->first.GetLength() * second_it->first.GetLength();
524                 spans_unique_second += second_it->first.GetLength();
525                 ++second_it;
526             }
527         }
528     }
529     is_equivalent = spans_in_common == first.length &&
530                     spans_in_common == second.length &&
531                     first.query_mismatches == second.query_mismatches &&
532                     first.subject_mismatches == second.subject_mismatches &&
533                     first.integer_scores == second.integer_scores &&
534                     s_EquivalentScores(first.real_scores, second.real_scores,
535                                        real_score_tolerance);
536     for ( ;  first_it != first.spans.end(); ++first_it) {
537         sum_a += first_it->first.GetLength() * first_it->first.GetLength();
538         spans_unique_first += first_it->first.GetLength();
539     }
540     for ( ;  second_it != second.spans.end(); ++second_it) {
541         sum_b += second_it->first.GetLength() * second_it->first.GetLength();
542         spans_unique_second += second_it->first.GetLength();
543     }
544 
545     overlap = dot == 0 ? 0 : dot / ::sqrt(sum_a * sum_b);
546 }
547 
548 
549 struct SAlignment_PtrLess
550 {
operator ()SAlignment_PtrLess551     bool operator()(const CAlignCompare::SAlignment *ptr1,
552                     const CAlignCompare::SAlignment *ptr2) const
553     {
554         const CAlignCompare::SAlignment& k1 = *ptr1;
555         const CAlignCompare::SAlignment& k2 = *ptr2;
556 
557         if (k1.query < k2.query)                    { return true; }
558         if (k2.query < k1.query)                    { return false; }
559         if (k1.subject < k2.subject)                { return true; }
560         if (k2.subject < k1.subject)                { return false; }
561 
562         if (k1.scores < k2.scores)                  { return true; }
563         if (k2.scores < k1.scores)                  { return false; }
564 
565         if (k1.query_strand < k2.query_strand)      { return true; }
566         if (k2.query_strand < k1.query_strand)      { return false; }
567         if (k1.subject_strand < k2.subject_strand)  { return true; }
568         if (k2.subject_strand < k1.subject_strand)  { return false; }
569 
570         if (k1.subject_range < k2.subject_range)    { return true; }
571         if (k2.subject_range < k1.subject_range)    { return false; }
572         if (k1.query_range < k2.query_range)        { return true; }
573         if (k2.query_range < k1.query_range)        { return false; }
574 
575         return ptr1 < ptr2;
576     }
577 };
578 
579 typedef set<CAlignCompare::SAlignment *, SAlignment_PtrLess> TAlignPtrSet;
580 typedef pair<CAlignCompare::SAlignment *, CAlignCompare::SAlignment *> TPtrPair;
581 typedef pair<TPtrPair, SComparison> TComp;
582 
583 struct SComp_Less
584 {
585     bool strict_compare;
586 
SComp_LessSComp_Less587     SComp_Less(bool strict = false)
588         : strict_compare(strict)
589     {
590     }
591 
operator ()SComp_Less592     bool operator()(const TComp& c1, const TComp& c2) const
593     {
594         // strict comparison amounts to placing all identical pairs either before
595         // or after non-identical ones
596         // putting identical pairs first means that we evaluate the best examples first, and
597         // can establish equality without polluting the comparison with weaker
598         // alignments; non-strict means we combine weaker overlapping
599         // alignments together into equivalence groups with alignments that are
600         // identical
601         if (strict_compare) {
602             if (c1.second.is_equivalent && !c2.second.is_equivalent) {
603                 return true;
604             }
605             if (c2.second.is_equivalent && !c1.second.is_equivalent) {
606                 return false;
607             }
608         }
609         else {
610             if (c1.second.is_equivalent && !c2.second.is_equivalent) {
611                 return false;
612             }
613             if (c2.second.is_equivalent && !c1.second.is_equivalent) {
614                 return true;
615             }
616         }
617 
618         if (c1.first.first->subject_range < c2.first.first->subject_range)
619         {
620             return false;
621         }
622         if (c2.first.first->subject_range < c1.first.first->subject_range)
623         {
624             return true;
625         }
626         return c1.first.second->query_range < c2.first.second->query_range;
627     }
628 };
629 
630 CAlignCompare::SAlignment::
SAlignment(int s,const CRef<CSeq_align> & al,CAlignCompare & compare,bool is_slice)631 SAlignment(int s, const CRef<CSeq_align> &al, CAlignCompare &compare,
632            bool is_slice)
633 : source_set(s)
634 , query_strand(eNa_strand_unknown)
635 , subject_strand(eNa_strand_unknown)
636 , length(0)
637 , align(al)
638 , match_level(CAlignCompare::e_NoMatch)
639 , compare_object(compare)
640 {
641     try {
642         if (compare.m_Row != e_Subject) {
643             query = CSeq_id_Handle::GetHandle(align->GetSeq_id(0));
644             query_strand = align->GetSeqStrand(0);
645             query_range = align->GetSeqRange(0);
646         }
647         if (compare.m_Row != e_Query) {
648             subject = CSeq_id_Handle::GetHandle(align->GetSeq_id(1));
649             subject_strand = align->GetSeqStrand(1);
650             subject_range = align->GetSeqRange(1);
651         }
652         s_PopulateScores(*align, compare.m_DisambiguitingScores.first, scores.first);
653         s_PopulateScores(*align, compare.m_DisambiguitingScores.second, scores.second, false);
654         s_PopulateScores(*align, compare.m_QualityScores, quality_scores);
655         s_PopulateScoreSet(*align, compare.m_ScoreSet, compare.m_ScoreSetAsBlacklist,
656                            integer_scores, real_scores);
657         switch (compare.m_Mode) {
658         case e_Full:
659             /// If this alignment was created by slicing an input alignment,
660             /// it doesn't have traceback data so mismatches can't be calculated
661             if (!is_slice) {
662                 s_GetAlignmentMismatches(*align, *this, compare.m_Row);
663             }
664             // fall through
665 
666         case e_Interval:
667             s_GetAlignmentSpans_Interval(*align, *this, compare.m_Row);
668             break;
669 
670         case e_Exon:
671             s_GetAlignmentSpans_Exon(*align, *this, compare.m_Row);
672             break;
673 
674         case e_Span:
675             s_GetAlignmentSpans_Span(*align, *this, compare.m_Row);
676             break;
677 
678         case e_Intron:
679             s_GetAlignmentSpans_Intron(*align, *this, compare.m_Row);
680             break;
681         }
682     }
683     catch (CException& e) {
684         ERR_POST(Error << "alignment not processed: " << MSerial_AsnText << *align << e);
685         spans.clear();
686     }
687     ITERATE (TAlignmentSpans, it, spans) {
688         length += it->first.GetLength();
689     }
690 }
691 
692 int CAlignCompare::SAlignment::
CompareGroup(const SAlignment & o,bool strict_only) const693 CompareGroup(const SAlignment &o, bool strict_only) const
694 {
695     if (query.AsString() < o.query.AsString()) { return -1; }
696     if (o.query.AsString() < query.AsString()) { return 1; }
697 
698     if (subject.AsString() < o.subject.AsString()) { return -1; }
699     if (o.subject.AsString() < subject.AsString()) { return 1; }
700 
701     if (scores.first < o.scores.first) { return -1; }
702     if (o.scores.first < scores.first) { return 1; }
703 
704     if (strict_only) {
705         return 0;
706     }
707 
708     for (unsigned score_index = 0; score_index < scores.second.size(); ++score_index) {
709         if (scores.second[score_index] && o.scores.second[score_index]) {
710             if (scores.second[score_index] < o.scores.second[score_index]) { return -1; }
711             if (o.scores.second[score_index] < scores.second[score_index]) { return 1; }
712         }
713     }
714 
715     return 0;
716 }
717 
x_DetermineNextGroupSet()718 int CAlignCompare::x_DetermineNextGroupSet()
719 {
720     if (m_NextSet1Group.empty()) {
721         if (m_Set1.EndOfData()) {
722             return 2;
723         } else {
724             m_NextSet1Group.push_back(x_NextAlignment(1));
725         }
726     }
727     if (m_NextSet2Group.empty()) {
728         if (m_Set2.EndOfData()) {
729             return 1;
730         } else {
731             m_NextSet2Group.push_back(x_NextAlignment(2));
732         }
733     }
734     int compare_group = m_NextSet1Group.front()
735                       ->CompareGroup(*m_NextSet2Group.front(), true);
736     if (compare_group < 0) {
737         return 1;
738     } else if (compare_group > 0) {
739         return 2;
740     } else {
741         return 3;
742     }
743 }
744 
x_GetCurrentGroup(int set)745 void CAlignCompare::x_GetCurrentGroup(int set)
746 {
747     IAlignSource &source = set == 1 ? m_Set1 : m_Set2;
748     list< AutoPtr<SAlignment> > &current_group =
749         set == 1 ? m_CurrentSet1Group : m_CurrentSet2Group;
750     list< AutoPtr<SAlignment> > &next_group =
751         set == 1 ? m_NextSet1Group : m_NextSet2Group;
752     current_group.clear();
753     current_group.splice(current_group.end(), next_group);
754     while (!source.EndOfData() && next_group.empty()) {
755         AutoPtr<SAlignment> align = x_NextAlignment(set);
756         if (current_group.empty() || align->CompareGroup(*current_group.front(), true) == 0)
757         {
758             current_group.push_back(align);
759         } else {
760             next_group.push_back(align);
761         }
762     }
763 }
764 
PopulateBoundariesMap() const765 void CAlignCompare::SAlignment::PopulateBoundariesMap() const
766 {
767     ITERATE (TAlignmentSpans, it, spans) {
768         if (query) {
769             compare_object.m_BoundariesMap[query].insert(it->second.GetFrom());
770             compare_object.m_BoundariesMap[query].insert(it->second.GetToOpen());
771         }
772         if (subject) {
773             compare_object.m_BoundariesMap[subject].insert(it->first.GetFrom());
774             compare_object.m_BoundariesMap[subject].insert(it->first.GetToOpen());
775         }
776     }
777 }
778 
779 list< AutoPtr<CAlignCompare::SAlignment> > CAlignCompare::SAlignment::
BreakOnBoundaries(int row) const780 BreakOnBoundaries(int row) const
781 {
782     list< AutoPtr<SAlignment> > align_parts;
783     const set<TSeqPos> &boundaries =
784         compare_object.m_BoundariesMap[row == 0 ? query : subject];
785     TSeqRange range = row == 0 ? query_range : subject_range;
786     TSeqPos last_boundary = range.GetFrom();
787     for (set<TSeqPos>::const_iterator it = boundaries.upper_bound(range.GetFrom());
788          it != boundaries.end() && *it <= range.GetToOpen(); ++it)
789     {
790         /// Extract slice, as long as it's not the the entire alignment
791         if (last_boundary > range.GetFrom() || *it < range.GetToOpen()) {
792             AutoPtr<SAlignment> part = Slice(row, last_boundary, *it-1);
793             if (part.get()) {
794                 align_parts.push_back(part);
795             }
796         }
797         last_boundary = *it;
798     }
799     if (!align_parts.empty() && last_boundary < range.GetToOpen()) {
800         AutoPtr<SAlignment> part = Slice(row, last_boundary, range.GetTo());
801         if (part.get()) {
802             align_parts.push_back(part);
803         }
804     }
805     return align_parts;
806 }
807 
808 AutoPtr<CAlignCompare::SAlignment> CAlignCompare::SAlignment::
Slice(int row,TSeqPos from,TSeqPos to) const809 Slice(int row, TSeqPos from, TSeqPos to) const
810 {
811     if (align->GetSegs().IsDisc()) {
812         vector< AutoPtr<SAlignment> > seg_slices;
813         TSeqRange slice_range(min(from, to), max(from, to));
814         ITERATE (CSeq_align_set::Tdata, seg_it, align->GetSegs().GetDisc().Get())
815         {
816             TSeqRange seg_slice_range =
817                 slice_range & (*seg_it)->GetSeqRange(row);
818             if (seg_slice_range.Empty()) {
819                 continue;
820             }
821             AutoPtr<SAlignment> slice =
822                 SAlignment(source_set, *seg_it, compare_object) .  Slice(
823                      row, seg_slice_range.GetFrom(), seg_slice_range.GetTo());
824             if (slice.get()) {
825                 seg_slices.push_back(slice);
826             }
827         }
828         AutoPtr<SAlignment> complete_slice;
829         if (seg_slices.size() == 1) {
830             complete_slice = seg_slices.front();
831         } else if (seg_slices.size() > 1) {
832             CRef<CSeq_align> complete_align(new CSeq_align);
833             complete_slice.reset(new SAlignment(source_set, complete_align,
834                                                 compare_object, true));
835             ITERATE (vector< AutoPtr<SAlignment> >, seg_it, seg_slices) {
836                 complete_align->SetSegs().SetDisc().Set().push_back(
837                      (*seg_it)->align);
838                 complete_slice->query_mismatches += (*seg_it)->query_mismatches;
839                 complete_slice->subject_mismatches += (*seg_it)->query_mismatches;
840             }
841         }
842         return complete_slice;
843     }
844 
845     if (!align->GetSegs().IsDenseg()) {
846         NCBI_THROW(CException, eUnknown,
847                "Alignment splitting supported only for Dense-seq and "
848                "Disc-seg alignments");
849     }
850     AutoPtr<SAlignment> slice;
851     CRef<CSeq_align> slice_align(new CSeq_align);
852     slice_align->SetType(align->GetType());
853     if (align->IsSetDim()) {
854         slice_align->SetDim(align->GetDim());
855     }
856     CRef<CDense_seg> slice_seg = align->GetSegs().GetDenseg()
857                                       .ExtractSlice(row, from, to);
858     bool all_gaps = true;
859     for (int seg = 0; seg < slice_seg->GetNumseg() && all_gaps; ++seg) {
860         if (slice_seg->GetStarts()[seg*2] >= 0 &&
861             slice_seg->GetStarts()[seg*2+1] >= 0)
862         {
863             all_gaps = false;
864         }
865     }
866     if (all_gaps) {
867         /// Slice is completely within gaps, so no alignment
868         return slice;
869     }
870     slice_align->SetSegs().SetDenseg(*slice_seg);
871     ITERATE (CSeq_align::TScore, score_it, align->GetScore()) {
872         if ((*score_it)->GetId().IsStr() &&
873             compare_object.m_DistributiveScores.count(
874                  (*score_it)->GetId().GetStr()))
875         {
876             slice_align->SetScore().push_back(*score_it);
877         }
878     }
879     slice.reset(new SAlignment(source_set, slice_align, compare_object, true));
880     /// Special case for full mode; extract the mismatches positions from the
881     /// ones calculated for the full alignment
882     if (compare_object.m_Mode == e_Full) {
883 	slice->query_mismatches = query_mismatches;
884 	slice->query_mismatches &= slice_align->GetSeqRange(0);
885 	slice->subject_mismatches = subject_mismatches;
886 	slice->subject_mismatches &= slice_align->GetSeqRange(1);
887     }
888     return slice;
889 }
890 
x_SplitOnOverlaps(int group,int row)891 void CAlignCompare::x_SplitOnOverlaps(int group, int row)
892 {
893     list< AutoPtr<SAlignment> > orig_set;
894     list< AutoPtr<SAlignment> > &transformed_set =
895         group == 1 ? m_CurrentSet1Group : m_CurrentSet2Group;
896     CSeq_id_Handle id = row == 0 ? transformed_set.front()->query
897                                  : transformed_set.front()->subject;
898     if (!id) {
899         return;
900     }
901     orig_set.swap(transformed_set);
902     ITERATE (list< AutoPtr<SAlignment> >, it, orig_set) {
903         list< AutoPtr<CAlignCompare::SAlignment> > parts =
904             (*it)->BreakOnBoundaries(row);
905         if (parts.empty()) {
906             transformed_set.push_back(*it);
907         } else {
908             transformed_set.splice(transformed_set.end(), parts);
909         }
910     }
911 }
912 
PopulateBoundariesMap()913 void CAlignCompare::PopulateBoundariesMap()
914 {
915     while (!m_Set1.EndOfData()) {
916         x_NextAlignment(1, false)->PopulateBoundariesMap();
917     }
918     m_Set1.Reset();
919     while (!m_Set2.EndOfData()) {
920         x_NextAlignment(2, false)->PopulateBoundariesMap();
921     }
922     m_Set2.Reset();
923 }
924 
NextGroup()925 vector<const CAlignCompare::SAlignment *> CAlignCompare::NextGroup()
926 {
927     int next_group_set = x_DetermineNextGroupSet();
928     if (next_group_set & 1) {
929         x_GetCurrentGroup(1);
930     }
931     if (next_group_set & 2) {
932         x_GetCurrentGroup(2);
933     }
934 
935     vector<const SAlignment *> group;
936     switch (next_group_set) {
937     case 1:
938         if (!m_IgnoreNotPresent) {
939             m_CountOnlySet1 += m_CurrentSet1Group.size();
940             m_CountSplitSet1 += m_CurrentSet1Group.size();
941             ITERATE (list< AutoPtr<SAlignment> >, it, m_CurrentSet1Group) {
942                 m_CountBasesOnlySet1 += (*it)->length;
943                 group.push_back(&**it);
944             }
945         }
946         break;
947 
948     case 2:
949         if (!m_IgnoreNotPresent) {
950             m_CountOnlySet2 += m_CurrentSet2Group.size();
951             m_CountSplitSet2 += m_CurrentSet2Group.size();
952             ITERATE (list< AutoPtr<SAlignment> >, it, m_CurrentSet2Group) {
953                 m_CountBasesOnlySet2 += (*it)->length;
954                 group.push_back(&**it);
955             }
956         }
957         break;
958 
959     default:
960     {{
961         if (!m_BoundariesMap.empty()) {
962             x_SplitOnOverlaps(1, 0);
963             x_SplitOnOverlaps(1, 1);
964             x_SplitOnOverlaps(2, 0);
965             x_SplitOnOverlaps(2, 1);
966             m_CountSplitSet1 += m_CurrentSet1Group.size();
967             m_CountSplitSet2 += m_CurrentSet2Group.size();
968         }
969         TAlignPtrSet set1_aligns;
970         NON_CONST_ITERATE (list< AutoPtr<SAlignment> >, it, m_CurrentSet1Group)
971         {
972             set1_aligns.insert(&**it);
973         }
974 
975         TAlignPtrSet set2_aligns;
976         NON_CONST_ITERATE (list< AutoPtr<SAlignment> >, it, m_CurrentSet2Group)
977         {
978             set2_aligns.insert(&**it);
979         }
980 
981         set<SAlignment const*> red_color; // alignments from set2 that have equivalent mate
982                                     // from opposite set.
983         vector<TComp> comparisons;
984 
985         ITERATE (TAlignPtrSet, set1_it, set1_aligns) {
986             SAlignment const* lhs = *set1_it;
987 
988             ITERATE (TAlignPtrSet, set2_it, set2_aligns) {
989                 SAlignment const* rhs = *set2_it;
990                 // Check for equivalent alignment.
991                 // In strict mode we do not combine overlapping and equiv. alignments.
992                 if ( m_Strict && red_color.count(rhs) > 0 ) {
993                     continue;
994                 }
995 
996                 if ( false == s_IsOverlapping(lhs, rhs, m_Row) ) {
997                     continue;
998                 }
999                 // Check for overlap.
1000                 comparisons.push_back(TComp(TPtrPair(const_cast<SAlignment*>(lhs), const_cast<SAlignment*>(rhs)),
1001                                             SComparison(*lhs, *rhs,
1002                                                         m_RealScoreTolerance)));
1003 
1004                 // Post-processing:
1005                 //  - if two alignments are equivalent
1006                 //  -- color both alignments in red,
1007                 //  -- break out of the loop.
1008                 TComp const& record = comparisons.back();
1009                 SComparison const& comp = record.second;
1010                 if ( comp.is_equivalent ) {
1011                     red_color.insert(rhs);  // set2's alignment.
1012                     // In strict mode we do not combine overlapping and equiv. alignments.
1013                     if ( m_Strict ) {
1014                         break;
1015                     }
1016                 }
1017             }
1018         }
1019 
1020         std::sort(comparisons.begin(), comparisons.end(), SComp_Less(m_Strict));
1021 
1022         typedef pair<TAlignPtrSet, EMatchLevel> TAlignGroup;
1023 
1024         list<TAlignGroup> groups;
1025         map<const SAlignment *, list<TAlignGroup>::iterator> group_map;
1026 
1027         ITERATE (vector<TComp>, it, comparisons) {
1028             bool is_equivalent = it->second.is_equivalent;
1029             /// This comparison counts if the two alignments are equivalent, or
1030             /// they overlap and we haven't yet seen an equivalence for either
1031             if (is_equivalent ||
1032                     (it->second.overlap > 0 &&
1033                      it->first.first->match_level != e_Equiv &&
1034                      it->first.second->match_level != e_Equiv))
1035             {
1036                 list<TAlignGroup>::iterator align1_group = groups.end(),
1037                                             align2_group = groups.end();
1038                 if (set1_aligns.erase(it->first.first)) {
1039                     it->first.first->match_level =
1040                         is_equivalent ? e_Equiv : e_Overlap;
1041                     group.push_back(it->first.first);
1042                     if (is_equivalent) {
1043                         ++m_CountEquivSet1;
1044                         m_CountBasesEquivSet1 += it->second.spans_in_common;
1045                     } else {
1046                         ++m_CountOverlapSet1;
1047                         m_CountBasesOverlapSet1 += it->second.spans_in_common
1048                                                  + it->second.spans_overlap;
1049                         m_CountBasesOnlySet1 += it->second.spans_unique_first;
1050                     }
1051                 } else {
1052                     align1_group = group_map[it->first.first];
1053                 }
1054                 if (set2_aligns.erase(it->first.second)) {
1055                     it->first.second->match_level =
1056                         is_equivalent ? e_Equiv : e_Overlap;
1057                     group.push_back(it->first.second);
1058                     if (is_equivalent) {
1059                         ++m_CountEquivSet2;
1060                         m_CountBasesEquivSet2 += it->second.spans_in_common;
1061                     } else {
1062                         ++m_CountOverlapSet2;
1063                         m_CountBasesOverlapSet2 += it->second.spans_in_common
1064                                                  + it->second.spans_overlap;
1065                         m_CountBasesOnlySet2 += it->second.spans_unique_second;
1066                     }
1067                 } else {
1068                     align2_group = group_map[it->first.second];
1069                 }
1070                 if (align1_group == groups.end() &&
1071                     align2_group == groups.end())
1072                 {
1073                     /// Neither alignemnts was encountered before, so create
1074                     /// new group
1075                     list<TAlignGroup>::iterator new_group =
1076                         groups.insert(groups.end(), TAlignGroup());
1077                     new_group->first.insert(it->first.first);
1078                     new_group->first.insert(it->first.second);
1079                     new_group->second = it->first.first->match_level;
1080                     group_map[it->first.first] = new_group;
1081                     group_map[it->first.second] = new_group;
1082                     ++(is_equivalent ? m_CountEquivGroups
1083                                      : m_CountOverlapGroups);
1084                 } else if(align1_group == groups.end()) {
1085                     /// alignment 1 is new, add it to existing group
1086                     align2_group->first.insert(it->first.first);
1087                     group_map[it->first.first] = align2_group;
1088                 } else if(align2_group == groups.end()) {
1089                     /// alignment 2 is new, add it to existing group
1090                     align1_group->first.insert(it->first.second);
1091                     group_map[it->first.second] = align1_group;
1092                 } else if (align1_group != align2_group) {
1093                     /// The alignments are in two separate groups; merge them
1094                     ITERATE (TAlignPtrSet, group2_it, align2_group->first) {
1095                         align1_group->first.insert(*group2_it);
1096                         group_map[*group2_it] = align1_group;
1097                     }
1098                     if (align2_group->second == e_Overlap) {
1099                         --m_CountOverlapGroups;
1100                         if (align1_group->second == e_Equiv) {
1101                             /// Change the group from equivalence to overlap
1102                             align1_group->second = e_Overlap;
1103                             ++m_CountOverlapGroups;
1104                             --m_CountEquivGroups;
1105                         }
1106                     } else {
1107                         --m_CountEquivGroups;
1108                     }
1109                     groups.erase(align2_group);
1110                 }
1111             }
1112         }
1113 
1114         ITERATE (list<TAlignGroup>, group_it, groups) {
1115             if (group_it->second == e_NoMatch) {
1116                 continue;
1117             }
1118             if (group_it->second == e_Overlap && !m_QualityScores.empty())
1119             {
1120                 /// Find which side is better
1121                 vector<SAlignment *> best(3, static_cast<SAlignment *>(NULL));
1122                 ITERATE (TAlignPtrSet, align_it, group_it->first) {
1123                     SAlignment *&side_best = best[(*align_it)->source_set];
1124                     if (!side_best || (*align_it)->quality_scores >
1125                                       side_best->quality_scores)
1126                     {
1127                         side_best = *align_it;
1128                     }
1129                 }
1130                 if (best[1]->quality_scores != best[2]->quality_scores) {
1131                     int better_side =
1132                         best[1]->quality_scores > best[2]->quality_scores
1133                                   ? 1 : 2;
1134                     ITERATE (TAlignPtrSet, align_it, group_it->first) {
1135                         (*align_it)->match_level =
1136                              (*align_it)->source_set == better_side
1137                                  ? e_OverlapBetter : e_OverlapWorse;
1138                     }
1139                 }
1140             }
1141             ITERATE (TAlignPtrSet, align1_it, group_it->first) {
1142                 if ((*align1_it)->source_set != 1) {
1143                     continue;
1144                 }
1145                 ITERATE (TAlignPtrSet, align2_it, group_it->first) {
1146                     if ((*align2_it)->source_set != 2) {
1147                         continue;
1148                     }
1149                     (*align1_it)->matched_alignments.push_back(*align2_it);
1150                     (*align2_it)->matched_alignments.push_back(*align1_it);
1151                 }
1152             }
1153         }
1154 
1155         /// Add remaining alignments, for which no match was found, in order
1156         /// of their appearance in alignment comparisons
1157         m_CountOnlySet1 += set1_aligns.size();
1158         m_CountOnlySet2 += set2_aligns.size();
1159         ITERATE (vector<TComp>, comp_it, comparisons) {
1160             if (set1_aligns.empty() && set2_aligns.empty()) {
1161                 /// Found best comparison for all alignments
1162                 break;
1163             }
1164             if (comp_it->second.overlap == 0) {
1165                 if (set1_aligns.erase(comp_it->first.first)) {
1166                     group.push_back(comp_it->first.first);
1167                     m_CountBasesOnlySet1 += comp_it->first.first->length;
1168                 }
1169                 if (set2_aligns.erase(comp_it->first.second)) {
1170                     group.push_back(comp_it->first.second);
1171                     m_CountBasesOnlySet2 += comp_it->first.second->length;
1172                 }
1173             }
1174         }
1175         ITERATE (TAlignPtrSet, set1_it, set1_aligns) {
1176             group.push_back(*set1_it);
1177             m_CountBasesOnlySet1 += (*set1_it)->length;
1178         }
1179         ITERATE (TAlignPtrSet, set2_it, set2_aligns) {
1180             group.push_back(*set2_it);
1181             m_CountBasesOnlySet2 += (*set2_it)->length;
1182         }
1183     }}
1184     }
1185 
1186     return group;
1187 }
1188 
s_IsOverlapping(CAlignCompare::SAlignment const * lhs,CAlignCompare::SAlignment const * rhs,CAlignCompare::ERowComparison row)1189 bool s_IsOverlapping(CAlignCompare::SAlignment const* lhs, CAlignCompare::SAlignment const* rhs, CAlignCompare::ERowComparison row)
1190 {
1191     bool overlap;
1192 
1193     switch ( row ) {
1194         case CAlignCompare::e_Query:
1195             overlap = lhs->query_range.IntersectingWith(rhs->query_range);
1196             break;
1197 
1198         case CAlignCompare::e_Subject:
1199             overlap = lhs->subject_range.IntersectingWith(rhs->subject_range);
1200             break;
1201 
1202         default: //CAlignCompare::e_Both:
1203             if ( false == lhs->subject_range.IntersectingWith(rhs->subject_range) &&
1204                     false == lhs->query_range.IntersectingWith(rhs->query_range) )
1205             {
1206                 overlap = false;
1207             }
1208             else {
1209                 overlap = true;
1210             }
1211             break;
1212     }
1213     return overlap;
1214 }
1215 
1216 END_NCBI_SCOPE
1217