1 /*  $Id: score_builder_base.cpp 628386 2021-03-30 17:19:29Z mozese2 $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <objtools/alnmgr/score_builder_base.hpp>
34 #include <objtools/alnmgr/alntext.hpp>
35 
36 #include <util/sequtil/sequtil_manip.hpp>
37 
38 #include <objtools/alnmgr/alnvec.hpp>
39 #include <objtools/alnmgr/pairwise_aln.hpp>
40 #include <objtools/alnmgr/aln_converters.hpp>
41 
42 #include <objmgr/objmgr_exception.hpp>
43 #include <objmgr/seq_vector.hpp>
44 #include <objmgr/feat_ci.hpp>
45 
46 #include <objects/seqloc/Seq_loc.hpp>
47 #include <objects/seq/Annot_descr.hpp>
48 #include <objects/seq/Annotdesc.hpp>
49 #include <objects/seqalign/Seq_align.hpp>
50 #include <objects/seqalign/Std_seg.hpp>
51 #include <objects/seqalign/Spliced_seg.hpp>
52 #include <objects/seqalign/Spliced_exon.hpp>
53 #include <objects/seqalign/Spliced_exon_chunk.hpp>
54 #include <objects/seqalign/Product_pos.hpp>
55 #include <objects/seqalign/Prot_pos.hpp>
56 
57 #include <objmgr/util/sequence.hpp>
58 #include <objects/seqfeat/Org_ref.hpp>
59 #include <objects/seqfeat/OrgName.hpp>
60 #include <objects/seqfeat/Genetic_code_table.hpp>
61 
62 BEGIN_NCBI_SCOPE
63 USING_SCOPE(objects);
64 
65 /// Default constructor
CScoreBuilderBase()66 CScoreBuilderBase::CScoreBuilderBase()
67 : m_ErrorMode(eError_Throw)
68 , m_SubstMatrixName("BLOSUM62")
69 {
70 }
71 
72 /// Destructor
~CScoreBuilderBase()73 CScoreBuilderBase::~CScoreBuilderBase()
74 {
75 }
76 
77 /// Get length of intersection between a range and a range collection
78 static inline
s_IntersectionLength(const CRangeCollection<TSeqPos> & ranges,const TSeqRange & range)79 TSeqPos s_IntersectionLength(const CRangeCollection<TSeqPos> &ranges,
80                              const TSeqRange &range)
81 {
82     TSeqPos length = 0;
83     ITERATE (CRangeCollection<TSeqPos>, it, ranges) {
84         length += it->IntersectionWith(range).GetLength();
85     }
86     return length;
87 }
88 
89 ///
90 /// calculate mismatches and identities in a seq-align
91 ///
92 
s_GetNucIdentityMismatch(const vector<string> & data,int * identities,int * mismatches)93 static void s_GetNucIdentityMismatch(const vector<string>& data,
94                                      int* identities,
95                                      int* mismatches)
96 {
97     if ( data.empty() ) {
98         return;
99     }
100     size_t rows = data.size();
101     size_t size = data[0].size();
102     for (size_t i = 1; i < rows; ++i ) {
103         if ( data[i].size() != size ) {
104             NCBI_THROW(CSeqalignException, eInvalidInputData,
105                        "Rows have different lengths");
106         }
107     }
108     for (size_t a = 0;  a < size;  ++a) {
109         bool is_mismatch = false;
110         char c = data[0][a];
111         for (size_t b = 1;  b < rows;  ++b) {
112             if (data[b][a] != c) {
113                 is_mismatch = true;
114                 break;
115             }
116         }
117 
118         if (is_mismatch) {
119             ++(*mismatches);
120         } else {
121             ++(*identities);
122         }
123     }
124 }
125 
126 
s_GetSplicedSegIdentityMismatch(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges,int * identities,int * mismatches)127 static void s_GetSplicedSegIdentityMismatch(CScope& scope,
128                                             const CSeq_align& align,
129                                             const CRangeCollection<TSeqPos> &ranges,
130                                             int* identities,
131                                             int* mismatches)
132 {
133     ///
134     /// easy route
135     /// use the alignment manager
136     ///
137     TAlnSeqIdIRef id1(new CAlnSeqId(align.GetSeq_id(0)));
138     TAlnSeqIdIRef id2(new CAlnSeqId(align.GetSeq_id(1)));
139     CRef<CPairwiseAln> pairwise(new CPairwiseAln(id1, id2));
140     ConvertSeqAlignToPairwiseAln(*pairwise, align, 0, 1);
141 
142     CBioseq_Handle prod_bsh    = scope.GetBioseqHandle(align.GetSeq_id(0));
143     CBioseq_Handle genomic_bsh = scope.GetBioseqHandle(align.GetSeq_id(1));
144     if ( !prod_bsh  ||  !genomic_bsh ) {
145         const CSeq_id &failed_id = align.GetSeq_id(genomic_bsh ? 0 : 1);
146         NCBI_THROW(CSeqalignException, eInvalidSeqId,
147                    "Can't get sequence data for " + failed_id.AsFastaString() +
148                    " in order to count identities/mismatches");
149     }
150 
151     CSeqVector prod(prod_bsh, CBioseq_Handle::eCoding_Iupac);
152 
153     switch (align.GetSegs().GetSpliced().GetProduct_type()) {
154     case CSpliced_seg::eProduct_type_transcript:
155         {{
156              CSeqVector gen(genomic_bsh, CBioseq_Handle::eCoding_Iupac);
157              ITERATE (CPairwiseAln, it, *pairwise) {
158                  const CPairwiseAln::TAlnRng& range = *it;
159                  TSeqRange r1(range.GetFirstFrom(), range.GetFirstTo());
160                  TSeqRange r2(range.GetSecondFrom(), range.GetSecondTo());
161                  string prod_data;
162                  prod.GetSeqData(r1.GetFrom(), r1.GetTo() + 1, prod_data);
163                  string gen_data;
164                  gen.GetSeqData(r2.GetFrom(), r2.GetTo() + 1, gen_data);
165                  if (range.IsReversed()) {
166                      CSeqManip::ReverseComplement(gen_data,
167                                                   CSeqUtil::e_Iupacna,
168                                                   0, gen_data.size());
169                  }
170 
171                  CRangeCollection<TSeqPos> seg_ranges = ranges;
172                  seg_ranges.IntersectWith(r1);
173                  ITERATE (CRangeCollection<TSeqPos>, range_it, seg_ranges) {
174                      TSeqPos start_offset = range_it->GetFrom() - r1.GetFrom(),
175                              end_offset = range_it->GetToOpen() - r1.GetFrom();
176                      string::const_iterator pit = prod_data.begin()
177                                                 + start_offset;
178                      string::const_iterator pit_end = prod_data.begin()
179                                                 + end_offset;
180                      string::const_iterator git = gen_data.begin()
181                                                 + start_offset;
182                      string::const_iterator git_end = gen_data.begin()
183                                                 + end_offset;
184 
185                      for ( ;  pit != pit_end  &&  git != git_end;  ++pit, ++git)
186                      {
187                          bool match = (*pit == *git);
188                          *identities +=  match;
189                          *mismatches += !match;
190                      }
191                  }
192              }
193          }}
194         break;
195 
196     case CSpliced_seg::eProduct_type_protein:
197         {{
198              int gcode = 1;
199              try {
200                  const COrg_ref& org_ref = sequence::GetOrg_ref(genomic_bsh);
201                  gcode = org_ref.GetOrgname().GetGcode();
202              }
203              catch (CException&) {
204              }
205              const CTrans_table& tbl = CGen_code_table::GetTransTable(gcode);
206 
207              char codon[3];
208              codon[0] = codon[1] = codon[2] = 'N';
209 
210              TSeqRange last_r1(0, 0);
211              ITERATE (CPairwiseAln, it, *pairwise) {
212                  const CPairwiseAln::TAlnRng& range = *it;
213                  TSeqRange r1(range.GetFirstFrom(), range.GetFirstTo());
214                  TSeqRange r2(range.GetSecondFrom(), range.GetSecondTo());
215 
216                  if (last_r1.GetTo() + 1 != r1.GetFrom()) {
217                      size_t i = last_r1.GetTo() + 1;
218                      size_t count = 0;
219                      for ( ;  i != r1.GetFrom()  &&  count < 3;  ++i, ++count) {
220                          codon[ i % 3 ] = 'N';
221                      }
222                  }
223                  last_r1 = r1;
224 
225                  string gen_data;
226                  CSeqVector gen(genomic_bsh, CBioseq_Handle::eCoding_Iupac);
227                  gen.GetSeqData(r2.GetFrom(), r2.GetTo() + 1, gen_data);
228                  if (range.IsReversed()) {
229                      CSeqManip::ReverseComplement(gen_data,
230                                                   CSeqUtil::e_Iupacna,
231                                                   0, gen_data.size());
232 
233                      //LOG_POST(Error << "reverse range: [" << r1.GetFrom() << ", " << r1.GetTo() << "] - [" << r2.GetFrom() << ", " << r2.GetTo() << "]");
234                  } else {
235                      //LOG_POST(Error << "forward range: [" << r1.GetFrom() << ", " << r1.GetTo() << "] - [" << r2.GetFrom() << ", " << r2.GetTo() << "]");
236                  }
237 
238                  /// compare product range to conceptual translation
239                  TSeqPos prod_pos = r1.GetFrom();
240                  //LOG_POST(Error << "  genomic = " << gen_data);
241                  for (size_t i = 0;  i < gen_data.size();  ++i, ++prod_pos) {
242                      codon[ prod_pos % 3 ] = gen_data[i];
243                      //LOG_POST(Error << "    filling: " << prod_pos << ": " << prod_pos % 3 << ": " << gen_data[i]);
244 
245                      if (prod_pos % 3 == 2) {
246                          int state = tbl.SetCodonState(codon[0], codon[1], codon[2]);
247                          char residue = (prod_pos == 2
248                                          ? tbl.GetStartResidue(state)
249                                          : tbl.GetCodonResidue(state));
250 
251                          /// NOTE:
252                          /// we increment identities/mismatches by 3 here,
253                          /// counting identities in nucleotide space!!
254                          if (residue == prod[prod_pos / 3]  &&
255                              residue != 'X'  &&  residue != '-') {
256                              *identities += 3;
257                          } else {
258                              *mismatches += 3;
259                          }
260                      }
261                  }
262              }
263          }}
264         break;
265 
266     default:
267         break;
268     }
269 
270     /*
271      * NB: leave this here; it's useful for validation
272     int actual_identities = 0;
273     if (align.GetNamedScore("N of matches", actual_identities)) {
274         if (actual_identities != *identities) {
275             LOG_POST(Error << "actual identities: " << actual_identities
276                      << "  computed identities: " << *identities);
277 
278             //cerr << MSerial_AsnText << align;
279         }
280     }
281     **/
282 }
283 
284 
s_GetCountIdentityMismatch(CScope & scope,const CSeq_align & align,int * identities,int * mismatches,const CRangeCollection<TSeqPos> & ranges=CRangeCollection<TSeqPos> (TSeqRange::GetWhole ()))285 static void s_GetCountIdentityMismatch(CScope& scope, const CSeq_align& align,
286                                        int* identities, int* mismatches,
287                                        const CRangeCollection<TSeqPos> &ranges =
288                                             CRangeCollection<TSeqPos>(TSeqRange::GetWhole()))
289 {
290     _ASSERT(identities  &&  mismatches);
291     if (ranges.empty()) {
292         return;
293     }
294 
295     {{
296          ///
297          /// shortcut: if 'num_ident' is present, we trust it
298          ///
299          int num_ident = 0;
300          if (ranges.begin()->IsWhole() &&
301              align.GetNamedScore(CSeq_align::eScore_IdentityCount, num_ident))
302          {
303              size_t len     = align.GetAlignLength(false /*ignore gaps*/);
304              *identities += num_ident;
305              *mismatches += (len - num_ident);
306              return;
307          }
308      }}
309 
310     switch (align.GetSegs().Which()) {
311     case CSeq_align::TSegs::e_Denseg:
312         {{
313             const CDense_seg& ds = align.GetSegs().GetDenseg();
314             vector<string> data;
315             CAlnVec vec(ds, scope);
316             data.resize(vec.GetNumRows());
317             for (int seg = 0;  seg < vec.GetNumSegs();  ++seg) {
318                 bool has_gap = false;
319                 for (int i = 0;  !has_gap && i < vec.GetNumRows();  ++i) {
320                     if (vec.GetStart(i, seg) == -1) {
321                         has_gap = true;
322                     }
323                 }
324                 if (has_gap) {
325                     /// we compute ungapped identities
326                     /// gap on at least one row, so we skip this segment
327                     continue;
328                 }
329 
330                 TSeqPos seg_start = vec.GetStart(0, seg),
331                         seg_stop = vec.GetStop(0, seg);
332                 CRangeCollection<TSeqPos> seg_ranges = ranges;
333                 seg_ranges.IntersectWith(TSeqRange(seg_start, seg_stop));
334                 for (int i = 0;  i < vec.GetNumRows();  ++i) {
335                     TSeqPos offset = vec.GetStart(i, seg) - seg_start;
336                     ITERATE (CRangeCollection<TSeqPos>, range_it, seg_ranges) {
337                         string seq_string;
338                         vec.GetSeqString(seq_string, i,
339                                          range_it->GetFrom()+offset,
340                                          range_it->GetTo()+offset);
341                         data[i] += seq_string;
342                     }
343                 }
344             }
345             s_GetNucIdentityMismatch(data, identities, mismatches);
346         }}
347         break;
348 
349     case CSeq_align::TSegs::e_Disc:
350         {{
351             ITERATE (CSeq_align::TSegs::TDisc::Tdata, iter,
352                      align.GetSegs().GetDisc().Get()) {
353                 s_GetCountIdentityMismatch(scope, **iter,
354                                            identities, mismatches, ranges);
355             }
356         }}
357         break;
358 
359     case CSeq_align::TSegs::e_Std:
360         NCBI_THROW(CSeqalignException, eNotImplemented,
361                    "identity + mismatch function not implemented for std-seg");
362         break;
363 
364     case CSeq_align::TSegs::e_Spliced:
365         {{
366              int aln_identities = 0;
367              int aln_mismatches = 0;
368              bool has_non_standard = false;
369              ITERATE (CSpliced_seg::TExons, iter,
370                       align.GetSegs().GetSpliced().GetExons()) {
371                  const CSpliced_exon& exon = **iter;
372                  TSeqRange product_span;
373                  product_span.Set(exon.GetProduct_start().AsSeqPos(),
374                                   exon.GetProduct_end().AsSeqPos());
375                  if (exon.IsSetParts()) {
376                      TSeqPos part_start = product_span.GetFrom();
377                      ITERATE (CSpliced_exon::TParts, it, exon.GetParts()) {
378                          const CSpliced_exon_chunk& chunk = **it;
379                          int part_len = 0;
380                          switch (chunk.Which()) {
381                          case CSpliced_exon_chunk::e_Match:
382                              part_len = chunk.GetMatch();
383                              aln_identities += s_IntersectionLength(ranges,
384                                                 TSeqRange(part_start,
385                                                           part_start+part_len-1));
386                              break;
387 
388                          case CSpliced_exon_chunk::e_Mismatch:
389                              part_len = chunk.GetMismatch();
390                              aln_mismatches += s_IntersectionLength(ranges,
391                                                 TSeqRange(part_start,
392                                                           part_start+part_len-1));
393                              break;
394 
395                          case CSpliced_exon_chunk::e_Diag:
396                              part_len = chunk.GetDiag();
397                              if (s_IntersectionLength(ranges,
398                                      TSeqRange(part_start,
399                                                part_start+part_len-1)))
400                              {
401                                  has_non_standard = true;
402                              }
403                              break;
404 
405                          case CSpliced_exon_chunk::e_Product_ins:
406                              part_len = chunk.GetProduct_ins();
407                              break;
408 
409                          default:
410                              break;
411                          }
412                          part_start += part_len;
413                      }
414                  } else {
415                      has_non_standard = true;
416                      break;
417                  }
418              }
419              if ( !has_non_standard ) {
420                  *identities += aln_identities;
421                  *mismatches += aln_mismatches;
422                  break;
423              }
424 
425              /// we must compute match and mismatch based on first
426              /// prinicples. Sometimes loader will fail in getting
427              /// all components of the genomic sequence; in that case
428              /// throw an exception, but make it somewhat more informative
429              try {
430                  s_GetSplicedSegIdentityMismatch(scope, align, ranges,
431                                                  identities, mismatches);
432              } catch (CLoaderException &e) {
433                  NCBI_RETHROW_SAME(e,
434                                    "Can't calculate identities/mismatches for "
435                                    "alignment with genomic sequence " +
436                                    align.GetSeq_id(1).AsFastaString() +
437                                    "; Loader can't load all required "
438                                    "components of sequence");
439              }
440          }}
441         break;
442 
443     default:
444         _ASSERT(false);
445         break;
446     }
447 }
448 
449 ///
450 /// calculate the percent identity
451 /// we also return the count of identities and mismatches
452 ///
s_GetPercentIdentity(CScope & scope,const CSeq_align & align,int * identities,int * mismatches,double * pct_identity,CScoreBuilderBase::EPercentIdentityType type,const CRangeCollection<TSeqPos> & ranges=CRangeCollection<TSeqPos> (TSeqRange::GetWhole ()))453 static void s_GetPercentIdentity(CScope& scope, const CSeq_align& align,
454                                  int* identities,
455                                  int* mismatches,
456                                  double* pct_identity,
457                                  CScoreBuilderBase::EPercentIdentityType type,
458                                  const CRangeCollection<TSeqPos> &ranges =
459                                       CRangeCollection<TSeqPos>(TSeqRange::GetWhole()))
460 {
461     size_t count_aligned = 0;
462     switch (type) {
463     case CScoreBuilderBase::eGapped:
464         count_aligned = align.GetAlignLengthWithinRanges(ranges, true /* include gaps */);
465         break;
466 
467     case CScoreBuilderBase::eUngapped:
468         count_aligned = align.GetAlignLengthWithinRanges(ranges, false /* omit gaps */);
469         break;
470 
471     case CScoreBuilderBase::eGBDNA:
472         count_aligned  = align.GetAlignLengthWithinRanges(ranges, false /* omit gaps */);
473         count_aligned += align.GetNumGapOpeningsWithinRanges(ranges);
474         break;
475     }
476 
477     s_GetCountIdentityMismatch(scope, align, identities, mismatches, ranges);
478     if (count_aligned) {
479         *pct_identity = 100.0f * double(*identities) / count_aligned;
480     } else {
481         *pct_identity = 0;
482     }
483 }
484 
485 
486 ///
487 /// calculate the percent coverage
488 ///
s_SequenceIsProtein(CScope & scope,const CSeq_id & id)489 static bool s_SequenceIsProtein(CScope& scope,
490                                 const CSeq_id& id)
491 {
492     CSeq_inst::TMol mol = scope.GetSequenceType(id);
493     if (mol == CSeq_inst::eMol_not_set) {
494         CBioseq_Handle bsh = scope.GetBioseqHandle(id);
495         if ( !bsh ) {
496             NCBI_THROW(CException, eUnknown,
497                        "failed to retrieve sequence: " + id.AsFastaString());
498         }
499         return bsh.IsAa();
500     }
501 
502     return (mol == CSeq_inst::eMol_aa);
503 }
504 
505 
s_IsProteinToGenomic(CScope & scope,const CSeq_align & align)506 static bool s_IsProteinToGenomic(CScope& scope,
507                                  const CSeq_align& align)
508 {
509     if (align.GetSegs().IsSpliced()) {
510         return align.GetSegs().GetSpliced()
511             .GetProduct_type() == CSpliced_seg::eProduct_type_protein;
512     }
513 
514     if (align.GetSegs().IsDenseg()) {
515         const CDense_seg& seg = align.GetSegs().GetDenseg();
516         if (seg.IsSetWidths()) {
517             // FIXME: I can't remember what the rule for widths is
518             //
519         }
520         else {
521             // we must be protein-to-protein or nuc-to-nuc
522             return false;
523         }
524     }
525 
526     // our short-cuts are exhausted
527     // fall back to a check of sequence type
528     const CSeq_id& id0 = align.GetSeq_id(0);
529     if ( !s_SequenceIsProtein(scope, id0) ) {
530         return false;
531     }
532     const CSeq_id& id1 = align.GetSeq_id(1);
533     return s_SequenceIsProtein(scope, id1);
534 }
535 
536 
s_GetPercentCoverage(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges,double * pct_coverage,unsigned query=0)537 static void s_GetPercentCoverage(CScope& scope, const CSeq_align& align,
538                                  const CRangeCollection<TSeqPos>& ranges,
539                                  double* pct_coverage, unsigned query = 0)
540 {
541     if (!ranges.empty() && ranges.begin()->IsWhole() &&
542         align.GetNamedScore(CSeq_align::eScore_PercentCoverage,
543                             *pct_coverage)) {
544         return;
545     }
546 
547     size_t covered_bases = align.GetAlignLengthWithinRanges
548                                (ranges, false /* don't include gaps */);
549     size_t seq_len = 0;
550     if(ranges.empty() || !ranges.begin()->IsWhole()){
551         seq_len = ranges.GetCoveredLength();
552     } else {
553         if (align.GetSegs().IsSpliced()  &&
554             align.GetSegs().GetSpliced().IsSetProduct_length())
555         {
556             seq_len = align.GetSegs().GetSpliced().GetProduct_length();
557         } else {
558             const auto &query_id = align.GetSeq_id(query);
559             const objects::CBioseq_Handle& bsh_seq = scope.GetBioseqHandle(query_id);
560             if (!bsh_seq) {
561                 *pct_coverage = 0;
562                 NCBI_THROW(CSeqalignException, eInvalidSeqId,
563                     "Can't get sequence data for " + query_id.AsFastaString() +
564                     " in order to calculate coverage");
565             }
566             seq_len = bsh_seq.GetBioseqLength();
567         }
568         if (align.GetSegs().IsSpliced()  &&
569             align.GetSegs().GetSpliced().IsSetPoly_a()) {
570 
571             if (align.GetSegs().GetSpliced().IsSetProduct_strand()  &&
572                 align.GetSegs().GetSpliced().GetProduct_strand() == eNa_strand_minus) {
573                 seq_len -= align.GetSegs().GetSpliced().GetPoly_a();
574             } else {
575                 seq_len = align.GetSegs().GetSpliced().GetPoly_a();
576             }
577         }
578 
579 
580         //
581         // determine if the alignment is protein-to-genomic
582         //
583         bool is_protein_to_genomic = s_IsProteinToGenomic(scope, align);
584         if (is_protein_to_genomic) {
585             /// alignment is protein-to-genomic alignment
586             /// NOTE: alignment length is always reported in nucleotide
587             /// coordinates
588             seq_len *= 3;
589             if (align.GetSegs().IsStd()) {
590                 /// odd corner case:
591                 /// std-seg alignments of protein to nucleotide
592                 covered_bases *= 3;
593             }
594         }
595     }
596 
597     if (covered_bases) {
598         *pct_coverage = 100.0f * double(covered_bases) / double(seq_len);
599     } else {
600         *pct_coverage = 0;
601     }
602 }
603 
604 /////////////////////////////////////////////////////////////////////////////
x_GetMatrixCounts(CScope & scope,const CSeq_align & align,int * positives,int * negatives)605 void CScoreBuilderBase::x_GetMatrixCounts(CScope& scope,
606                        const CSeq_align& align,
607                        int* positives, int* negatives)
608 {
609     if (!align.GetSegs().IsSpliced() ||
610          align.GetSegs().GetSpliced().GetProduct_type() !=
611                 CSpliced_seg::eProduct_type_protein)
612     {
613         NCBI_THROW(CSeqalignException, eUnsupported,
614                    "num_positives and num_negatives scores only defined "
615                    "for protein alignment");
616     }
617     CProteinAlignText pro_text(scope, align, m_SubstMatrixName);
618     const string& prot = pro_text.GetProtein();
619     const string& dna = pro_text.GetDNA();
620     const string& match = pro_text.GetMatch();
621     for(string::size_type i=0;i<match.size(); ++i) {
622         if( isalpha(prot[i]) && (dna[i] != '-')) {
623             int increment = isupper(prot[i]) ? 3 : 1;
624             switch(match[i]) {
625             case '|':
626             case '+':
627                 *positives += increment;
628                 break;
629             case 'X': /// skip introns and bad parts
630                 break;
631             default://mismatch
632                 *negatives += increment;
633                 break;
634             }
635         }
636     }
637 }
638 
639 
640 
641 
SetSubstMatrix(const string & name)642 void CScoreBuilderBase::SetSubstMatrix(const string &name)
643 {
644     m_SubstMatrixName = name;
645 }
646 
GetPercentIdentity(CScope & scope,const CSeq_align & align,EPercentIdentityType type)647 double CScoreBuilderBase::GetPercentIdentity(CScope& scope,
648                                          const CSeq_align& align,
649                                          EPercentIdentityType type)
650 {
651     int identities = 0;
652     int mismatches = 0;
653     double pct_identity = 0;
654     s_GetPercentIdentity(scope, align,
655                          &identities, &mismatches, &pct_identity, type);
656     return pct_identity;
657 }
658 
659 
GetPercentIdentity(CScope & scope,const CSeq_align & align,const TSeqRange & range,EPercentIdentityType type)660 double CScoreBuilderBase::GetPercentIdentity(CScope& scope,
661                                          const CSeq_align& align,
662                                          const TSeqRange &range,
663                                          EPercentIdentityType type)
664 {
665     int identities = 0;
666     int mismatches = 0;
667     double pct_identity = 0;
668     s_GetPercentIdentity(scope, align,
669                          &identities, &mismatches, &pct_identity, type,
670                           CRangeCollection<TSeqPos>(range));
671     return pct_identity;
672 }
673 
674 
GetPercentIdentity(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges,EPercentIdentityType type)675 double CScoreBuilderBase::GetPercentIdentity(CScope& scope,
676                                          const CSeq_align& align,
677                                          const CRangeCollection<TSeqPos> &ranges,
678                                          EPercentIdentityType type)
679 {
680     int identities = 0;
681     int mismatches = 0;
682     double pct_identity = 0;
683     s_GetPercentIdentity(scope, align,
684                          &identities, &mismatches, &pct_identity, type, ranges);
685     return pct_identity;
686 }
687 
688 
GetPercentCoverage(CScope & scope,const CSeq_align & align,unsigned query)689 double CScoreBuilderBase::GetPercentCoverage(CScope& scope,
690                                          const CSeq_align& align,
691                                          unsigned query)
692 {
693     double pct_coverage = 0;
694     s_GetPercentCoverage(scope, align,
695                          CRangeCollection<TSeqPos>(TSeqRange::GetWhole()),
696                          &pct_coverage,
697                          query);
698     return pct_coverage;
699 }
700 
GetPercentCoverage(CScope & scope,const CSeq_align & align,const TSeqRange & range,unsigned query)701 double CScoreBuilderBase::GetPercentCoverage(CScope& scope,
702                                          const CSeq_align& align,
703                                          const TSeqRange& range,
704                                          unsigned query)
705 {
706     double pct_coverage = 0;
707     s_GetPercentCoverage(scope, align, CRangeCollection<TSeqPos>(range), &pct_coverage, query);
708     return pct_coverage;
709 }
710 
GetPercentCoverage(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges,unsigned query)711 double CScoreBuilderBase::GetPercentCoverage(CScope& scope,
712                                          const CSeq_align& align,
713                                          const CRangeCollection<TSeqPos>& ranges,
714                                          unsigned query)
715 {
716     double pct_coverage = 0;
717     s_GetPercentCoverage(scope, align, ranges, &pct_coverage, query);
718     return pct_coverage;
719 }
720 
GetIdentityCount(CScope & scope,const CSeq_align & align)721 int CScoreBuilderBase::GetIdentityCount(CScope& scope, const CSeq_align& align)
722 {
723     int identities = 0;
724     int mismatches = 0;
725     s_GetCountIdentityMismatch(scope, align, &identities, &mismatches);
726     return identities;
727 }
728 
729 
GetMismatchCount(CScope & scope,const CSeq_align & align)730 int CScoreBuilderBase::GetMismatchCount(CScope& scope, const CSeq_align& align)
731 {
732     int identities = 0;
733     int mismatches = 0;
734     s_GetCountIdentityMismatch(scope, align, &identities,&mismatches);
735     return mismatches;
736 }
737 
738 
GetMismatchCount(CScope & scope,const CSeq_align & align,int & identities,int & mismatches)739 void CScoreBuilderBase::GetMismatchCount(CScope& scope, const CSeq_align& align,
740                                      int& identities, int& mismatches)
741 {
742     identities = 0;
743     mismatches = 0;
744     s_GetCountIdentityMismatch(scope, align, &identities, &mismatches);
745 }
746 
747 
GetIdentityCount(CScope & scope,const CSeq_align & align,const TSeqRange & range)748 int CScoreBuilderBase::GetIdentityCount(CScope& scope, const CSeq_align& align,
749                                         const TSeqRange& range)
750 {
751     int identities = 0;
752     int mismatches = 0;
753     s_GetCountIdentityMismatch(scope, align, &identities, &mismatches,
754                                CRangeCollection<TSeqPos>(range));
755     return identities;
756 }
757 
758 
GetMismatchCount(CScope & scope,const CSeq_align & align,const TSeqRange & range)759 int CScoreBuilderBase::GetMismatchCount(CScope& scope, const CSeq_align& align,
760                                         const TSeqRange& range)
761 {
762     int identities = 0;
763     int mismatches = 0;
764     s_GetCountIdentityMismatch(scope, align, &identities,&mismatches,
765                                CRangeCollection<TSeqPos>(range));
766     return mismatches;
767 }
768 
769 
GetMismatchCount(CScope & scope,const CSeq_align & align,const TSeqRange & range,int & identities,int & mismatches)770 void CScoreBuilderBase::GetMismatchCount(CScope& scope, const CSeq_align& align,
771                                         const TSeqRange& range,
772                                      int& identities, int& mismatches)
773 {
774     identities = 0;
775     mismatches = 0;
776     s_GetCountIdentityMismatch(scope, align, &identities, &mismatches,
777                                CRangeCollection<TSeqPos>(range));
778 }
779 
780 
GetIdentityCount(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges)781 int CScoreBuilderBase::GetIdentityCount(CScope& scope, const CSeq_align& align,
782                                         const CRangeCollection<TSeqPos> &ranges)
783 {
784     int identities = 0;
785     int mismatches = 0;
786     s_GetCountIdentityMismatch(scope, align, &identities, &mismatches, ranges);
787     return identities;
788 }
789 
790 
GetMismatchCount(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges)791 int CScoreBuilderBase::GetMismatchCount(CScope& scope, const CSeq_align& align,
792                                         const CRangeCollection<TSeqPos> &ranges)
793 {
794     int identities = 0;
795     int mismatches = 0;
796     s_GetCountIdentityMismatch(scope, align, &identities,&mismatches, ranges);
797     return mismatches;
798 }
799 
800 
GetMismatchCount(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges,int & identities,int & mismatches)801 void CScoreBuilderBase::GetMismatchCount(CScope& scope, const CSeq_align& align,
802                                         const CRangeCollection<TSeqPos> &ranges,
803                                      int& identities, int& mismatches)
804 {
805     identities = 0;
806     mismatches = 0;
807     s_GetCountIdentityMismatch(scope, align, &identities, &mismatches, ranges);
808 }
809 
810 
GetPositiveCount(CScope & scope,const CSeq_align & align)811 int CScoreBuilderBase::GetPositiveCount(CScope& scope, const CSeq_align& align)
812 {
813     int positives = 0;
814     int negatives = 0;
815     x_GetMatrixCounts(scope, align, &positives, &negatives);
816     return positives;
817 }
818 
819 
GetNegativeCount(CScope & scope,const CSeq_align & align)820 int CScoreBuilderBase::GetNegativeCount(CScope& scope, const CSeq_align& align)
821 {
822     int positives = 0;
823     int negatives = 0;
824     x_GetMatrixCounts(scope, align, &positives, &negatives);
825     return negatives;
826 }
827 
828 
GetMatrixCounts(CScope & scope,const CSeq_align & align,int & positives,int & negatives)829 void CScoreBuilderBase::GetMatrixCounts(CScope& scope, const CSeq_align& align,
830                                      int& positives, int& negatives)
831 {
832     positives = 0;
833     negatives = 0;
834     x_GetMatrixCounts(scope, align, &positives, &negatives);
835 }
836 
837 
GetGapBaseCount(const CSeq_align & align)838 int CScoreBuilderBase::GetGapBaseCount(const CSeq_align& align)
839 {
840     return align.GetTotalGapCount();
841 }
842 
843 
GetGapCount(const CSeq_align & align)844 int CScoreBuilderBase::GetGapCount(const CSeq_align& align)
845 {
846     return align.GetNumGapOpenings();
847 }
848 
849 
GetAlignLength(const CSeq_align & align,bool ungapped)850 TSeqPos CScoreBuilderBase::GetAlignLength(const CSeq_align& align, bool ungapped)
851 {
852     return align.GetAlignLength( !ungapped /* true = include gaps = !ungapped */);
853 }
854 
855 
GetGapBaseCount(const CSeq_align & align,const TSeqRange & range)856 int CScoreBuilderBase::GetGapBaseCount(const CSeq_align& align,
857                                        const TSeqRange &range)
858 {
859     return align.GetTotalGapCountWithinRange(range);
860 }
861 
862 
GetGapCount(const CSeq_align & align,const TSeqRange & range)863 int CScoreBuilderBase::GetGapCount(const CSeq_align& align,
864                                    const TSeqRange &range)
865 {
866     return align.GetNumGapOpeningsWithinRange(range);
867 }
868 
869 
GetAlignLength(const CSeq_align & align,const TSeqRange & range,bool ungapped)870 TSeqPos CScoreBuilderBase::GetAlignLength(const CSeq_align& align,
871                                           const TSeqRange &range,
872                                           bool ungapped)
873 {
874     return align.GetAlignLengthWithinRange(range, !ungapped
875           /* true = include gaps = !ungapped */);
876 }
877 
878 
GetGapBaseCount(const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges)879 int CScoreBuilderBase::GetGapBaseCount(const CSeq_align& align,
880                                        const CRangeCollection<TSeqPos> &ranges)
881 {
882     return align.GetTotalGapCountWithinRanges(ranges);
883 }
884 
885 
GetGapCount(const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges)886 int CScoreBuilderBase::GetGapCount(const CSeq_align& align,
887                                    const CRangeCollection<TSeqPos> &ranges)
888 {
889     return align.GetNumGapOpeningsWithinRanges(ranges);
890 }
891 
892 
GetAlignLength(const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges,bool ungapped)893 TSeqPos CScoreBuilderBase::GetAlignLength(const CSeq_align& align,
894                                    const CRangeCollection<TSeqPos> &ranges,
895                                    bool ungapped)
896 {
897     return align.GetAlignLengthWithinRanges(ranges, !ungapped
898           /* true = include gaps = !ungapped */);
899 }
900 
901 
902 /////////////////////////////////////////////////////////////////////////////
903 
AddScore(CScope & scope,list<CRef<CSeq_align>> & aligns,CSeq_align::EScoreType score)904 void CScoreBuilderBase::AddScore(CScope& scope, list< CRef<CSeq_align> >& aligns,
905                              CSeq_align::EScoreType score)
906 {
907     NON_CONST_ITERATE (list< CRef<CSeq_align> >, iter, aligns) {
908         AddScore(scope, **iter, score);
909     }
910 }
911 
AddScore(CScope & scope,CSeq_align & align,CSeq_align::EScoreType score)912 void CScoreBuilderBase::AddScore(CScope& scope, CSeq_align& align,
913                                  CSeq_align::EScoreType score)
914 {
915     try {
916         switch (score) {
917         /// Special cases for the three precent-identity scores, to add
918         /// the num_ident and num_mismatch scores as well
919         case CSeq_align::eScore_PercentIdentity_Gapped:
920         case CSeq_align::eScore_PercentIdentity_Ungapped:
921         case CSeq_align::eScore_PercentIdentity_GapOpeningOnly:
922             {{
923                 int identities      = 0;
924                 int mismatches      = 0;
925                 double pct_identity = 0;
926                 s_GetPercentIdentity(scope, align, &identities, &mismatches,
927                     &pct_identity,
928                     static_cast<EPercentIdentityType>(
929                         score - CSeq_align::eScore_PercentIdentity_Gapped));
930                 align.SetNamedScore(score, pct_identity);
931                 align.SetNamedScore(CSeq_align::eScore_IdentityCount,   identities);
932                 align.SetNamedScore(CSeq_align::eScore_MismatchCount,   mismatches);
933             }}
934             break;
935 
936         default:
937             {{
938                 double score_value = ComputeScore(scope, align, score);
939                 if (CSeq_align::IsIntegerScore(score)) {
940                     align.SetNamedScore(score, (int)score_value);
941                 } else {
942                     if (score_value == numeric_limits<double>::infinity()) {
943                         score_value = numeric_limits<double>::max() / 10.0;
944                     }
945                     align.SetNamedScore(score, score_value);
946                 }
947             }}
948         }
949     } catch (CSeqalignException& e) {
950         // Unimplemented (code missing) or unsupported (score cannot be defined)
951         // is handled according to the error handling mode. All other
952         // errors always throw.
953         switch (e.GetErrCode()) {
954         case CSeqalignException::eUnsupported:
955         case CSeqalignException::eNotImplemented:
956             break;
957         default:
958             throw;
959         }
960 
961         switch (GetErrorMode()) {
962         case eError_Throw:
963             throw;
964         case eError_Report:
965             ERR_POST(Error
966                 << "CScoreBuilderBase::AddScore(): error computing score: "
967                 << e);
968         default:
969             break;
970         }
971     }
972 }
973 
GetDonor(const objects::CSpliced_exon & exon)974 string GetDonor(const objects::CSpliced_exon& exon) {
975     if( exon.CanGetDonor_after_exon() && exon.GetDonor_after_exon().CanGetBases() ) {
976         return exon.GetDonor_after_exon().GetBases();
977     }
978     return string();
979 }
980 
GetAcceptor(const objects::CSpliced_exon & exon)981 string GetAcceptor(const objects::CSpliced_exon& exon) {
982     if( exon.CanGetAcceptor_before_exon() && exon.GetAcceptor_before_exon().CanGetBases()  ) {
983         return exon.GetAcceptor_before_exon().GetBases();
984     }
985     return string();
986 }
987 
988 //returns true for GT/AG, GC/AG AND AT/AC
IsConsSplice(const string & donor,const string acc)989 bool IsConsSplice(const string& donor, const string acc) {
990     if(donor.length()<2 || acc.length()<2) return false;
991     if(toupper(Uchar(acc.c_str()[0])) != 'A') return false;
992     switch(toupper(Uchar(acc.c_str()[1]))) {
993     case 'C':
994         if( toupper(Uchar(donor.c_str()[0])) == 'A' && toupper(Uchar(donor.c_str()[1])) == 'T' ) return true;
995         else return false;
996         break;
997     case 'G':
998         if( toupper(Uchar(donor.c_str()[0])) == 'G' ) {
999             char don2 = toupper(Uchar(donor.c_str()[1]));
1000             if(don2 == 'T' || don2 == 'C') return true;
1001         }
1002         return false;
1003         break;
1004     default:
1005         return false;
1006         break;
1007     }
1008     return false;
1009 }
1010 
1011 
AddSplignScores(const CSeq_align & align,CSeq_align::TScore & scores)1012 void CScoreBuilderBase::AddSplignScores(const CSeq_align& align,
1013                                         CSeq_align::TScore &scores)
1014 {
1015     typedef CSeq_align::TSegs::TSpliced TSpliced;
1016     const TSpliced & spliced (align.GetSegs().GetSpliced());
1017     if(spliced.GetProduct_type() != CSpliced_seg::eProduct_type_transcript) {
1018         NCBI_THROW(CSeqalignException, eUnsupported,
1019                    "CScoreBuilderBase::AddSplignScores(): Unsupported product type");
1020     }
1021 
1022     const bool qstrand (spliced.GetProduct_strand() != eNa_strand_minus);
1023 
1024     typedef TSpliced::TExons TExons;
1025     const TExons & exons (spliced.GetExons());
1026 
1027     size_t matches (0),
1028         aligned_query_bases (0),  // matches, mismatches and indels
1029         aln_length_exons (0),
1030         aln_length_gaps (0),
1031         splices_total (0),        // twice the number of introns
1032         splices_consensus (0);
1033 
1034     const TSeqPos  qlen (spliced.GetProduct_length());
1035     const TSeqPos polya (spliced.CanGetPoly_a()?
1036                          spliced.GetPoly_a(): (qstrand? qlen: TSeqPos(-1)));
1037     const TSeqPos prod_length_no_polya (qstrand? polya: qlen - 1 - polya);
1038 
1039     typedef CSpliced_exon TExon;
1040     TSeqPos qprev (qstrand? TSeqPos(-1): qlen);
1041     string donor;
1042     ITERATE(TExons, ii2, exons) {
1043 
1044         const TExon & exon (**ii2);
1045         const TSeqPos qmin (exon.GetProduct_start().GetNucpos()),
1046             qmax (exon.GetProduct_end().GetNucpos());
1047 
1048         const TSeqPos qgap (qstrand? qmin - qprev - 1: qprev - qmax - 1);
1049 
1050         if(qgap > 0) {
1051             aln_length_gaps += qgap;
1052             donor.clear();
1053         }
1054         else if (ii2 != exons.begin()) {
1055             splices_total += 2;
1056             if(IsConsSplice(donor, GetAcceptor(exon))) { splices_consensus += 2; }
1057         }
1058 
1059         typedef TExon::TParts TParts;
1060         const TParts & parts (exon.GetParts());
1061         string errmsg;
1062         ITERATE(TParts, ii3, parts) {
1063             const CSpliced_exon_chunk & part (**ii3);
1064             const CSpliced_exon_chunk::E_Choice choice (part.Which());
1065             TSeqPos len (0);
1066             switch(choice) {
1067             case CSpliced_exon_chunk::e_Match:
1068                 len = part.GetMatch();
1069                 matches += len;
1070                 aligned_query_bases += len;
1071                 break;
1072             case CSpliced_exon_chunk::e_Mismatch:
1073                 len = part.GetMismatch();
1074                 aligned_query_bases += len;
1075                 break;
1076             case CSpliced_exon_chunk::e_Product_ins:
1077                 len = part.GetProduct_ins();
1078                 aligned_query_bases += len;
1079                 break;
1080             case CSpliced_exon_chunk::e_Genomic_ins:
1081                 len = part.GetGenomic_ins();
1082                 break;
1083             default:
1084                 errmsg = "Unexpected spliced exon chunk part: "
1085                     + part.SelectionName(choice);
1086                 NCBI_THROW(CSeqalignException, eUnsupported, errmsg);
1087             }
1088             aln_length_exons += len;
1089         }
1090 
1091         donor = GetDonor(exon);
1092         qprev = qstrand? qmax: qmin;
1093     } // TExons
1094 
1095     const TSeqPos qgap (qstrand? polya - qprev - 1: qprev - polya - 1);
1096     aln_length_gaps += qgap;
1097 
1098     for (CSeq_align::TScore::iterator it = scores.begin(); it != scores.end(); )
1099     {
1100         CSeq_align::EScoreType score_type = CSeq_align::eScore_Score;
1101         if ((*it)->GetId().IsStr()) {
1102             CSeq_align::TScoreNameMap::const_iterator score =
1103                     CSeq_align::ScoreNameMap()
1104                              . find((*it)->GetId().GetStr());
1105             if (score != CSeq_align::ScoreNameMap().end()) {
1106                 score_type = score->second;
1107             }
1108         }
1109         if (score_type >= CSeq_align::eScore_Matches &&
1110             score_type <= CSeq_align::eScore_ExonIdentity)
1111         {
1112             it = scores.erase(it);
1113         } else {
1114             ++it;
1115         }
1116     }
1117 
1118         {
1119             CRef<CScore> score_matches (new CScore());
1120             score_matches->SetId().SetStr(
1121                 CSeq_align::ScoreName(CSeq_align::eScore_Matches));
1122             score_matches->SetValue().SetInt(matches);
1123             scores.push_back(score_matches);
1124         }
1125         {
1126             CRef<CScore> score_overall_identity (new CScore());
1127             score_overall_identity->SetId().SetStr(
1128                 CSeq_align::ScoreName(CSeq_align::eScore_OverallIdentity));
1129             score_overall_identity->SetValue().
1130                 SetReal(double(matches)/(aln_length_exons + aln_length_gaps));
1131             scores.push_back(score_overall_identity);
1132         }
1133         {
1134             CRef<CScore> score_splices (new CScore());
1135             score_splices->SetId().SetStr(
1136                 CSeq_align::ScoreName(CSeq_align::eScore_Splices));
1137             score_splices->SetValue().SetInt(splices_total);
1138             scores.push_back(score_splices);
1139         }
1140         {
1141             CRef<CScore> score_splices_consensus (new CScore());
1142             score_splices_consensus->SetId().SetStr(
1143                 CSeq_align::ScoreName(CSeq_align::eScore_ConsensusSplices));
1144             score_splices_consensus->SetValue().SetInt(splices_consensus);
1145             scores.push_back(score_splices_consensus);
1146         }
1147         {
1148             CRef<CScore> score_coverage (new CScore());
1149             score_coverage->SetId().SetStr(
1150                 CSeq_align::ScoreName(CSeq_align::eScore_ProductCoverage));
1151             score_coverage->SetValue().
1152                 SetReal(double(aligned_query_bases) / prod_length_no_polya);
1153             scores.push_back(score_coverage);
1154         }
1155         {
1156             CRef<CScore> score_exon_identity (new CScore());
1157             score_exon_identity->SetId().SetStr(
1158                 CSeq_align::ScoreName(CSeq_align::eScore_ExonIdentity));
1159             score_exon_identity->SetValue().
1160                 SetReal(double(matches) / aln_length_exons);
1161             scores.push_back(score_exon_identity);
1162         }
1163 
1164 }
1165 
ComputeScore(CScope & scope,const CSeq_align & align,CSeq_align::EScoreType score)1166 double CScoreBuilderBase::ComputeScore(CScope& scope, const CSeq_align& align,
1167                                        CSeq_align::EScoreType score)
1168 {
1169     return ComputeScore(scope, align,
1170                       CRangeCollection<TSeqPos>(TSeqRange::GetWhole()), score);
1171 }
1172 
ComputeScore(CScope & scope,const CSeq_align & align,const TSeqRange & range,CSeq_align::EScoreType score)1173 double CScoreBuilderBase::ComputeScore(CScope& scope, const CSeq_align& align,
1174                                        const TSeqRange &range,
1175                                        CSeq_align::EScoreType score)
1176 {
1177     return ComputeScore(scope, align, CRangeCollection<TSeqPos>(range), score);
1178 }
1179 
ComputeScore(CScope & scope,const CSeq_align & align,const CRangeCollection<TSeqPos> & ranges,CSeq_align::EScoreType score)1180 double CScoreBuilderBase::ComputeScore(CScope& scope, const CSeq_align& align,
1181                                        const CRangeCollection<TSeqPos> &ranges,
1182                                        CSeq_align::EScoreType score)
1183 {
1184     switch (score) {
1185     case CSeq_align::eScore_Score:
1186         {{
1187              NCBI_THROW(CSeqalignException, eUnsupported,
1188                         "CScoreBuilderBase::ComputeScore(): "
1189                         "generic 'score' computation is undefined");
1190          }}
1191         break;
1192 
1193     case CSeq_align::eScore_Blast:
1194     case CSeq_align::eScore_BitScore:
1195     case CSeq_align::eScore_EValue:
1196     case CSeq_align::eScore_SumEValue:
1197     case CSeq_align::eScore_CompAdjMethod:
1198         NCBI_THROW(CSeqalignException, eNotImplemented,
1199                    "CScoreBuilderBase::ComputeScore(): "
1200                    "BLAST scores are available in CScoreBuilder, "
1201                    "not CScoreBuilderBase");
1202         break;
1203 
1204     case CSeq_align::eScore_IdentityCount:
1205         return GetIdentityCount(scope, align, ranges);
1206 
1207     case CSeq_align::eScore_PositiveCount:
1208         if (ranges.empty() || !ranges.begin()->IsWhole()) {
1209             NCBI_THROW(CSeqalignException, eNotImplemented,
1210                        "positive-count score not supported within a range");
1211         }
1212         return GetPositiveCount(scope, align);
1213 
1214     case CSeq_align::eScore_NegativeCount:
1215         if (ranges.empty() || !ranges.begin()->IsWhole()) {
1216             NCBI_THROW(CSeqalignException, eNotImplemented,
1217                        "positive-count score not supported within a range");
1218         }
1219         return GetNegativeCount(scope, align);
1220 
1221     case CSeq_align::eScore_MismatchCount:
1222         return GetMismatchCount(scope, align, ranges);
1223 
1224     case CSeq_align::eScore_GapCount:
1225         return GetGapCount(align, ranges);
1226 
1227     case CSeq_align::eScore_AlignLength:
1228         return align.GetAlignLengthWithinRanges(ranges, true /* include gaps */);
1229 
1230     case CSeq_align::eScore_PercentIdentity_Gapped:
1231         {{
1232             int identities      = 0;
1233             int mismatches      = 0;
1234             double pct_identity = 0;
1235             s_GetPercentIdentity(scope, align,
1236                                  &identities, &mismatches, &pct_identity,
1237                                  eGapped, ranges);
1238             return pct_identity;
1239         }}
1240         break;
1241 
1242     case CSeq_align::eScore_PercentIdentity_Ungapped:
1243         {{
1244             int identities      = 0;
1245             int mismatches      = 0;
1246             double pct_identity = 0;
1247             s_GetPercentIdentity(scope, align,
1248                                  &identities, &mismatches, &pct_identity,
1249                                  eUngapped, ranges);
1250             return pct_identity;
1251         }}
1252         break;
1253 
1254     case CSeq_align::eScore_PercentIdentity_GapOpeningOnly:
1255         {{
1256             int identities      = 0;
1257             int mismatches      = 0;
1258             double pct_identity = 0;
1259             s_GetPercentIdentity(scope, align,
1260                                  &identities, &mismatches, &pct_identity,
1261                                  eGBDNA, ranges);
1262             return pct_identity;
1263         }}
1264         break;
1265 
1266     case CSeq_align::eScore_PercentCoverage:
1267         {{
1268             double pct_coverage = 0;
1269             s_GetPercentCoverage(scope, align, ranges, &pct_coverage);
1270             return pct_coverage;
1271         }}
1272         break;
1273 
1274     case CSeq_align::eScore_HighQualityPercentCoverage:
1275         {{
1276             if(align.GetSegs().Which() == CSeq_align::TSegs::e_Std)
1277                 /// high-quality-coverage calculatino is not possbile for standard segs
1278                 NCBI_THROW(CSeqalignException, eUnsupported,
1279                             "High-quality percent coverage not supported "
1280                             "for standard seg representation");
1281 
1282             if (ranges.empty() || !ranges.begin()->IsWhole()) {
1283                 NCBI_THROW(CSeqalignException, eNotImplemented,
1284                            "High-quality percent coverage not supported "
1285                            "within a range");
1286             }
1287             /// If we have annotation for a high-quality region, it is in a ftable named
1288             /// "NCBI_GPIPE", containing a region Seq-feat named "alignable"
1289             TSeqRange alignable_range = TSeqRange::GetWhole();
1290             CBioseq_Handle query = scope.GetBioseqHandle(align.GetSeq_id(0));
1291             for(CFeat_CI feat_it(query,
1292                                  SAnnotSelector(CSeqFeatData::e_Region).
1293                                  SetExcludeExternal());
1294                     feat_it; ++feat_it)
1295             {
1296                 if(feat_it->GetData().GetRegion() == "alignable" &&
1297                    feat_it->GetAnnot().IsNamed() &&
1298                    feat_it->GetAnnot().GetName() == "NCBI_GPIPE")
1299                 {
1300                     alignable_range = feat_it->GetRange();
1301                     break;
1302                 }
1303             }
1304             double pct_coverage = 0;
1305             s_GetPercentCoverage(scope, align,
1306                                  CRangeCollection<TSeqPos>(alignable_range),
1307                                  &pct_coverage);
1308             return pct_coverage;
1309         }}
1310         break;
1311 
1312     case CSeq_align::eScore_Matches:
1313     case CSeq_align::eScore_OverallIdentity:
1314     case CSeq_align::eScore_Splices:
1315     case CSeq_align::eScore_ConsensusSplices:
1316     case CSeq_align::eScore_ProductCoverage:
1317     case CSeq_align::eScore_ExonIdentity:
1318         {{
1319              if (ranges.empty() || !ranges.begin()->IsWhole()) {
1320                  NCBI_THROW(CSeqalignException, eNotImplemented,
1321                             "splign scores not supported within a range");
1322              }
1323              CSeq_align::TScore scores;
1324              AddSplignScores(align, scores);
1325              ITERATE (CSeq_align::TScore, it, scores) {
1326                  if ((*it)->GetId().GetStr() == CSeq_align::ScoreName(score))
1327                  {
1328                      if ((*it)->GetValue().IsInt()) {
1329                          return (*it)->GetValue().GetInt();
1330                      } else {
1331                          return (*it)->GetValue().GetReal();
1332                      }
1333                  }
1334              }
1335              NCBI_ASSERT(false, "Should never reach this point");
1336         }}
1337 
1338     default:
1339         {{
1340             NCBI_THROW(CSeqalignException, eNotImplemented,
1341                        "Unknown score");
1342             return 0;
1343         }}
1344     }
1345 }
1346 
1347 
1348 
1349 
1350 
1351 END_NCBI_SCOPE
1352 
1353