1 /*  $Id: validerror_graph.cpp 632625 2021-06-03 17:38:33Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko......
27  *
28  * File Description:
29  *   validation of seq_graph
30  *   .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <objects/seq/Bioseq.hpp>
36 #include <objects/seq/Seq_annot.hpp>
37 #include <objects/seqres/Seq_graph.hpp>
38 #include <objmgr/graph_ci.hpp>
39 #include <objtools/validator/validerror_graph.hpp>
40 #include <objtools/validator/utilities.hpp>
41 
42 
43 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)44 BEGIN_SCOPE(objects)
45 BEGIN_SCOPE(validator)
46 
47 
48 CValidError_graph::CValidError_graph(CValidError_imp& imp) :
49     CValidError_base(imp)
50 {
51 }
52 
53 
~CValidError_graph(void)54 CValidError_graph::~CValidError_graph(void)
55 {
56 }
57 
58 
ValidateSeqGraph(const CSeq_graph & graph)59 void CValidError_graph::ValidateSeqGraph(const CSeq_graph& graph)
60 {
61     if (graph.GetGraph().IsByte()) {
62         const CByte_graph& bg = graph.GetGraph().GetByte();
63 
64         // Test that min / max values are in the 0 - 100 range.
65         x_ValidateMinValues(bg, graph);
66         x_ValidateMaxValues(bg, graph);
67     }
68 }
69 
70 
x_ValidateMinValues(const CByte_graph & bg,const CSeq_graph & graph)71 void CValidError_graph::x_ValidateMinValues(const CByte_graph& bg, const CSeq_graph& graph)
72 {
73     int min = bg.GetMin();
74     if ( min < 0  ||  min > 100 ) {
75         PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphMin,
76             "Graph min (" + NStr::IntToString(min) + ") out of range",
77             graph);
78     }
79 }
80 
81 
x_ValidateMaxValues(const CByte_graph & bg,const CSeq_graph & graph)82 void CValidError_graph::x_ValidateMaxValues(const CByte_graph& bg, const CSeq_graph& graph)
83 {
84     int max = bg.GetMax();
85     if ( max <= 0  ||  max > 100 ) {
86         EDiagSev sev = (max <= 0) ? eDiag_Error : eDiag_Warning;
87         PostErr(sev, eErr_SEQ_GRAPH_GraphMax,
88             "Graph max (" + NStr::IntToString(max) + ") out of range",
89             graph);
90     }
91 }
92 
93 
ValidateSeqGraphContext(const CSeq_graph & graph,const CBioseq_set & set)94 void CValidError_graph::ValidateSeqGraphContext(const CSeq_graph& graph, const CBioseq_set& set)
95 {
96     m_Imp.IncrementMisplacedGraphCount();
97 }
98 
99 
ValidateSeqGraphContext(const CSeq_graph & graph,const CBioseq & seq)100 void CValidError_graph::ValidateSeqGraphContext(const CSeq_graph& graph, const CBioseq& seq)
101 {
102     if (!graph.IsSetLoc()) {
103         m_Imp.IncrementMisplacedGraphCount();
104     } else {
105         CBioseq_Handle bsh = GetCache().GetBioseqHandleFromLocation(
106             m_Scope,
107             graph.GetLoc(), m_Imp.GetTSE_Handle());
108         if (m_Scope->GetBioseqHandle(seq) != bsh) {
109             m_Imp.IncrementMisplacedGraphCount();
110         }
111     }
112 
113     if (graph.GetGraph().IsByte()) {
114         const CByte_graph& bg = graph.GetGraph().GetByte();
115 
116         TSeqPos numval = graph.GetNumval();
117         if (numval != bg.GetValues().size()) {
118             PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphByteLen,
119                 "SeqGraph (" + NStr::SizetToString(numval) + ") " +
120                 "and ByteStore (" + NStr::SizetToString(bg.GetValues().size()) +
121                 ") length mismatch", seq, graph);
122         }
123     }
124 }
125 
126 
x_ValidateGraphLocation(const CSeq_graph & graph)127 bool CValidError_graph::x_ValidateGraphLocation (const CSeq_graph& graph)
128 {
129     if (!graph.IsSetLoc() || graph.GetLoc().Which() == CSeq_loc::e_not_set) {
130         PostErr (eDiag_Error, eErr_SEQ_GRAPH_GraphLocInvalid, "SeqGraph location (Unknown) is invalid", graph);
131         return false;
132     } else {
133         const CSeq_loc& loc = graph.GetLoc();
134         const CBioseq_Handle & bsh =
135             GetCache().GetBioseqHandleFromLocation(
136                 m_Scope, loc, m_Imp.GetTSE_Handle());
137         if (!bsh) {
138             string label;
139             if (loc.GetId() != 0) {
140                loc.GetId()->GetLabel(&label, CSeq_id::eContent);
141             }
142             if (NStr::IsBlank(label)) {
143                 label = "unknown";
144             }
145             PostErr (eDiag_Warning, eErr_SEQ_GRAPH_GraphBioseqId,
146                      "Bioseq not found for Graph location " + label, graph);
147             return false;
148         }
149         TSeqPos start = loc.GetStart(eExtreme_Positional);
150         TSeqPos stop = loc.GetStop(eExtreme_Positional);
151         if (start >= bsh.GetBioseqLength() || stop >= bsh.GetBioseqLength()
152             || !loc.IsInt() || loc.GetStrand() == eNa_strand_minus) {
153             string label = GetValidatorLocationLabel (loc, *m_Scope);
154             PostErr (eDiag_Error, eErr_SEQ_GRAPH_GraphLocInvalid,
155                      "SeqGraph location (" + label + ") is invalid", graph);
156             return false;
157         }
158     }
159     return true;
160 }
161 
162 
s_CompareTwoSeqGraphs(const CRef<CSeq_graph> g1,const CRef<CSeq_graph> g2)163 bool s_CompareTwoSeqGraphs(const CRef <CSeq_graph> g1,
164                                     const CRef <CSeq_graph> g2)
165 {
166     if (!g1->IsSetLoc()) {
167         return true;
168     } else if (!g2->IsSetLoc()) {
169         return false;
170     }
171 
172     TSeqPos start1 = g1->GetLoc().GetStart(eExtreme_Positional);
173     TSeqPos stop1 = g1->GetLoc().GetStop(eExtreme_Positional);
174     TSeqPos start2 = g2->GetLoc().GetStart(eExtreme_Positional);
175     TSeqPos stop2 = g2->GetLoc().GetStop(eExtreme_Positional);
176 
177     if (start1 < start2) {
178         return true;
179     } else if (start1 == start2 && stop1 < stop2) {
180         return true;
181     } else {
182         return false;
183     }
184 }
185 
186 
ValidateGraphsOnBioseq(const CBioseq & seq)187 void CValidError_graph::ValidateGraphsOnBioseq(const CBioseq& seq)
188 {
189     if ( !seq.IsNa() || !seq.IsSetAnnot() ) {
190         return;
191     }
192 
193     vector <CRef <CSeq_graph> > graph_list;
194 
195     FOR_EACH_ANNOT_ON_BIOSEQ (it, seq) {
196         if ((*it)->IsGraph()) {
197             FOR_EACH_GRAPH_ON_ANNOT(git, **it) {
198                 if (IsSupportedGraphType(**git)) {
199                     CRef <CSeq_graph> r(*git);
200                     graph_list.push_back(r);
201                 }
202             }
203         }
204     }
205 
206     if (graph_list.size() == 0) {
207         return;
208     }
209 
210     int     last_loc = -1;
211     bool    overlaps = false;
212     const CSeq_graph* overlap_graph = 0;
213     SIZE_TYPE num_graphs = 0;
214     SIZE_TYPE graphs_len = 0;
215 
216     const CSeq_inst& inst = seq.GetInst();
217 
218     x_ValidateGraphOrderOnBioseq (seq, graph_list);
219 
220     // now sort, so that we can look for coverage
221     sort (graph_list.begin(), graph_list.end(), s_CompareTwoSeqGraphs);
222 
223     SIZE_TYPE Ns_with_score = 0,
224         gaps_with_score = 0,
225         ACGTs_without_score = 0,
226         vals_below_min = 0,
227         vals_above_max = 0,
228         num_bases = 0;
229 
230     int first_N = -1,
231         first_ACGT = -1;
232 
233     for (vector<CRef <CSeq_graph> >::iterator grp = graph_list.begin(); grp != graph_list.end(); ++grp) {
234         const CSeq_graph& graph = **grp;
235 
236         // Currently we support only byte graphs
237         x_ValidateGraphValues(graph, seq, first_N, first_ACGT, num_bases, Ns_with_score, gaps_with_score, ACGTs_without_score, vals_below_min, vals_above_max);
238 
239         if (graph.IsSetLoc() && graph.GetLoc().Which() != CSeq_loc::e_not_set) {
240             // Test for overlapping graphs
241             const CSeq_loc& loc = graph.GetLoc();
242             if ( (int)loc.GetTotalRange().GetFrom() <= last_loc ) {
243                 overlaps = true;
244                 overlap_graph = &graph;
245             }
246             last_loc = loc.GetTotalRange().GetTo();
247         }
248 
249         graphs_len += graph.GetNumval();
250         ++num_graphs;
251     }
252 
253     if ( ACGTs_without_score > 0 ) {
254         if (ACGTs_without_score * 10 > num_bases) {
255             double pct = (double) (ACGTs_without_score) * 100.0 / (double) num_bases;
256             PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphACGTScoreMany,
257                     NStr::SizetToString (ACGTs_without_score) + " ACGT bases ("
258                     + NStr::DoubleToString (pct, 2) + "%) have zero score value - first one at position "
259                     + NStr::IntToString (first_ACGT + 1),
260                     seq);
261         } else {
262             PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphACGTScore,
263                 NStr::SizetToString(ACGTs_without_score) +
264                 " ACGT bases have zero score value - first one at position " +
265                 NStr::IntToString(first_ACGT + 1), seq);
266         }
267     }
268     if ( Ns_with_score > 0 ) {
269         if (Ns_with_score * 10 > num_bases) {
270             double pct = (double) (Ns_with_score) * 100.0 / (double) num_bases;
271             PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphNScoreMany,
272                     NStr::SizetToString(Ns_with_score) + " N bases ("
273                     + NStr::DoubleToString(pct, 2) + "%) have positive score value - first one at position "
274                     + NStr::IntToString(first_N + 1),
275                     seq);
276         } else {
277             PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphNScore,
278                 NStr::SizetToString(Ns_with_score) +
279                 " N bases have positive score value - first one at position " +
280                 NStr::IntToString(first_N + 1), seq);
281         }
282     }
283     if ( gaps_with_score > 0 ) {
284         PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphGapScore,
285             NStr::SizetToString(gaps_with_score) +
286             " gap bases have positive score value",
287             seq);
288     }
289     if ( vals_below_min > 0 ) {
290         PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphBelow,
291             NStr::SizetToString(vals_below_min) +
292             " quality scores have values below the reported minimum or 0",
293             seq);
294     }
295     if ( vals_above_max > 0 ) {
296         PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphAbove,
297             NStr::SizetToString(vals_above_max) +
298             " quality scores have values above the reported maximum or 100",
299             seq);
300     }
301 
302     if ( overlaps ) {
303         PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphOverlap,
304             "Graph components overlap, with multiple scores for "
305             "a single base", seq, *overlap_graph);
306     }
307 
308     SIZE_TYPE seq_len = GetUngappedSeqLen(seq);
309     if ( (seq_len != graphs_len)  &&  (inst.GetLength() != graphs_len) ) {
310         PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphBioseqLen,
311             "SeqGraph (" + NStr::SizetToString(graphs_len) + ") and Bioseq (" +
312             NStr::SizetToString(seq_len) + ") length mismatch", seq);
313     }
314 
315     if ( inst.GetRepr() == CSeq_inst::eRepr_delta  &&  num_graphs > 1 ) {
316         x_ValidateGraphOnDeltaBioseq(seq);
317     }
318 
319 }
320 
321 
322 //look for Seq-graphs out of order
x_ValidateGraphOrderOnBioseq(const CBioseq & seq,vector<CRef<CSeq_graph>> graph_list)323 void CValidError_graph::x_ValidateGraphOrderOnBioseq (const CBioseq& seq, vector <CRef <CSeq_graph> > graph_list)
324 {
325     if (graph_list.size() < 2) {
326         return;
327     }
328 
329     TSeqPos last_left = graph_list[0]->GetLoc().GetStart(eExtreme_Positional);
330     TSeqPos last_right = graph_list[0]->GetLoc().GetStop(eExtreme_Positional);
331 
332     for (size_t i = 1; i < graph_list.size(); i++) {
333         TSeqPos left = graph_list[i]->GetLoc().GetStart(eExtreme_Positional);
334         TSeqPos right = graph_list[i]->GetLoc().GetStop(eExtreme_Positional);
335 
336         if (left < last_left
337             || (left == last_left && right < last_right)) {
338             PostErr (eDiag_Warning, eErr_SEQ_GRAPH_GraphOutOfOrder,
339                      "Graph components are out of order - may be a software bug",
340                      *graph_list[i]);
341             return;
342         }
343         last_left = left;
344         last_right = right;
345     }
346 }
347 
348 
x_ValidateGraphValues(const CSeq_graph & graph,const CBioseq & seq,int & first_N,int & first_ACGT,size_t & num_bases,size_t & Ns_with_score,size_t & gaps_with_score,size_t & ACGTs_without_score,size_t & vals_below_min,size_t & vals_above_max)349 void CValidError_graph::x_ValidateGraphValues
350 (const CSeq_graph& graph,
351  const CBioseq& seq,
352  int& first_N,
353  int& first_ACGT,
354  size_t& num_bases,
355  size_t& Ns_with_score,
356  size_t& gaps_with_score,
357  size_t& ACGTs_without_score,
358  size_t& vals_below_min,
359  size_t& vals_above_max)
360 {
361     string label;
362     seq.GetFirstId()->GetLabel(&label);
363 
364     if (!x_ValidateGraphLocation(graph)) {
365         return;
366     }
367 
368     try {
369         const CByte_graph& bg = graph.GetGraph().GetByte();
370         int min = bg.GetMin();
371         int max = bg.GetMax();
372 
373         const CSeq_loc& gloc = graph.GetLoc();
374         CRef<CSeq_loc> tmp(new CSeq_loc());
375         tmp->Assign(gloc);
376         tmp->SetStrand(eNa_strand_plus);
377 
378         CSeqVector vec(*tmp, *m_Scope,
379             CBioseq_Handle::eCoding_Ncbi,
380             sequence::GetStrand(gloc, m_Scope));
381         vec.SetCoding(CSeq_data::e_Ncbi4na);
382 
383         CSeqVector::const_iterator seq_begin = vec.begin();
384         CSeqVector::const_iterator seq_end = vec.end();
385         CSeqVector::const_iterator seq_iter = seq_begin;
386 
387         const CByte_graph::TValues& values = bg.GetValues();
388         CByte_graph::TValues::const_iterator val_iter = values.begin();
389         CByte_graph::TValues::const_iterator val_end = values.end();
390 
391         size_t score_pos = 0;
392 
393         while (seq_iter != seq_end && score_pos < graph.GetNumval()) {
394             CSeqVector::TResidue res = *seq_iter;
395             if (IsResidue(res)) {
396                 short val;
397                 if (val_iter == val_end) {
398                     val = 0;
399                 } else {
400                     val = (short)(*val_iter);
401                     ++val_iter;
402                 }
403                 // counting total number of bases, to look for percentage of bases with score of zero
404                 num_bases++;
405 
406                 if ((val < min) || (val < 0)) {
407                     vals_below_min++;
408                 }
409                 if ((val > max) || (val > 100)) {
410                     vals_above_max++;
411                 }
412 
413                 switch (res) {
414                 case 0:     // gap
415                     if (val > 0) {
416                         gaps_with_score++;
417                     }
418                     break;
419 
420                 case 1:     // A
421                 case 2:     // C
422                 case 4:     // G
423                 case 8:     // T
424                     if (val == 0) {
425                         ACGTs_without_score++;
426                         if (first_ACGT == -1) {
427                             first_ACGT = seq_iter.GetPos() + gloc.GetStart(eExtreme_Positional);
428                         }
429                     }
430                     break;
431 
432                 case 15:    // N
433                     if (val > 0) {
434                         Ns_with_score++;
435                         if (first_N == -1) {
436                             first_N = seq_iter.GetPos() + gloc.GetStart(eExtreme_Positional);
437                         }
438                     }
439                     break;
440                 }
441             }
442             ++seq_iter;
443             ++score_pos;
444         }
445     } catch (CException& e) {
446         PostErr(eDiag_Fatal, eErr_INTERNAL_Exception,
447             string("Exception while validating graph values. EXCEPTION: ") +
448             e.what(), seq);
449     }
450 
451 }
452 
453 
x_ValidateGraphOnDeltaBioseq(const CBioseq & seq)454 void CValidError_graph::x_ValidateGraphOnDeltaBioseq(const CBioseq& seq)
455 {
456     const CDelta_ext& delta = seq.GetInst().GetExt().GetDelta();
457     CDelta_ext::Tdata::const_iterator curr = delta.Get().begin(),
458         next = curr,
459         end = delta.Get().end();
460 
461     SIZE_TYPE   num_delta_seq = 0;
462     TSeqPos offset = 0;
463 
464     CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq);
465     CGraph_CI grp(bsh);
466     while (grp && !IsSupportedGraphType(grp->GetOriginalGraph())) {
467         ++grp;
468     }
469     while ( curr != end && grp ) {
470         const CSeq_graph& graph = grp->GetOriginalGraph();
471         ++next;
472         switch ( (*curr)->Which() ) {
473             case CDelta_seq::e_Loc:
474                 {
475                     const CSeq_loc& loc = (*curr)->GetLoc();
476                     if ( !loc.IsNull() ) {
477                         TSeqPos loclen = sequence::GetLength(loc, m_Scope);
478                         if ( graph.GetNumval() != loclen ) {
479                             PostErr(eDiag_Warning, eErr_SEQ_GRAPH_GraphSeqLocLen,
480                                 "SeqGraph (" + NStr::IntToString(graph.GetNumval()) +
481                                 ") and SeqLoc (" + NStr::IntToString(loclen) +
482                                 ") length mismatch", graph);
483                         }
484                         offset += loclen;
485                         ++num_delta_seq;
486                     }
487                     ++grp;
488                     while (grp && !IsSupportedGraphType(grp->GetOriginalGraph())) {
489                         ++grp;
490                     }
491                 }
492                 break;
493 
494             case CDelta_seq::e_Literal:
495                 {
496                     const CSeq_literal& lit = (*curr)->GetLiteral();
497                     TSeqPos litlen = lit.GetLength(),
498                         nextlen = 0;
499                     if ( lit.IsSetSeq_data() && !lit.GetSeq_data().IsGap() ) {
500                         while (next != end  &&  x_GetLitLength(**next, nextlen)) {
501                             litlen += nextlen;
502                             ++next;
503                         }
504                         if ( graph.GetNumval() != litlen ) {
505                             PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphSeqLitLen,
506                                 "SeqGraph (" + NStr::IntToString(graph.GetNumval()) +
507                                 ") and SeqLit (" + NStr::IntToString(litlen) +
508                                 ") length mismatch", graph);
509                         }
510                         const CSeq_loc& graph_loc = graph.GetLoc();
511                         if ( graph_loc.IsInt() ) {
512                             TSeqPos from = graph_loc.GetTotalRange().GetFrom();
513                             TSeqPos to = graph_loc.GetTotalRange().GetTo();
514                             if (  from != offset ) {
515                                 PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphStartPhase,
516                                     "SeqGraph (" + NStr::IntToString(from) +
517                                     ") and SeqLit (" + NStr::IntToString(offset) +
518                                     ") start do not coincide",
519                                     graph);
520                             }
521 
522                             if ( to != offset + litlen - 1 ) {
523                                 PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphStopPhase,
524                                     "SeqGraph (" + NStr::IntToString(to) +
525                                     ") and SeqLit (" +
526                                     NStr::IntToString(litlen + offset - 1) +
527                                     ") stop do not coincide",
528                                     graph);
529                             }
530                         }
531                         ++grp;
532                         while (grp && !IsSupportedGraphType(grp->GetOriginalGraph())) {
533                             ++grp;
534                         }
535                         ++num_delta_seq;
536                     }
537                     offset += litlen;
538                 }
539                 break;
540 
541             default:
542                 break;
543         }
544         curr = next;
545     }
546 
547     // if there are any left, count the remaining delta seqs that should have graphs
548     while ( curr != end) {
549         ++next;
550         switch ( (*curr)->Which() ) {
551             case CDelta_seq::e_Loc:
552                 {
553                     const CSeq_loc& loc = (*curr)->GetLoc();
554                     if ( !loc.IsNull() ) {
555                         ++num_delta_seq;
556                     }
557                 }
558                 break;
559 
560             case CDelta_seq::e_Literal:
561                 {
562                     const CSeq_literal& lit = (*curr)->GetLiteral();
563                     TSeqPos litlen = lit.GetLength(),
564                         nextlen = 0;
565                     if ( lit.IsSetSeq_data() ) {
566                         while (next != end  &&  x_GetLitLength(**next, nextlen)) {
567                             litlen += nextlen;
568                             ++next;
569                         }
570                         ++num_delta_seq;
571                     }
572                 }
573                 break;
574 
575             default:
576                 break;
577         }
578         curr = next;
579     }
580 
581     SIZE_TYPE num_graphs = 0;
582     grp.Rewind();
583     while (grp) {
584         if (IsSupportedGraphType(grp->GetOriginalGraph())) {
585             ++num_graphs;
586         }
587         ++grp;
588     }
589 
590     if ( num_delta_seq != num_graphs ) {
591         PostErr(eDiag_Error, eErr_SEQ_GRAPH_GraphDiffNumber,
592             "Different number of SeqGraph (" +
593             NStr::SizetToString(num_graphs) + ") and SeqLit (" +
594             NStr::SizetToString(num_delta_seq) + ") components",
595             seq);
596     }
597 }
598 
599 
600 // Currently we support only phrap, phred or gap4 types with byte values.
IsSupportedGraphType(const CSeq_graph & graph)601 bool CValidError_graph::IsSupportedGraphType(const CSeq_graph& graph)
602 {
603     string title;
604     if ( graph.IsSetTitle() ) {
605         title = graph.GetTitle();
606     }
607     if ( NStr::CompareNocase(title, "Phrap Quality") == 0  ||
608          NStr::CompareNocase(title, "Phred Quality") == 0  ||
609          NStr::CompareNocase(title, "Gap4") == 0 ) {
610         if ( graph.GetGraph().IsByte() ) {
611             return true;
612         }
613     }
614     return false;
615 }
616 
617 
618 // NOTE - this returns the length of the non-gap portions of the sequence
GetUngappedSeqLen(const CBioseq & seq)619 SIZE_TYPE CValidError_graph::GetUngappedSeqLen(const CBioseq& seq)
620 {
621     SIZE_TYPE seq_len = 0;
622     const CSeq_inst & inst = seq.GetInst();
623 
624     if ( inst.GetRepr() == CSeq_inst::eRepr_raw ) {
625         seq_len = inst.GetLength();
626     } else if ( inst.GetRepr() == CSeq_inst::eRepr_delta ) {
627         const CDelta_ext& delta = inst.GetExt().GetDelta();
628         ITERATE( CDelta_ext::Tdata, dseq, delta.Get() ) {
629             switch( (*dseq)->Which() ) {
630             case CDelta_seq::e_Loc:
631                 seq_len += sequence::GetLength((*dseq)->GetLoc(), m_Scope);
632                 break;
633             case CDelta_seq::e_Literal:
634                 if ( (*dseq)->GetLiteral().IsSetSeq_data() && !(*dseq)->GetLiteral().GetSeq_data().IsGap() ) {
635                     seq_len += (*dseq)->GetLiteral().GetLength();
636                 }
637                 break;
638             default:
639                 break;
640             }
641         }
642     }
643     return seq_len;
644 }
645 
646 
x_GetLitLength(const CDelta_seq & delta,TSeqPos & len)647 bool CValidError_graph::x_GetLitLength(const CDelta_seq& delta, TSeqPos& len)
648 {
649     len = 0;
650     if ( delta.IsLiteral() ) {
651         const CSeq_literal& lit = delta.GetLiteral();
652         if ( lit.IsSetSeq_data() && !lit.GetSeq_data().IsGap()) {
653             len = lit.GetLength();
654             return true;
655         }
656     }
657     return false;
658 }
659 
660 
661 
662 
663 
664 END_SCOPE(validator)
665 END_SCOPE(objects)
666 END_NCBI_SCOPE
667