1 /* $Id: validerror_format.cpp 632625 2021-06-03 17:38:33Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko.......
27  *
28  * File Description:
29  *   Validates CSeq_entries and CSeq_submits
30  *
31  */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <serial/serialbase.hpp>
35 #include <objects/submit/Seq_submit.hpp>
36 #include <objects/general/Dbtag.hpp>
37 #include <objects/pub/Pub_equiv.hpp>
38 #include <objects/seq/Bioseq.hpp>
39 #include <objects/seq/Pubdesc.hpp>
40 #include <objects/seq/Seqdesc.hpp>
41 #include <objects/seqfeat/Feat_id.hpp>
42 #include <objects/seqfeat/Imp_feat.hpp>
43 #include <objects/seqfeat/Org_ref.hpp>
44 #include <objects/seqfeat/OrgName.hpp>
45 #include <objects/seqfeat/OrgMod.hpp>
46 #include <objects/general/User_object.hpp>
47 #include <objmgr/object_manager.hpp>
48 #include <objmgr/util/sequence.hpp>
49 #include <objtools/validator/validerror_format.hpp>
50 //#include <objtools/validator/validatorp.hpp>
51 #include <objtools/validator/utilities.hpp>
52 #include <util/static_map.hpp>
53 
54 
55 
56 BEGIN_NCBI_SCOPE
57 BEGIN_SCOPE(objects)
58 BEGIN_SCOPE(validator)
59 USING_SCOPE(sequence);
60 
61 
62 // *********************** CValidErrorFormat implementation **********************
63 
64 
65 //LCOV_EXCL_START
66 //used by Genome Workbench to create submitter report,
67 //not used by asnvalidate
CValidErrorFormat(CObjectManager & objmgr)68 CValidErrorFormat::CValidErrorFormat(CObjectManager& objmgr) :
69     m_ObjMgr(&objmgr)
70 {
71 }
72 
73 
~CValidErrorFormat(void)74 CValidErrorFormat::~CValidErrorFormat(void)
75 {
76 }
77 
78 
GetSubmitterFormatErrorGroup(CValidErrItem::TErrIndex err_code) const79 ESubmitterFormatErrorGroup CValidErrorFormat::GetSubmitterFormatErrorGroup(CValidErrItem::TErrIndex err_code) const
80 {
81     ESubmitterFormatErrorGroup rval = eSubmitterFormatErrorGroup_Default;
82 
83     switch(err_code) {
84         case eErr_SEQ_FEAT_NotSpliceConsensus:
85         case eErr_SEQ_FEAT_NotSpliceConsensusDonor:
86         case eErr_SEQ_FEAT_NotSpliceConsensusAcceptor:
87         case eErr_SEQ_FEAT_RareSpliceConsensusDonor:
88         case eErr_SEQ_FEAT_NotSpliceConsensusAcceptorTerminalIntron:
89         case eErr_SEQ_FEAT_NotSpliceConsensusDonorTerminalIntron:
90             rval = eSubmitterFormatErrorGroup_ConsensusSplice;
91             break;
92         case eErr_SEQ_FEAT_BadEcNumberFormat:
93             rval = eSubmitterFormatErrorGroup_BadEcNumberFormat;
94             break;
95         case eErr_SEQ_FEAT_BadEcNumberValue:
96         case eErr_SEQ_FEAT_DeletedEcNumber:
97         case eErr_SEQ_FEAT_ReplacedEcNumber:
98         case eErr_SEQ_FEAT_SplitEcNumber:
99             rval = eSubmitterFormatErrorGroup_BadEcNumberValue;
100             break;
101         case eErr_SEQ_FEAT_EcNumberProblem:
102             rval = eSubmitterFormatErrorGroup_BadEcNumberProblem;
103             break;
104         case eErr_SEQ_DESCR_BadSpecificHost:
105             rval = eSubmitterFormatErrorGroup_BadSpecificHost;
106             break;
107         case eErr_SEQ_DESCR_BadInstitutionCode:
108             rval = eSubmitterFormatErrorGroup_BadInstitutionCode;
109             break;
110         case eErr_SEQ_DESCR_LatLonCountry:
111         case eErr_SEQ_DESCR_LatLonWater:
112             rval = eSubmitterFormatErrorGroup_LatLonCountry;
113             break;
114         default:
115             break;
116     }
117     return rval;
118 }
119 
120 
GetSubmitterFormatErrorGroupTitle(CValidErrItem::TErrIndex err_code) const121 string CValidErrorFormat::GetSubmitterFormatErrorGroupTitle(CValidErrItem::TErrIndex err_code) const
122 {
123     string rval;
124     switch(err_code) {
125         case eErr_SEQ_FEAT_NotSpliceConsensus:
126         case eErr_SEQ_FEAT_NotSpliceConsensusDonor:
127         case eErr_SEQ_FEAT_NotSpliceConsensusAcceptor:
128         case eErr_SEQ_FEAT_RareSpliceConsensusDonor:
129             rval = "Not Splice Consensus";
130             break;
131         case eErr_SEQ_FEAT_BadEcNumberFormat:
132             rval = "EC Number Format";
133             break;
134         case eErr_SEQ_FEAT_BadEcNumberValue:
135         case eErr_SEQ_FEAT_DeletedEcNumber:
136         case eErr_SEQ_FEAT_ReplacedEcNumber:
137         case eErr_SEQ_FEAT_SplitEcNumber:
138             rval = "EC Number Value";
139             break;
140         case eErr_SEQ_FEAT_EcNumberProblem:
141             rval = "EC Number Problem";
142             break;
143         case eErr_SEQ_DESCR_BadSpecificHost:
144             rval = "Bad Specific-host Values";
145             break;
146         case eErr_SEQ_DESCR_BadInstitutionCode:
147             rval = "Bad Institution Codes";
148             break;
149         case eErr_SEQ_DESCR_LatLonCountry:
150         case eErr_SEQ_DESCR_LatLonWater:
151             rval = "LatLonCountry Errors";
152             break;
153         default:
154             rval = CValidErrItem::ConvertErrCode(err_code);
155             break;
156     }
157 
158     return rval;
159 }
160 
161 
FormatForSubmitterReport(const CValidErrItem & error,CScope & scope) const162 string CValidErrorFormat::FormatForSubmitterReport(const CValidErrItem& error, CScope& scope) const
163 {
164     string rval;
165 
166     switch (error.GetErrIndex()) {
167         case eErr_SEQ_FEAT_NotSpliceConsensus:
168         case eErr_SEQ_FEAT_NotSpliceConsensusDonor:
169         case eErr_SEQ_FEAT_NotSpliceConsensusAcceptor:
170         case eErr_SEQ_FEAT_RareSpliceConsensusDonor:
171         case eErr_SEQ_FEAT_NotSpliceConsensusAcceptorTerminalIntron:
172         case eErr_SEQ_FEAT_NotSpliceConsensusDonorTerminalIntron:
173             rval = x_FormatConsensusSpliceForSubmitterReport(error, scope);
174             break;
175         case eErr_SEQ_FEAT_BadEcNumberFormat:
176         case eErr_SEQ_FEAT_BadEcNumberValue:
177         case eErr_SEQ_FEAT_EcNumberProblem:
178         case eErr_SEQ_FEAT_DeletedEcNumber:
179         case eErr_SEQ_FEAT_ReplacedEcNumber:
180         case eErr_SEQ_FEAT_SplitEcNumber:
181             rval = x_FormatECNumberForSubmitterReport(error, scope);
182             break;
183         case eErr_SEQ_DESCR_BadSpecificHost:
184             rval = x_FormatBadSpecificHostForSubmitterReport(error);
185             break;
186         case eErr_SEQ_DESCR_BadInstitutionCode:
187             rval = x_FormatBadInstCodeForSubmitterReport(error);
188             break;
189         case eErr_SEQ_DESCR_LatLonCountry:
190         case eErr_SEQ_DESCR_LatLonWater:
191             rval = x_FormatLatLonCountryForSubmitterReport(error);
192             break;
193         default:
194             rval = x_FormatGenericForSubmitterReport(error, scope);
195             break;
196     }
197 
198     return rval;
199 }
200 
201 
x_FormatConsensusSpliceForSubmitterReport(const CValidErrItem & error,CScope & scope) const202 string CValidErrorFormat::x_FormatConsensusSpliceForSubmitterReport(const CValidErrItem& error, CScope& scope) const
203 {
204     string rval;
205     if (!error.IsSetMsg() || NStr::IsBlank(error.GetMsg())) {
206         return rval;
207     }
208     string msg = error.GetMsg();
209     if (NStr::Find(msg, "(AG) not found") != string::npos) {
210         rval = "AG";
211     }
212     else if (NStr::Find(msg, "(GT) not found") != string::npos) {
213         rval = "GT";
214     } else if (NStr::Find(msg, "(AT-AC) found instead of (GT-AG)") != string::npos) {
215         rval = "(AT-AC) instead of (GT-AG)";
216     } else if (NStr::Find(msg, "(GC-AG) found instead of (GT-AG)") != string::npos) {
217         rval = "(GC-AG) instead of (GT-AG)";
218     }
219     if (NStr::IsBlank(rval)) {
220         return rval;
221     }
222 
223     size_t position_pos = NStr::Find(msg, "ending at position ");
224     size_t other_clue = NStr::Find(msg, "and before exon");
225     if (position_pos == string::npos || other_clue == string::npos) {
226         position_pos = NStr::Find(msg, "position ");
227         if (position_pos != string::npos) {
228             string pos_str = msg.substr(position_pos);
229             long int pos;
230             if (sscanf(pos_str.c_str(), "position %ld of ", &pos) == 1) {
231                 rval += " at " + NStr::NumericToString(pos);
232                 size_t seq_pos = NStr::Find(pos_str, " of ");
233                 if (seq_pos != string::npos) {
234                     rval = pos_str.substr(seq_pos + 4) + "\t" + rval;
235                 }
236             }
237         }
238     } else {
239         string pos_str = msg.substr(position_pos);
240         long int pos1, pos2;
241         if (sscanf(pos_str.c_str(), "ending at position %ld and before exon starting at position %ld of ", &pos1, &pos2) == 2) {
242             rval += " at " + NStr::NumericToString(pos1) + ", " + NStr::NumericToString(pos2);
243             size_t seq_pos = NStr::Find(pos_str, " of ");
244             if (seq_pos != string::npos) {
245                 rval = pos_str.substr(seq_pos + 4) + "\t" + rval;
246             }
247         }
248     }
249 
250     string obj_desc = error.GetObjDesc();
251     size_t type_pos = NStr::Find(obj_desc, "FEATURE: ");
252     if (type_pos != string::npos) {
253         obj_desc = obj_desc.substr(type_pos + 9);
254         size_t space_pos = NStr::Find(obj_desc, ":");
255         if (space_pos != string::npos) {
256             obj_desc = obj_desc.substr(0, space_pos);
257         }
258     }
259 
260     rval = obj_desc + "\t" + rval;
261 
262     return rval;
263 }
264 
265 
RemovePrefix(string & str,const string & prefix)266 void RemovePrefix(string& str, const string& prefix)
267 {
268     size_t type_pos = NStr::Find(str, prefix);
269     if (type_pos != string::npos) {
270         str = str.substr(type_pos + prefix.length());
271     }
272 }
273 
RemoveSuffix(string & str,const string & suffix)274 void RemoveSuffix(string& str, const string& suffix)
275 {
276     size_t type_pos = NStr::Find(str, suffix);
277     if (type_pos != string::npos) {
278         str = str.substr(0, type_pos);
279     }
280 }
281 
282 
x_FormatGenericForSubmitterReport(const CValidErrItem & error,CScope & scope) const283 string CValidErrorFormat::x_FormatGenericForSubmitterReport(const CValidErrItem& error, CScope& scope) const
284 {
285     string obj_desc = error.GetObjDesc();
286     if (NStr::StartsWith(obj_desc, "FEATURE") && error.IsSetObj_content()) {
287         obj_desc = error.GetObj_content();
288         NStr::ReplaceInPlace(obj_desc, ":", "\t", 0, 1);
289         // Add feature location part of label
290         if (error.IsSetLocation()) {
291             obj_desc += "\t" + error.GetLocation();
292         }
293         if (error.IsSetLocus_tag()) {
294             obj_desc += "\t" + error.GetLocus_tag();
295         } else if (error.IsSetObject() && error.GetObject().GetThisTypeInfo() == CSeq_feat:: GetTypeInfo()) {
296             const CSeq_feat* sf = static_cast<const CSeq_feat*>(&(error.GetObject()));
297             if (sf) {
298                 obj_desc += "\t" + x_GetLocusTag(*sf, scope);
299             }
300         }
301     } else {
302         RemovePrefix(obj_desc, "DESCRIPTOR: ");
303         RemovePrefix(obj_desc, "BioSrc: ");
304         RemoveSuffix(obj_desc, " BIOSEQ: ");
305         RemoveSuffix(obj_desc, " BIOSEQ-SET: ");
306 
307         NStr::ReplaceInPlace(obj_desc, ":", "\t", 0, 1);
308         size_t close_pos = NStr::Find(obj_desc, "]");
309         if (close_pos != string::npos) {
310             obj_desc = obj_desc.substr(0, close_pos);
311             NStr::ReplaceInPlace(obj_desc, "[", "\t");
312         }
313     }
314     string rval = error.GetAccession() + ":" + obj_desc;
315 
316     return rval;
317 }
318 
319 
x_FormatECNumberForSubmitterReport(const CValidErrItem & error,CScope & scope) const320 string CValidErrorFormat::x_FormatECNumberForSubmitterReport(const CValidErrItem& error, CScope& scope) const
321 {
322     string rval;
323     string ec_numbers;
324     string prot_name;
325     string locus_tag;
326 
327     // want: accession number for sequence, ec numbers, locus tag, protein name
328 
329     if (error.GetObject().GetThisTypeInfo() != CSeq_feat::GetTypeInfo()) {
330         return rval;
331     }
332     const CSeq_feat* feat = static_cast<const CSeq_feat*>(&(error.GetObject()));
333     if (!feat) {
334         return rval;
335     }
336 
337     // look for EC number in quals
338     if (feat->IsSetQual()) {
339         ITERATE(CSeq_feat::TQual, it, feat->GetQual()) {
340             if ((*it)->IsSetQual() &&
341                 NStr::EqualNocase((*it)->GetQual(), "EC_number") &&
342                 (*it)->IsSetVal() &&
343                 !NStr::IsBlank((*it)->GetVal())) {
344                 if (!NStr::IsBlank(ec_numbers)) {
345                     ec_numbers += ";";
346                 }
347                 ec_numbers += (*it)->GetVal();
348             }
349         }
350     }
351     // look for EC number in prot-ref
352     if (feat->IsSetData() && feat->GetData().IsProt() &&
353         feat->GetData().GetProt().IsSetEc()) {
354         ITERATE(CProt_ref::TEc, it, feat->GetData().GetProt().GetEc()) {
355             if (!NStr::IsBlank(ec_numbers)) {
356                     ec_numbers += ";";
357             }
358             ec_numbers += *it;
359         }
360     }
361 
362     if (NStr::IsBlank(ec_numbers)) {
363         ec_numbers = "Blank EC number";
364     }
365 
366     // look for protein name
367     if (feat->IsSetData() && feat->GetData().IsProt() &&
368         feat->GetData().GetProt().IsSetName() &&
369         !feat->GetData().GetProt().GetName().empty()) {
370         prot_name = feat->GetData().GetProt().GetName().front();
371     }
372 
373     // get locus tag
374     CConstRef <CSeq_feat> gene = sequence::GetGeneForFeature(*feat, scope);
375     if (gene && gene->GetData().GetGene().IsSetLocus_tag()) {
376         locus_tag = gene->GetData().GetGene().GetLocus_tag();
377     }
378 
379     rval = error.GetAccnver() + "\t" + ec_numbers + "\t" + locus_tag + "\t" + prot_name;
380     return rval;
381 }
382 
383 
s_GetSpecificHostFromBioSource(const CBioSource & biosrc)384 string s_GetSpecificHostFromBioSource(const CBioSource& biosrc)
385 {
386     string rval;
387 
388     if (biosrc.IsSetOrg() &&
389         biosrc.GetOrg().IsSetOrgname() &&
390         biosrc.GetOrg().GetOrgname().IsSetMod()) {
391         ITERATE(COrgName::TMod, it, biosrc.GetOrg().GetOrgname().GetMod()) {
392             if ((*it)->IsSetSubtype() &&
393                 (*it)->GetSubtype() == COrgMod::eSubtype_nat_host &&
394                 (*it)->IsSetSubname() &&
395                 !NStr::IsBlank((*it)->GetSubname())) {
396                 if (!NStr::IsBlank(rval)) {
397                     rval += ";";
398                 }
399                 rval += (*it)->GetSubname();
400             }
401         }
402     }
403     return rval;
404 }
405 
406 
x_FormatBadSpecificHostForSubmitterReport(const CValidErrItem & error) const407 string CValidErrorFormat::x_FormatBadSpecificHostForSubmitterReport(const CValidErrItem& error) const
408 {
409     string rval;
410     string spec_host;
411     const string kAlternateName = "Specific host value is alternate name: ";
412     if (NStr::StartsWith(error.GetMsg(), kAlternateName)) {
413         spec_host = error.GetMsg().substr(kAlternateName.length());
414     } else if (error.GetObject().GetThisTypeInfo() == CSeqdesc::GetTypeInfo()) {
415         const CSeqdesc* desc = static_cast<const CSeqdesc *>(&(error.GetObject()));
416         if (desc && desc->IsSource()) {
417             spec_host = s_GetSpecificHostFromBioSource(desc->GetSource());
418         }
419     } else if (error.GetObject().GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
420         const CSeq_feat* feat = static_cast<const CSeq_feat *>(&(error.GetObject()));
421         if (feat && feat->IsSetData() && feat->GetData().IsBiosrc()) {
422             spec_host = s_GetSpecificHostFromBioSource(feat->GetData().GetBiosrc());
423         }
424     }
425 
426     if (!NStr::IsBlank(spec_host)) {
427         rval = error.GetAccession() + "\t" + spec_host;
428     }
429 
430     return rval;
431 }
432 
433 
s_GetInstCodeFromBioSource(const CBioSource & biosrc)434 string s_GetInstCodeFromBioSource(const CBioSource& biosrc)
435 {
436     string rval;
437 
438     if (biosrc.IsSetOrg() &&
439         biosrc.GetOrg().IsSetOrgname() &&
440         biosrc.GetOrg().GetOrgname().IsSetMod()) {
441         ITERATE(COrgName::TMod, it, biosrc.GetOrg().GetOrgname().GetMod()) {
442             if ((*it)->IsSetSubtype() &&
443                 ((*it)->GetSubtype() == COrgMod::eSubtype_bio_material ||
444                  (*it)->GetSubtype() == COrgMod::eSubtype_culture_collection ||
445                  (*it)->GetSubtype() == COrgMod::eSubtype_specimen_voucher) &&
446                 (*it)->IsSetSubname() &&
447                 !NStr::IsBlank((*it)->GetSubname())) {
448                 size_t pos = NStr::Find((*it)->GetSubname(), ":");
449                 if (pos != string::npos) {
450                     string code = (*it)->GetSubname().substr(0, pos);
451                     if (!NStr::IsBlank(code)) {
452                         if (!NStr::IsBlank(rval)) {
453                             rval += ";";
454                         }
455                         rval += code;
456                     }
457                 }
458             }
459         }
460     }
461     return rval;
462 }
463 
464 
x_FormatBadInstCodeForSubmitterReport(const CValidErrItem & error) const465 string CValidErrorFormat::x_FormatBadInstCodeForSubmitterReport(const CValidErrItem& error) const
466 {
467     string rval;
468 
469     string codes;
470     if (error.GetObject().GetThisTypeInfo() == CSeqdesc::GetTypeInfo()) {
471         const CSeqdesc* desc = static_cast<const CSeqdesc *>(&(error.GetObject()));
472         if (desc && desc->IsSource()) {
473             codes = s_GetInstCodeFromBioSource(desc->GetSource());
474         }
475     }
476     else if (error.GetObject().GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
477         const CSeq_feat* feat = static_cast<const CSeq_feat *>(&(error.GetObject()));
478         if (feat && feat->IsSetData() && feat->GetData().IsBiosrc()) {
479             codes = s_GetInstCodeFromBioSource(feat->GetData().GetBiosrc());
480         }
481     }
482 
483     if (!NStr::IsBlank(codes)) {
484         rval = error.GetAccession() + "\t" + codes;
485     }
486 
487     return rval;
488 }
489 
490 
FormatForSubmitterReport(const CValidError & errors,CScope & scope,CValidErrItem::TErrIndex err_code) const491 string CValidErrorFormat::FormatForSubmitterReport(const CValidError& errors, CScope& scope, CValidErrItem::TErrIndex err_code) const
492 {
493     string rval;
494     for ( CValidError_CI vit(errors); vit; ++vit) {
495         if (err_code == vit->GetErrIndex()) {
496             string this_val = FormatForSubmitterReport(*vit, scope);
497             if (!NStr::IsBlank(this_val)) {
498                 if (NStr::IsBlank(rval)) {
499                     rval += GetSubmitterFormatErrorGroupTitle(err_code) + "\n";
500                 }
501                 rval += this_val + "\n";
502             }
503         }
504     }
505     return rval;
506 }
507 
508 
FormatCategoryForSubmitterReport(const CValidError & errors,CScope & scope,ESubmitterFormatErrorGroup grp) const509 string CValidErrorFormat::FormatCategoryForSubmitterReport
510     (const CValidError& errors, CScope& scope, ESubmitterFormatErrorGroup grp) const
511 {
512     string rval;
513     for ( CValidError_CI vit(errors); vit; ++vit) {
514         CValidErrItem::TErrIndex err_code = vit->GetErrIndex();
515         if (GetSubmitterFormatErrorGroup(err_code) == grp) {
516             string this_val = FormatForSubmitterReport(*vit, scope);
517             if (!NStr::IsBlank(this_val)) {
518                 if (NStr::IsBlank(rval)) {
519                     rval += GetSubmitterFormatErrorGroupTitle(err_code) + "\n";
520                 }
521                 rval += this_val + "\n";
522             }
523         }
524     }
525     return rval;
526 }
527 
528 
x_FormatLatLonCountryForSubmitterReport(const CValidErrItem & error) const529 string CValidErrorFormat::x_FormatLatLonCountryForSubmitterReport(const CValidErrItem& error) const
530 {
531     string rval = error.GetAccession() + ":" + error.GetMsg();
532     return rval;
533 }
534 
535 
GetListOfErrorCodes(const CValidError & errors) const536 vector<unsigned int> CValidErrorFormat::GetListOfErrorCodes(const CValidError& errors) const
537 {
538     vector<unsigned int> list;
539 
540     for ( CValidError_CI vit(errors); vit; ++vit) {
541         list.push_back(vit->GetErrIndex());
542     }
543     sort(list.begin(), list.end());
544     list.erase(unique(list.begin(), list.end()), list.end());
545     return list;
546 }
547 
548 
FormatCompleteSubmitterReport(const CValidError & errors,CScope & scope) const549 vector<string> CValidErrorFormat::FormatCompleteSubmitterReport(const CValidError& errors, CScope& scope) const
550 {
551     vector<string> list;
552 
553     // first, do special categories
554     for (unsigned int t = eSubmitterFormatErrorGroup_ConsensusSplice; t < eSubmitterFormatErrorGroup_Default; ++t) {
555         string this_val = FormatCategoryForSubmitterReport(errors, scope, (ESubmitterFormatErrorGroup)t);
556         if (!NStr::IsBlank(this_val)) {
557             list.push_back(this_val);
558         }
559     }
560 
561     // now do errors not in special categories
562     vector<unsigned int> codes = GetListOfErrorCodes(errors);
563     ITERATE(vector<unsigned int>, it, codes) {
564         if (GetSubmitterFormatErrorGroup(*it) == eSubmitterFormatErrorGroup_Default) {
565             string this_val = FormatForSubmitterReport(errors, scope, *it);
566             if (!NStr::IsBlank(this_val)) {
567                 list.push_back(this_val);
568             }
569         }
570     }
571     return list;
572 }
573 //LCOV_EXCL_STOP
574 
575 
s_GetFeatureIdLabel(const CObject_id & object_id)576 static string s_GetFeatureIdLabel (const CObject_id& object_id)
577 {
578     string feature_id;
579     if (object_id.IsId()) {
580         feature_id = NStr::IntToString(object_id.GetId());
581     } else if (object_id.IsStr()) {
582         feature_id = object_id.GetStr();
583     }
584     return feature_id;
585 }
586 
587 
GetFeatureIdLabel(const CFeat_id & feat_id)588 string CValidErrorFormat::GetFeatureIdLabel (const CFeat_id& feat_id)
589 {
590     string feature_id;
591     if (feat_id.IsLocal()) {
592         feature_id = s_GetFeatureIdLabel(feat_id.GetLocal());
593     } else if (feat_id.IsGeneral()) {
594         if (feat_id.GetGeneral().IsSetDb()) {
595             feature_id += feat_id.GetGeneral().GetDb();
596         }
597         feature_id += ":";
598         if (feat_id.GetGeneral().IsSetTag()) {
599             feature_id += s_GetFeatureIdLabel (feat_id.GetGeneral().GetTag());
600         }
601     }
602     return feature_id;
603 }
604 
605 
GetFeatureIdLabel(const CSeq_feat & ft)606 string CValidErrorFormat::GetFeatureIdLabel(const CSeq_feat& ft)
607 {
608     string feature_id;
609     if (ft.IsSetId()) {
610         feature_id = CValidErrorFormat::GetFeatureIdLabel(ft.GetId());
611     } else if (ft.IsSetIds()) {
612         ITERATE(CSeq_feat::TIds, id_it, ft.GetIds()) {
613             feature_id = CValidErrorFormat::GetFeatureIdLabel((**id_it));
614             if (!NStr::IsBlank(feature_id)) {
615                 break;
616             }
617         }
618     }
619     return feature_id;
620 }
621 
622 
s_FixBioseqLabelProblems(string & str)623 static void s_FixBioseqLabelProblems (string& str)
624 {
625     size_t pos = NStr::Find(str, ",");
626     if (pos != string::npos && str.c_str()[pos + 1] != 0 && str.c_str()[pos + 1] != ' ') {
627         str = str.substr(0, pos + 1) + " " + str.substr(pos + 1);
628     }
629     pos = NStr::Find(str, "=");
630     if (pos != string::npos && str.c_str()[pos + 1] != 0 && str.c_str()[pos + 1] != ' ') {
631         str = str.substr(0, pos + 1) + " " + str.substr(pos + 1);
632     }
633 }
634 
635 
636 
s_GetOrgRefContentLabel(const COrg_ref & org)637 static string s_GetOrgRefContentLabel (const COrg_ref& org)
638 {
639     string content;
640     if (org.IsSetTaxname()) {
641         content = org.GetTaxname();
642     } else if (org.IsSetCommon()) {
643         content = org.GetCommon();
644     } else if (org.IsSetDb() && !org.GetDb().empty()) {
645         org.GetDb().front()->GetLabel(&content);
646     }
647     return content;
648 }
649 
650 
s_GetBioSourceContentLabel(const CBioSource & bsrc)651 static string s_GetBioSourceContentLabel (const CBioSource& bsrc)
652 {
653     string content;
654     if (bsrc.IsSetOrg()) {
655         content = s_GetOrgRefContentLabel(bsrc.GetOrg());
656     }
657     return content;
658 }
659 
660 
s_GetFeatureContentLabelExtras(const CSeq_feat & feat)661 static string s_GetFeatureContentLabelExtras (const CSeq_feat& feat)
662 {
663     string tlabel;
664 
665     // Put Seq-feat qual into label
666     if (feat.IsSetQual()) {
667         string prefix("/");
668         ITERATE(CSeq_feat::TQual, it, feat.GetQual()) {
669             tlabel += prefix + (**it).GetQual();
670             prefix = " ";
671             if (!(**it).GetVal().empty()) {
672                 tlabel += "=" + (**it).GetVal();
673             }
674         }
675     }
676 
677     // Put Seq-feat comment into label
678     if (feat.IsSetComment()) {
679         if (tlabel.empty()) {
680             tlabel = feat.GetComment();
681         } else {
682             tlabel += "; " + feat.GetComment();
683         }
684     }
685     return tlabel;
686 }
687 
688 
s_GetCdregionContentLabel(const CSeq_feat & feat,CRef<CScope> scope)689 static string s_GetCdregionContentLabel (const CSeq_feat& feat, CRef<CScope> scope)
690 {
691     string content;
692 
693     // Check that feature data is Cdregion
694     if (!feat.GetData().IsCdregion()) {
695         return content;
696     }
697 
698     const CGene_ref* gref = 0;
699     const CProt_ref* pref = 0;
700 
701     // Look for CProt_ref object to create a label from
702     if (feat.IsSetXref()) {
703         ITERATE ( CSeq_feat::TXref, it, feat.GetXref()) {
704             const CSeqFeatXref& xref = **it;
705             if ( !xref.IsSetData() ) {
706                 continue;
707             }
708 
709             switch (xref.GetData().Which()) {
710             case CSeqFeatData::e_Prot:
711                 pref = &xref.GetData().GetProt();
712                 break;
713             case CSeqFeatData::e_Gene:
714                 gref = &xref.GetData().GetGene();
715                 break;
716             default:
717                 break;
718             }
719         }
720     }
721 
722     // Try and create a label from a CProt_ref in CSeqFeatXref in feature
723     if (pref) {
724         pref->GetLabel(&content);
725         return content;
726     }
727 
728     // Try and create a label from a CProt_ref in the feat product and
729     // return if found
730     if (feat.IsSetProduct()  &&  scope) {
731         try {
732             const CSeq_id& id = GetId(feat.GetProduct(), scope);
733             CBioseq_Handle hnd = scope->GetBioseqHandle(id);
734             if (hnd) {
735                 const CBioseq& seq = *hnd.GetCompleteBioseq();
736 
737                 // Now look for a CProt_ref feature in seq and
738                 // if found call GetLabel() on the CProt_ref
739                 CTypeConstIterator<CSeqFeatData> it = ConstBegin(seq);
740                 for (;it; ++it) {
741                     if (it->IsProt()) {
742                         it->GetProt().GetLabel(&content);
743                         return content;
744                     }
745                 }
746             }
747         } catch (CObjmgrUtilException&) {}
748     }
749 
750     // Try and create a label from a CGene_ref in CSeqFeatXref in feature
751     if (gref) {
752         gref->GetLabel(&content);
753     }
754 
755     if (NStr::IsBlank(content)) {
756         content = s_GetFeatureContentLabelExtras(feat);
757     }
758 
759     return content;
760 }
761 
762 
GetFeatureContentLabel(const CSeq_feat & feat,CRef<CScope> scope)763 string CValidErrorFormat::GetFeatureContentLabel (const CSeq_feat& feat, CRef<CScope> scope)
764 {
765     string content_label;
766 
767     switch (feat.GetData().Which()) {
768         case CSeqFeatData::e_Pub:
769             content_label = "Cit: ";
770             feat.GetData().GetPub().GetPub().GetLabel(&content_label);
771             break;
772         case CSeqFeatData::e_Biosrc:
773             content_label = "Src: " + s_GetBioSourceContentLabel (feat.GetData().GetBiosrc());
774             break;
775         case CSeqFeatData::e_Imp:
776             {
777                 feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
778                 if (feat.GetData().GetImp().IsSetKey()) {
779                     string key = feat.GetData().GetImp().GetKey();
780                     string tmp = "[" + key + "]";
781                     if (NStr::StartsWith(content_label, tmp)) {
782                         content_label = key + content_label.substr(tmp.length());
783                     }
784                 }
785             }
786             break;
787         case CSeqFeatData::e_Rna:
788             feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
789             if (feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA
790                 && NStr::Equal(content_label, "tRNA: tRNA")) {
791                 content_label = "tRNA: ";
792             }
793             break;
794         case CSeqFeatData::e_Cdregion:
795             content_label = "CDS: " + s_GetCdregionContentLabel(feat, scope);
796             break;
797         case CSeqFeatData::e_Prot:
798             feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
799             if (feat.GetData().GetProt().IsSetProcessed()) {
800                 switch (feat.GetData().GetProt().GetProcessed()) {
801                     case CProt_ref::eProcessed_mature:
802                         content_label = "mat_peptide: " + content_label.substr(6);
803                         break;
804                     case CProt_ref::eProcessed_signal_peptide:
805                         content_label = "sig_peptide: " + content_label.substr(6);
806                         break;
807                     case CProt_ref::eProcessed_transit_peptide:
808                         content_label = "trans_peptide: " + content_label.substr(6);
809                         break;
810                     default:
811                         break;
812                 }
813             }
814             break;
815         default:
816             feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
817             break;
818     }
819     return content_label;
820 }
821 
822 
GetFeatureBioseqLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)823 string CValidErrorFormat::GetFeatureBioseqLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
824 {
825     string desc;
826     // Append label for bioseq of feature location
827     if (!suppress_context && scope) {
828         bool find_failed = false;
829         try {
830             CBioseq_Handle hnd;
831             try {
832                 hnd = scope->GetBioseqHandle(ft.GetLocation());
833             } catch (CException&) {
834                 CSeq_loc_CI li(ft.GetLocation());
835                 while (li && !hnd) {
836                     hnd = scope->GetBioseqHandle(li.GetSeq_id());
837                     ++li;
838                 }
839             }
840             if (hnd) {
841                 desc += CValidErrorFormat::GetBioseqLabel(hnd);
842             }
843         } catch (CObjMgrException& ex) {
844             if (ex.GetErrCode() == CObjMgrException::eFindFailed) {
845                 find_failed = true;
846             }
847         } catch (CException) {
848         } catch (std::exception) {
849         };
850         if (find_failed) {
851             try {
852                 CSeq_loc_CI li(ft.GetLocation());
853                 CBioseq_Handle hnd = scope->GetBioseqHandle(li.GetSeq_id());
854                 if (hnd) {
855                     desc += CValidErrorFormat::GetBioseqLabel(hnd);
856                 }
857 
858             } catch (CException) {
859             } catch (std::exception) {
860             };
861         }
862     }
863     return desc;
864 }
865 
866 
GetFeatureProductLocLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)867 string CValidErrorFormat::GetFeatureProductLocLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
868 {
869     string desc;
870     // Append label for product of feature
871     if (ft.IsSetProduct() && scope) {
872         string loc_label;
873         if (suppress_context) {
874             CSeq_loc loc;
875             loc.Assign(ft.GetProduct());
876             ChangeSeqLocId(&loc, false, scope);
877             loc_label = GetValidatorLocationLabel(loc, *scope);
878         } else {
879             loc_label = GetValidatorLocationLabel(ft.GetProduct(), *scope);
880         }
881         if (loc_label.size() > 800) {
882             loc_label.replace(797, NPOS, "...");
883         }
884         if (!loc_label.empty()) {
885             desc += "[";
886             desc += loc_label;
887             desc += "]";
888         }
889     }
890     return desc;
891 }
892 
893 
GetFeatureLocationLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)894 string CValidErrorFormat::GetFeatureLocationLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
895 {
896     string loc_label;
897     // Add feature location part of label
898     if (ft.IsSetLocation() && scope) {
899         if (suppress_context) {
900             CSeq_loc loc;
901             loc.Assign(ft.GetLocation());
902             ChangeSeqLocId(&loc, false, scope);
903             loc_label = GetValidatorLocationLabel(loc, *scope);
904         } else {
905             loc_label = GetValidatorLocationLabel(ft.GetLocation(), *scope);
906         }
907         if (loc_label.size() > 800) {
908             loc_label.replace(795, NPOS, "...");
909         }
910     }
911     return loc_label;
912 }
913 
914 //LCOV_EXCL_START
915 //not used
GetFeatureLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)916 string CValidErrorFormat::GetFeatureLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
917 {
918     // Add feature part of label
919     string desc = "FEATURE: ";
920     string content_label = CValidErrorFormat::GetFeatureContentLabel(ft, scope);
921     desc += content_label;
922 
923     // Add feature ID part of label (if present)
924     string feature_id = GetFeatureIdLabel(ft);
925     if (!NStr::IsBlank(feature_id)) {
926         desc += " <" + feature_id + "> ";
927     }
928 
929     // Add feature location part of label
930     string loc_label = GetFeatureLocationLabel(ft, scope, suppress_context);
931     if (!NStr::IsBlank(loc_label)) {
932         desc += " [" + loc_label + "]";
933     }
934 
935     // Append label for bioseq of feature location
936     string bioseq_label = GetFeatureBioseqLabel(ft, scope, suppress_context);
937     if (!NStr::IsBlank(bioseq_label)) {
938         desc += bioseq_label;
939     }
940 
941     // Append label for product of feature
942     string product_label = GetFeatureProductLocLabel(ft, scope, suppress_context);
943     if (!NStr::IsBlank(product_label)) {
944         desc += product_label;
945     }
946     return desc;
947 }
948 //LCOV_EXCL_STOP
949 
950 
GetDescriptorContent(const CSeqdesc & ds)951 string CValidErrorFormat::GetDescriptorContent (const CSeqdesc& ds)
952 {
953     string content;
954 
955     switch (ds.Which()) {
956         case CSeqdesc::e_Pub:
957             content = "Pub: ";
958             ds.GetPub().GetPub().GetLabel(&content);
959             break;
960         case CSeqdesc::e_Source:
961             content = "BioSource: " + s_GetBioSourceContentLabel(ds.GetSource());
962             break;
963         case CSeqdesc::e_Modif:
964             ds.GetLabel(&content, CSeqdesc::eBoth);
965             if (NStr::StartsWith(content, "modif: ,")) {
966                 content = "Modifier: " + content.substr(8);
967             }
968             break;
969         case CSeqdesc::e_Molinfo:
970             ds.GetLabel(&content, CSeqdesc::eBoth);
971             if (NStr::StartsWith(content, "molinfo: ,")) {
972                 content = "molInfo: " + content.substr(10);
973             }
974             break;
975         case CSeqdesc::e_Comment:
976             ds.GetLabel(&content, CSeqdesc::eBoth);
977             if (NStr::StartsWith(content, "comment: ") && NStr::IsBlank(content.substr(9))) {
978                 content = "comment: ";
979             }
980             break;
981         case CSeqdesc::e_User:
982             content = "UserObj: ";
983             if (ds.GetUser().IsSetClass()) {
984                 content += ds.GetUser().GetClass();
985             } else if (ds.GetUser().IsSetType() && ds.GetUser().GetType().IsStr()) {
986                 content += ds.GetUser().GetType().GetStr();
987             }
988             break;
989         default:
990             ds.GetLabel(&content, CSeqdesc::eBoth);
991             break;
992     }
993     // fix descriptor type names
994     string first = content.substr(0, 1);
995     NStr::ToUpper(first);
996     content = first + content.substr(1);
997     size_t colon_pos = NStr::Find(content, ":");
998     if (colon_pos != string::npos) {
999         size_t dash_pos = NStr::Find(content.substr(0, colon_pos), "-");
1000         if (dash_pos != string::npos) {
1001             string after_dash = content.substr(dash_pos + 1, 1);
1002             NStr::ToUpper (after_dash);
1003             content = content.substr(0, dash_pos) + after_dash + content.substr(dash_pos + 2);
1004         }
1005     }
1006     if (NStr::StartsWith(content, "BioSource:")) {
1007         content = "BioSrc:" + content.substr(10);
1008     } else if (NStr::StartsWith(content, "Modif:")) {
1009         content = "Modifier:" + content.substr(6);
1010     } else if (NStr::StartsWith(content, "Embl:")) {
1011         content = "EMBL:" + content.substr(5);
1012     } else if (NStr::StartsWith(content, "Pir:")) {
1013         content = "PIR:" + content.substr(4);
1014     }
1015     return content;
1016 }
1017 
1018 
GetDescriptorLabel(const CSeqdesc & ds,const CSeq_entry & ctx,CRef<CScope> scope,bool suppress_context)1019 string CValidErrorFormat::GetDescriptorLabel(const CSeqdesc& ds, const CSeq_entry& ctx, CRef<CScope> scope, bool suppress_context)
1020 {
1021     string desc("DESCRIPTOR: ");
1022 
1023     string content = CValidErrorFormat::GetDescriptorContent (ds);
1024 
1025     desc += content;
1026 
1027     desc += " ";
1028     if (ctx.IsSeq()) {
1029         AppendBioseqLabel(desc, ctx.GetSeq(), suppress_context);
1030     } else {
1031         desc += CValidErrorFormat::GetBioseqSetLabel(ctx.GetSet(), scope, suppress_context);
1032     }
1033    return desc;
1034 }
1035 
1036 
GetBioseqLabel(CBioseq_Handle bh)1037 string CValidErrorFormat::GetBioseqLabel (CBioseq_Handle bh)
1038 {
1039     string desc;
1040 
1041     CBioseq_Handle::TBioseqCore bc = bh.GetBioseqCore();
1042     desc += " [";
1043     string bc_label;
1044     bc->GetLabel(&bc_label, CBioseq::eBoth);
1045     s_FixBioseqLabelProblems(bc_label);
1046     desc += bc_label;
1047     desc += "]";
1048     return desc;
1049 }
1050 
1051 
GetBioseqSetLabel(const CBioseq_set & st,CRef<CScope> scope,bool suppress_context)1052 string CValidErrorFormat::GetBioseqSetLabel(const CBioseq_set& st, CRef<CScope> scope, bool suppress_context)
1053 {
1054     string str;
1055     // GetLabel for CBioseq_set does not follow C Toolkit conventions
1056     // AND is a horrible performance hit for sets with lots of sequences
1057 
1058     const CBioseq* best = 0;
1059     CTypeConstIterator<CBioseq> si(ConstBegin(st));
1060     if (si) {
1061         best = &(*si);
1062     }
1063     // Add content to label.
1064     if (!best) {
1065         str += "BIOSEQ-SET: ";
1066         if (!suppress_context && st.IsSetClass()) {
1067             const CEnumeratedTypeValues* tv =
1068                 CBioseq_set::GetTypeInfo_enum_EClass();
1069             const string& cn = tv->FindName(st.GetClass(), true);
1070             str += cn;
1071             str += ": ";
1072         }
1073 
1074         str += "(No Bioseqs)";
1075     } else if (st.IsSetClass()) {
1076         str += "BIOSEQ-SET: ";
1077         if (!suppress_context) {
1078             const CEnumeratedTypeValues* tv =
1079                 CBioseq_set::GetTypeInfo_enum_EClass();
1080             const string& cn = tv->FindName(st.GetClass(), true);
1081             str += cn;
1082             str += ": ";
1083         }
1084         if (scope) {
1085             string content;
1086             int version = 0;
1087             const string& accn = GetAccessionFromObjects(&st, NULL, *scope, &version);
1088             content += accn;
1089             // best->GetLabel(&content, CBioseq::eContent, supress_context);
1090             // fix problems with label
1091             s_FixBioseqLabelProblems(content);
1092             str += content;
1093         }
1094     } else {
1095         AppendBioseqLabel(str, *best, suppress_context);
1096     }
1097     return str;
1098 }
1099 
1100 
1101 //LCOV_EXCL_START
1102 //not used
GetObjectLabel(const CObject & obj,const CSeq_entry & ctx,CRef<CScope> scope,bool suppress_context)1103 string CValidErrorFormat::GetObjectLabel(const CObject& obj, const CSeq_entry& ctx, CRef<CScope> scope, bool suppress_context)
1104 {
1105     string label = "Unknown object";
1106 
1107     const CSeq_feat* ft = dynamic_cast<const CSeq_feat*>(&obj);
1108     const CSeqdesc* ds = dynamic_cast<const CSeqdesc*>(&obj);
1109     const CBioseq* b = dynamic_cast<const CBioseq*>(&obj);
1110     const CBioseq_set* set = dynamic_cast<const CBioseq_set*>(&obj);
1111 
1112     if (ft) {
1113         label = GetFeatureLabel(*ft, scope, suppress_context);
1114     } else if (ds) {
1115         label = GetDescriptorLabel(*ds, ctx, scope, suppress_context);
1116     } else if (b) {
1117         label = GetBioseqLabel(scope->GetBioseqHandle(*b));
1118     } else if (set) {
1119         label = GetBioseqSetLabel(*set, scope, suppress_context);
1120     }
1121     return label;
1122 }
1123 //LCOV_EXCL_STOP
1124 
1125 
1126 //LCOV_EXCL_START
1127 //added for GPIPE, not used for asnvalidate
1128 const string kSuppressFieldLabel = "Suppress";
1129 
s_IsSuppressField(const CUser_field & field)1130 bool s_IsSuppressField (const CUser_field& field)
1131 {
1132     if (field.IsSetLabel() &&
1133         field.GetLabel().IsStr() &&
1134         NStr::EqualNocase(field.GetLabel().GetStr(), kSuppressFieldLabel)) {
1135         return true;
1136     } else {
1137         return false;
1138     }
1139 }
1140 
1141 
AddSuppression(CUser_object & user,unsigned int error_code)1142 void CValidErrorFormat::AddSuppression(CUser_object& user, unsigned int error_code)
1143 {
1144     bool found = false;
1145     if (user.IsSetData()) {
1146         NON_CONST_ITERATE(CUser_object::TData, it, user.SetData()) {
1147             if (s_IsSuppressField(**it)) {
1148                 if ((*it)->IsSetData()) {
1149                     if ((*it)->GetData().IsInt()) {
1150                         unsigned int old_val = (*it)->GetData().GetInt();
1151                         if (old_val == error_code) {
1152                             // do nothing, already there
1153                         } else {
1154                             (*it)->SetData().SetInts().push_back(old_val);
1155                             (*it)->SetData().SetInts().push_back(error_code);
1156                         }
1157                         found = true;
1158                         break;
1159                     } else if ((*it)->GetData().IsInts()) {
1160                         ITERATE(CUser_field::TData::TInts, ii, (*it)->GetData().GetInts()) {
1161                             if (*ii == error_code) {
1162                                 found = true;
1163                                 break;
1164                             }
1165                         }
1166                         if (!found) {
1167                             (*it)->SetData().SetInts().push_back(error_code);
1168                             found = true;
1169                         }
1170                         break;
1171                     }
1172                 }
1173             }
1174         }
1175     }
1176     if (!found) {
1177         CRef<CUser_field> field(new CUser_field());
1178         field->SetLabel().SetStr(kSuppressFieldLabel);
1179         field->SetData().SetInts().push_back(error_code);
1180         user.SetData().push_back(field);
1181     }
1182 }
1183 
1184 
SetSuppressionRules(const CUser_object & user,CValidError & errors)1185 void CValidErrorFormat::SetSuppressionRules(const CUser_object& user, CValidError& errors)
1186 {
1187     if (!user.IsSetData()) {
1188         return;
1189     }
1190     ITERATE(CUser_object::TData, it, user.GetData()) {
1191         if ((*it)->IsSetData() && s_IsSuppressField(**it)) {
1192             if ((*it)->GetData().IsInt()) {
1193                 errors.SuppressError((*it)->GetData().GetInt());
1194             } else if ((*it)->GetData().IsInts()) {
1195                 ITERATE(CUser_field::TData::TInts, ii, (*it)->GetData().GetInts()) {
1196                     errors.SuppressError(*ii);
1197                 }
1198             } else if ((*it)->GetData().IsStr()) {
1199                 unsigned int ec = CValidErrItem::ConvertToErrCode((*it)->GetData().GetStr());
1200                 if (ec != eErr_MAX) {
1201                     errors.SuppressError(ec);
1202                 }
1203             } else if ((*it)->GetData().IsStrs()) {
1204                 ITERATE(CUser_field::TData::TStrs, si, (*it)->GetData().GetStrs()) {
1205                     unsigned int ec = CValidErrItem::ConvertToErrCode(*si);
1206                     if (ec != eErr_MAX) {
1207                         errors.SuppressError(ec);
1208                     }
1209                 }
1210             }
1211         }
1212     }
1213 }
1214 
1215 
SetSuppressionRules(const CSeq_entry & se,CValidError & errors)1216 void CValidErrorFormat::SetSuppressionRules(const CSeq_entry& se, CValidError& errors)
1217 {
1218     if (se.IsSeq()) {
1219         SetSuppressionRules(se.GetSeq(), errors);
1220     } else if (se.IsSet()) {
1221         const CBioseq_set& set = se.GetSet();
1222         if (set.IsSetDescr()) {
1223             ITERATE(CBioseq_set::TDescr::Tdata, it, set.GetDescr().Get()) {
1224                 if ((*it)->IsUser() &&
1225                     (*it)->GetUser().GetObjectType() == CUser_object::eObjectType_ValidationSuppression) {
1226                     SetSuppressionRules((*it)->GetUser(), errors);
1227                 }
1228             }
1229         }
1230         if (set.IsSetSeq_set()) {
1231             ITERATE(CBioseq_set::TSeq_set, it, set.GetSeq_set()) {
1232                 SetSuppressionRules(**it, errors);
1233             }
1234         }
1235     }
1236 }
1237 
1238 
SetSuppressionRules(const CSeq_entry_Handle & se,CValidError & errors)1239 void CValidErrorFormat::SetSuppressionRules(const CSeq_entry_Handle& se, CValidError& errors)
1240 {
1241     SetSuppressionRules(*(se.GetCompleteSeq_entry()), errors);
1242 }
1243 
1244 
SetSuppressionRules(const CSeq_submit & ss,CValidError & errors)1245 void CValidErrorFormat::SetSuppressionRules(const CSeq_submit& ss, CValidError& errors)
1246 {
1247     if (ss.IsEntrys()) {
1248         ITERATE(CSeq_submit::TData::TEntrys, it, ss.GetData().GetEntrys()) {
1249             SetSuppressionRules(**it, errors);
1250         }
1251     }
1252 }
1253 
1254 
SetSuppressionRules(const CBioseq & seq,CValidError & errors)1255 void CValidErrorFormat::SetSuppressionRules(const CBioseq& seq, CValidError& errors)
1256 {
1257     if (seq.IsSetDescr()) {
1258         ITERATE(CBioseq::TDescr::Tdata, it, seq.GetDescr().Get()) {
1259             if ((*it)->IsUser() &&
1260                 (*it)->GetUser().GetObjectType() == CUser_object::eObjectType_ValidationSuppression) {
1261                 SetSuppressionRules((*it)->GetUser(), errors);
1262             }
1263         }
1264     }
1265 }
1266 //LCOV_EXCL_STOP
1267 
1268 
1269 //LCOV_EXCL_START
1270 //not used by asnvalidate but may be useful for other clients of validator library
AddLocusTags(CValidError & errors,CScope & scope)1271 void CValidErrorFormat::AddLocusTags(CValidError& errors, CScope& scope)
1272 {
1273     for (auto it : errors.SetErrs()) {
1274         if (it->IsSetLocus_tag()) {
1275             continue;
1276         }
1277         if (it->IsSetObjectType() &&
1278             it->GetObjectType() == CValidErrItem::eObjectType_seqfeat &&
1279             it->IsSetObject() &&
1280             it->GetObject().GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
1281             const CSeq_feat* sf = static_cast<const CSeq_feat *>(&(it->GetObject()));
1282             if (sf && sf->IsSetData()) {
1283                 it->SetLocus_tag(x_GetLocusTag(*sf, scope));
1284             }
1285         }
1286     }
1287 }
1288 
1289 
x_GetLocusTag(const CSeq_feat & sf,CScope & scope)1290 const string& CValidErrorFormat::x_GetLocusTag(const CSeq_feat& sf, CScope& scope)
1291 {
1292     const string* rval = &kEmptyStr;
1293 
1294     if (sf.GetData().IsGene()) {
1295         if (sf.GetData().GetGene().IsSetLocus_tag()) {
1296             rval = &sf.GetData().GetGene().GetLocus_tag();
1297         }
1298     } else {
1299         const CGene_ref* g = sf.GetGeneXref();
1300         if (g && g->IsSetLocus_tag()) {
1301             rval = &g->GetLocus_tag();
1302         } else {
1303             CConstRef<CSeq_feat> gene = sequence::GetGeneForFeature(sf, scope);
1304             if (gene && gene->GetData().GetGene().IsSetLocus_tag()) {
1305                 rval = &gene->GetData().GetGene().GetLocus_tag();
1306             }
1307         }
1308     }
1309     return *rval;
1310 }
1311 //LCOV_EXCL_STOP
1312 
1313 
1314 END_SCOPE(validator)
1315 END_SCOPE(objects)
1316 END_NCBI_SCOPE
1317