1 /* $Id: validerror_format.cpp 632625 2021-06-03 17:38:33Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko.......
27 *
28 * File Description:
29 * Validates CSeq_entries and CSeq_submits
30 *
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <serial/serialbase.hpp>
35 #include <objects/submit/Seq_submit.hpp>
36 #include <objects/general/Dbtag.hpp>
37 #include <objects/pub/Pub_equiv.hpp>
38 #include <objects/seq/Bioseq.hpp>
39 #include <objects/seq/Pubdesc.hpp>
40 #include <objects/seq/Seqdesc.hpp>
41 #include <objects/seqfeat/Feat_id.hpp>
42 #include <objects/seqfeat/Imp_feat.hpp>
43 #include <objects/seqfeat/Org_ref.hpp>
44 #include <objects/seqfeat/OrgName.hpp>
45 #include <objects/seqfeat/OrgMod.hpp>
46 #include <objects/general/User_object.hpp>
47 #include <objmgr/object_manager.hpp>
48 #include <objmgr/util/sequence.hpp>
49 #include <objtools/validator/validerror_format.hpp>
50 //#include <objtools/validator/validatorp.hpp>
51 #include <objtools/validator/utilities.hpp>
52 #include <util/static_map.hpp>
53
54
55
56 BEGIN_NCBI_SCOPE
57 BEGIN_SCOPE(objects)
58 BEGIN_SCOPE(validator)
59 USING_SCOPE(sequence);
60
61
62 // *********************** CValidErrorFormat implementation **********************
63
64
65 //LCOV_EXCL_START
66 //used by Genome Workbench to create submitter report,
67 //not used by asnvalidate
CValidErrorFormat(CObjectManager & objmgr)68 CValidErrorFormat::CValidErrorFormat(CObjectManager& objmgr) :
69 m_ObjMgr(&objmgr)
70 {
71 }
72
73
~CValidErrorFormat(void)74 CValidErrorFormat::~CValidErrorFormat(void)
75 {
76 }
77
78
GetSubmitterFormatErrorGroup(CValidErrItem::TErrIndex err_code) const79 ESubmitterFormatErrorGroup CValidErrorFormat::GetSubmitterFormatErrorGroup(CValidErrItem::TErrIndex err_code) const
80 {
81 ESubmitterFormatErrorGroup rval = eSubmitterFormatErrorGroup_Default;
82
83 switch(err_code) {
84 case eErr_SEQ_FEAT_NotSpliceConsensus:
85 case eErr_SEQ_FEAT_NotSpliceConsensusDonor:
86 case eErr_SEQ_FEAT_NotSpliceConsensusAcceptor:
87 case eErr_SEQ_FEAT_RareSpliceConsensusDonor:
88 case eErr_SEQ_FEAT_NotSpliceConsensusAcceptorTerminalIntron:
89 case eErr_SEQ_FEAT_NotSpliceConsensusDonorTerminalIntron:
90 rval = eSubmitterFormatErrorGroup_ConsensusSplice;
91 break;
92 case eErr_SEQ_FEAT_BadEcNumberFormat:
93 rval = eSubmitterFormatErrorGroup_BadEcNumberFormat;
94 break;
95 case eErr_SEQ_FEAT_BadEcNumberValue:
96 case eErr_SEQ_FEAT_DeletedEcNumber:
97 case eErr_SEQ_FEAT_ReplacedEcNumber:
98 case eErr_SEQ_FEAT_SplitEcNumber:
99 rval = eSubmitterFormatErrorGroup_BadEcNumberValue;
100 break;
101 case eErr_SEQ_FEAT_EcNumberProblem:
102 rval = eSubmitterFormatErrorGroup_BadEcNumberProblem;
103 break;
104 case eErr_SEQ_DESCR_BadSpecificHost:
105 rval = eSubmitterFormatErrorGroup_BadSpecificHost;
106 break;
107 case eErr_SEQ_DESCR_BadInstitutionCode:
108 rval = eSubmitterFormatErrorGroup_BadInstitutionCode;
109 break;
110 case eErr_SEQ_DESCR_LatLonCountry:
111 case eErr_SEQ_DESCR_LatLonWater:
112 rval = eSubmitterFormatErrorGroup_LatLonCountry;
113 break;
114 default:
115 break;
116 }
117 return rval;
118 }
119
120
GetSubmitterFormatErrorGroupTitle(CValidErrItem::TErrIndex err_code) const121 string CValidErrorFormat::GetSubmitterFormatErrorGroupTitle(CValidErrItem::TErrIndex err_code) const
122 {
123 string rval;
124 switch(err_code) {
125 case eErr_SEQ_FEAT_NotSpliceConsensus:
126 case eErr_SEQ_FEAT_NotSpliceConsensusDonor:
127 case eErr_SEQ_FEAT_NotSpliceConsensusAcceptor:
128 case eErr_SEQ_FEAT_RareSpliceConsensusDonor:
129 rval = "Not Splice Consensus";
130 break;
131 case eErr_SEQ_FEAT_BadEcNumberFormat:
132 rval = "EC Number Format";
133 break;
134 case eErr_SEQ_FEAT_BadEcNumberValue:
135 case eErr_SEQ_FEAT_DeletedEcNumber:
136 case eErr_SEQ_FEAT_ReplacedEcNumber:
137 case eErr_SEQ_FEAT_SplitEcNumber:
138 rval = "EC Number Value";
139 break;
140 case eErr_SEQ_FEAT_EcNumberProblem:
141 rval = "EC Number Problem";
142 break;
143 case eErr_SEQ_DESCR_BadSpecificHost:
144 rval = "Bad Specific-host Values";
145 break;
146 case eErr_SEQ_DESCR_BadInstitutionCode:
147 rval = "Bad Institution Codes";
148 break;
149 case eErr_SEQ_DESCR_LatLonCountry:
150 case eErr_SEQ_DESCR_LatLonWater:
151 rval = "LatLonCountry Errors";
152 break;
153 default:
154 rval = CValidErrItem::ConvertErrCode(err_code);
155 break;
156 }
157
158 return rval;
159 }
160
161
FormatForSubmitterReport(const CValidErrItem & error,CScope & scope) const162 string CValidErrorFormat::FormatForSubmitterReport(const CValidErrItem& error, CScope& scope) const
163 {
164 string rval;
165
166 switch (error.GetErrIndex()) {
167 case eErr_SEQ_FEAT_NotSpliceConsensus:
168 case eErr_SEQ_FEAT_NotSpliceConsensusDonor:
169 case eErr_SEQ_FEAT_NotSpliceConsensusAcceptor:
170 case eErr_SEQ_FEAT_RareSpliceConsensusDonor:
171 case eErr_SEQ_FEAT_NotSpliceConsensusAcceptorTerminalIntron:
172 case eErr_SEQ_FEAT_NotSpliceConsensusDonorTerminalIntron:
173 rval = x_FormatConsensusSpliceForSubmitterReport(error, scope);
174 break;
175 case eErr_SEQ_FEAT_BadEcNumberFormat:
176 case eErr_SEQ_FEAT_BadEcNumberValue:
177 case eErr_SEQ_FEAT_EcNumberProblem:
178 case eErr_SEQ_FEAT_DeletedEcNumber:
179 case eErr_SEQ_FEAT_ReplacedEcNumber:
180 case eErr_SEQ_FEAT_SplitEcNumber:
181 rval = x_FormatECNumberForSubmitterReport(error, scope);
182 break;
183 case eErr_SEQ_DESCR_BadSpecificHost:
184 rval = x_FormatBadSpecificHostForSubmitterReport(error);
185 break;
186 case eErr_SEQ_DESCR_BadInstitutionCode:
187 rval = x_FormatBadInstCodeForSubmitterReport(error);
188 break;
189 case eErr_SEQ_DESCR_LatLonCountry:
190 case eErr_SEQ_DESCR_LatLonWater:
191 rval = x_FormatLatLonCountryForSubmitterReport(error);
192 break;
193 default:
194 rval = x_FormatGenericForSubmitterReport(error, scope);
195 break;
196 }
197
198 return rval;
199 }
200
201
x_FormatConsensusSpliceForSubmitterReport(const CValidErrItem & error,CScope & scope) const202 string CValidErrorFormat::x_FormatConsensusSpliceForSubmitterReport(const CValidErrItem& error, CScope& scope) const
203 {
204 string rval;
205 if (!error.IsSetMsg() || NStr::IsBlank(error.GetMsg())) {
206 return rval;
207 }
208 string msg = error.GetMsg();
209 if (NStr::Find(msg, "(AG) not found") != string::npos) {
210 rval = "AG";
211 }
212 else if (NStr::Find(msg, "(GT) not found") != string::npos) {
213 rval = "GT";
214 } else if (NStr::Find(msg, "(AT-AC) found instead of (GT-AG)") != string::npos) {
215 rval = "(AT-AC) instead of (GT-AG)";
216 } else if (NStr::Find(msg, "(GC-AG) found instead of (GT-AG)") != string::npos) {
217 rval = "(GC-AG) instead of (GT-AG)";
218 }
219 if (NStr::IsBlank(rval)) {
220 return rval;
221 }
222
223 size_t position_pos = NStr::Find(msg, "ending at position ");
224 size_t other_clue = NStr::Find(msg, "and before exon");
225 if (position_pos == string::npos || other_clue == string::npos) {
226 position_pos = NStr::Find(msg, "position ");
227 if (position_pos != string::npos) {
228 string pos_str = msg.substr(position_pos);
229 long int pos;
230 if (sscanf(pos_str.c_str(), "position %ld of ", &pos) == 1) {
231 rval += " at " + NStr::NumericToString(pos);
232 size_t seq_pos = NStr::Find(pos_str, " of ");
233 if (seq_pos != string::npos) {
234 rval = pos_str.substr(seq_pos + 4) + "\t" + rval;
235 }
236 }
237 }
238 } else {
239 string pos_str = msg.substr(position_pos);
240 long int pos1, pos2;
241 if (sscanf(pos_str.c_str(), "ending at position %ld and before exon starting at position %ld of ", &pos1, &pos2) == 2) {
242 rval += " at " + NStr::NumericToString(pos1) + ", " + NStr::NumericToString(pos2);
243 size_t seq_pos = NStr::Find(pos_str, " of ");
244 if (seq_pos != string::npos) {
245 rval = pos_str.substr(seq_pos + 4) + "\t" + rval;
246 }
247 }
248 }
249
250 string obj_desc = error.GetObjDesc();
251 size_t type_pos = NStr::Find(obj_desc, "FEATURE: ");
252 if (type_pos != string::npos) {
253 obj_desc = obj_desc.substr(type_pos + 9);
254 size_t space_pos = NStr::Find(obj_desc, ":");
255 if (space_pos != string::npos) {
256 obj_desc = obj_desc.substr(0, space_pos);
257 }
258 }
259
260 rval = obj_desc + "\t" + rval;
261
262 return rval;
263 }
264
265
RemovePrefix(string & str,const string & prefix)266 void RemovePrefix(string& str, const string& prefix)
267 {
268 size_t type_pos = NStr::Find(str, prefix);
269 if (type_pos != string::npos) {
270 str = str.substr(type_pos + prefix.length());
271 }
272 }
273
RemoveSuffix(string & str,const string & suffix)274 void RemoveSuffix(string& str, const string& suffix)
275 {
276 size_t type_pos = NStr::Find(str, suffix);
277 if (type_pos != string::npos) {
278 str = str.substr(0, type_pos);
279 }
280 }
281
282
x_FormatGenericForSubmitterReport(const CValidErrItem & error,CScope & scope) const283 string CValidErrorFormat::x_FormatGenericForSubmitterReport(const CValidErrItem& error, CScope& scope) const
284 {
285 string obj_desc = error.GetObjDesc();
286 if (NStr::StartsWith(obj_desc, "FEATURE") && error.IsSetObj_content()) {
287 obj_desc = error.GetObj_content();
288 NStr::ReplaceInPlace(obj_desc, ":", "\t", 0, 1);
289 // Add feature location part of label
290 if (error.IsSetLocation()) {
291 obj_desc += "\t" + error.GetLocation();
292 }
293 if (error.IsSetLocus_tag()) {
294 obj_desc += "\t" + error.GetLocus_tag();
295 } else if (error.IsSetObject() && error.GetObject().GetThisTypeInfo() == CSeq_feat:: GetTypeInfo()) {
296 const CSeq_feat* sf = static_cast<const CSeq_feat*>(&(error.GetObject()));
297 if (sf) {
298 obj_desc += "\t" + x_GetLocusTag(*sf, scope);
299 }
300 }
301 } else {
302 RemovePrefix(obj_desc, "DESCRIPTOR: ");
303 RemovePrefix(obj_desc, "BioSrc: ");
304 RemoveSuffix(obj_desc, " BIOSEQ: ");
305 RemoveSuffix(obj_desc, " BIOSEQ-SET: ");
306
307 NStr::ReplaceInPlace(obj_desc, ":", "\t", 0, 1);
308 size_t close_pos = NStr::Find(obj_desc, "]");
309 if (close_pos != string::npos) {
310 obj_desc = obj_desc.substr(0, close_pos);
311 NStr::ReplaceInPlace(obj_desc, "[", "\t");
312 }
313 }
314 string rval = error.GetAccession() + ":" + obj_desc;
315
316 return rval;
317 }
318
319
x_FormatECNumberForSubmitterReport(const CValidErrItem & error,CScope & scope) const320 string CValidErrorFormat::x_FormatECNumberForSubmitterReport(const CValidErrItem& error, CScope& scope) const
321 {
322 string rval;
323 string ec_numbers;
324 string prot_name;
325 string locus_tag;
326
327 // want: accession number for sequence, ec numbers, locus tag, protein name
328
329 if (error.GetObject().GetThisTypeInfo() != CSeq_feat::GetTypeInfo()) {
330 return rval;
331 }
332 const CSeq_feat* feat = static_cast<const CSeq_feat*>(&(error.GetObject()));
333 if (!feat) {
334 return rval;
335 }
336
337 // look for EC number in quals
338 if (feat->IsSetQual()) {
339 ITERATE(CSeq_feat::TQual, it, feat->GetQual()) {
340 if ((*it)->IsSetQual() &&
341 NStr::EqualNocase((*it)->GetQual(), "EC_number") &&
342 (*it)->IsSetVal() &&
343 !NStr::IsBlank((*it)->GetVal())) {
344 if (!NStr::IsBlank(ec_numbers)) {
345 ec_numbers += ";";
346 }
347 ec_numbers += (*it)->GetVal();
348 }
349 }
350 }
351 // look for EC number in prot-ref
352 if (feat->IsSetData() && feat->GetData().IsProt() &&
353 feat->GetData().GetProt().IsSetEc()) {
354 ITERATE(CProt_ref::TEc, it, feat->GetData().GetProt().GetEc()) {
355 if (!NStr::IsBlank(ec_numbers)) {
356 ec_numbers += ";";
357 }
358 ec_numbers += *it;
359 }
360 }
361
362 if (NStr::IsBlank(ec_numbers)) {
363 ec_numbers = "Blank EC number";
364 }
365
366 // look for protein name
367 if (feat->IsSetData() && feat->GetData().IsProt() &&
368 feat->GetData().GetProt().IsSetName() &&
369 !feat->GetData().GetProt().GetName().empty()) {
370 prot_name = feat->GetData().GetProt().GetName().front();
371 }
372
373 // get locus tag
374 CConstRef <CSeq_feat> gene = sequence::GetGeneForFeature(*feat, scope);
375 if (gene && gene->GetData().GetGene().IsSetLocus_tag()) {
376 locus_tag = gene->GetData().GetGene().GetLocus_tag();
377 }
378
379 rval = error.GetAccnver() + "\t" + ec_numbers + "\t" + locus_tag + "\t" + prot_name;
380 return rval;
381 }
382
383
s_GetSpecificHostFromBioSource(const CBioSource & biosrc)384 string s_GetSpecificHostFromBioSource(const CBioSource& biosrc)
385 {
386 string rval;
387
388 if (biosrc.IsSetOrg() &&
389 biosrc.GetOrg().IsSetOrgname() &&
390 biosrc.GetOrg().GetOrgname().IsSetMod()) {
391 ITERATE(COrgName::TMod, it, biosrc.GetOrg().GetOrgname().GetMod()) {
392 if ((*it)->IsSetSubtype() &&
393 (*it)->GetSubtype() == COrgMod::eSubtype_nat_host &&
394 (*it)->IsSetSubname() &&
395 !NStr::IsBlank((*it)->GetSubname())) {
396 if (!NStr::IsBlank(rval)) {
397 rval += ";";
398 }
399 rval += (*it)->GetSubname();
400 }
401 }
402 }
403 return rval;
404 }
405
406
x_FormatBadSpecificHostForSubmitterReport(const CValidErrItem & error) const407 string CValidErrorFormat::x_FormatBadSpecificHostForSubmitterReport(const CValidErrItem& error) const
408 {
409 string rval;
410 string spec_host;
411 const string kAlternateName = "Specific host value is alternate name: ";
412 if (NStr::StartsWith(error.GetMsg(), kAlternateName)) {
413 spec_host = error.GetMsg().substr(kAlternateName.length());
414 } else if (error.GetObject().GetThisTypeInfo() == CSeqdesc::GetTypeInfo()) {
415 const CSeqdesc* desc = static_cast<const CSeqdesc *>(&(error.GetObject()));
416 if (desc && desc->IsSource()) {
417 spec_host = s_GetSpecificHostFromBioSource(desc->GetSource());
418 }
419 } else if (error.GetObject().GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
420 const CSeq_feat* feat = static_cast<const CSeq_feat *>(&(error.GetObject()));
421 if (feat && feat->IsSetData() && feat->GetData().IsBiosrc()) {
422 spec_host = s_GetSpecificHostFromBioSource(feat->GetData().GetBiosrc());
423 }
424 }
425
426 if (!NStr::IsBlank(spec_host)) {
427 rval = error.GetAccession() + "\t" + spec_host;
428 }
429
430 return rval;
431 }
432
433
s_GetInstCodeFromBioSource(const CBioSource & biosrc)434 string s_GetInstCodeFromBioSource(const CBioSource& biosrc)
435 {
436 string rval;
437
438 if (biosrc.IsSetOrg() &&
439 biosrc.GetOrg().IsSetOrgname() &&
440 biosrc.GetOrg().GetOrgname().IsSetMod()) {
441 ITERATE(COrgName::TMod, it, biosrc.GetOrg().GetOrgname().GetMod()) {
442 if ((*it)->IsSetSubtype() &&
443 ((*it)->GetSubtype() == COrgMod::eSubtype_bio_material ||
444 (*it)->GetSubtype() == COrgMod::eSubtype_culture_collection ||
445 (*it)->GetSubtype() == COrgMod::eSubtype_specimen_voucher) &&
446 (*it)->IsSetSubname() &&
447 !NStr::IsBlank((*it)->GetSubname())) {
448 size_t pos = NStr::Find((*it)->GetSubname(), ":");
449 if (pos != string::npos) {
450 string code = (*it)->GetSubname().substr(0, pos);
451 if (!NStr::IsBlank(code)) {
452 if (!NStr::IsBlank(rval)) {
453 rval += ";";
454 }
455 rval += code;
456 }
457 }
458 }
459 }
460 }
461 return rval;
462 }
463
464
x_FormatBadInstCodeForSubmitterReport(const CValidErrItem & error) const465 string CValidErrorFormat::x_FormatBadInstCodeForSubmitterReport(const CValidErrItem& error) const
466 {
467 string rval;
468
469 string codes;
470 if (error.GetObject().GetThisTypeInfo() == CSeqdesc::GetTypeInfo()) {
471 const CSeqdesc* desc = static_cast<const CSeqdesc *>(&(error.GetObject()));
472 if (desc && desc->IsSource()) {
473 codes = s_GetInstCodeFromBioSource(desc->GetSource());
474 }
475 }
476 else if (error.GetObject().GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
477 const CSeq_feat* feat = static_cast<const CSeq_feat *>(&(error.GetObject()));
478 if (feat && feat->IsSetData() && feat->GetData().IsBiosrc()) {
479 codes = s_GetInstCodeFromBioSource(feat->GetData().GetBiosrc());
480 }
481 }
482
483 if (!NStr::IsBlank(codes)) {
484 rval = error.GetAccession() + "\t" + codes;
485 }
486
487 return rval;
488 }
489
490
FormatForSubmitterReport(const CValidError & errors,CScope & scope,CValidErrItem::TErrIndex err_code) const491 string CValidErrorFormat::FormatForSubmitterReport(const CValidError& errors, CScope& scope, CValidErrItem::TErrIndex err_code) const
492 {
493 string rval;
494 for ( CValidError_CI vit(errors); vit; ++vit) {
495 if (err_code == vit->GetErrIndex()) {
496 string this_val = FormatForSubmitterReport(*vit, scope);
497 if (!NStr::IsBlank(this_val)) {
498 if (NStr::IsBlank(rval)) {
499 rval += GetSubmitterFormatErrorGroupTitle(err_code) + "\n";
500 }
501 rval += this_val + "\n";
502 }
503 }
504 }
505 return rval;
506 }
507
508
FormatCategoryForSubmitterReport(const CValidError & errors,CScope & scope,ESubmitterFormatErrorGroup grp) const509 string CValidErrorFormat::FormatCategoryForSubmitterReport
510 (const CValidError& errors, CScope& scope, ESubmitterFormatErrorGroup grp) const
511 {
512 string rval;
513 for ( CValidError_CI vit(errors); vit; ++vit) {
514 CValidErrItem::TErrIndex err_code = vit->GetErrIndex();
515 if (GetSubmitterFormatErrorGroup(err_code) == grp) {
516 string this_val = FormatForSubmitterReport(*vit, scope);
517 if (!NStr::IsBlank(this_val)) {
518 if (NStr::IsBlank(rval)) {
519 rval += GetSubmitterFormatErrorGroupTitle(err_code) + "\n";
520 }
521 rval += this_val + "\n";
522 }
523 }
524 }
525 return rval;
526 }
527
528
x_FormatLatLonCountryForSubmitterReport(const CValidErrItem & error) const529 string CValidErrorFormat::x_FormatLatLonCountryForSubmitterReport(const CValidErrItem& error) const
530 {
531 string rval = error.GetAccession() + ":" + error.GetMsg();
532 return rval;
533 }
534
535
GetListOfErrorCodes(const CValidError & errors) const536 vector<unsigned int> CValidErrorFormat::GetListOfErrorCodes(const CValidError& errors) const
537 {
538 vector<unsigned int> list;
539
540 for ( CValidError_CI vit(errors); vit; ++vit) {
541 list.push_back(vit->GetErrIndex());
542 }
543 sort(list.begin(), list.end());
544 list.erase(unique(list.begin(), list.end()), list.end());
545 return list;
546 }
547
548
FormatCompleteSubmitterReport(const CValidError & errors,CScope & scope) const549 vector<string> CValidErrorFormat::FormatCompleteSubmitterReport(const CValidError& errors, CScope& scope) const
550 {
551 vector<string> list;
552
553 // first, do special categories
554 for (unsigned int t = eSubmitterFormatErrorGroup_ConsensusSplice; t < eSubmitterFormatErrorGroup_Default; ++t) {
555 string this_val = FormatCategoryForSubmitterReport(errors, scope, (ESubmitterFormatErrorGroup)t);
556 if (!NStr::IsBlank(this_val)) {
557 list.push_back(this_val);
558 }
559 }
560
561 // now do errors not in special categories
562 vector<unsigned int> codes = GetListOfErrorCodes(errors);
563 ITERATE(vector<unsigned int>, it, codes) {
564 if (GetSubmitterFormatErrorGroup(*it) == eSubmitterFormatErrorGroup_Default) {
565 string this_val = FormatForSubmitterReport(errors, scope, *it);
566 if (!NStr::IsBlank(this_val)) {
567 list.push_back(this_val);
568 }
569 }
570 }
571 return list;
572 }
573 //LCOV_EXCL_STOP
574
575
s_GetFeatureIdLabel(const CObject_id & object_id)576 static string s_GetFeatureIdLabel (const CObject_id& object_id)
577 {
578 string feature_id;
579 if (object_id.IsId()) {
580 feature_id = NStr::IntToString(object_id.GetId());
581 } else if (object_id.IsStr()) {
582 feature_id = object_id.GetStr();
583 }
584 return feature_id;
585 }
586
587
GetFeatureIdLabel(const CFeat_id & feat_id)588 string CValidErrorFormat::GetFeatureIdLabel (const CFeat_id& feat_id)
589 {
590 string feature_id;
591 if (feat_id.IsLocal()) {
592 feature_id = s_GetFeatureIdLabel(feat_id.GetLocal());
593 } else if (feat_id.IsGeneral()) {
594 if (feat_id.GetGeneral().IsSetDb()) {
595 feature_id += feat_id.GetGeneral().GetDb();
596 }
597 feature_id += ":";
598 if (feat_id.GetGeneral().IsSetTag()) {
599 feature_id += s_GetFeatureIdLabel (feat_id.GetGeneral().GetTag());
600 }
601 }
602 return feature_id;
603 }
604
605
GetFeatureIdLabel(const CSeq_feat & ft)606 string CValidErrorFormat::GetFeatureIdLabel(const CSeq_feat& ft)
607 {
608 string feature_id;
609 if (ft.IsSetId()) {
610 feature_id = CValidErrorFormat::GetFeatureIdLabel(ft.GetId());
611 } else if (ft.IsSetIds()) {
612 ITERATE(CSeq_feat::TIds, id_it, ft.GetIds()) {
613 feature_id = CValidErrorFormat::GetFeatureIdLabel((**id_it));
614 if (!NStr::IsBlank(feature_id)) {
615 break;
616 }
617 }
618 }
619 return feature_id;
620 }
621
622
s_FixBioseqLabelProblems(string & str)623 static void s_FixBioseqLabelProblems (string& str)
624 {
625 size_t pos = NStr::Find(str, ",");
626 if (pos != string::npos && str.c_str()[pos + 1] != 0 && str.c_str()[pos + 1] != ' ') {
627 str = str.substr(0, pos + 1) + " " + str.substr(pos + 1);
628 }
629 pos = NStr::Find(str, "=");
630 if (pos != string::npos && str.c_str()[pos + 1] != 0 && str.c_str()[pos + 1] != ' ') {
631 str = str.substr(0, pos + 1) + " " + str.substr(pos + 1);
632 }
633 }
634
635
636
s_GetOrgRefContentLabel(const COrg_ref & org)637 static string s_GetOrgRefContentLabel (const COrg_ref& org)
638 {
639 string content;
640 if (org.IsSetTaxname()) {
641 content = org.GetTaxname();
642 } else if (org.IsSetCommon()) {
643 content = org.GetCommon();
644 } else if (org.IsSetDb() && !org.GetDb().empty()) {
645 org.GetDb().front()->GetLabel(&content);
646 }
647 return content;
648 }
649
650
s_GetBioSourceContentLabel(const CBioSource & bsrc)651 static string s_GetBioSourceContentLabel (const CBioSource& bsrc)
652 {
653 string content;
654 if (bsrc.IsSetOrg()) {
655 content = s_GetOrgRefContentLabel(bsrc.GetOrg());
656 }
657 return content;
658 }
659
660
s_GetFeatureContentLabelExtras(const CSeq_feat & feat)661 static string s_GetFeatureContentLabelExtras (const CSeq_feat& feat)
662 {
663 string tlabel;
664
665 // Put Seq-feat qual into label
666 if (feat.IsSetQual()) {
667 string prefix("/");
668 ITERATE(CSeq_feat::TQual, it, feat.GetQual()) {
669 tlabel += prefix + (**it).GetQual();
670 prefix = " ";
671 if (!(**it).GetVal().empty()) {
672 tlabel += "=" + (**it).GetVal();
673 }
674 }
675 }
676
677 // Put Seq-feat comment into label
678 if (feat.IsSetComment()) {
679 if (tlabel.empty()) {
680 tlabel = feat.GetComment();
681 } else {
682 tlabel += "; " + feat.GetComment();
683 }
684 }
685 return tlabel;
686 }
687
688
s_GetCdregionContentLabel(const CSeq_feat & feat,CRef<CScope> scope)689 static string s_GetCdregionContentLabel (const CSeq_feat& feat, CRef<CScope> scope)
690 {
691 string content;
692
693 // Check that feature data is Cdregion
694 if (!feat.GetData().IsCdregion()) {
695 return content;
696 }
697
698 const CGene_ref* gref = 0;
699 const CProt_ref* pref = 0;
700
701 // Look for CProt_ref object to create a label from
702 if (feat.IsSetXref()) {
703 ITERATE ( CSeq_feat::TXref, it, feat.GetXref()) {
704 const CSeqFeatXref& xref = **it;
705 if ( !xref.IsSetData() ) {
706 continue;
707 }
708
709 switch (xref.GetData().Which()) {
710 case CSeqFeatData::e_Prot:
711 pref = &xref.GetData().GetProt();
712 break;
713 case CSeqFeatData::e_Gene:
714 gref = &xref.GetData().GetGene();
715 break;
716 default:
717 break;
718 }
719 }
720 }
721
722 // Try and create a label from a CProt_ref in CSeqFeatXref in feature
723 if (pref) {
724 pref->GetLabel(&content);
725 return content;
726 }
727
728 // Try and create a label from a CProt_ref in the feat product and
729 // return if found
730 if (feat.IsSetProduct() && scope) {
731 try {
732 const CSeq_id& id = GetId(feat.GetProduct(), scope);
733 CBioseq_Handle hnd = scope->GetBioseqHandle(id);
734 if (hnd) {
735 const CBioseq& seq = *hnd.GetCompleteBioseq();
736
737 // Now look for a CProt_ref feature in seq and
738 // if found call GetLabel() on the CProt_ref
739 CTypeConstIterator<CSeqFeatData> it = ConstBegin(seq);
740 for (;it; ++it) {
741 if (it->IsProt()) {
742 it->GetProt().GetLabel(&content);
743 return content;
744 }
745 }
746 }
747 } catch (CObjmgrUtilException&) {}
748 }
749
750 // Try and create a label from a CGene_ref in CSeqFeatXref in feature
751 if (gref) {
752 gref->GetLabel(&content);
753 }
754
755 if (NStr::IsBlank(content)) {
756 content = s_GetFeatureContentLabelExtras(feat);
757 }
758
759 return content;
760 }
761
762
GetFeatureContentLabel(const CSeq_feat & feat,CRef<CScope> scope)763 string CValidErrorFormat::GetFeatureContentLabel (const CSeq_feat& feat, CRef<CScope> scope)
764 {
765 string content_label;
766
767 switch (feat.GetData().Which()) {
768 case CSeqFeatData::e_Pub:
769 content_label = "Cit: ";
770 feat.GetData().GetPub().GetPub().GetLabel(&content_label);
771 break;
772 case CSeqFeatData::e_Biosrc:
773 content_label = "Src: " + s_GetBioSourceContentLabel (feat.GetData().GetBiosrc());
774 break;
775 case CSeqFeatData::e_Imp:
776 {
777 feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
778 if (feat.GetData().GetImp().IsSetKey()) {
779 string key = feat.GetData().GetImp().GetKey();
780 string tmp = "[" + key + "]";
781 if (NStr::StartsWith(content_label, tmp)) {
782 content_label = key + content_label.substr(tmp.length());
783 }
784 }
785 }
786 break;
787 case CSeqFeatData::e_Rna:
788 feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
789 if (feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA
790 && NStr::Equal(content_label, "tRNA: tRNA")) {
791 content_label = "tRNA: ";
792 }
793 break;
794 case CSeqFeatData::e_Cdregion:
795 content_label = "CDS: " + s_GetCdregionContentLabel(feat, scope);
796 break;
797 case CSeqFeatData::e_Prot:
798 feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
799 if (feat.GetData().GetProt().IsSetProcessed()) {
800 switch (feat.GetData().GetProt().GetProcessed()) {
801 case CProt_ref::eProcessed_mature:
802 content_label = "mat_peptide: " + content_label.substr(6);
803 break;
804 case CProt_ref::eProcessed_signal_peptide:
805 content_label = "sig_peptide: " + content_label.substr(6);
806 break;
807 case CProt_ref::eProcessed_transit_peptide:
808 content_label = "trans_peptide: " + content_label.substr(6);
809 break;
810 default:
811 break;
812 }
813 }
814 break;
815 default:
816 feature::GetLabel(feat, &content_label, feature::fFGL_Both, scope);
817 break;
818 }
819 return content_label;
820 }
821
822
GetFeatureBioseqLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)823 string CValidErrorFormat::GetFeatureBioseqLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
824 {
825 string desc;
826 // Append label for bioseq of feature location
827 if (!suppress_context && scope) {
828 bool find_failed = false;
829 try {
830 CBioseq_Handle hnd;
831 try {
832 hnd = scope->GetBioseqHandle(ft.GetLocation());
833 } catch (CException&) {
834 CSeq_loc_CI li(ft.GetLocation());
835 while (li && !hnd) {
836 hnd = scope->GetBioseqHandle(li.GetSeq_id());
837 ++li;
838 }
839 }
840 if (hnd) {
841 desc += CValidErrorFormat::GetBioseqLabel(hnd);
842 }
843 } catch (CObjMgrException& ex) {
844 if (ex.GetErrCode() == CObjMgrException::eFindFailed) {
845 find_failed = true;
846 }
847 } catch (CException) {
848 } catch (std::exception) {
849 };
850 if (find_failed) {
851 try {
852 CSeq_loc_CI li(ft.GetLocation());
853 CBioseq_Handle hnd = scope->GetBioseqHandle(li.GetSeq_id());
854 if (hnd) {
855 desc += CValidErrorFormat::GetBioseqLabel(hnd);
856 }
857
858 } catch (CException) {
859 } catch (std::exception) {
860 };
861 }
862 }
863 return desc;
864 }
865
866
GetFeatureProductLocLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)867 string CValidErrorFormat::GetFeatureProductLocLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
868 {
869 string desc;
870 // Append label for product of feature
871 if (ft.IsSetProduct() && scope) {
872 string loc_label;
873 if (suppress_context) {
874 CSeq_loc loc;
875 loc.Assign(ft.GetProduct());
876 ChangeSeqLocId(&loc, false, scope);
877 loc_label = GetValidatorLocationLabel(loc, *scope);
878 } else {
879 loc_label = GetValidatorLocationLabel(ft.GetProduct(), *scope);
880 }
881 if (loc_label.size() > 800) {
882 loc_label.replace(797, NPOS, "...");
883 }
884 if (!loc_label.empty()) {
885 desc += "[";
886 desc += loc_label;
887 desc += "]";
888 }
889 }
890 return desc;
891 }
892
893
GetFeatureLocationLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)894 string CValidErrorFormat::GetFeatureLocationLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
895 {
896 string loc_label;
897 // Add feature location part of label
898 if (ft.IsSetLocation() && scope) {
899 if (suppress_context) {
900 CSeq_loc loc;
901 loc.Assign(ft.GetLocation());
902 ChangeSeqLocId(&loc, false, scope);
903 loc_label = GetValidatorLocationLabel(loc, *scope);
904 } else {
905 loc_label = GetValidatorLocationLabel(ft.GetLocation(), *scope);
906 }
907 if (loc_label.size() > 800) {
908 loc_label.replace(795, NPOS, "...");
909 }
910 }
911 return loc_label;
912 }
913
914 //LCOV_EXCL_START
915 //not used
GetFeatureLabel(const CSeq_feat & ft,CRef<CScope> scope,bool suppress_context)916 string CValidErrorFormat::GetFeatureLabel(const CSeq_feat& ft, CRef<CScope> scope, bool suppress_context)
917 {
918 // Add feature part of label
919 string desc = "FEATURE: ";
920 string content_label = CValidErrorFormat::GetFeatureContentLabel(ft, scope);
921 desc += content_label;
922
923 // Add feature ID part of label (if present)
924 string feature_id = GetFeatureIdLabel(ft);
925 if (!NStr::IsBlank(feature_id)) {
926 desc += " <" + feature_id + "> ";
927 }
928
929 // Add feature location part of label
930 string loc_label = GetFeatureLocationLabel(ft, scope, suppress_context);
931 if (!NStr::IsBlank(loc_label)) {
932 desc += " [" + loc_label + "]";
933 }
934
935 // Append label for bioseq of feature location
936 string bioseq_label = GetFeatureBioseqLabel(ft, scope, suppress_context);
937 if (!NStr::IsBlank(bioseq_label)) {
938 desc += bioseq_label;
939 }
940
941 // Append label for product of feature
942 string product_label = GetFeatureProductLocLabel(ft, scope, suppress_context);
943 if (!NStr::IsBlank(product_label)) {
944 desc += product_label;
945 }
946 return desc;
947 }
948 //LCOV_EXCL_STOP
949
950
GetDescriptorContent(const CSeqdesc & ds)951 string CValidErrorFormat::GetDescriptorContent (const CSeqdesc& ds)
952 {
953 string content;
954
955 switch (ds.Which()) {
956 case CSeqdesc::e_Pub:
957 content = "Pub: ";
958 ds.GetPub().GetPub().GetLabel(&content);
959 break;
960 case CSeqdesc::e_Source:
961 content = "BioSource: " + s_GetBioSourceContentLabel(ds.GetSource());
962 break;
963 case CSeqdesc::e_Modif:
964 ds.GetLabel(&content, CSeqdesc::eBoth);
965 if (NStr::StartsWith(content, "modif: ,")) {
966 content = "Modifier: " + content.substr(8);
967 }
968 break;
969 case CSeqdesc::e_Molinfo:
970 ds.GetLabel(&content, CSeqdesc::eBoth);
971 if (NStr::StartsWith(content, "molinfo: ,")) {
972 content = "molInfo: " + content.substr(10);
973 }
974 break;
975 case CSeqdesc::e_Comment:
976 ds.GetLabel(&content, CSeqdesc::eBoth);
977 if (NStr::StartsWith(content, "comment: ") && NStr::IsBlank(content.substr(9))) {
978 content = "comment: ";
979 }
980 break;
981 case CSeqdesc::e_User:
982 content = "UserObj: ";
983 if (ds.GetUser().IsSetClass()) {
984 content += ds.GetUser().GetClass();
985 } else if (ds.GetUser().IsSetType() && ds.GetUser().GetType().IsStr()) {
986 content += ds.GetUser().GetType().GetStr();
987 }
988 break;
989 default:
990 ds.GetLabel(&content, CSeqdesc::eBoth);
991 break;
992 }
993 // fix descriptor type names
994 string first = content.substr(0, 1);
995 NStr::ToUpper(first);
996 content = first + content.substr(1);
997 size_t colon_pos = NStr::Find(content, ":");
998 if (colon_pos != string::npos) {
999 size_t dash_pos = NStr::Find(content.substr(0, colon_pos), "-");
1000 if (dash_pos != string::npos) {
1001 string after_dash = content.substr(dash_pos + 1, 1);
1002 NStr::ToUpper (after_dash);
1003 content = content.substr(0, dash_pos) + after_dash + content.substr(dash_pos + 2);
1004 }
1005 }
1006 if (NStr::StartsWith(content, "BioSource:")) {
1007 content = "BioSrc:" + content.substr(10);
1008 } else if (NStr::StartsWith(content, "Modif:")) {
1009 content = "Modifier:" + content.substr(6);
1010 } else if (NStr::StartsWith(content, "Embl:")) {
1011 content = "EMBL:" + content.substr(5);
1012 } else if (NStr::StartsWith(content, "Pir:")) {
1013 content = "PIR:" + content.substr(4);
1014 }
1015 return content;
1016 }
1017
1018
GetDescriptorLabel(const CSeqdesc & ds,const CSeq_entry & ctx,CRef<CScope> scope,bool suppress_context)1019 string CValidErrorFormat::GetDescriptorLabel(const CSeqdesc& ds, const CSeq_entry& ctx, CRef<CScope> scope, bool suppress_context)
1020 {
1021 string desc("DESCRIPTOR: ");
1022
1023 string content = CValidErrorFormat::GetDescriptorContent (ds);
1024
1025 desc += content;
1026
1027 desc += " ";
1028 if (ctx.IsSeq()) {
1029 AppendBioseqLabel(desc, ctx.GetSeq(), suppress_context);
1030 } else {
1031 desc += CValidErrorFormat::GetBioseqSetLabel(ctx.GetSet(), scope, suppress_context);
1032 }
1033 return desc;
1034 }
1035
1036
GetBioseqLabel(CBioseq_Handle bh)1037 string CValidErrorFormat::GetBioseqLabel (CBioseq_Handle bh)
1038 {
1039 string desc;
1040
1041 CBioseq_Handle::TBioseqCore bc = bh.GetBioseqCore();
1042 desc += " [";
1043 string bc_label;
1044 bc->GetLabel(&bc_label, CBioseq::eBoth);
1045 s_FixBioseqLabelProblems(bc_label);
1046 desc += bc_label;
1047 desc += "]";
1048 return desc;
1049 }
1050
1051
GetBioseqSetLabel(const CBioseq_set & st,CRef<CScope> scope,bool suppress_context)1052 string CValidErrorFormat::GetBioseqSetLabel(const CBioseq_set& st, CRef<CScope> scope, bool suppress_context)
1053 {
1054 string str;
1055 // GetLabel for CBioseq_set does not follow C Toolkit conventions
1056 // AND is a horrible performance hit for sets with lots of sequences
1057
1058 const CBioseq* best = 0;
1059 CTypeConstIterator<CBioseq> si(ConstBegin(st));
1060 if (si) {
1061 best = &(*si);
1062 }
1063 // Add content to label.
1064 if (!best) {
1065 str += "BIOSEQ-SET: ";
1066 if (!suppress_context && st.IsSetClass()) {
1067 const CEnumeratedTypeValues* tv =
1068 CBioseq_set::GetTypeInfo_enum_EClass();
1069 const string& cn = tv->FindName(st.GetClass(), true);
1070 str += cn;
1071 str += ": ";
1072 }
1073
1074 str += "(No Bioseqs)";
1075 } else if (st.IsSetClass()) {
1076 str += "BIOSEQ-SET: ";
1077 if (!suppress_context) {
1078 const CEnumeratedTypeValues* tv =
1079 CBioseq_set::GetTypeInfo_enum_EClass();
1080 const string& cn = tv->FindName(st.GetClass(), true);
1081 str += cn;
1082 str += ": ";
1083 }
1084 if (scope) {
1085 string content;
1086 int version = 0;
1087 const string& accn = GetAccessionFromObjects(&st, NULL, *scope, &version);
1088 content += accn;
1089 // best->GetLabel(&content, CBioseq::eContent, supress_context);
1090 // fix problems with label
1091 s_FixBioseqLabelProblems(content);
1092 str += content;
1093 }
1094 } else {
1095 AppendBioseqLabel(str, *best, suppress_context);
1096 }
1097 return str;
1098 }
1099
1100
1101 //LCOV_EXCL_START
1102 //not used
GetObjectLabel(const CObject & obj,const CSeq_entry & ctx,CRef<CScope> scope,bool suppress_context)1103 string CValidErrorFormat::GetObjectLabel(const CObject& obj, const CSeq_entry& ctx, CRef<CScope> scope, bool suppress_context)
1104 {
1105 string label = "Unknown object";
1106
1107 const CSeq_feat* ft = dynamic_cast<const CSeq_feat*>(&obj);
1108 const CSeqdesc* ds = dynamic_cast<const CSeqdesc*>(&obj);
1109 const CBioseq* b = dynamic_cast<const CBioseq*>(&obj);
1110 const CBioseq_set* set = dynamic_cast<const CBioseq_set*>(&obj);
1111
1112 if (ft) {
1113 label = GetFeatureLabel(*ft, scope, suppress_context);
1114 } else if (ds) {
1115 label = GetDescriptorLabel(*ds, ctx, scope, suppress_context);
1116 } else if (b) {
1117 label = GetBioseqLabel(scope->GetBioseqHandle(*b));
1118 } else if (set) {
1119 label = GetBioseqSetLabel(*set, scope, suppress_context);
1120 }
1121 return label;
1122 }
1123 //LCOV_EXCL_STOP
1124
1125
1126 //LCOV_EXCL_START
1127 //added for GPIPE, not used for asnvalidate
1128 const string kSuppressFieldLabel = "Suppress";
1129
s_IsSuppressField(const CUser_field & field)1130 bool s_IsSuppressField (const CUser_field& field)
1131 {
1132 if (field.IsSetLabel() &&
1133 field.GetLabel().IsStr() &&
1134 NStr::EqualNocase(field.GetLabel().GetStr(), kSuppressFieldLabel)) {
1135 return true;
1136 } else {
1137 return false;
1138 }
1139 }
1140
1141
AddSuppression(CUser_object & user,unsigned int error_code)1142 void CValidErrorFormat::AddSuppression(CUser_object& user, unsigned int error_code)
1143 {
1144 bool found = false;
1145 if (user.IsSetData()) {
1146 NON_CONST_ITERATE(CUser_object::TData, it, user.SetData()) {
1147 if (s_IsSuppressField(**it)) {
1148 if ((*it)->IsSetData()) {
1149 if ((*it)->GetData().IsInt()) {
1150 unsigned int old_val = (*it)->GetData().GetInt();
1151 if (old_val == error_code) {
1152 // do nothing, already there
1153 } else {
1154 (*it)->SetData().SetInts().push_back(old_val);
1155 (*it)->SetData().SetInts().push_back(error_code);
1156 }
1157 found = true;
1158 break;
1159 } else if ((*it)->GetData().IsInts()) {
1160 ITERATE(CUser_field::TData::TInts, ii, (*it)->GetData().GetInts()) {
1161 if (*ii == error_code) {
1162 found = true;
1163 break;
1164 }
1165 }
1166 if (!found) {
1167 (*it)->SetData().SetInts().push_back(error_code);
1168 found = true;
1169 }
1170 break;
1171 }
1172 }
1173 }
1174 }
1175 }
1176 if (!found) {
1177 CRef<CUser_field> field(new CUser_field());
1178 field->SetLabel().SetStr(kSuppressFieldLabel);
1179 field->SetData().SetInts().push_back(error_code);
1180 user.SetData().push_back(field);
1181 }
1182 }
1183
1184
SetSuppressionRules(const CUser_object & user,CValidError & errors)1185 void CValidErrorFormat::SetSuppressionRules(const CUser_object& user, CValidError& errors)
1186 {
1187 if (!user.IsSetData()) {
1188 return;
1189 }
1190 ITERATE(CUser_object::TData, it, user.GetData()) {
1191 if ((*it)->IsSetData() && s_IsSuppressField(**it)) {
1192 if ((*it)->GetData().IsInt()) {
1193 errors.SuppressError((*it)->GetData().GetInt());
1194 } else if ((*it)->GetData().IsInts()) {
1195 ITERATE(CUser_field::TData::TInts, ii, (*it)->GetData().GetInts()) {
1196 errors.SuppressError(*ii);
1197 }
1198 } else if ((*it)->GetData().IsStr()) {
1199 unsigned int ec = CValidErrItem::ConvertToErrCode((*it)->GetData().GetStr());
1200 if (ec != eErr_MAX) {
1201 errors.SuppressError(ec);
1202 }
1203 } else if ((*it)->GetData().IsStrs()) {
1204 ITERATE(CUser_field::TData::TStrs, si, (*it)->GetData().GetStrs()) {
1205 unsigned int ec = CValidErrItem::ConvertToErrCode(*si);
1206 if (ec != eErr_MAX) {
1207 errors.SuppressError(ec);
1208 }
1209 }
1210 }
1211 }
1212 }
1213 }
1214
1215
SetSuppressionRules(const CSeq_entry & se,CValidError & errors)1216 void CValidErrorFormat::SetSuppressionRules(const CSeq_entry& se, CValidError& errors)
1217 {
1218 if (se.IsSeq()) {
1219 SetSuppressionRules(se.GetSeq(), errors);
1220 } else if (se.IsSet()) {
1221 const CBioseq_set& set = se.GetSet();
1222 if (set.IsSetDescr()) {
1223 ITERATE(CBioseq_set::TDescr::Tdata, it, set.GetDescr().Get()) {
1224 if ((*it)->IsUser() &&
1225 (*it)->GetUser().GetObjectType() == CUser_object::eObjectType_ValidationSuppression) {
1226 SetSuppressionRules((*it)->GetUser(), errors);
1227 }
1228 }
1229 }
1230 if (set.IsSetSeq_set()) {
1231 ITERATE(CBioseq_set::TSeq_set, it, set.GetSeq_set()) {
1232 SetSuppressionRules(**it, errors);
1233 }
1234 }
1235 }
1236 }
1237
1238
SetSuppressionRules(const CSeq_entry_Handle & se,CValidError & errors)1239 void CValidErrorFormat::SetSuppressionRules(const CSeq_entry_Handle& se, CValidError& errors)
1240 {
1241 SetSuppressionRules(*(se.GetCompleteSeq_entry()), errors);
1242 }
1243
1244
SetSuppressionRules(const CSeq_submit & ss,CValidError & errors)1245 void CValidErrorFormat::SetSuppressionRules(const CSeq_submit& ss, CValidError& errors)
1246 {
1247 if (ss.IsEntrys()) {
1248 ITERATE(CSeq_submit::TData::TEntrys, it, ss.GetData().GetEntrys()) {
1249 SetSuppressionRules(**it, errors);
1250 }
1251 }
1252 }
1253
1254
SetSuppressionRules(const CBioseq & seq,CValidError & errors)1255 void CValidErrorFormat::SetSuppressionRules(const CBioseq& seq, CValidError& errors)
1256 {
1257 if (seq.IsSetDescr()) {
1258 ITERATE(CBioseq::TDescr::Tdata, it, seq.GetDescr().Get()) {
1259 if ((*it)->IsUser() &&
1260 (*it)->GetUser().GetObjectType() == CUser_object::eObjectType_ValidationSuppression) {
1261 SetSuppressionRules((*it)->GetUser(), errors);
1262 }
1263 }
1264 }
1265 }
1266 //LCOV_EXCL_STOP
1267
1268
1269 //LCOV_EXCL_START
1270 //not used by asnvalidate but may be useful for other clients of validator library
AddLocusTags(CValidError & errors,CScope & scope)1271 void CValidErrorFormat::AddLocusTags(CValidError& errors, CScope& scope)
1272 {
1273 for (auto it : errors.SetErrs()) {
1274 if (it->IsSetLocus_tag()) {
1275 continue;
1276 }
1277 if (it->IsSetObjectType() &&
1278 it->GetObjectType() == CValidErrItem::eObjectType_seqfeat &&
1279 it->IsSetObject() &&
1280 it->GetObject().GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
1281 const CSeq_feat* sf = static_cast<const CSeq_feat *>(&(it->GetObject()));
1282 if (sf && sf->IsSetData()) {
1283 it->SetLocus_tag(x_GetLocusTag(*sf, scope));
1284 }
1285 }
1286 }
1287 }
1288
1289
x_GetLocusTag(const CSeq_feat & sf,CScope & scope)1290 const string& CValidErrorFormat::x_GetLocusTag(const CSeq_feat& sf, CScope& scope)
1291 {
1292 const string* rval = &kEmptyStr;
1293
1294 if (sf.GetData().IsGene()) {
1295 if (sf.GetData().GetGene().IsSetLocus_tag()) {
1296 rval = &sf.GetData().GetGene().GetLocus_tag();
1297 }
1298 } else {
1299 const CGene_ref* g = sf.GetGeneXref();
1300 if (g && g->IsSetLocus_tag()) {
1301 rval = &g->GetLocus_tag();
1302 } else {
1303 CConstRef<CSeq_feat> gene = sequence::GetGeneForFeature(sf, scope);
1304 if (gene && gene->GetData().GetGene().IsSetLocus_tag()) {
1305 rval = &gene->GetData().GetGene().GetLocus_tag();
1306 }
1307 }
1308 }
1309 return *rval;
1310 }
1311 //LCOV_EXCL_STOP
1312
1313
1314 END_SCOPE(validator)
1315 END_SCOPE(objects)
1316 END_NCBI_SCOPE
1317