1 /* $Id: validator.cpp 632625 2021-06-03 17:38:33Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko.......
27 *
28 * File Description:
29 * Validates CSeq_entries and CSeq_submits
30 *
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <serial/serialbase.hpp>
35 #include <objects/submit/Seq_submit.hpp>
36 #include <objects/seq/Bioseq.hpp>
37 #include <objects/seq/Seqdesc.hpp>
38 #include <objmgr/object_manager.hpp>
39 #include <objmgr/util/sequence.hpp>
40 #include <objtools/validator/validator.hpp>
41 #include <util/static_map.hpp>
42 #include <util/sgml_entity.hpp>
43 #include <objects/taxon3/itaxon3.hpp>
44 #include <objects/taxon3/taxon3.hpp>
45 #include <objects/taxon3/cached_taxon3.hpp>
46
47 #include <objtools/validator/validatorp.hpp>
48 #include <objtools/validator/validerror_format.hpp>
49
50
51 BEGIN_NCBI_SCOPE
52 BEGIN_SCOPE(objects)
53 BEGIN_SCOPE(validator)
54 USING_SCOPE(sequence);
55
56
57 // *********************** CValidator implementation **********************
58
59
CValidator(CObjectManager & objmgr,AutoPtr<ITaxon3> taxon)60 CValidator::CValidator(CObjectManager& objmgr,
61 AutoPtr<ITaxon3> taxon) :
62 m_ObjMgr(&objmgr),
63 m_PrgCallback(0),
64 m_UserData(0)
65 {
66 if (taxon.get() == NULL) {
67 AutoPtr<ITaxon3> taxon3(new CTaxon3);
68 taxon3->Init();
69 m_Taxon = taxon3;
70 } else {
71 m_Taxon = taxon;
72 }
73 m_Taxon->Init();
74 }
75
76
~CValidator(void)77 CValidator::~CValidator(void)
78 {
79 }
80
81
Validate(const CSeq_entry & se,CScope * scope,Uint4 options)82 CConstRef<CValidError> CValidator::Validate
83 (const CSeq_entry& se,
84 CScope* scope,
85 Uint4 options)
86 {
87 CRef<CValidError> errors(new CValidError(&se));
88 CValidErrorFormat::SetSuppressionRules(se, *errors);
89 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
90 imp.SetProgressCallback(m_PrgCallback, m_UserData);
91 if ( !imp.Validate(se, 0, scope) ) {
92 errors.Reset();
93 }
94 return errors;
95 }
96
97
98 //LCOV_EXCL_START
99 // not used by asnvalidate, used by external programs
Validate(const CSeq_entry_Handle & seh,Uint4 options)100 CConstRef<CValidError> CValidator::Validate
101 (const CSeq_entry_Handle& seh,
102 Uint4 options)
103 {
104 static unsigned int num_e = 0, mult = 0;
105
106 num_e++;
107 if (num_e % 200 == 0) {
108 num_e = 0;
109 mult++;
110 }
111
112 CRef<CValidError> errors(new CValidError(&*seh.GetCompleteSeq_entry()));
113 CValidErrorFormat::SetSuppressionRules(seh, *errors);
114 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
115 imp.SetProgressCallback(m_PrgCallback, m_UserData);
116 if ( !imp.Validate(seh, 0) ) {
117 errors.Reset();
118 }
119 return errors;
120 }
121
122
GetTSANStretchErrors(const CSeq_entry_Handle & se)123 CConstRef<CValidError> CValidator::GetTSANStretchErrors(const CSeq_entry_Handle& se)
124 {
125 CRef<CValidError> errors(new CValidError(&*se.GetCompleteSeq_entry()));
126 CValidErrorFormat::SetSuppressionRules(se, *errors);
127 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
128 imp.SetProgressCallback(m_PrgCallback, m_UserData);
129 if ( !imp.GetTSANStretchErrors(se) ) {
130 errors.Reset();
131 }
132 return errors;
133 }
134
135
GetTSACDSOnMinusStrandErrors(const CSeq_entry_Handle & se)136 CConstRef<CValidError> CValidator::GetTSACDSOnMinusStrandErrors (const CSeq_entry_Handle& se)
137 {
138 CRef<CValidError> errors(new CValidError(&*se.GetCompleteSeq_entry()));
139 CValidErrorFormat::SetSuppressionRules(se, *errors);
140 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
141 imp.SetProgressCallback(m_PrgCallback, m_UserData);
142 if ( !imp.GetTSACDSOnMinusStrandErrors(se) ) {
143 errors.Reset();
144 }
145 return errors;
146 }
147
148
GetTSAConflictingBiomolTechErrors(const CSeq_entry_Handle & se)149 CConstRef<CValidError> CValidator::GetTSAConflictingBiomolTechErrors (const CSeq_entry_Handle& se)
150 {
151 CRef<CValidError> errors(new CValidError(&*se.GetCompleteSeq_entry()));
152 CValidErrorFormat::SetSuppressionRules(se, *errors);
153 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
154 imp.SetProgressCallback(m_PrgCallback, m_UserData);
155 if ( !imp.GetTSAConflictingBiomolTechErrors(se) ) {
156 errors.Reset();
157 }
158 return errors;
159 }
160
161
GetTSANStretchErrors(const CBioseq & seq)162 CConstRef<CValidError> CValidator::GetTSANStretchErrors(const CBioseq& seq)
163 {
164
165 CRef<CValidError> errors(new CValidError(&seq));
166 CValidErrorFormat::SetSuppressionRules(seq, *errors);
167 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
168 imp.SetProgressCallback(m_PrgCallback, m_UserData);
169 if ( !imp.GetTSANStretchErrors(seq) ) {
170 errors.Reset();
171 }
172 return errors;
173 }
174
175
GetTSACDSOnMinusStrandErrors(const CSeq_feat & f,const CBioseq & seq)176 CConstRef<CValidError> CValidator::GetTSACDSOnMinusStrandErrors (const CSeq_feat& f, const CBioseq& seq)
177 {
178 CRef<CValidError> errors(new CValidError(&f));
179 CValidErrorFormat::SetSuppressionRules(seq, *errors);
180 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
181 imp.SetProgressCallback(m_PrgCallback, m_UserData);
182 if ( !imp.GetTSACDSOnMinusStrandErrors(f, seq) ) {
183 errors.Reset();
184 }
185 return errors;
186 }
187
188
GetTSAConflictingBiomolTechErrors(const CBioseq & seq)189 CConstRef<CValidError> CValidator::GetTSAConflictingBiomolTechErrors (const CBioseq& seq)
190 {
191 CRef<CValidError> errors(new CValidError(&seq));
192 CValidErrorFormat::SetSuppressionRules(seq, *errors);
193 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
194 imp.SetProgressCallback(m_PrgCallback, m_UserData);
195 if ( !imp.GetTSAConflictingBiomolTechErrors(seq) ) {
196 errors.Reset();
197 }
198 return errors;
199 }
200 //LCOV_EXCL_STOP
201
202
Validate(const CSeq_submit & ss,CScope * scope,Uint4 options)203 CConstRef<CValidError> CValidator::Validate
204 (const CSeq_submit& ss,
205 CScope* scope,
206 Uint4 options)
207 {
208 options |= CValidator::eVal_seqsubmit_parent;
209 CRef<CValidError> errors(new CValidError(&ss));
210 CValidErrorFormat::SetSuppressionRules(ss, *errors);
211 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
212 imp.Validate(ss, scope);
213 if (ss.IsSetSub() && ss.GetSub().IsSetContact() && ss.GetSub().GetContact().IsSetContact()
214 && ss.GetSub().GetContact().GetContact().IsSetAffil()
215 && ss.GetSub().GetContact().GetContact().GetAffil().IsStd()) {
216 imp.ValidateAffil(ss.GetSub().GetContact().GetContact().GetAffil().GetStd(),
217 ss, 0);
218 }
219
220 return errors;
221 }
222
223
Validate(const CSeq_annot_Handle & sah,Uint4 options)224 CConstRef<CValidError> CValidator::Validate
225 (const CSeq_annot_Handle& sah,
226 Uint4 options)
227 {
228 CConstRef<CSeq_annot> sar = sah.GetCompleteSeq_annot();
229 CRef<CValidError> errors(new CValidError(&*sar));
230 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
231 imp.Validate(sah);
232 return errors;
233 }
234
235
Validate(const CSeq_feat & feat,CScope * scope,Uint4 options)236 CConstRef<CValidError> CValidator::Validate
237 (const CSeq_feat& feat,
238 CScope *scope,
239 Uint4 options)
240 {
241 CRef<CValidError> errors(new CValidError(&feat));
242 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
243 imp.Validate(feat, scope);
244 return errors;
245 }
246
247
248 //LCOV_EXCL_START
249 //not used by asnvalidate
Validate(const CSeq_feat & feat,Uint4 options)250 CConstRef<CValidError> CValidator::Validate
251 (const CSeq_feat& feat,
252 Uint4 options)
253 {
254 return Validate(feat, NULL, options);
255 }
256 //LCOV_EXCL_STOP
Validate(const CBioSource & src,CScope * scope,Uint4 options)257 CConstRef<CValidError> CValidator::Validate
258 (const CBioSource& src,
259 CScope *scope,
260 Uint4 options)
261 {
262 CRef<CValidError> errors(new CValidError(&src));
263 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
264 imp.Validate(src, scope);
265 return errors;
266 }
267
268 //LCOV_EXCL_START
269 //not used by asnvalidate
Validate(const CBioSource & src,Uint4 options)270 CConstRef<CValidError> CValidator::Validate
271 (const CBioSource& src,
272 Uint4 options)
273 {
274 return Validate(src, NULL, options);
275 }
276 //LCOV_EXCL_STOP
277
Validate(const CPubdesc & pubdesc,CScope * scope,Uint4 options)278 CConstRef<CValidError> CValidator::Validate
279 (const CPubdesc& pubdesc,
280 CScope *scope,
281 Uint4 options)
282 {
283 CRef<CValidError> errors(new CValidError(&pubdesc));
284 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
285 imp.Validate(pubdesc, scope);
286 return errors;
287 }
288
289 //LCOV_EXCL_START
290 //not used by asnvalidate
Validate(const CPubdesc & pubdesc,Uint4 options)291 CConstRef<CValidError> CValidator::Validate
292 (const CPubdesc& pubdesc,
293 Uint4 options)
294 {
295 return Validate(pubdesc, NULL, options);
296 }
297 //LCOV_EXCL_STOP
298
Validate(const CSeqdesc & desc,const CSeq_entry & ctx,Uint4 options)299 CConstRef<CValidError> CValidator::Validate
300 (const CSeqdesc& desc,
301 const CSeq_entry& ctx,
302 Uint4 options)
303 {
304 CRef<CValidError> errors(new CValidError(&desc));
305 CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
306 imp.Validate(desc, ctx);
307 return errors;
308 }
309
SetProgressCallback(TProgressCallback callback,void * user_data)310 void CValidator::SetProgressCallback(TProgressCallback callback, void* user_data)
311 {
312 m_PrgCallback = callback;
313 m_UserData = user_data;
314 }
315
316
BadCharsInAuthorName(const string & str,bool allowcomma,bool allowperiod,bool last)317 bool CValidator::BadCharsInAuthorName(const string& str, bool allowcomma, bool allowperiod, bool last)
318 {
319 if (NStr::IsBlank(str)) {
320 return false;
321 }
322
323
324 size_t stp = string::npos;
325 if (last) {
326 if (NStr::StartsWith(str, "St.")) {
327 stp = 2;
328 }
329 else if (NStr::StartsWith(str, "de M.")) {
330 stp = 4;
331 }
332 }
333
334 size_t pos = 0;
335 const char *ptr = str.c_str();
336
337 while (*ptr != 0) {
338 if (isalpha(*ptr)
339 || *ptr == '-'
340 || *ptr == '\''
341 || *ptr == ' '
342 || (*ptr == ',' && allowcomma)
343 || (*ptr == '.' && (allowperiod || pos == stp))) {
344 // all these are ok
345 ptr++;
346 pos++;
347 } else {
348 string tail = str.substr(pos);
349 if (NStr::Equal(tail, "2nd") ||
350 NStr::Equal(tail, "3rd") ||
351 NStr::Equal(tail, "4th") ||
352 NStr::Equal(tail, "5th") ||
353 NStr::Equal(tail, "6th")) {
354 return false;
355 }
356 return true;
357 }
358 }
359 return false;
360 }
361
362
BadCharsInAuthorLastName(const string & str)363 bool CValidator::BadCharsInAuthorLastName(const string& str)
364 {
365 if (NStr::EqualNocase(str, "et al.")) {
366 // this is ok
367 return false;
368 } else {
369 return BadCharsInAuthorName(str, false, false, true);
370 }
371 }
372
BadCharsInAuthorFirstName(const string & str)373 bool CValidator::BadCharsInAuthorFirstName(const string& str)
374 {
375 return BadCharsInAuthorName(str, false, true, false);
376 }
377
378
BadCharsInAuthorInitials(const string & str)379 bool CValidator::BadCharsInAuthorInitials(const string& str)
380 {
381 return BadCharsInAuthorName(str, false, true, false);
382 }
383
384
BadCharsInAuthorSuffix(const string & str)385 bool CValidator::BadCharsInAuthorSuffix(const string& str)
386 {
387 return BadCharsInAuthorName(str, false, true, false);
388 }
389
390
BadCharsInAuthor(const CName_std & author,bool & last_is_bad)391 string CValidator::BadCharsInAuthor(const CName_std& author, bool& last_is_bad)
392 {
393 string badauthor;
394 last_is_bad = false;
395
396 if (author.IsSetLast() && BadCharsInAuthorLastName(author.GetLast())) {
397 last_is_bad = true;
398 badauthor = author.GetLast();
399 } else if (author.IsSetFirst() && BadCharsInAuthorFirstName(author.GetFirst())) {
400 badauthor = author.GetFirst();
401 }
402 else if (author.IsSetInitials() && BadCharsInAuthorInitials(author.GetInitials())) {
403 badauthor = author.GetInitials();
404 } else if (author.IsSetSuffix() && BadCharsInAuthorSuffix(author.GetSuffix())) {
405 badauthor = author.GetSuffix();
406 }
407 return badauthor;
408 }
409
410
BadCharsInAuthor(const CAuthor & author,bool & last_is_bad)411 string CValidator::BadCharsInAuthor(const CAuthor& author, bool& last_is_bad)
412 {
413 last_is_bad = false;
414 if (author.IsSetName() && author.GetName().IsName()) {
415 return BadCharsInAuthor(author.GetName().GetName(), last_is_bad);
416 } else {
417 return kEmptyStr;
418 }
419 }
420
421
422 typedef bool(*CompareConsecutiveIntervalProc) (const CSeq_interval& int1, const CSeq_interval& int2, CScope *scope);
423
x_CompareConsecutiveIntervals(const CPacked_seqint & packed_int,CConstRef<CSeq_interval> & int_cur,CConstRef<CSeq_interval> & int_prv,CScope * scope,CompareConsecutiveIntervalProc compar)424 bool x_CompareConsecutiveIntervals
425 (const CPacked_seqint& packed_int,
426 CConstRef<CSeq_interval>& int_cur,
427 CConstRef<CSeq_interval>& int_prv,
428 CScope* scope,
429 CompareConsecutiveIntervalProc compar)
430 {
431 bool ok = true;
432 ITERATE(CPacked_seqint::Tdata, it, packed_int.Get()) {
433 int_cur = (*it);
434 if (int_prv && !compar(*int_cur, *int_prv, scope)) {
435 ok = false;
436 break;
437 }
438
439 int_prv = int_cur;
440 }
441 return ok;
442 }
443
444
CheckConsecutiveIntervals(const CSeq_loc & loc,CScope & scope,CompareConsecutiveIntervalProc compar)445 bool CheckConsecutiveIntervals(const CSeq_loc& loc, CScope& scope, CompareConsecutiveIntervalProc compar)
446 {
447 bool ok = true;
448 const CSeq_interval *int_cur = 0, *int_prv = 0;
449
450 CTypeConstIterator<CSeq_loc> lit = ConstBegin(loc);
451 for (; lit && ok; ++lit) {
452 CSeq_loc::E_Choice loc_choice = lit->Which();
453 switch (loc_choice) {
454 case CSeq_loc::e_Int:
455 {{
456 int_cur = &lit->GetInt();
457 if (int_prv) {
458 ok = compar(*int_cur, *int_prv, &scope);
459 }
460 int_prv = int_cur;
461 }}
462 break;
463 case CSeq_loc::e_Pnt:
464 int_prv = 0;
465 break;
466 case CSeq_loc::e_Packed_pnt:
467 int_prv = 0;
468 break;
469 case CSeq_loc::e_Packed_int:
470 {{
471 CConstRef<CSeq_interval> this_int_cur(int_cur);
472 CConstRef<CSeq_interval> this_int_prv(int_prv);
473 ok = x_CompareConsecutiveIntervals
474 (lit->GetPacked_int(), this_int_cur, this_int_prv, &scope, compar);
475 }}
476 break;
477 case CSeq_loc::e_Null:
478 break;
479 default:
480 int_prv = 0;
481 break;
482 }
483
484 }
485 return ok;
486 }
487
488
489
x_IsCorrectlyOrdered(const CSeq_interval & int_cur,const CSeq_interval & int_prv,CScope * scope)490 bool x_IsCorrectlyOrdered
491 (const CSeq_interval& int_cur,
492 const CSeq_interval& int_prv,
493 CScope* scope)
494 {
495 ENa_strand strand_cur = int_cur.IsSetStrand() ?
496 int_cur.GetStrand() : eNa_strand_unknown;
497
498 if (IsSameBioseq(int_prv.GetId(), int_cur.GetId(), scope)) {
499 if (strand_cur == eNa_strand_minus) {
500 if (int_prv.GetTo() < int_cur.GetTo()) {
501 return false;
502 }
503 }
504 else {
505 if (int_prv.GetTo() > int_cur.GetTo()) {
506 return false;
507 }
508 }
509 }
510 return true;
511 }
512
513
IsSeqLocCorrectlyOrdered(const CSeq_loc & loc,CScope & scope)514 bool CValidator::IsSeqLocCorrectlyOrdered(const CSeq_loc& loc, CScope& scope)
515 {
516 CBioseq_Handle seq;
517 try {
518 CBioseq_Handle seq = scope.GetBioseqHandle(loc);
519 } catch (CObjMgrException& ) {
520 // no way to tell
521 return true;
522 } catch (const exception& ) {
523 // no way to tell
524 return true;
525 }
526 if (seq && seq.GetInst_Topology() == CSeq_inst::eTopology_circular) {
527 // no way to check if topology is circular
528 return true;
529 }
530
531 return CheckConsecutiveIntervals(loc, scope, x_IsCorrectlyOrdered);
532 }
533
534
x_IsNotAdjacent(const CSeq_interval & int_cur,const CSeq_interval & int_prv,CScope * scope)535 bool x_IsNotAdjacent
536 (const CSeq_interval& int_cur,
537 const CSeq_interval& int_prv,
538 CScope* scope)
539 {
540 ENa_strand strand_cur = int_cur.IsSetStrand() ?
541 int_cur.GetStrand() : eNa_strand_unknown;
542
543 bool ok = true;
544 if (IsSameBioseq(int_prv.GetId(), int_cur.GetId(), scope)) {
545 if (strand_cur == eNa_strand_minus) {
546 if (int_cur.GetTo() + 1 == int_prv.GetFrom()) {
547 ok = false;
548 }
549 }
550 else {
551 if (int_prv.GetTo() + 1 == int_cur.GetFrom()) {
552 ok = false;
553 }
554 }
555 }
556 return ok;
557 }
558
559
DoesSeqLocContainAdjacentIntervals(const CSeq_loc & loc,CScope & scope)560 bool CValidator::DoesSeqLocContainAdjacentIntervals
561 (const CSeq_loc& loc, CScope &scope)
562 {
563 return !CheckConsecutiveIntervals(loc, scope, x_IsNotAdjacent);
564 }
565
566
x_SameStrand(const CSeq_interval & int1,const CSeq_interval & int2)567 bool x_SameStrand(const CSeq_interval& int1, const CSeq_interval& int2)
568 {
569 ENa_strand strand1 = int1.IsSetStrand() ?
570 int1.GetStrand() : eNa_strand_unknown;
571 ENa_strand strand2 = int2.IsSetStrand() ?
572 int2.GetStrand() : eNa_strand_unknown;
573 return (strand1 == strand2);
574 }
575
576
IsNotDuplicateInterval(const CSeq_interval & int1,const CSeq_interval & int2,CScope * scope)577 bool IsNotDuplicateInterval(const CSeq_interval& int1, const CSeq_interval& int2, CScope* scope)
578 {
579 if (IsSameBioseq(int1.GetId(), int2.GetId(), scope) &&
580 x_SameStrand(int1, int2) &&
581 int1.GetFrom() == int2.GetFrom() &&
582 int1.GetTo() == int2.GetTo()) {
583 return false;
584 }
585 return true;
586 }
587
DoesSeqLocContainDuplicateIntervals(const CSeq_loc & loc,CScope & scope)588 bool CValidator::DoesSeqLocContainDuplicateIntervals(const CSeq_loc& loc, CScope& scope)
589 {
590 return !CheckConsecutiveIntervals(loc, scope, IsNotDuplicateInterval);
591 }
592
593
ConvertCode(CSubSource::ELatLonCountryErr errcode)594 EErrType CValidator::ConvertCode(CSubSource::ELatLonCountryErr errcode)
595 {
596 EErrType rval = eErr_UNKNOWN;
597 switch (errcode) {
598 case CSubSource::eLatLonCountryErr_Country:
599 rval = eErr_SEQ_DESCR_LatLonCountry;
600 break;
601 case CSubSource::eLatLonCountryErr_State:
602 rval = eErr_SEQ_DESCR_LatLonState;
603 break;
604 case CSubSource::eLatLonCountryErr_Water:
605 rval = eErr_SEQ_DESCR_LatLonWater;
606 break;
607 case CSubSource::eLatLonCountryErr_Value:
608 rval = eErr_SEQ_DESCR_LatLonValue;
609 break;
610 default:
611 break;
612 }
613 return rval;
614 }
615
616
IsValidDbxref(const CDbtag & xref,bool is_biosource,bool is_refseq_or_gps)617 CValidator::TDbxrefValidFlags CValidator::IsValidDbxref(const CDbtag& xref, bool is_biosource, bool is_refseq_or_gps)
618 {
619 TDbxrefValidFlags flags = eValid;
620
621 if (xref.IsSetTag() && xref.GetTag().IsStr()) {
622 if (ContainsSgml(xref.GetTag().GetStr())) {
623 flags |= eTagHasSgml;
624 }
625
626 if (xref.GetTag().GetStr().find(' ') != string::npos) {
627 flags |= eContainsSpace;
628 }
629 }
630
631 if (!xref.IsSetDb()) {
632 return flags;
633 }
634 const string& db = xref.GetDb();
635 string dbv;
636 if (xref.IsSetTag() && xref.GetTag().IsStr()) {
637 dbv = xref.GetTag().GetStr();
638 }
639 else if (xref.IsSetTag() && xref.GetTag().IsId()) {
640 dbv = NStr::NumericToString(xref.GetTag().GetId());
641 }
642
643 if (ContainsSgml(db)) {
644 flags |= eDbHasSgml;
645 }
646
647 bool src_db = false;
648 bool refseq_db = false;
649 string correct_caps;
650
651 if (xref.GetDBFlags(refseq_db, src_db, correct_caps)) {
652 if (!NStr::EqualCase(correct_caps, db)) {
653 // capitalization is bad
654 flags |= eBadCapitalization;
655 }
656
657 if (is_biosource && !src_db) {
658 flags |= eNotForSource;
659 if (refseq_db && is_refseq_or_gps) {
660 flags |= eRefSeqNotForSource;
661 }
662 } else if (!is_biosource && src_db && NStr::EqualNocase(db, "taxon")) {
663 flags |= eOnlyForSource;
664 }
665 if (refseq_db && !is_refseq_or_gps) {
666 flags |= eOnlyForRefSeq;
667 }
668 } else {
669 flags |= eUnrecognized;
670 }
671 return flags;
672 }
673
674
675 //LCOV_EXCL_START
676 //code is not used
CCache(void)677 CCache::CCache(void)
678 {
679 m_impl.reset(new CCacheImpl);
680 }
681
682 CRef<CCache>
MakeEmptyCache(void)683 CValidator::MakeEmptyCache(void)
684 {
685 return Ref(new CCache);
686 }
687 //LCOV_EXCL_STOP
688
689 END_SCOPE(validator)
690 END_SCOPE(objects)
691 END_NCBI_SCOPE
692