1 /* $Id: validator.cpp 632625 2021-06-03 17:38:33Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko.......
27  *
28  * File Description:
29  *   Validates CSeq_entries and CSeq_submits
30  *
31  */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <serial/serialbase.hpp>
35 #include <objects/submit/Seq_submit.hpp>
36 #include <objects/seq/Bioseq.hpp>
37 #include <objects/seq/Seqdesc.hpp>
38 #include <objmgr/object_manager.hpp>
39 #include <objmgr/util/sequence.hpp>
40 #include <objtools/validator/validator.hpp>
41 #include <util/static_map.hpp>
42 #include <util/sgml_entity.hpp>
43 #include <objects/taxon3/itaxon3.hpp>
44 #include <objects/taxon3/taxon3.hpp>
45 #include <objects/taxon3/cached_taxon3.hpp>
46 
47 #include <objtools/validator/validatorp.hpp>
48 #include <objtools/validator/validerror_format.hpp>
49 
50 
51 BEGIN_NCBI_SCOPE
52 BEGIN_SCOPE(objects)
53 BEGIN_SCOPE(validator)
54 USING_SCOPE(sequence);
55 
56 
57 // *********************** CValidator implementation **********************
58 
59 
CValidator(CObjectManager & objmgr,AutoPtr<ITaxon3> taxon)60 CValidator::CValidator(CObjectManager& objmgr,
61     AutoPtr<ITaxon3> taxon) :
62     m_ObjMgr(&objmgr),
63     m_PrgCallback(0),
64     m_UserData(0)
65 {
66     if (taxon.get() == NULL) {
67         AutoPtr<ITaxon3> taxon3(new CTaxon3);
68         taxon3->Init();
69         m_Taxon = taxon3;
70     } else {
71         m_Taxon = taxon;
72     }
73     m_Taxon->Init();
74 }
75 
76 
~CValidator(void)77 CValidator::~CValidator(void)
78 {
79 }
80 
81 
Validate(const CSeq_entry & se,CScope * scope,Uint4 options)82 CConstRef<CValidError> CValidator::Validate
83 (const CSeq_entry& se,
84  CScope* scope,
85  Uint4 options)
86 {
87     CRef<CValidError> errors(new CValidError(&se));
88     CValidErrorFormat::SetSuppressionRules(se, *errors);
89     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
90     imp.SetProgressCallback(m_PrgCallback, m_UserData);
91     if ( !imp.Validate(se, 0, scope) ) {
92         errors.Reset();
93     }
94     return errors;
95 }
96 
97 
98 //LCOV_EXCL_START
99 // not used by asnvalidate, used by external programs
Validate(const CSeq_entry_Handle & seh,Uint4 options)100 CConstRef<CValidError> CValidator::Validate
101 (const CSeq_entry_Handle& seh,
102  Uint4 options)
103 {
104     static unsigned int num_e = 0, mult = 0;
105 
106     num_e++;
107     if (num_e % 200 == 0) {
108         num_e = 0;
109         mult++;
110     }
111 
112     CRef<CValidError> errors(new CValidError(&*seh.GetCompleteSeq_entry()));
113     CValidErrorFormat::SetSuppressionRules(seh, *errors);
114     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
115     imp.SetProgressCallback(m_PrgCallback, m_UserData);
116     if ( !imp.Validate(seh, 0) ) {
117         errors.Reset();
118     }
119     return errors;
120 }
121 
122 
GetTSANStretchErrors(const CSeq_entry_Handle & se)123 CConstRef<CValidError> CValidator::GetTSANStretchErrors(const CSeq_entry_Handle& se)
124 {
125     CRef<CValidError> errors(new CValidError(&*se.GetCompleteSeq_entry()));
126     CValidErrorFormat::SetSuppressionRules(se, *errors);
127     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
128     imp.SetProgressCallback(m_PrgCallback, m_UserData);
129     if ( !imp.GetTSANStretchErrors(se) ) {
130         errors.Reset();
131     }
132     return errors;
133 }
134 
135 
GetTSACDSOnMinusStrandErrors(const CSeq_entry_Handle & se)136 CConstRef<CValidError> CValidator::GetTSACDSOnMinusStrandErrors (const CSeq_entry_Handle& se)
137 {
138     CRef<CValidError> errors(new CValidError(&*se.GetCompleteSeq_entry()));
139     CValidErrorFormat::SetSuppressionRules(se, *errors);
140     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
141     imp.SetProgressCallback(m_PrgCallback, m_UserData);
142     if ( !imp.GetTSACDSOnMinusStrandErrors(se) ) {
143         errors.Reset();
144     }
145     return errors;
146 }
147 
148 
GetTSAConflictingBiomolTechErrors(const CSeq_entry_Handle & se)149 CConstRef<CValidError> CValidator::GetTSAConflictingBiomolTechErrors (const CSeq_entry_Handle& se)
150 {
151     CRef<CValidError> errors(new CValidError(&*se.GetCompleteSeq_entry()));
152     CValidErrorFormat::SetSuppressionRules(se, *errors);
153     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
154     imp.SetProgressCallback(m_PrgCallback, m_UserData);
155     if ( !imp.GetTSAConflictingBiomolTechErrors(se) ) {
156         errors.Reset();
157     }
158     return errors;
159 }
160 
161 
GetTSANStretchErrors(const CBioseq & seq)162 CConstRef<CValidError> CValidator::GetTSANStretchErrors(const CBioseq& seq)
163 {
164 
165     CRef<CValidError> errors(new CValidError(&seq));
166     CValidErrorFormat::SetSuppressionRules(seq, *errors);
167     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
168     imp.SetProgressCallback(m_PrgCallback, m_UserData);
169     if ( !imp.GetTSANStretchErrors(seq) ) {
170         errors.Reset();
171     }
172     return errors;
173 }
174 
175 
GetTSACDSOnMinusStrandErrors(const CSeq_feat & f,const CBioseq & seq)176 CConstRef<CValidError> CValidator::GetTSACDSOnMinusStrandErrors (const CSeq_feat& f, const CBioseq& seq)
177 {
178     CRef<CValidError> errors(new CValidError(&f));
179     CValidErrorFormat::SetSuppressionRules(seq, *errors);
180     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
181     imp.SetProgressCallback(m_PrgCallback, m_UserData);
182     if ( !imp.GetTSACDSOnMinusStrandErrors(f, seq) ) {
183         errors.Reset();
184     }
185     return errors;
186 }
187 
188 
GetTSAConflictingBiomolTechErrors(const CBioseq & seq)189 CConstRef<CValidError> CValidator::GetTSAConflictingBiomolTechErrors (const CBioseq& seq)
190 {
191     CRef<CValidError> errors(new CValidError(&seq));
192     CValidErrorFormat::SetSuppressionRules(seq, *errors);
193     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), 0);
194     imp.SetProgressCallback(m_PrgCallback, m_UserData);
195     if ( !imp.GetTSAConflictingBiomolTechErrors(seq) ) {
196         errors.Reset();
197     }
198     return errors;
199 }
200 //LCOV_EXCL_STOP
201 
202 
Validate(const CSeq_submit & ss,CScope * scope,Uint4 options)203 CConstRef<CValidError> CValidator::Validate
204 (const CSeq_submit& ss,
205  CScope* scope,
206  Uint4 options)
207 {
208     options |= CValidator::eVal_seqsubmit_parent;
209     CRef<CValidError> errors(new CValidError(&ss));
210     CValidErrorFormat::SetSuppressionRules(ss, *errors);
211     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
212     imp.Validate(ss, scope);
213     if (ss.IsSetSub() && ss.GetSub().IsSetContact() && ss.GetSub().GetContact().IsSetContact()
214         && ss.GetSub().GetContact().GetContact().IsSetAffil()
215         && ss.GetSub().GetContact().GetContact().GetAffil().IsStd()) {
216         imp.ValidateAffil(ss.GetSub().GetContact().GetContact().GetAffil().GetStd(),
217                              ss, 0);
218     }
219 
220     return errors;
221 }
222 
223 
Validate(const CSeq_annot_Handle & sah,Uint4 options)224 CConstRef<CValidError> CValidator::Validate
225 (const CSeq_annot_Handle& sah,
226  Uint4 options)
227 {
228     CConstRef<CSeq_annot> sar = sah.GetCompleteSeq_annot();
229     CRef<CValidError> errors(new CValidError(&*sar));
230     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
231     imp.Validate(sah);
232     return errors;
233 }
234 
235 
Validate(const CSeq_feat & feat,CScope * scope,Uint4 options)236 CConstRef<CValidError> CValidator::Validate
237 (const CSeq_feat& feat,
238  CScope *scope,
239  Uint4 options)
240 {
241     CRef<CValidError> errors(new CValidError(&feat));
242     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
243     imp.Validate(feat, scope);
244     return errors;
245 }
246 
247 
248 //LCOV_EXCL_START
249 //not used by asnvalidate
Validate(const CSeq_feat & feat,Uint4 options)250 CConstRef<CValidError> CValidator::Validate
251 (const CSeq_feat& feat,
252  Uint4 options)
253 {
254     return Validate(feat, NULL, options);
255 }
256 //LCOV_EXCL_STOP
Validate(const CBioSource & src,CScope * scope,Uint4 options)257 CConstRef<CValidError> CValidator::Validate
258 (const CBioSource& src,
259  CScope *scope,
260  Uint4 options)
261 {
262     CRef<CValidError> errors(new CValidError(&src));
263     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
264     imp.Validate(src, scope);
265     return errors;
266 }
267 
268 //LCOV_EXCL_START
269 //not used by asnvalidate
Validate(const CBioSource & src,Uint4 options)270 CConstRef<CValidError> CValidator::Validate
271 (const CBioSource& src,
272  Uint4 options)
273 {
274     return Validate(src, NULL, options);
275 }
276 //LCOV_EXCL_STOP
277 
Validate(const CPubdesc & pubdesc,CScope * scope,Uint4 options)278 CConstRef<CValidError> CValidator::Validate
279 (const CPubdesc& pubdesc,
280  CScope *scope,
281  Uint4 options)
282 {
283     CRef<CValidError> errors(new CValidError(&pubdesc));
284     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
285     imp.Validate(pubdesc, scope);
286     return errors;
287 }
288 
289 //LCOV_EXCL_START
290 //not used by asnvalidate
Validate(const CPubdesc & pubdesc,Uint4 options)291 CConstRef<CValidError> CValidator::Validate
292 (const CPubdesc& pubdesc,
293  Uint4 options)
294 {
295     return Validate(pubdesc, NULL, options);
296 }
297 //LCOV_EXCL_STOP
298 
Validate(const CSeqdesc & desc,const CSeq_entry & ctx,Uint4 options)299 CConstRef<CValidError> CValidator::Validate
300 (const CSeqdesc& desc,
301  const CSeq_entry& ctx,
302  Uint4 options)
303 {
304     CRef<CValidError> errors(new CValidError(&desc));
305     CValidError_imp imp(*m_ObjMgr, &(*errors), m_Taxon.get(), options);
306     imp.Validate(desc, ctx);
307     return errors;
308 }
309 
SetProgressCallback(TProgressCallback callback,void * user_data)310 void CValidator::SetProgressCallback(TProgressCallback callback, void* user_data)
311 {
312     m_PrgCallback = callback;
313     m_UserData = user_data;
314 }
315 
316 
BadCharsInAuthorName(const string & str,bool allowcomma,bool allowperiod,bool last)317 bool CValidator::BadCharsInAuthorName(const string& str, bool allowcomma, bool allowperiod, bool last)
318 {
319     if (NStr::IsBlank(str)) {
320         return false;
321     }
322 
323 
324     size_t stp = string::npos;
325     if (last) {
326         if (NStr::StartsWith(str, "St.")) {
327             stp = 2;
328         }
329         else if (NStr::StartsWith(str, "de M.")) {
330             stp = 4;
331         }
332     }
333 
334     size_t pos = 0;
335     const char *ptr = str.c_str();
336 
337     while (*ptr != 0) {
338         if (isalpha(*ptr)
339             || *ptr == '-'
340             || *ptr == '\''
341             || *ptr == ' '
342             || (*ptr == ',' && allowcomma)
343             || (*ptr == '.' && (allowperiod || pos == stp))) {
344             // all these are ok
345             ptr++;
346             pos++;
347         } else {
348             string tail = str.substr(pos);
349             if (NStr::Equal(tail, "2nd") ||
350                 NStr::Equal(tail, "3rd") ||
351                 NStr::Equal(tail, "4th") ||
352                 NStr::Equal(tail, "5th") ||
353                 NStr::Equal(tail, "6th")) {
354                 return false;
355             }
356             return true;
357         }
358     }
359     return false;
360 }
361 
362 
BadCharsInAuthorLastName(const string & str)363 bool CValidator::BadCharsInAuthorLastName(const string& str)
364 {
365     if (NStr::EqualNocase(str, "et al.")) {
366         // this is ok
367         return false;
368     } else {
369         return BadCharsInAuthorName(str, false, false, true);
370     }
371 }
372 
BadCharsInAuthorFirstName(const string & str)373 bool CValidator::BadCharsInAuthorFirstName(const string& str)
374 {
375     return BadCharsInAuthorName(str, false, true, false);
376 }
377 
378 
BadCharsInAuthorInitials(const string & str)379 bool CValidator::BadCharsInAuthorInitials(const string& str)
380 {
381     return BadCharsInAuthorName(str, false, true, false);
382 }
383 
384 
BadCharsInAuthorSuffix(const string & str)385 bool CValidator::BadCharsInAuthorSuffix(const string& str)
386 {
387     return BadCharsInAuthorName(str, false, true, false);
388 }
389 
390 
BadCharsInAuthor(const CName_std & author,bool & last_is_bad)391 string CValidator::BadCharsInAuthor(const CName_std& author, bool& last_is_bad)
392 {
393     string badauthor;
394     last_is_bad = false;
395 
396     if (author.IsSetLast() && BadCharsInAuthorLastName(author.GetLast())) {
397         last_is_bad = true;
398         badauthor = author.GetLast();
399     } else if (author.IsSetFirst() && BadCharsInAuthorFirstName(author.GetFirst())) {
400         badauthor = author.GetFirst();
401     }
402     else if (author.IsSetInitials() && BadCharsInAuthorInitials(author.GetInitials())) {
403         badauthor = author.GetInitials();
404     } else if (author.IsSetSuffix() && BadCharsInAuthorSuffix(author.GetSuffix())) {
405         badauthor = author.GetSuffix();
406     }
407     return badauthor;
408 }
409 
410 
BadCharsInAuthor(const CAuthor & author,bool & last_is_bad)411 string CValidator::BadCharsInAuthor(const CAuthor& author, bool& last_is_bad)
412 {
413     last_is_bad = false;
414     if (author.IsSetName() && author.GetName().IsName()) {
415         return BadCharsInAuthor(author.GetName().GetName(), last_is_bad);
416     } else {
417         return kEmptyStr;
418     }
419 }
420 
421 
422 typedef bool(*CompareConsecutiveIntervalProc) (const CSeq_interval& int1, const CSeq_interval& int2, CScope *scope);
423 
x_CompareConsecutiveIntervals(const CPacked_seqint & packed_int,CConstRef<CSeq_interval> & int_cur,CConstRef<CSeq_interval> & int_prv,CScope * scope,CompareConsecutiveIntervalProc compar)424 bool x_CompareConsecutiveIntervals
425 (const CPacked_seqint& packed_int,
426 CConstRef<CSeq_interval>& int_cur,
427 CConstRef<CSeq_interval>& int_prv,
428 CScope* scope,
429 CompareConsecutiveIntervalProc compar)
430 {
431     bool ok = true;
432     ITERATE(CPacked_seqint::Tdata, it, packed_int.Get()) {
433         int_cur = (*it);
434         if (int_prv && !compar(*int_cur, *int_prv, scope)) {
435             ok = false;
436             break;
437         }
438 
439         int_prv = int_cur;
440     }
441     return ok;
442 }
443 
444 
CheckConsecutiveIntervals(const CSeq_loc & loc,CScope & scope,CompareConsecutiveIntervalProc compar)445 bool CheckConsecutiveIntervals(const CSeq_loc& loc, CScope& scope, CompareConsecutiveIntervalProc compar)
446 {
447     bool ok = true;
448     const CSeq_interval *int_cur = 0, *int_prv = 0;
449 
450     CTypeConstIterator<CSeq_loc> lit = ConstBegin(loc);
451     for (; lit && ok; ++lit) {
452         CSeq_loc::E_Choice loc_choice = lit->Which();
453         switch (loc_choice) {
454         case CSeq_loc::e_Int:
455             {{
456             int_cur = &lit->GetInt();
457             if (int_prv) {
458                 ok = compar(*int_cur, *int_prv, &scope);
459             }
460             int_prv = int_cur;
461             }}
462             break;
463         case CSeq_loc::e_Pnt:
464             int_prv = 0;
465             break;
466         case CSeq_loc::e_Packed_pnt:
467             int_prv = 0;
468             break;
469         case CSeq_loc::e_Packed_int:
470         {{
471             CConstRef<CSeq_interval> this_int_cur(int_cur);
472             CConstRef<CSeq_interval> this_int_prv(int_prv);
473             ok = x_CompareConsecutiveIntervals
474                 (lit->GetPacked_int(), this_int_cur, this_int_prv, &scope, compar);
475             }}
476             break;
477         case CSeq_loc::e_Null:
478             break;
479         default:
480             int_prv = 0;
481             break;
482         }
483 
484     }
485     return ok;
486 }
487 
488 
489 
x_IsCorrectlyOrdered(const CSeq_interval & int_cur,const CSeq_interval & int_prv,CScope * scope)490 bool x_IsCorrectlyOrdered
491 (const CSeq_interval& int_cur,
492  const CSeq_interval& int_prv,
493  CScope* scope)
494 {
495     ENa_strand strand_cur = int_cur.IsSetStrand() ?
496         int_cur.GetStrand() : eNa_strand_unknown;
497 
498     if (IsSameBioseq(int_prv.GetId(), int_cur.GetId(), scope)) {
499         if (strand_cur == eNa_strand_minus) {
500             if (int_prv.GetTo() < int_cur.GetTo()) {
501                 return false;
502             }
503         }
504         else {
505             if (int_prv.GetTo() > int_cur.GetTo()) {
506                 return false;
507             }
508         }
509     }
510     return true;
511 }
512 
513 
IsSeqLocCorrectlyOrdered(const CSeq_loc & loc,CScope & scope)514 bool CValidator::IsSeqLocCorrectlyOrdered(const CSeq_loc& loc, CScope& scope)
515 {
516     CBioseq_Handle seq;
517     try {
518         CBioseq_Handle seq = scope.GetBioseqHandle(loc);
519     } catch (CObjMgrException& ) {
520         // no way to tell
521         return true;
522     } catch (const exception& ) {
523         // no way to tell
524         return true;
525     }
526     if (seq  &&  seq.GetInst_Topology() == CSeq_inst::eTopology_circular) {
527         // no way to check if topology is circular
528         return true;
529     }
530 
531     return CheckConsecutiveIntervals(loc, scope, x_IsCorrectlyOrdered);
532 }
533 
534 
x_IsNotAdjacent(const CSeq_interval & int_cur,const CSeq_interval & int_prv,CScope * scope)535 bool x_IsNotAdjacent
536 (const CSeq_interval& int_cur,
537 const CSeq_interval& int_prv,
538 CScope* scope)
539 {
540     ENa_strand strand_cur = int_cur.IsSetStrand() ?
541         int_cur.GetStrand() : eNa_strand_unknown;
542 
543     bool ok = true;
544     if (IsSameBioseq(int_prv.GetId(), int_cur.GetId(), scope)) {
545         if (strand_cur == eNa_strand_minus) {
546             if (int_cur.GetTo() + 1 == int_prv.GetFrom()) {
547                 ok = false;
548             }
549         }
550         else {
551             if (int_prv.GetTo() + 1 == int_cur.GetFrom()) {
552                 ok = false;
553             }
554         }
555     }
556     return ok;
557 }
558 
559 
DoesSeqLocContainAdjacentIntervals(const CSeq_loc & loc,CScope & scope)560 bool CValidator::DoesSeqLocContainAdjacentIntervals
561 (const CSeq_loc& loc, CScope &scope)
562 {
563     return !CheckConsecutiveIntervals(loc, scope, x_IsNotAdjacent);
564 }
565 
566 
x_SameStrand(const CSeq_interval & int1,const CSeq_interval & int2)567 bool x_SameStrand(const CSeq_interval& int1, const CSeq_interval& int2)
568 {
569     ENa_strand strand1 = int1.IsSetStrand() ?
570         int1.GetStrand() : eNa_strand_unknown;
571     ENa_strand strand2 = int2.IsSetStrand() ?
572         int2.GetStrand() : eNa_strand_unknown;
573     return (strand1 == strand2);
574 }
575 
576 
IsNotDuplicateInterval(const CSeq_interval & int1,const CSeq_interval & int2,CScope * scope)577 bool IsNotDuplicateInterval(const CSeq_interval& int1, const CSeq_interval& int2, CScope* scope)
578 {
579     if (IsSameBioseq(int1.GetId(), int2.GetId(), scope) &&
580         x_SameStrand(int1, int2) &&
581         int1.GetFrom() == int2.GetFrom() &&
582         int1.GetTo() == int2.GetTo()) {
583         return false;
584     }
585     return true;
586 }
587 
DoesSeqLocContainDuplicateIntervals(const CSeq_loc & loc,CScope & scope)588 bool CValidator::DoesSeqLocContainDuplicateIntervals(const CSeq_loc& loc, CScope& scope)
589 {
590     return !CheckConsecutiveIntervals(loc, scope, IsNotDuplicateInterval);
591 }
592 
593 
ConvertCode(CSubSource::ELatLonCountryErr errcode)594 EErrType CValidator::ConvertCode(CSubSource::ELatLonCountryErr errcode)
595 {
596     EErrType rval = eErr_UNKNOWN;
597     switch (errcode) {
598     case CSubSource::eLatLonCountryErr_Country:
599         rval = eErr_SEQ_DESCR_LatLonCountry;
600         break;
601     case CSubSource::eLatLonCountryErr_State:
602         rval = eErr_SEQ_DESCR_LatLonState;
603         break;
604     case CSubSource::eLatLonCountryErr_Water:
605         rval = eErr_SEQ_DESCR_LatLonWater;
606         break;
607     case CSubSource::eLatLonCountryErr_Value:
608         rval = eErr_SEQ_DESCR_LatLonValue;
609         break;
610     default:
611         break;
612     }
613     return rval;
614 }
615 
616 
IsValidDbxref(const CDbtag & xref,bool is_biosource,bool is_refseq_or_gps)617 CValidator::TDbxrefValidFlags CValidator::IsValidDbxref(const CDbtag& xref, bool is_biosource, bool is_refseq_or_gps)
618 {
619     TDbxrefValidFlags flags = eValid;
620 
621     if (xref.IsSetTag() && xref.GetTag().IsStr()) {
622         if (ContainsSgml(xref.GetTag().GetStr())) {
623             flags |= eTagHasSgml;
624         }
625 
626         if (xref.GetTag().GetStr().find(' ') != string::npos) {
627             flags |= eContainsSpace;
628         }
629     }
630 
631     if (!xref.IsSetDb()) {
632         return flags;
633     }
634     const string& db = xref.GetDb();
635     string dbv;
636     if (xref.IsSetTag() && xref.GetTag().IsStr()) {
637         dbv = xref.GetTag().GetStr();
638     }
639     else if (xref.IsSetTag() && xref.GetTag().IsId()) {
640         dbv = NStr::NumericToString(xref.GetTag().GetId());
641     }
642 
643     if (ContainsSgml(db)) {
644         flags |= eDbHasSgml;
645     }
646 
647     bool src_db = false;
648     bool refseq_db = false;
649     string correct_caps;
650 
651     if (xref.GetDBFlags(refseq_db, src_db, correct_caps)) {
652         if (!NStr::EqualCase(correct_caps, db)) {
653             // capitalization is bad
654             flags |= eBadCapitalization;
655         }
656 
657         if (is_biosource && !src_db) {
658             flags |= eNotForSource;
659             if (refseq_db && is_refseq_or_gps) {
660                 flags |= eRefSeqNotForSource;
661             }
662         } else if (!is_biosource && src_db && NStr::EqualNocase(db, "taxon")) {
663             flags |= eOnlyForSource;
664         }
665         if (refseq_db && !is_refseq_or_gps) {
666             flags |= eOnlyForRefSeq;
667         }
668     } else {
669         flags |= eUnrecognized;
670     }
671     return flags;
672 }
673 
674 
675 //LCOV_EXCL_START
676 //code is not used
CCache(void)677 CCache::CCache(void)
678 {
679     m_impl.reset(new CCacheImpl);
680 }
681 
682 CRef<CCache>
MakeEmptyCache(void)683 CValidator::MakeEmptyCache(void)
684 {
685     return Ref(new CCache);
686 }
687 //LCOV_EXCL_STOP
688 
689 END_SCOPE(validator)
690 END_SCOPE(objects)
691 END_NCBI_SCOPE
692