1 /*  $Id: valid_pub.cpp 632625 2021-06-03 17:38:33Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko, Mati Shomrat, ....
27  *
28  * File Description:
29  *   Implementation of private parts of the validator
30  *   .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
36 #include <objtools/validator/validerror_desc.hpp>
37 #include <objtools/validator/validerror_bioseq.hpp>
38 #include <objects/general/Person_id.hpp>
39 #include <objects/general/Name_std.hpp>
40 #include <objects/seq/Seqdesc.hpp>
41 #include <objects/seq/Seq_descr.hpp>
42 #include <objects/seq/Pubdesc.hpp>
43 #include <objects/seqfeat/Seq_feat.hpp>
44 #include <objmgr/bioseq_ci.hpp>
45 #include <objmgr/seqdesc_ci.hpp>
46 #include <objmgr/scope.hpp>
47 
48 #include <objects/pub/Pub.hpp>
49 #include <objects/pub/Pub_equiv.hpp>
50 
51 #include <objects/biblio/Author.hpp>
52 #include <objects/biblio/Auth_list.hpp>
53 #include <objects/biblio/Cit_art.hpp>
54 #include <objects/biblio/Cit_book.hpp>
55 #include <objects/biblio/Cit_gen.hpp>
56 #include <objects/biblio/Cit_jour.hpp>
57 #include <objects/biblio/Cit_let.hpp>
58 #include <objects/biblio/Cit_proc.hpp>
59 #include <objects/biblio/Cit_sub.hpp>
60 #include <objects/biblio/PubMedId.hpp>
61 #include <objects/biblio/PubStatus.hpp>
62 #include <objects/biblio/Title.hpp>
63 #include <objects/biblio/Imprint.hpp>
64 #include <objects/biblio/Affil.hpp>
65 #include <objects/misc/sequence_macros.hpp>
66 
67 #define NCBI_USE_ERRCODE_X   Objtools_Validator
68 
69 BEGIN_NCBI_SCOPE
70 BEGIN_SCOPE(objects)
71 BEGIN_SCOPE(validator)
72 using namespace sequence;
73 
74 
ValidatePubdesc(const CPubdesc & pubdesc,const CSerialObject & obj,const CSeq_entry * ctx)75 void CValidError_imp::ValidatePubdesc
76 (const CPubdesc& pubdesc,
77  const CSerialObject& obj,
78  const CSeq_entry *ctx)
79 {
80     if (!pubdesc.IsSetPub() || pubdesc.GetPub().Get().empty()) {
81         PostObjErr(eDiag_Fatal, eErr_SEQ_DESCR_NoPubFound,
82                 "Empty publication descriptor", obj, ctx);
83         return;
84     }
85     TEntrezId uid = ZERO_ENTREZ_ID, pmid = ZERO_ENTREZ_ID, muid = ZERO_ENTREZ_ID;
86     bool conflicting_pmids = false, redundant_pmids = false, conflicting_muids = false, redundant_muids = false;
87 
88     ValidatePubHasAuthor(pubdesc, obj, ctx);
89 
90     // need to get uid (pmid or muid) in first pass for ValidatePubArticle
91     FOR_EACH_PUB_ON_PUBDESC (pub_iter, pubdesc) {
92         const CPub& pub = **pub_iter;
93 
94         switch( pub.Which() ) {
95         case CPub::e_Muid:
96             if ( muid == ZERO_ENTREZ_ID ) {
97                 muid = pub.GetMuid();
98             } else if ( muid != pub.GetMuid() ) {
99                 conflicting_muids = true;
100             } else {
101                 redundant_muids = true;
102             }
103             if ( uid == ZERO_ENTREZ_ID ) {
104                 uid = pub.GetMuid();
105             }
106             break;
107 
108         case CPub::e_Pmid:
109             if ( pmid == ZERO_ENTREZ_ID ) {
110                 pmid = pub.GetPmid();
111             } else if ( pmid != pub.GetPmid() ) {
112                 conflicting_pmids = true;
113             } else {
114                 redundant_pmids = true;
115             }
116             if ( uid == ZERO_ENTREZ_ID ) {
117                 uid = pub.GetPmid();
118             }
119             break;
120 
121         default:
122             break;
123         }
124     }
125 
126     if ( conflicting_pmids ) {
127         PostObjErr(eDiag_Warning, eErr_SEQ_DESCR_CollidingPublications,
128                 "Multiple conflicting pmids in a single publication", obj, ctx);
129     }
130     if ( redundant_pmids ) {
131         PostObjErr(eDiag_Warning, eErr_SEQ_DESCR_CollidingPublications,
132                 "Multiple redundant pmids in a single publication", obj, ctx);
133     }
134     if ( conflicting_muids ) {
135         PostObjErr(eDiag_Warning, eErr_SEQ_DESCR_CollidingPublications,
136                 "Multiple conflicting muids in a single publication", obj, ctx);
137     }
138     if ( redundant_muids ) {
139         PostObjErr(eDiag_Warning, eErr_SEQ_DESCR_CollidingPublications,
140                 "Multiple redundant muids in a single publication", obj, ctx);
141     }
142 
143     // second pass for remaining (non-uid) types
144     FOR_EACH_PUB_ON_PUBDESC (pub_iter, pubdesc) {
145         const CPub& pub = **pub_iter;
146 
147         switch( pub.Which() ) {
148         case CPub::e_Gen:
149             ValidatePubGen(pub.GetGen(), obj, ctx);
150             break;
151 
152         case CPub::e_Sub:
153             ValidateCitSub(pub.GetSub(), obj, ctx);
154             break;
155 
156         case CPub::e_Medline:
157             PostObjErr(eDiag_Error, eErr_GENERIC_MedlineEntryPub,
158                 "Publication is medline entry", obj, ctx);
159             break;
160 
161         /*
162         case CPub::e_Muid:
163             if ( uid == 0 ) {
164                 uid = pub.GetMuid();
165             }
166             break;
167 
168         case CPub::e_Pmid:
169             if ( uid == 0 ) {
170                 uid = pub.GetPmid();
171             }
172             break;
173         */
174 
175         case CPub::e_Article:
176             ValidatePubArticle(pub.GetArticle(), uid, obj, ctx);
177             if (pubdesc.IsSetComment() && !NStr::IsBlank(pubdesc.GetComment())
178                 && pub.GetArticle().IsSetFrom() && pub.GetArticle().GetFrom().IsJournal()
179                 && pub.GetArticle().GetFrom().GetJournal().IsSetImp()
180                 && pub.GetArticle().GetFrom().GetJournal().GetImp().IsSetPubstatus()) {
181                 CImprint::TPubstatus pubstatus = pub.GetArticle().GetFrom().GetJournal().GetImp().GetPubstatus();
182                 const string& comment = pubdesc.GetComment();
183                 if ((pubstatus == ePubStatus_epublish
184                      || pubstatus == ePubStatus_ppublish
185                      || pubstatus == ePubStatus_aheadofprint)
186                     && (NStr::Find(comment, "Publication Status") != string::npos
187                         || NStr::Find(comment, "Publication-Status") != string::npos
188                         || NStr::Find(comment, "Publication_Status") != string::npos)) {
189                     PostObjErr(eDiag_Warning, eErr_GENERIC_UnexpectedPubStatusComment,
190                                "Publication status is in comment for pmid " + NStr::NumericToString (uid),
191                                obj, ctx);
192                 }
193             }
194             break;
195 
196         case CPub::e_Equiv:
197             PostObjErr(eDiag_Warning, eErr_GENERIC_UnnecessaryPubEquiv,
198                 "Publication has unexpected internal Pub-equiv", obj, ctx);
199             break;
200 
201         default:
202             break;
203         }
204     }
205     if (pubdesc.IsSetPub()) {
206         ValidateAuthorsInPubequiv (pubdesc.GetPub(), obj, ctx);
207     }
208 }
209 
210 
s_CitGenIsJustBackBoneIDNumber(const CCit_gen & gen)211 static bool s_CitGenIsJustBackBoneIDNumber (const CCit_gen& gen)
212 {
213     if (gen.IsSetCit()
214         && NStr::StartsWith (gen.GetCit(), "BackBone id_pub = ")
215         && !gen.IsSetJournal()
216         && !gen.IsSetDate()
217         && !gen.IsSetSerial_number()) {
218         return true;
219     } else {
220         return false;
221     }
222 }
223 
224 
ValidatePubGen(const CCit_gen & gen,const CSerialObject & obj,const CSeq_entry * ctx)225 void CValidError_imp::ValidatePubGen
226 (const CCit_gen& gen,
227  const CSerialObject& obj,
228  const CSeq_entry *ctx)
229 {
230     bool is_unpub = false;
231     if ( gen.IsSetCit()  &&  !gen.GetCit().empty() ) {
232         const string& cit = gen.GetCit();
233         // skip if just BackBone id number
234         if (s_CitGenIsJustBackBoneIDNumber(gen)) {
235             return;
236         }
237 
238         if (NStr::StartsWith (cit, "submitted", NStr::eNocase)
239             || NStr::StartsWith (cit, "unpublished", NStr::eNocase)
240             || NStr::StartsWith (cit, "Online Publication", NStr::eNocase)
241             || NStr::StartsWith (cit, "Published Only in DataBase", NStr::eNocase)
242             || NStr::StartsWith (cit, "(er) ", NStr::eNocase)) {
243             is_unpub = true;
244         } else {
245             PostObjErr(eDiag_Error, eErr_GENERIC_MissingPubRequirement,
246                 "Unpublished citation text invalid", obj, ctx);
247         }
248 
249         if (NStr::FindCase (cit, "Title=") != string::npos) {
250             PostObjErr(eDiag_Error, eErr_GENERIC_StructuredCitGenCit,
251                     "Unpublished citation has embedded Title", obj, ctx);
252         }
253         if (NStr::FindCase (cit, "Journal=") != string::npos) {
254             PostObjErr(eDiag_Error, eErr_GENERIC_StructuredCitGenCit,
255                     "Unpublished citation has embedded Journal", obj, ctx);
256         }
257 
258     }
259     if (gen.IsSetSerial_number()) {
260         m_PubSerialNumbers.push_back(gen.GetSerial_number());
261         /* date not required if just serial number */
262         if (!gen.IsSetCit() && !gen.IsSetJournal() && !gen.IsSetDate()) {
263             return;
264         }
265     }
266     if (!gen.IsSetDate()) {
267         if (!is_unpub) {
268             PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement, "Publication date missing", obj, ctx);
269         }
270     } else if (gen.GetDate().IsStr()) {
271         if (NStr::Equal(gen.GetDate().GetStr(), "?")) {
272             PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement, "Publication date marked as '?'", obj, ctx);
273         }
274     } else if (gen.GetDate().IsStd() && (!gen.GetDate().GetStd().IsSetYear() || gen.GetDate().GetStd().GetYear() == 0)) {
275         PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement, "Publication date not set", obj, ctx);
276     } else {
277         int rval = CheckDate (gen.GetDate());
278         if (rval != eDateValid_valid) {
279             PostBadDateError (eDiag_Error, "Publication date has error", rval, obj, ctx);
280         }
281     }
282 }
283 
284 
IsElectronicJournal(const CCit_jour & journal)285 bool IsElectronicJournal(const CCit_jour& journal)
286 {
287     bool is_electronic_journal = false;
288     if (journal.IsSetTitle()) {
289         ITERATE(CTitle::Tdata, item, journal.GetTitle().Get()) {
290             if ((*item)->Which() == CTitle::C_E::e_Name
291                 && NStr::StartsWith((*item)->GetName(), "(er)")) {
292                 is_electronic_journal = true;
293                 break;
294             }
295         }
296     }
297     if (journal.IsSetImp() && journal.GetImp().IsSetPubstatus()) {
298         CImprint::TPubstatus pubstatus = journal.GetImp().GetPubstatus();
299         if (pubstatus == ePubStatus_epublish || pubstatus == ePubStatus_aheadofprint) {
300             is_electronic_journal = true;
301         }
302     }
303     return is_electronic_journal;
304 }
305 
306 
IsInpress(const CCit_jour & jour)307 static bool IsInpress(const CCit_jour& jour)
308 {
309     if (jour.IsSetImp() &&
310         jour.GetImp().IsSetPrepub() &&
311         jour.GetImp().GetPrepub() == CImprint::ePrepub_in_press) {
312         return true;
313     } else {
314         return false;
315     }
316 }
317 
318 
ValidatePubArticle(const CCit_art & art,TEntrezId uid,const CSerialObject & obj,const CSeq_entry * ctx)319 void CValidError_imp::ValidatePubArticle
320 (const CCit_art& art,
321  TEntrezId uid,
322  const CSerialObject& obj,
323  const CSeq_entry *ctx)
324 {
325     if ( !art.IsSetTitle()  ||  !HasTitle(art.GetTitle()) ) {
326         PostObjErr(eDiag_Error, eErr_GENERIC_MissingPubRequirement,
327             "Publication has no title", obj, ctx);
328     }
329 
330     if (art.GetFrom().IsJournal()) {
331         const CCit_jour& jour = art.GetFrom().GetJournal();
332 
333         bool has_iso_jta = HasIsoJTA(jour.GetTitle());
334         bool is_electronic_journal = IsElectronicJournal(art.GetFrom().GetJournal());
335 
336         if (!HasTitle(jour.GetTitle())) {
337             PostObjErr(eDiag_Error, eErr_GENERIC_MissingPubRequirement,
338                 "Journal title missing", obj, ctx);
339         }
340 
341         if (uid == ZERO_ENTREZ_ID) {
342             ValidatePubArticleNoPMID(art, obj, ctx);
343         }
344 
345         if ( !has_iso_jta && !is_electronic_journal  &&
346             (uid > ZERO_ENTREZ_ID || IsRequireISOJTA() || IsInpress(jour))) {
347             PostObjErr(eDiag_Warning, eErr_GENERIC_MissingISOJTA,
348                 "ISO journal title abbreviation missing", obj, ctx);
349         }
350     }
351 }
352 
353 
ValidatePubArticleNoPMID(const CCit_art & art,const CSerialObject & obj,const CSeq_entry * ctx)354 void CValidError_imp::ValidatePubArticleNoPMID
355 (const CCit_art& art,
356 const CSerialObject& obj,
357 const CSeq_entry *ctx)
358 {
359     if (!art.GetFrom().IsJournal()) {
360         return;
361     }
362     const CCit_jour& jour = art.GetFrom().GetJournal();
363     if (!jour.IsSetImp()) {
364         return;
365     }
366 
367     bool in_press = false;
368     bool is_electronic_journal = IsElectronicJournal(jour);
369 
370     const CImprint& imp = jour.GetImp();
371 
372     if (imp.CanGetPrepub()) {
373         in_press = imp.GetPrepub() == CImprint::ePrepub_in_press;
374         if (in_press) {
375             if (imp.IsSetPages()) {
376                 if (!NStr::IsBlank(imp.GetPages())) {
377                     PostObjErr(eDiag_Warning, eErr_GENERIC_PublicationInconsistency,
378                         "In-press is not expected to have page numbers", obj, ctx);
379                 }
380             }
381             if ((!imp.IsSetDate()) || (imp.GetDate().IsStr() && NStr::Equal(imp.GetDate().GetStr(), "?"))) {
382                 PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
383                     "In-press is missing the date", obj, ctx);
384             }
385         }
386     }
387 
388     if (!imp.IsSetPrepub() &&
389         (!imp.CanGetPubstatus() ||
390         imp.GetPubstatus() != ePubStatus_aheadofprint)) {
391         bool no_vol = !imp.IsSetVolume() ||
392             NStr::IsBlank(imp.GetVolume());
393         bool no_pages = !imp.IsSetPages() ||
394             NStr::IsBlank(imp.GetPages());
395         if (no_vol) {
396             if (is_electronic_journal) {
397                 PostObjErr(eDiag_Info, eErr_GENERIC_MissingVolumeEpub,
398                     "Electronic journal volume missing", obj, ctx);
399             } else {
400                 PostObjErr(eDiag_Warning, eErr_GENERIC_MissingVolume,
401                     "Journal volume missing", obj, ctx);
402             }
403         }
404         if (no_pages) {
405             if (is_electronic_journal) {
406                 PostObjErr(eDiag_Info, eErr_GENERIC_MissingPagesEpub,
407                     "Electronic journal pages missing", obj, ctx);
408             } else {
409                 PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPages,
410                     "Journal pages missing", obj, ctx);
411             }
412         }
413 
414         if (!no_pages && !is_electronic_journal) {
415             x_ValidatePages(imp.GetPages(), obj, ctx);
416         }
417         if (imp.IsSetDate() && imp.GetDate().Which() != CDate::e_not_set) {
418             if (imp.GetDate().IsStr() && NStr::Equal(imp.GetDate().GetStr(), "?")) {
419                 PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
420                     "Publication date marked as '?'", obj, ctx);
421             } else if (imp.GetDate().IsStd()) {
422                 if (!imp.GetDate().GetStd().IsSetYear()) {
423                     PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
424                         "Publication date missing", obj, ctx);
425                 } else if (imp.GetDate().GetStd().GetYear() == 0) {
426                     PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
427                         "Publication date not set", obj, ctx);
428                 } else {
429                     int rval = CheckDate(imp.GetDate());
430                     if (rval != eDateValid_valid) {
431                         PostBadDateError(eDiag_Error, "Publication date has error", rval, obj, ctx);
432                     }
433                 }
434             }
435         } else {
436             PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
437                 "Publication date missing", obj, ctx);
438         }
439     }
440     if (imp.IsSetPubstatus()) {
441         CImprint::TPubstatus pubstatus = imp.GetPubstatus();
442         if (pubstatus == ePubStatus_aheadofprint
443             && (!imp.IsSetPrepub() || imp.GetPrepub() != CImprint::ePrepub_in_press)) {
444             bool noVol = !imp.IsSetVolume() || NStr::IsBlank(imp.GetVolume());
445             bool noPages = !imp.IsSetPages() || NStr::IsBlank(imp.GetPages());
446             if (!noVol && !noPages) {
447                 PostObjErr(eDiag_Warning, eErr_GENERIC_PublicationInconsistency,
448                     "Ahead-of-print without in-press", obj, ctx);
449             }
450         }
451         if (pubstatus == ePubStatus_epublish
452             && imp.IsSetPrepub() && imp.GetPrepub() == CImprint::ePrepub_in_press) {
453             PostObjErr(eDiag_Warning, eErr_GENERIC_PublicationInconsistency,
454                 "Electronic-only publication should not also be in-press", obj, ctx);
455         }
456     }
457 }
458 
459 
s_GetDigits(const string & pages,string & digits)460 bool s_GetDigits(const string& pages, string& digits)
461 {
462     string::size_type pos = 0;
463     string::size_type len = pages.length();
464 
465     digits.erase();
466 
467     // skip alpha at the begining
468     while (pos < len  &&  !isdigit((unsigned char) pages[pos])) {
469         ++pos;
470     }
471 
472     while (pos < len  &&  isdigit((unsigned char) pages[pos])) {
473         digits += pages[pos];
474         ++pos;
475     }
476 
477     _ASSERT (pos >= len  ||  !isdigit((unsigned char) pages[pos]));
478 
479     while (pos < len) {
480         if (isdigit((unsigned char) pages[pos])) {
481             digits.erase();
482             return false;
483         }
484         ++pos;
485     }
486 
487     return true;
488 }
489 
490 
x_ValidatePages(const string & pages,const CSerialObject & obj,const CSeq_entry * ctx)491 void CValidError_imp::x_ValidatePages
492 (const string& pages,
493  const CSerialObject& obj,
494  const CSeq_entry *ctx)
495 {
496     static const string kRoman = "IVXLCDM";
497 
498     if (pages.empty()) {
499         return;
500     }
501 
502     EDiagSev sev = eDiag_Warning;
503 
504     string start, stop;
505     if (!NStr::SplitInTwo(pages, "-", start, stop) || start.empty() || stop.empty()) {
506         if (!isdigit(pages.c_str()[0])) {
507             PostObjErr(sev, eErr_GENERIC_BadPageNumbering, "Page numbering start looks strange", obj, ctx);
508         }
509         return;
510     }
511 
512     NStr::ReplaceInPlace(start, " ", "");
513     NStr::ReplaceInPlace(stop, " ", "");
514 
515     int p1 = 0, p2 = 0;
516     bool start_good = false, stop_good = false;
517     size_t num_digits = 0, num_chars = 0;
518 
519     if (start.c_str()[0] == '-') {
520         num_chars++;
521     }
522     while (isdigit (start.c_str()[num_chars])) {
523         num_digits++;
524         num_chars++;
525     }
526     if (num_digits == 0) {
527         if (!isalpha(start.c_str()[0])) {
528             PostObjErr(sev, eErr_GENERIC_BadPageNumbering, "Page numbering start looks strange", obj, ctx);
529         }
530     } else {
531         start_good = true;
532         p1 = NStr::StringToInt (start.substr(0, num_digits), NStr::fConvErr_NoThrow);
533 
534         num_digits = 0;
535         num_chars = 0;
536         if (stop.c_str()[0] == '-') {
537             num_chars++;
538         }
539         while (isdigit (stop.c_str()[num_chars])) {
540           num_digits++;
541           num_chars++;
542         }
543         if (num_digits == 0) {
544             PostObjErr(sev, eErr_GENERIC_BadPageNumbering, "Page numbering stop looks strange", obj, ctx);
545         } else {
546             stop_good = true;
547             p2 = NStr::StringToInt (stop.substr(0, num_digits), NStr::fConvErr_NoThrow);
548         }
549 
550         if ((start_good && p1 == 0) || (stop_good && p2 == 0)) {
551             PostObjErr(sev, eErr_GENERIC_BadPageNumbering, "Page numbering has zero value", obj, ctx);
552         } else if ((start_good && p1 < 0) || (stop_good && p2 < 0)) {
553             PostObjErr(sev, eErr_GENERIC_BadPageNumbering, "Page numbering has negative value", obj, ctx);
554         } else if (start_good && stop_good && p1 > p2) {
555             PostObjErr(sev, eErr_GENERIC_BadPageNumbering, "Page numbering out of order", obj, ctx);
556         } else if (start_good && stop_good && p2 > p1 + 50) {
557             PostObjErr(sev, eErr_GENERIC_BadPageNumbering, "Page numbering greater than 50", obj, ctx);
558         }
559     }
560 }
561 
562 
HasTitle(const CTitle & title)563 bool CValidError_imp::HasTitle(const CTitle& title)
564 {
565     ITERATE (CTitle::Tdata, item, title.Get() ) {
566         const string *str = 0;
567         switch ( (*item)->Which() ) {
568         case CTitle::C_E::e_Name:
569             str = &(*item)->GetName();
570             break;
571 
572         case CTitle::C_E::e_Tsub:
573             str = &(*item)->GetTsub();
574             break;
575 
576         case CTitle::C_E::e_Trans:
577             str = &(*item)->GetTrans();
578             break;
579 
580         case CTitle::C_E::e_Jta:
581             str = &(*item)->GetJta();
582             break;
583 
584         case CTitle::C_E::e_Iso_jta:
585             str = &(*item)->GetIso_jta();
586             break;
587 
588         case CTitle::C_E::e_Ml_jta:
589             str = &(*item)->GetMl_jta();
590             break;
591 
592         case CTitle::C_E::e_Coden:
593             str = &(*item)->GetCoden();
594             break;
595 
596         case CTitle::C_E::e_Issn:
597             str = &(*item)->GetIssn();
598             break;
599 
600         case CTitle::C_E::e_Abr:
601             str = &(*item)->GetAbr();
602             break;
603 
604         case CTitle::C_E::e_Isbn:
605             str = &(*item)->GetIsbn();
606             break;
607 
608         default:
609             break;
610         };
611         if ( str != NULL && !NStr::IsBlank(*str) ) {
612             return true;
613         }
614     }
615     return false;
616 }
617 
618 
HasIsoJTA(const CTitle & title)619 bool CValidError_imp::HasIsoJTA(const CTitle& title)
620 {
621     ITERATE (CTitle::Tdata, item, title.Get() ) {
622         if ( (*item)->IsIso_jta() ) {
623             return true;
624         } else if ( (*item)->IsMl_jta() ) {
625             return true;
626         }
627     }
628     return false;
629 }
630 
631 
HasName(const CAuth_list & authors)632 bool CValidError_imp::HasName(const CAuth_list& authors)
633 {
634     if ( authors.CanGetNames() ) {
635         const CAuth_list::TNames& names = authors.GetNames();
636         switch ( names.Which() ) {
637             case CAuth_list::TNames::e_Std:
638                 ITERATE ( list< CRef< CAuthor > >, auth, names.GetStd() ) {
639                     const CPerson_id& pid = (*auth)->GetName();
640                     if ( pid.IsName() ) {
641                         if ( ! NStr::IsBlank(pid.GetName().GetLast()) ) {
642                             return true;
643                         }
644                     } else if ( pid.IsMl() ) {
645                         if ( ! NStr::IsBlank (pid.GetMl()) ) {
646                             return true;
647                         }
648                     } else if ( pid.IsStr() ) {
649                         if ( ! NStr::IsBlank (pid.GetStr()) ) {
650                             return true;
651                         }
652                     } else if ( pid.IsConsortium() ) {
653                         if ( ! NStr::IsBlank (pid.GetConsortium()) ) {
654                             return true;
655                         }
656                     }
657                 }
658                 break;
659             case CAuth_list::TNames::e_Ml:
660                 if ( ! IsBlankStringList(names.GetMl()) ) {
661                     return true;
662                 }
663                 break;
664             case CAuth_list::TNames::e_Str:
665                 if ( ! IsBlankStringList(names.GetStr()) ) {
666                     return true;
667                 }
668                 break;
669             default:
670                 break;
671         }
672     }
673     return false;
674 }
675 
676 
ValidatePubHasAuthor(const CPubdesc & pubdesc,const CSerialObject & obj,const CSeq_entry * ctx)677 void CValidError_imp::ValidatePubHasAuthor
678 (const CPubdesc& pubdesc,
679  const CSerialObject& obj,
680  const CSeq_entry *ctx)
681 {
682     bool has_name = false;
683     FOR_EACH_PUB_ON_PUBDESC (pub_iter, pubdesc) {
684         const CPub& pub = **pub_iter;
685         switch (pub.Which() ) {
686             case CPub::e_Gen:
687                 // don't check if just serial number
688                 if (!pub.GetGen().IsSetCit()
689                     && !pub.GetGen().IsSetJournal()
690                     && !pub.GetGen().IsSetDate()
691                     && pub.GetGen().IsSetSerial_number()
692                     && pub.GetGen().GetSerial_number() > -1) {
693                     // skip
694                 } else if (s_CitGenIsJustBackBoneIDNumber(pub.GetGen())) {
695                     // just BackBoneID, skip
696                 } else {
697                     has_name = false;
698                     if ( pub.GetGen().IsSetAuthors()
699                          && HasName(pub.GetGen().GetAuthors())) {
700                          has_name = true;
701                     }
702                     if (!has_name) {
703                         PostObjErr(IsRefSeq() ? eDiag_Warning : eDiag_Error,
704                                    eErr_GENERIC_MissingPubRequirement,
705                                    "Publication has no author names", obj, ctx);
706                     }
707                 }
708                 break;
709             case CPub::e_Article:
710                 has_name = false;
711                 if ( pub.GetArticle().IsSetAuthors()
712                     && HasName(pub.GetArticle().GetAuthors())) {
713                      has_name = true;
714                 }
715                 if (!has_name) {
716                     PostObjErr(IsRefSeq() ? eDiag_Warning : eDiag_Error,
717                                eErr_GENERIC_MissingPubRequirement,
718                                "Publication has no author names", obj, ctx);
719                 }
720                 break;
721             default:
722                 break;
723         }
724     }
725 }
726 
727 
ValidateAuthorList(const CAuth_list::C_Names & names,const CSerialObject & obj,const CSeq_entry * ctx)728 void CValidError_imp::ValidateAuthorList
729 (const CAuth_list::C_Names& names,
730  const CSerialObject& obj,
731  const CSeq_entry *ctx)
732 {
733     if (names.IsStd()) {
734         list<string> consortium_list;
735 
736         ITERATE ( CAuth_list::C_Names::TStd, name, names.GetStd() ) {
737             bool   last_is_bad = false;
738             string badauthor = CValidator::BadCharsInAuthor(**name, last_is_bad);
739             if (!NStr::IsBlank(badauthor)) {
740                 PostObjErr(eDiag_Warning,
741                     last_is_bad ? eErr_SEQ_FEAT_BadCharInAuthorLastName : eErr_SEQ_FEAT_BadCharInAuthorName,
742                           "Bad characters in author " + badauthor, obj, ctx);
743             }
744             if ( (*name)->GetName().IsName() ) {
745                 const CName_std& nstd = (*name)->GetName().GetName();
746                 string last;
747                 if (nstd.IsSetLast()) {
748                     last = nstd.GetLast();
749                     NStr::ReplaceInPlace (last, ".", " ");
750                     NStr::ReplaceInPlace (last, "  ", " ");
751                     NStr::TruncateSpacesInPlace (last);
752                 }
753                 string initials;
754                 if (nstd.IsSetInitials()) {
755                     initials = nstd.GetInitials();
756                     NStr::ReplaceInPlace (initials, ".", " ");
757                     NStr::ReplaceInPlace (initials, "  ", " ");
758                     NStr::TruncateSpacesInPlace (initials);
759                 }
760                 if ( (NStr::CompareNocase(last, "et al") == 0)  ||
761                      (NStr::CompareNocase(last, "et") == 0
762                       &&  NStr::CompareNocase(initials, "al") == 0
763                       &&  (!nstd.IsSetFirst() || nstd.GetFirst().empty()))) {
764                     CAuth_list::C_Names::TStd::const_iterator temp = name;
765                     if ( ++temp == names.GetStd().end() ) {
766                         PostObjErr(eDiag_Warning, eErr_GENERIC_AuthorListHasEtAl,
767                             "Author list ends in et al.", obj, ctx);
768                     } else {
769                         PostObjErr(eDiag_Warning, eErr_GENERIC_AuthorListHasEtAl,
770                             "Author list contains et al.", obj, ctx);
771                     }
772                 }
773                 // validate suffix, if set and nonempty
774                 if (nstd.IsSetSuffix() && !NStr::IsBlank (nstd.GetSuffix())) {
775                     string suffix = nstd.GetSuffix();
776 
777                     typedef CName_std::TSuffixes TSuffixes;
778                     const TSuffixes& suffixes = CName_std::GetStandardSuffixes();
779                     bool found = false;
780                     ITERATE (TSuffixes, it, suffixes) {
781                         if (NStr::EqualNocase (suffix, *it)) {
782                             found = true;
783                             break;
784                         }
785                     }
786                     if (!found) {
787                         PostObjErr (eDiag_Warning, eErr_SEQ_FEAT_BadAuthorSuffix,
788                                  "Bad author suffix " + suffix,
789                                  obj, ctx);
790                     }
791                 }
792             } else if ( (*name)->GetName().IsConsortium() ) {
793                 const string& consortium = (*name)->GetName().GetConsortium();
794                 if (NStr::IsBlank (consortium)) {
795                     PostObjErr (eDiag_Warning, eErr_GENERIC_PublicationInconsistency, "Empty consortium", obj, ctx);
796                 } else {
797                     bool found = false;
798                     ITERATE (list<string>, cons_str, consortium_list) {
799                         if (NStr::EqualNocase (consortium, *cons_str)) {
800                             found = true;
801                             break;
802                         }
803                     }
804                     if (found) {
805                         PostObjErr (eDiag_Warning, eErr_GENERIC_PublicationInconsistency,
806                                  "Duplicate consortium '" + consortium + "'", obj, ctx);
807                     } else {
808                         consortium_list.push_back(consortium);
809                     }
810                 }
811             }
812         }
813     } else if (names.IsMl()) {
814         ITERATE ( list< string >, str, names.GetMl()) {
815             if (CValidator::BadCharsInAuthorName(*str, true, true, false)) {
816                 PostObjErr (eDiag_Warning, eErr_SEQ_FEAT_BadCharInAuthorName,
817                             "Bad characters in author " + *str, obj, ctx);
818             }
819         }
820     } else if (names.IsStr()) {
821 
822         ITERATE ( list< string >, str, names.GetStr()) {
823             if (CValidator::BadCharsInAuthorName(*str, true, true, false)) {
824                 PostObjErr (eDiag_Warning, eErr_SEQ_FEAT_BadCharInAuthorName,
825                             "Bad characters in author " + *str, obj, ctx);
826             }
827         }
828     }
829 }
830 
831 
ValidateAuthorsInPubequiv(const CPub_equiv & pe,const CSerialObject & obj,const CSeq_entry * ctx)832 void CValidError_imp::ValidateAuthorsInPubequiv
833 (const CPub_equiv& pe,
834  const CSerialObject& obj,
835  const CSeq_entry* ctx)
836 {
837     // per VR-19, do not validate authors if PMID specified
838     FOR_EACH_PUB_ON_PUBEQUIV(pub_iter, pe) {
839         const CPub& pub = **pub_iter;
840         if (pub.IsPmid() && pub.GetPmid() > ZERO_ENTREZ_ID) {
841             return;
842         }
843     }
844 
845     FOR_EACH_PUB_ON_PUBEQUIV (pub_iter, pe) {
846         const CPub& pub = **pub_iter;
847         const CAuth_list* authors = 0;
848         switch ( pub.Which() ) {
849         case CPub::e_Gen:
850             if ( pub.GetGen().IsSetAuthors() ) {
851                 authors = &(pub.GetGen().GetAuthors());
852             }
853             break;
854         case CPub::e_Sub:
855             authors = &(pub.GetSub().GetAuthors());
856             break;
857         case CPub::e_Article:
858             if ( pub.GetArticle().IsSetAuthors() ) {
859                 authors = &(pub.GetArticle().GetAuthors());
860             }
861             break;
862         case CPub::e_Book:
863             authors = &(pub.GetBook().GetAuthors());
864             break;
865         case CPub::e_Proc:
866             authors = &(pub.GetProc().GetBook().GetAuthors());
867             break;
868         case CPub::e_Man:
869             authors = &(pub.GetMan().GetCit().GetAuthors());
870             break;
871         case CPub::e_Patent:
872             authors = &(pub.GetPatent().GetAuthors());
873             break;
874         case CPub::e_Equiv:
875             ValidateAuthorsInPubequiv (pub.GetEquiv(), obj, ctx);
876             break;
877         default:
878             break;
879         }
880 
881         if ( !authors ) {
882             continue;
883         }
884 
885         const CAuth_list::C_Names& names = authors->GetNames();
886         ValidateAuthorList (names, obj, ctx);
887     }
888 }
889 
890 
s_IsRefSeqInSep(const CSeq_entry & se,CScope & scope)891 static bool s_IsRefSeqInSep(const CSeq_entry& se, CScope& scope)
892 {
893     for (CBioseq_CI it(scope, se); it; ++it) {
894         FOR_EACH_SEQID_ON_BIOSEQ (id, *(it->GetCompleteBioseq())) {
895             if ((*id)->IsOther()) {
896                 const CTextseq_id* tsip = (*id)->GetTextseq_Id();
897                 if (tsip != NULL  &&  tsip->IsSetAccession()) {
898                     return true;
899                 }
900             }
901         }
902     }
903     return false;
904 }
905 
906 
s_IsHtgInSep(const CSeq_entry & se)907 static bool s_IsHtgInSep(const CSeq_entry& se)
908 {
909     FOR_EACH_DESCRIPTOR_ON_SEQENTRY (it, se) {
910         if ((*it)->Which() == CSeqdesc::e_Molinfo) {
911             CMolInfo::TTech tech = (*it)->GetMolinfo().GetTech();
912             if (tech == CMolInfo::eTech_htgs_0  ||
913                 tech == CMolInfo::eTech_htgs_1  ||
914                 tech == CMolInfo::eTech_htgs_2  ||
915                 tech == CMolInfo::eTech_htgs_3) {
916                 return true;
917             }
918         }
919     }
920     if (se.IsSet()) {
921         FOR_EACH_SEQENTRY_ON_SEQSET (it, se.GetSet()) {
922             if (s_IsHtgInSep(**it)) {
923                 return true;
924             }
925         }
926     }
927     return false;
928 }
929 
930 
IsHtg(void) const931 bool CValidError_imp::IsHtg(void) const
932 {
933     if (m_TSE) {
934         return s_IsHtgInSep(*m_TSE);
935     } else {
936         return false;
937     }
938 }
939 
940 
941 /*
942 static bool s_IsPDBInSep(const CSeq_entry& se, CScope& scope)
943 {
944     for (CBioseq_CI it(scope, se); it; ++it) {
945         FOR_EACH_SEQID_ON_BIOSEQ (id, *(it->GetCompleteBioseq())) {
946             if ((*id)->IsPdb()) {
947                 return true;
948             }
949         }
950     }
951     return false;
952 }
953 */
954 
955 
ValidateAffil(const CAffil::TStd & std,const CSerialObject & obj,const CSeq_entry * ctx)956 void CValidError_imp::ValidateAffil(const CAffil::TStd& std, const CSerialObject& obj, const CSeq_entry *ctx)
957 {
958     // ignore if everything is empty
959     if ((!std.IsSetAffil() || NStr::IsBlank(std.GetAffil())) &&
960         (!std.IsSetDiv() || NStr::IsBlank(std.GetDiv())) &&
961         (!std.IsSetStreet() || NStr::IsBlank(std.GetStreet())) &&
962         (!std.IsSetCity() || NStr::IsBlank(std.GetCity())) &&
963         (!std.IsSetSub() || NStr::IsBlank(std.GetSub())) &&
964         (!std.IsSetPostal_code() || NStr::IsBlank(std.GetPostal_code())) &&
965         (!std.IsSetPhone() || NStr::IsBlank(std.GetPhone())) &&
966         (!std.IsSetFax() || NStr::IsBlank(std.GetFax())) &&
967         (!std.IsSetEmail() || NStr::IsBlank(std.GetEmail()))) {
968         // do nothing, completely blank
969     } else {
970         if (!std.IsSetCountry() || NStr::IsBlank(std.GetCountry())) {
971             PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
972                 "Submission citation affiliation has no country",
973                 obj, ctx);
974         } else if (NStr::Equal(std.GetCountry(), "USA")) {
975             if (!std.IsSetSub() || NStr::IsBlank(std.GetSub())) {
976                 PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
977                     "Submission citation affiliation has no state",
978                     obj, ctx);
979             }
980         }
981     }
982 }
983 
984 
ValidateSubAffil(const CAffil::TStd & std,const CSerialObject & obj,const CSeq_entry * ctx)985 void CValidError_imp::ValidateSubAffil
986 (const CAffil::TStd& std,
987  const CSerialObject& obj,
988  const CSeq_entry *ctx)
989 {
990     EDiagSev sev = eDiag_Critical;
991 
992     if (m_IsINSDInSep || IsRefSeq() || IsHtg() || IsPDB()) {
993         sev = eDiag_Warning;
994     }
995     if (!std.IsSetCountry() || NStr::IsBlank(std.GetCountry())) {
996         PostObjErr(sev, eErr_GENERIC_MissingPubRequirement,
997                     "Submission citation affiliation has no country",
998                     obj, ctx);
999     } else if (NStr::EqualCase (std.GetCountry(), "USA")) {
1000         if (!std.IsSetSub() || NStr::IsBlank (std.GetSub())) {
1001             PostObjErr(eDiag_Warning, eErr_GENERIC_MissingPubRequirement,
1002                         "Submission citation affiliation has no state",
1003                         obj, ctx);
1004         }
1005     }
1006     if ((!std.IsSetDiv() || NStr::IsBlank(std.GetDiv())) && (!std.IsSetAffil() || NStr::IsBlank(std.GetAffil()))) {
1007         PostObjErr(sev, eErr_GENERIC_MissingPubRequirement,
1008                     "Submission citation affiliation has no institution",
1009                     obj, ctx);
1010     }
1011 }
1012 
1013 
x_DowngradeForMissingAffil(const CCit_sub & cs)1014 bool CValidError_imp::x_DowngradeForMissingAffil(const CCit_sub& cs)
1015 {
1016     if (IsRefSeq() || s_IsRefSeqInSep(GetTSE(), *m_Scope)  ||
1017         IsHtg()  || IsPDB()) {
1018         return true;
1019     }
1020     if (IsEmbl() || IsTPE()) {
1021         if (cs.IsSetDate() && cs.GetDate().IsStd() &&
1022             cs.GetDate().GetStd().IsSetYear() &&
1023             cs.GetDate().GetStd().GetYear() < 1995) {
1024             return true;
1025         }
1026         CBioseq_CI bi(GetTSEH(), CSeq_inst::eMol_na);
1027         while (bi) {
1028             CSeqdesc_CI block_i(*bi, CSeqdesc::e_Embl);
1029             while (block_i) {
1030                 if (block_i && block_i->GetEmbl().IsSetKeywords()) {
1031                     for (auto keyword : block_i->GetEmbl().GetKeywords()) {
1032                         if (NStr::EqualNocase(keyword, "TPA:specialist_db")) {
1033                             return true;
1034                         }
1035                     }
1036                 }
1037                 ++block_i;
1038             }
1039             ++bi;
1040         }
1041     }
1042 
1043     return false;
1044 }
1045 
ValidateCitSub(const CCit_sub & cs,const CSerialObject & obj,const CSeq_entry * ctx)1046 void CValidError_imp::ValidateCitSub
1047 (const CCit_sub& cs,
1048  const CSerialObject& obj,
1049  const CSeq_entry *ctx)
1050 {
1051     bool has_name  = false,
1052          has_affil = false;
1053 
1054     if ( cs.CanGetAuthors() ) {
1055         const CAuth_list& authors = cs.GetAuthors();
1056         has_name = HasName(authors);
1057 
1058         if ( authors.CanGetAffil() ) {
1059             const CAffil& affil = authors.GetAffil();
1060 
1061             switch ( affil.Which() ) {
1062             case CAffil::e_Str:
1063                 {{
1064                     if ( !NStr::IsBlank(affil.GetStr()) ) {
1065                         has_affil = true;
1066                     }
1067                 }}
1068                 break;
1069 
1070             case CAffil::e_Std:
1071                 {{
1072                     const CAffil::TStd& std = affil.GetStd();
1073 #define HAS_VALUE(x) (std.CanGet##x()  &&  !NStr::IsBlank(std.Get##x()))
1074                     if ( HAS_VALUE(Affil)    ||  HAS_VALUE(Div)      ||
1075                          HAS_VALUE(City)     ||  HAS_VALUE(Sub)      ||
1076                          HAS_VALUE(Country)  ||  HAS_VALUE(Street)   ||
1077                          HAS_VALUE(Email)    ||  HAS_VALUE(Fax)      ||
1078                          HAS_VALUE(Phone)    ||  HAS_VALUE(Postal_code) ) {
1079                         has_affil = true;
1080                         ValidateSubAffil (std, obj, ctx);
1081                     }
1082                 }}
1083 #undef HAS_VALUE
1084                 break;
1085 
1086             default:
1087                 break;
1088             }
1089         }
1090     }
1091 
1092     if ( !has_name ) {
1093         PostObjErr(eDiag_Critical, eErr_GENERIC_MissingPubRequirement,
1094             "Submission citation has no author names", obj, ctx);
1095     }
1096     if ( !has_affil ) {
1097         EDiagSev sev = x_DowngradeForMissingAffil(cs) ? eDiag_Warning : eDiag_Critical;
1098         PostObjErr(sev, eErr_GENERIC_MissingPubRequirement,
1099             "Submission citation has no affiliation", obj, ctx);
1100     }
1101 
1102     if (cs.IsSetDate()) {
1103         int rval = CheckDate (cs.GetDate());
1104         if (rval == eDateValid_valid) {
1105             time_t time_now = time(NULL);
1106             if (CSubSource::IsCollectionDateAfterTime(cs.GetDate(), time_now)) {
1107                 PostObjErr(eDiag_Warning, eErr_GENERIC_BadDate, "Submission citation date is in the future", obj, ctx);
1108             }
1109         } else {
1110             PostBadDateError (eDiag_Error, "Submission citation date has error", rval, obj, ctx);
1111         }
1112     } else {
1113         PostObjErr(eDiag_Error, eErr_GENERIC_MissingPubRequirement,
1114             "Submission citation has no date", obj, ctx);
1115     }
1116 }
1117 
1118 
s_IsNoncuratedRefSeq(const CBioseq & seq)1119 static bool s_IsNoncuratedRefSeq (const CBioseq& seq)
1120 {
1121     FOR_EACH_SEQID_ON_BIOSEQ (id_it, seq) {
1122         if ((*id_it)->IsOther()) {
1123             if ((*id_it)->GetOther().IsSetAccession()) {
1124                 string accession = (*id_it)->GetOther().GetAccession();
1125                 if (NStr::StartsWith(accession, "NM_")
1126                     || NStr::StartsWith(accession, "NP_")
1127                     || NStr::StartsWith(accession, "NG_")
1128                     || NStr::StartsWith(accession, "NR_")) {
1129                     return false;
1130                 } else {
1131                     return true;
1132                 }
1133             }
1134         }
1135     }
1136     return false;
1137 }
1138 
1139 
IsNoncuratedRefSeq(const CBioseq & seq,EDiagSev & sev)1140 bool CValidError_imp::IsNoncuratedRefSeq(const CBioseq& seq, EDiagSev& sev)
1141 {
1142     FOR_EACH_SEQID_ON_BIOSEQ (id_it, seq) {
1143         if ((*id_it)->IsOther()
1144             && (*id_it)->GetOther().IsSetAccession()) {
1145             const string& accession = (*id_it)->GetOther().GetAccession();
1146             if (NStr::StartsWith (accession, "NM_")
1147                 || NStr::StartsWith(accession, "NP_")
1148                 || NStr::StartsWith(accession, "NG_")
1149                 || NStr::StartsWith(accession, "NR_")) {
1150                 sev = eDiag_Warning;
1151                 return false;
1152             }
1153             return true;
1154         }
1155     }
1156 
1157     return false;
1158 }
1159 
1160 
AddBioseqWithNoPub(const CBioseq & seq)1161 void CValidError_imp::AddBioseqWithNoPub(const CBioseq& seq)
1162 {
1163     EDiagSev sev = eDiag_Error;
1164 
1165     if (!m_NoPubs && !m_IsSeqSubmit) {
1166         if (seq.IsAa()) {
1167             CBioseq_Handle bsh = m_Scope->GetBioseqHandle(seq);
1168             if (bsh) {
1169                 bsh = GetNucBioseq (bsh);
1170                 if (bsh) {
1171                     const CBioseq& nuc = *(bsh.GetCompleteBioseq());
1172                     if(!IsNoncuratedRefSeq (nuc, sev)
1173                         && !IsWGSIntermediate(nuc)
1174                         && !IsTSAIntermediate(nuc)) {
1175                         PostErr (sev, eErr_SEQ_DESCR_NoPubFound, "No publications refer to this Bioseq.", seq);
1176                     }
1177                     return;
1178                 }
1179             }
1180         }
1181         if (!IsNoncuratedRefSeq (seq, sev)
1182             && !IsWGSIntermediate(seq)
1183             && !IsTSAIntermediate(seq)) {
1184             PostErr (sev, eErr_SEQ_DESCR_NoPubFound, "No publications refer to this Bioseq.", seq);
1185         }
1186     }
1187 }
1188 
1189 
s_IsGpipe(const CBioseq & seq)1190 static bool s_IsGpipe (const CBioseq& seq)
1191 {
1192     bool is_gpipe = false;
1193 
1194     FOR_EACH_SEQID_ON_BIOSEQ (id_it, seq) {
1195         if ((*id_it)->IsGpipe()) {
1196             is_gpipe = true;
1197             break;
1198         }
1199     }
1200     return is_gpipe;
1201 }
1202 
1203 
s_CuratedRefSeqLowerToWarning(const CBioseq & seq)1204 static bool s_CuratedRefSeqLowerToWarning (const CBioseq& seq)
1205 {
1206     FOR_EACH_SEQID_ON_BIOSEQ (id_it, seq) {
1207         if ((*id_it)->IsOther() && (*id_it)->GetOther().IsSetAccession()) {
1208             const string& str = (*id_it)->GetOther().GetAccession();
1209             if (NStr::StartsWith(str, "NM_")
1210                 || NStr::StartsWith(str, "NP_")
1211                 || NStr::StartsWith(str, "NG_")
1212                 || NStr::StartsWith(str, "NR_")) {
1213                 return true;
1214             }
1215         }
1216     }
1217     return false;
1218 }
1219 
s_IsWgs_Contig(const CBioseq & seq)1220 static bool s_IsWgs_Contig (const CBioseq& seq)
1221 {
1222     CSeq_inst::ERepr rp = seq.GetInst().GetRepr();
1223     if (rp == CSeq_inst::eRepr_virtual) return false;
1224     IF_EXISTS_CLOSEST_MOLINFO (mi_ref, seq, NULL) {
1225         const CMolInfo& molinf = (*mi_ref).GetMolinfo();
1226         if (molinf.GetTech() == NCBI_TECH(wgs)) return true;
1227         if (molinf.GetTech() == NCBI_TECH(tsa)) return true;
1228     }
1229     return false;
1230 }
1231 
s_IsTSA_Contig(const CBioseq & seq)1232 static bool s_IsTSA_Contig (const CBioseq& seq)
1233 {
1234     /*
1235     CSeq_inst::ERepr rp = seq.GetInst().GetRepr();
1236     if (rp == CSeq_inst::eRepr_virtual) return false;
1237     */
1238     IF_EXISTS_CLOSEST_MOLINFO (mi_ref, seq, NULL) {
1239         const CMolInfo& molinf = (*mi_ref).GetMolinfo();
1240         if (molinf.GetTech() == NCBI_TECH(wgs)) return true;
1241         if (molinf.GetTech() == NCBI_TECH(tsa)) return true;
1242     }
1243     return false;
1244 }
1245 
1246 
ReportMissingPubs(const CSeq_entry & se,const CCit_sub * cs)1247 void CValidError_imp::ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs)
1248 {
1249      if ( m_NoPubs && !IsSeqSubmitParent() ) {
1250         if ( !m_IsGPS  &&  !cs) {
1251             CBioseq_CI b_it(m_Scope->GetSeq_entryHandle(se));
1252             if (b_it)
1253             {
1254                 CConstRef<CBioseq> bioseq = b_it->GetCompleteBioseq();
1255                 if (   !s_IsNoncuratedRefSeq(*bioseq)
1256                     && !s_IsGpipe(*bioseq)
1257                     && !s_IsWgs_Contig(*bioseq)
1258                     && !s_IsTSA_Contig(*bioseq) ) {
1259                         EDiagSev sev = eDiag_Error;
1260                         if (s_CuratedRefSeqLowerToWarning(*bioseq)) {
1261                             sev = eDiag_Warning;
1262                         }
1263                         PostErr(sev, eErr_SEQ_DESCR_NoPubFound,
1264                             "No publications anywhere on this entire record.", se);
1265                 }
1266             }
1267         }
1268     }
1269     if ( m_NoCitSubPubs && !cs && !IsSeqSubmitParent() ) {
1270         CBioseq_CI b_it(m_Scope->GetSeq_entryHandle(se));
1271         if (b_it) {
1272             CConstRef<CBioseq> bioseq = b_it->GetCompleteBioseq();
1273             if (CValidError_bioseq::IsWGSMaster(*bioseq, *m_Scope) ||
1274                 (!IsRefSeq() &&
1275                  !CValidError_bioseq::IsWGSAccession(*bioseq) &&
1276                  !CValidError_bioseq::IsTSAAccession(*bioseq))) {
1277                   EDiagSev sev = eDiag_Info;
1278                   if (m_genomeSubmission) {
1279                       sev = eDiag_Error;
1280                   }
1281                   PostErr(sev, eErr_GENERIC_MissingPubRequirement,
1282                        "No submission citation anywhere on this entire record.", se);
1283             }
1284         }
1285     }
1286 }
1287 
1288 
FindCollidingSerialNumbers(const CSerialObject & obj)1289 void CValidError_imp::FindCollidingSerialNumbers (const CSerialObject& obj)
1290 {
1291     if (m_PubSerialNumbers.size() < 2) {
1292         return;
1293     }
1294     sort (m_PubSerialNumbers.begin(), m_PubSerialNumbers.end());
1295 
1296     vector<int>::iterator it1 = m_PubSerialNumbers.begin();
1297     vector<int>::iterator it2 = it1;
1298     ++it2;
1299     while (it2 != m_PubSerialNumbers.end()) {
1300         if (*it1 == *it2) {
1301             PostErr (eDiag_Warning, eErr_GENERIC_CollidingSerialNumbers,
1302               "Multiple publications have serial number " + NStr::IntToString(*it1),
1303               obj);
1304             while (it2 != m_PubSerialNumbers.end() && *it2 == *it1) {
1305                 ++it2;
1306             }
1307             if (it2 != m_PubSerialNumbers.end()) {
1308                 it1 = it2;
1309                 ++it2;
1310             }
1311         } else {
1312             it1 = it2;
1313             ++it2;
1314         }
1315     }
1316 }
1317 
1318 
1319 END_SCOPE(validator)
1320 END_SCOPE(objects)
1321 END_NCBI_SCOPE
1322