1 #ifndef _PUB_FIX_HPP_ 2 #define _PUB_FIX_HPP_ 3 4 /* $Id: pub_fix.hpp 632623 2021-06-03 17:38:11Z ivanov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Alexey Dobronadezhdin 30 * 31 * File Description: 32 * Code for fixing up publications. 33 * 34 * =========================================================================== 35 */ 36 #include <corelib/ncbistd.hpp> 37 #include <corelib/ncbiobj.hpp> 38 39 BEGIN_NCBI_SCOPE 40 41 class IMessageListener; 42 43 BEGIN_SCOPE(objects) 44 45 class CPub; 46 class CPub_equiv; 47 class CCit_art; 48 49 BEGIN_SCOPE(edit) 50 51 /*------------------------------------------------------------------------------- 52 https://jira.ncbi.nlm.nih.gov/browse/ID-6514?focusedCommentId=6241819&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-6241819 53 As requested by Mark Cavanaugh: 54 So here's how I imagine things working Leonid: 55 56 1) PubMed Cit-art pub has a year value > 1999 57 58 Accept the Auth-list of the PubMed article, as-is 59 60 Consider generating a warning if the PubMed article author count is significantly less than the original author count. 61 62 "Significant" ? Hmmmmm..... Let's try: Auth-Count-Diff >= 1/3 * Orig-Auth-Count 63 64 2) PubMed Cit-art pub has a year value ranging from 1996 to 1999 65 66 If the original author count is > 25, preserve the Auth-list of the original article, discarding PubMed's author list 67 68 Log the author name counts : Original vs PubMed 69 Log the author lists: Original vs Pubmed 70 71 3) PubMed Cit-art pub has a year value < 1996 72 73 If the original author count is > 10, preserve the Auth-list of the original article, discarding PubMed's author list 74 75 Log the author name counts : Original vs PubMed 76 Log the author lists: Original vs Pubmed 77 78 We may have to tweak things a bit further, but this is a good start. 79 -------------------------------------------------------------------------------*/ 80 81 class NCBI_XOBJEDIT_EXPORT CAuthListValidator 82 { 83 public: 84 enum EOutcome { 85 eNotSet = 0, 86 eFailed_validation, 87 eAccept_pubmed, 88 eKeep_genbank 89 }; 90 static void Configure(const CNcbiRegistry& cfg, const string& section); 91 // If true, FixPubEquiv() will use this class to validate authors list 92 static bool enabled; 93 CAuthListValidator(IMessageListener* err_log); 94 EOutcome validate(const CCit_art& gb_art, const CCit_art& pm_art); 95 void DebugDump(CNcbiOstream& out) const; 96 // utility method 97 static void get_lastnames(const CAuth_list& authors, list<string>& lastnames, string& auth_string); 98 99 // public vars 100 EOutcome outcome; 101 int pub_year; 102 int cnt_gb; 103 int cnt_pm; 104 int cnt_matched; 105 int cnt_added; // new from pubmed list 106 int cnt_removed; // not matched in genbank list 107 int cnt_min; // minimum # in GB/PM list, use as a base for ration 108 list<string> matched; 109 list<string> removed; 110 list<string> added; 111 string gb_type; 112 string pm_type; 113 string gb_auth_string; 114 string pm_auth_string; 115 // for DebugDump() 116 string reported_limit; 117 double actual_matched_to_min; 118 double actual_removed_to_gb; 119 120 private: 121 void compare_lastnames(); 122 void dumplist(const char* hdr, const list<string>& lst, CNcbiOstream& out) const; 123 static void get_lastnames(const CAuth_list::C_Names::TStd& authors, list<string>& lastnames); 124 static void get_lastnames(const CAuth_list::C_Names::TStr& authors, list<string>& lastnames); 125 // vars 126 IMessageListener* m_err_log; 127 static bool configured; 128 static double cfg_matched_to_min; 129 static double cfg_removed_to_gb; 130 }; 131 132 class NCBI_XOBJEDIT_EXPORT CPubFix 133 { 134 public: 135 CPubFix(bool always_lookup,bool replace_cit,bool merge_ids,IMessageListener * err_log)136 CPubFix(bool always_lookup, bool replace_cit, bool merge_ids, IMessageListener* err_log) : 137 m_always_lookup(always_lookup), 138 m_replace_cit(replace_cit), 139 m_merge_ids(merge_ids), 140 m_err_log(err_log), 141 m_authlist_validator(err_log) 142 { 143 } 144 145 void FixPub(CPub& pub); 146 void FixPubEquiv(CPub_equiv& pub_equiv); GetValidator() const147 const CAuthListValidator& GetValidator() const { return m_authlist_validator; }; 148 149 static CRef<CCit_art> FetchPubPmId(TEntrezId pmid); 150 static string GetErrorId(int code, int subcode); 151 152 private: 153 bool m_always_lookup, 154 m_replace_cit, 155 m_merge_ids; 156 157 IMessageListener* m_err_log; 158 CAuthListValidator m_authlist_validator; 159 }; 160 161 END_SCOPE(edit) 162 END_SCOPE(objects) 163 END_NCBI_SCOPE 164 165 #endif // MISC_FIX_PUB__HPP 166