1 #ifndef _PUB_FIX_HPP_
2 #define _PUB_FIX_HPP_
3 
4 /* $Id: pub_fix.hpp 632623 2021-06-03 17:38:11Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author:  Alexey Dobronadezhdin
30  *
31  * File Description:
32  *   Code for fixing up publications.
33  *
34  * ===========================================================================
35  */
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiobj.hpp>
38 
39 BEGIN_NCBI_SCOPE
40 
41 class IMessageListener;
42 
43 BEGIN_SCOPE(objects)
44 
45 class CPub;
46 class CPub_equiv;
47 class CCit_art;
48 
49 BEGIN_SCOPE(edit)
50 
51 /*-------------------------------------------------------------------------------
52 https://jira.ncbi.nlm.nih.gov/browse/ID-6514?focusedCommentId=6241819&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-6241819
53 As requested by Mark Cavanaugh:
54 So here's how I imagine things working Leonid:
55 
56 1) PubMed Cit-art pub has a year value > 1999
57 
58 Accept the Auth-list of the PubMed article, as-is
59 
60 Consider generating a warning if the PubMed article author count is significantly less than the original author count.
61 
62 "Significant" ? Hmmmmm..... Let's try: Auth-Count-Diff >= 1/3 * Orig-Auth-Count
63 
64 2) PubMed Cit-art pub has a year value ranging from 1996 to 1999
65 
66 If the original author count is > 25, preserve the Auth-list of the original article, discarding PubMed's author list
67 
68 Log the author name counts : Original vs PubMed
69 Log the author lists: Original vs Pubmed
70 
71 3) PubMed Cit-art pub has a year value < 1996
72 
73 If the original author count is > 10, preserve the Auth-list of the original article, discarding PubMed's author list
74 
75 Log the author name counts : Original vs PubMed
76 Log the author lists: Original vs Pubmed
77 
78 We may have to tweak things a bit further, but this is a good start.
79 -------------------------------------------------------------------------------*/
80 
81 class NCBI_XOBJEDIT_EXPORT CAuthListValidator
82 {
83 public:
84     enum EOutcome {
85         eNotSet = 0,
86         eFailed_validation,
87         eAccept_pubmed,
88         eKeep_genbank
89     };
90     static void Configure(const CNcbiRegistry& cfg, const string& section);
91     // If true, FixPubEquiv() will use this class to validate authors list
92     static bool enabled;
93     CAuthListValidator(IMessageListener* err_log);
94     EOutcome validate(const CCit_art& gb_art, const CCit_art& pm_art);
95     void DebugDump(CNcbiOstream& out) const;
96     // utility method
97     static void get_lastnames(const CAuth_list& authors, list<string>& lastnames, string& auth_string);
98 
99     // public vars
100     EOutcome outcome;
101     int pub_year;
102     int cnt_gb;
103     int cnt_pm;
104     int cnt_matched;
105     int cnt_added;      // new from pubmed list
106     int cnt_removed;    // not matched in genbank list
107     int cnt_min;        // minimum # in GB/PM list, use as a base for ration
108     list<string> matched;
109     list<string> removed;
110     list<string> added;
111     string gb_type;
112     string pm_type;
113     string gb_auth_string;
114     string pm_auth_string;
115     // for DebugDump()
116     string reported_limit;
117     double actual_matched_to_min;
118     double actual_removed_to_gb;
119 
120 private:
121     void compare_lastnames();
122     void dumplist(const char* hdr, const list<string>& lst, CNcbiOstream& out) const;
123     static void get_lastnames(const CAuth_list::C_Names::TStd& authors, list<string>& lastnames);
124     static void get_lastnames(const CAuth_list::C_Names::TStr& authors, list<string>& lastnames);
125     // vars
126     IMessageListener* m_err_log;
127     static bool configured;
128     static double cfg_matched_to_min;
129     static double cfg_removed_to_gb;
130 };
131 
132 class NCBI_XOBJEDIT_EXPORT CPubFix
133 {
134 public:
135 
CPubFix(bool always_lookup,bool replace_cit,bool merge_ids,IMessageListener * err_log)136     CPubFix(bool always_lookup, bool replace_cit, bool merge_ids, IMessageListener* err_log) :
137         m_always_lookup(always_lookup),
138         m_replace_cit(replace_cit),
139         m_merge_ids(merge_ids),
140         m_err_log(err_log),
141         m_authlist_validator(err_log)
142     {
143     }
144 
145     void FixPub(CPub& pub);
146     void FixPubEquiv(CPub_equiv& pub_equiv);
GetValidator() const147     const CAuthListValidator& GetValidator() const { return m_authlist_validator; };
148 
149     static CRef<CCit_art> FetchPubPmId(TEntrezId pmid);
150     static string GetErrorId(int code, int subcode);
151 
152 private:
153     bool m_always_lookup,
154         m_replace_cit,
155         m_merge_ids;
156 
157     IMessageListener* m_err_log;
158     CAuthListValidator m_authlist_validator;
159 };
160 
161 END_SCOPE(edit)
162 END_SCOPE(objects)
163 END_NCBI_SCOPE
164 
165 #endif  // MISC_FIX_PUB__HPP
166