1 /*  $Id: discrepancy.hpp 627092 2021-03-09 14:28:00Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Sema
27  * Created:  01/29/2015
28  */
29 
30 #ifndef _MISC_DISCREPANCY_DISCREPANCY_H_
31 #define _MISC_DISCREPANCY_DISCREPANCY_H_
32 
33 #include <serial/iterator.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <serial/serialbase.hpp>
36 #include <objmgr/scope.hpp>
37 #include <objects/macro/Suspect_rule.hpp>
38 #include <objects/macro/Suspect_rule_set.hpp>
39 
40 BEGIN_NCBI_SCOPE
41 BEGIN_SCOPE(NDiscrepancy)
42 
43 class NCBI_DISCREPANCY_EXPORT CReportObj : public CObject
44 {
45 public:
46     enum EType {
47         eType_feature,
48         eType_descriptor,
49         eType_sequence,
50         eType_seq_set,
51         eType_submit_block,
52         eType_string
53     };
~CReportObj()54     virtual ~CReportObj(){}
55     virtual string GetText() const = 0;
56     virtual string GetPath() const = 0;
57     virtual string GetFeatureType() const = 0;
58     virtual string GetProductName() const = 0;
59     virtual string GetLocation() const = 0;
60     virtual string GetLocusTag() const = 0;
61     virtual string GetShort() const = 0;
62     virtual EType GetType() const = 0;
63     virtual bool CanAutofix() const = 0;
64     virtual bool IsFixed() const = 0;
65     virtual void SetMoreInfo(CObject* data) = 0;
66 };
67 typedef vector<CRef<CReportObj> > TReportObjectList;
68 
69 
70 class NCBI_DISCREPANCY_EXPORT CAutofixReport : public CObject
71 {
72 public:
CAutofixReport(const string & s,unsigned int n)73     CAutofixReport(const string&s, unsigned int n) : S(s), N(n) {}
AddSubitems(const vector<CRef<CAutofixReport>> & v)74     void AddSubitems(const vector<CRef<CAutofixReport>>& v) { copy(v.begin(), v.end(), back_inserter(V)); }
GetS() const75     string GetS() const { return S; }
GetN() const76     unsigned int GetN() const { return N; }
GetSubitems()77     const vector<CRef<CAutofixReport>>& GetSubitems() { return V; }
78 protected:
79     string S;
80     unsigned int N;
81     vector<CRef<CAutofixReport>> V;
82 };
83 
84 
85 class NCBI_DISCREPANCY_EXPORT CReportItem : public CObject
86 {
87 public:
88     enum ESeverity {
89         eSeverity_info    = 0,
90         eSeverity_warning = 1,
91         eSeverity_error   = 2
92     };
~CReportItem()93     virtual ~CReportItem(){}
94     virtual string GetTitle() const = 0;
95     virtual string GetStr() const = 0;
96     virtual string GetMsg() const = 0;
97     virtual string GetXml() const = 0;
98     virtual string GetUnit() const = 0;
99     virtual size_t GetCount() const = 0;
100     virtual TReportObjectList GetDetails() const = 0;
101     virtual vector<CRef<CReportItem> > GetSubitems() const = 0;
102     virtual bool CanAutofix() const = 0;
103     virtual ESeverity GetSeverity() const = 0;
104     virtual bool IsFatal() const = 0;
105     virtual bool IsInfo() const = 0;
106     virtual bool IsExtended() const = 0;
107     virtual bool IsSummary() const = 0;
108     virtual bool IsReal() const = 0;
109     static CRef<CReportItem> CreateReportItem(const string& test, const CReportObj& obj, const string& msg, bool autofix = false);
110 };
111 typedef vector<CRef<CReportItem> > TReportItemList;
112 
113 
114 class NCBI_DISCREPANCY_EXPORT CDiscrepancyCase : public CObject
115 {
116 public:
~CDiscrepancyCase()117     virtual ~CDiscrepancyCase(){}
118     virtual string GetName() const = 0;
119     virtual string GetType() const = 0;
120     virtual TReportItemList GetReport() const = 0;
121     virtual TReportObjectList GetObjects() const = 0;
122 };
123 typedef map<string, CRef<CDiscrepancyCase> > TDiscrepancyCaseMap;
124 
125 
126 class NCBI_DISCREPANCY_EXPORT CDiscrepancySet : public CObject
127 {
128 public:
CDiscrepancySet()129     CDiscrepancySet() : m_SesameStreetCutoff(0.75), /*m_Eucariote(false),*/ m_Gui(false), m_UserData(nullptr) {}
~CDiscrepancySet()130     virtual ~CDiscrepancySet(){}
131 
132     template<typename Container>
AddTests(const Container & cont)133     bool AddTests(const Container& cont)
134     {
135         bool success = true;
136         for_each(cont.begin(), cont.end(), [this, &success](const string& test_name) { success &= this->AddTest(test_name); });
137         return success;
138     }
139 
140     virtual bool AddTest(const string& name) = 0;
141     virtual void Push(const CSerialObject& root, const string& fname = kEmptyStr) = 0;
142     virtual void Parse() = 0;
Parse(const CSerialObject & root,const string & fname=kEmptyStr)143     virtual void Parse(const CSerialObject& root, const string& fname = kEmptyStr) { Push(root, fname); Parse(); }
144     virtual void ParseStream(CObjectIStream& stream, const string& fname, bool skip, const string& default_header = kEmptyStr) = 0;
145     virtual void ParseStrings(const string& fname) = 0;
146     virtual void TestString(const string& str) = 0;
147     virtual unsigned Summarize() = 0;
148     virtual void Autofix(TReportObjectList& tofix, map<string, size_t>& rep, const string& default_header = kEmptyStr) = 0;
149     virtual const TDiscrepancyCaseMap& GetTests() const = 0;
150 
151     enum EOutput {
152         eOutput_Summary = 1 << 0,   // only summary
153         eOutput_Fatal   = 1 << 1,   // print FATAL
154         eOutput_Ext     = 1 << 2,   // extended output
155         eOutput_Files   = 1 << 3    // print file name
156     };
157     virtual void OutputText(CNcbiOstream& out, unsigned short flags, char group = 0) = 0;
158     virtual void OutputXML(CNcbiOstream& out, unsigned short flags) = 0;
159 
IsGui() const160     bool IsGui() const { return m_Gui; }
GetLineage() const161     const string& GetLineage() const { return m_Lineage; }
GetSesameStreetCutoff() const162     float GetSesameStreetCutoff() const { return m_SesameStreetCutoff; }
GetUserData() const163     void* GetUserData() const { return m_UserData; }
164     //virtual void SetFile(const string& fname) = 0;
SetLineage(const string & s)165     void SetLineage(const string& s) { m_Lineage = s; }
166     //void SetEucariote(bool b){ m_Eucariote = b; }
SetSesameStreetCutoff(float f)167     void SetSesameStreetCutoff(float f){ m_SesameStreetCutoff = f; }
168     virtual void SetSuspectRules(const string& name, bool read = true) = 0;
SetGui(bool b)169     void SetGui(bool b){ m_Gui = b; }
SetUserData(void * p)170     void SetUserData(void* p){ m_UserData = p; }
171     static CRef<CDiscrepancySet> New(objects::CScope& scope);
172     static string Format(const string& str, unsigned int count);
173     virtual const CSerialObject* FindObject(CReportObj& obj, bool alt = false) = 0;
174 
175 protected:
176     string m_Lineage;
177     float m_SesameStreetCutoff;
178     //bool m_Eucariote;
179     bool m_Gui;
180     void* m_UserData;
181 };
182 
183 
184 class NCBI_DISCREPANCY_EXPORT CDiscrepancyGroup : public CObject
185 {
186 public:
CDiscrepancyGroup(const string & name="",const string & test="")187     CDiscrepancyGroup(const string& name = "", const string& test = "") : m_Name(name), m_Test(test) {}
Add(CRef<CDiscrepancyGroup> child)188     void Add(CRef<CDiscrepancyGroup> child) { m_List.push_back(child); }
189     TReportItemList Collect(TDiscrepancyCaseMap& tests, bool all = true) const;
operator [](size_t n) const190     const CDiscrepancyGroup& operator[](size_t n) const { return *m_List[n]; }
191 
192 protected:
193     string m_Name;
194     string m_Test;
195     vector<CRef<CDiscrepancyGroup> > m_List;
196 };
197 
198 
199 enum EGroup {
200     eNone = 0,
201     eDisc = 1,
202     eOncaller = 2,
203     eSubmitter = 4,
204     eSmart = 8,
205     eBig = 16,
206     eTSA = 32,
207     eFatal = 64,
208     eAutofix = 128
209 };
210 typedef unsigned short TGroup;
211 
212 
213 NCBI_DISCREPANCY_EXPORT string GetDiscrepancyCaseName(const string&);
214 NCBI_DISCREPANCY_EXPORT string GetDiscrepancyDescr(const string&);
215 NCBI_DISCREPANCY_EXPORT TGroup GetDiscrepancyGroup(const string&);
216 NCBI_DISCREPANCY_EXPORT vector<string> GetDiscrepancyNames(TGroup group = 0);
217 NCBI_DISCREPANCY_EXPORT vector<string> GetDiscrepancyAliases(const string&);
218 NCBI_DISCREPANCY_EXPORT bool IsShortrRNA(const objects::CSeq_feat& f, objects::CScope* scope);
219 
220 typedef std::function < CRef<objects::CSeq_feat>() > GetFeatureFunc;
221 NCBI_DISCREPANCY_EXPORT string FixProductName(const objects::CSuspect_rule* rule, objects::CScope& scope, string& prot_name, GetFeatureFunc get_mrna, GetFeatureFunc get_cds);
222 
223 NCBI_DISCREPANCY_EXPORT CConstRef<objects::CSuspect_rule_set> GetOrganelleProductRules(const string& name = "");
224 NCBI_DISCREPANCY_EXPORT CConstRef<objects::CSuspect_rule_set> GetProductRules(const string& name = "");
225 
226 END_SCOPE(NDiscrepancy)
227 END_NCBI_SCOPE
228 
229 #endif  // _MISC_DISCREPANCY_DISCREPANCY_H_
230