1 /* $Id: discrepancy.hpp 627092 2021-03-09 14:28:00Z ivanov $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Authors: Sema 27 * Created: 01/29/2015 28 */ 29 30 #ifndef _MISC_DISCREPANCY_DISCREPANCY_H_ 31 #define _MISC_DISCREPANCY_DISCREPANCY_H_ 32 33 #include <serial/iterator.hpp> 34 #include <corelib/ncbistd.hpp> 35 #include <serial/serialbase.hpp> 36 #include <objmgr/scope.hpp> 37 #include <objects/macro/Suspect_rule.hpp> 38 #include <objects/macro/Suspect_rule_set.hpp> 39 40 BEGIN_NCBI_SCOPE 41 BEGIN_SCOPE(NDiscrepancy) 42 43 class NCBI_DISCREPANCY_EXPORT CReportObj : public CObject 44 { 45 public: 46 enum EType { 47 eType_feature, 48 eType_descriptor, 49 eType_sequence, 50 eType_seq_set, 51 eType_submit_block, 52 eType_string 53 }; ~CReportObj()54 virtual ~CReportObj(){} 55 virtual string GetText() const = 0; 56 virtual string GetPath() const = 0; 57 virtual string GetFeatureType() const = 0; 58 virtual string GetProductName() const = 0; 59 virtual string GetLocation() const = 0; 60 virtual string GetLocusTag() const = 0; 61 virtual string GetShort() const = 0; 62 virtual EType GetType() const = 0; 63 virtual bool CanAutofix() const = 0; 64 virtual bool IsFixed() const = 0; 65 virtual void SetMoreInfo(CObject* data) = 0; 66 }; 67 typedef vector<CRef<CReportObj> > TReportObjectList; 68 69 70 class NCBI_DISCREPANCY_EXPORT CAutofixReport : public CObject 71 { 72 public: CAutofixReport(const string & s,unsigned int n)73 CAutofixReport(const string&s, unsigned int n) : S(s), N(n) {} AddSubitems(const vector<CRef<CAutofixReport>> & v)74 void AddSubitems(const vector<CRef<CAutofixReport>>& v) { copy(v.begin(), v.end(), back_inserter(V)); } GetS() const75 string GetS() const { return S; } GetN() const76 unsigned int GetN() const { return N; } GetSubitems()77 const vector<CRef<CAutofixReport>>& GetSubitems() { return V; } 78 protected: 79 string S; 80 unsigned int N; 81 vector<CRef<CAutofixReport>> V; 82 }; 83 84 85 class NCBI_DISCREPANCY_EXPORT CReportItem : public CObject 86 { 87 public: 88 enum ESeverity { 89 eSeverity_info = 0, 90 eSeverity_warning = 1, 91 eSeverity_error = 2 92 }; ~CReportItem()93 virtual ~CReportItem(){} 94 virtual string GetTitle() const = 0; 95 virtual string GetStr() const = 0; 96 virtual string GetMsg() const = 0; 97 virtual string GetXml() const = 0; 98 virtual string GetUnit() const = 0; 99 virtual size_t GetCount() const = 0; 100 virtual TReportObjectList GetDetails() const = 0; 101 virtual vector<CRef<CReportItem> > GetSubitems() const = 0; 102 virtual bool CanAutofix() const = 0; 103 virtual ESeverity GetSeverity() const = 0; 104 virtual bool IsFatal() const = 0; 105 virtual bool IsInfo() const = 0; 106 virtual bool IsExtended() const = 0; 107 virtual bool IsSummary() const = 0; 108 virtual bool IsReal() const = 0; 109 static CRef<CReportItem> CreateReportItem(const string& test, const CReportObj& obj, const string& msg, bool autofix = false); 110 }; 111 typedef vector<CRef<CReportItem> > TReportItemList; 112 113 114 class NCBI_DISCREPANCY_EXPORT CDiscrepancyCase : public CObject 115 { 116 public: ~CDiscrepancyCase()117 virtual ~CDiscrepancyCase(){} 118 virtual string GetName() const = 0; 119 virtual string GetType() const = 0; 120 virtual TReportItemList GetReport() const = 0; 121 virtual TReportObjectList GetObjects() const = 0; 122 }; 123 typedef map<string, CRef<CDiscrepancyCase> > TDiscrepancyCaseMap; 124 125 126 class NCBI_DISCREPANCY_EXPORT CDiscrepancySet : public CObject 127 { 128 public: CDiscrepancySet()129 CDiscrepancySet() : m_SesameStreetCutoff(0.75), /*m_Eucariote(false),*/ m_Gui(false), m_UserData(nullptr) {} ~CDiscrepancySet()130 virtual ~CDiscrepancySet(){} 131 132 template<typename Container> AddTests(const Container & cont)133 bool AddTests(const Container& cont) 134 { 135 bool success = true; 136 for_each(cont.begin(), cont.end(), [this, &success](const string& test_name) { success &= this->AddTest(test_name); }); 137 return success; 138 } 139 140 virtual bool AddTest(const string& name) = 0; 141 virtual void Push(const CSerialObject& root, const string& fname = kEmptyStr) = 0; 142 virtual void Parse() = 0; Parse(const CSerialObject & root,const string & fname=kEmptyStr)143 virtual void Parse(const CSerialObject& root, const string& fname = kEmptyStr) { Push(root, fname); Parse(); } 144 virtual void ParseStream(CObjectIStream& stream, const string& fname, bool skip, const string& default_header = kEmptyStr) = 0; 145 virtual void ParseStrings(const string& fname) = 0; 146 virtual void TestString(const string& str) = 0; 147 virtual unsigned Summarize() = 0; 148 virtual void Autofix(TReportObjectList& tofix, map<string, size_t>& rep, const string& default_header = kEmptyStr) = 0; 149 virtual const TDiscrepancyCaseMap& GetTests() const = 0; 150 151 enum EOutput { 152 eOutput_Summary = 1 << 0, // only summary 153 eOutput_Fatal = 1 << 1, // print FATAL 154 eOutput_Ext = 1 << 2, // extended output 155 eOutput_Files = 1 << 3 // print file name 156 }; 157 virtual void OutputText(CNcbiOstream& out, unsigned short flags, char group = 0) = 0; 158 virtual void OutputXML(CNcbiOstream& out, unsigned short flags) = 0; 159 IsGui() const160 bool IsGui() const { return m_Gui; } GetLineage() const161 const string& GetLineage() const { return m_Lineage; } GetSesameStreetCutoff() const162 float GetSesameStreetCutoff() const { return m_SesameStreetCutoff; } GetUserData() const163 void* GetUserData() const { return m_UserData; } 164 //virtual void SetFile(const string& fname) = 0; SetLineage(const string & s)165 void SetLineage(const string& s) { m_Lineage = s; } 166 //void SetEucariote(bool b){ m_Eucariote = b; } SetSesameStreetCutoff(float f)167 void SetSesameStreetCutoff(float f){ m_SesameStreetCutoff = f; } 168 virtual void SetSuspectRules(const string& name, bool read = true) = 0; SetGui(bool b)169 void SetGui(bool b){ m_Gui = b; } SetUserData(void * p)170 void SetUserData(void* p){ m_UserData = p; } 171 static CRef<CDiscrepancySet> New(objects::CScope& scope); 172 static string Format(const string& str, unsigned int count); 173 virtual const CSerialObject* FindObject(CReportObj& obj, bool alt = false) = 0; 174 175 protected: 176 string m_Lineage; 177 float m_SesameStreetCutoff; 178 //bool m_Eucariote; 179 bool m_Gui; 180 void* m_UserData; 181 }; 182 183 184 class NCBI_DISCREPANCY_EXPORT CDiscrepancyGroup : public CObject 185 { 186 public: CDiscrepancyGroup(const string & name="",const string & test="")187 CDiscrepancyGroup(const string& name = "", const string& test = "") : m_Name(name), m_Test(test) {} Add(CRef<CDiscrepancyGroup> child)188 void Add(CRef<CDiscrepancyGroup> child) { m_List.push_back(child); } 189 TReportItemList Collect(TDiscrepancyCaseMap& tests, bool all = true) const; operator [](size_t n) const190 const CDiscrepancyGroup& operator[](size_t n) const { return *m_List[n]; } 191 192 protected: 193 string m_Name; 194 string m_Test; 195 vector<CRef<CDiscrepancyGroup> > m_List; 196 }; 197 198 199 enum EGroup { 200 eNone = 0, 201 eDisc = 1, 202 eOncaller = 2, 203 eSubmitter = 4, 204 eSmart = 8, 205 eBig = 16, 206 eTSA = 32, 207 eFatal = 64, 208 eAutofix = 128 209 }; 210 typedef unsigned short TGroup; 211 212 213 NCBI_DISCREPANCY_EXPORT string GetDiscrepancyCaseName(const string&); 214 NCBI_DISCREPANCY_EXPORT string GetDiscrepancyDescr(const string&); 215 NCBI_DISCREPANCY_EXPORT TGroup GetDiscrepancyGroup(const string&); 216 NCBI_DISCREPANCY_EXPORT vector<string> GetDiscrepancyNames(TGroup group = 0); 217 NCBI_DISCREPANCY_EXPORT vector<string> GetDiscrepancyAliases(const string&); 218 NCBI_DISCREPANCY_EXPORT bool IsShortrRNA(const objects::CSeq_feat& f, objects::CScope* scope); 219 220 typedef std::function < CRef<objects::CSeq_feat>() > GetFeatureFunc; 221 NCBI_DISCREPANCY_EXPORT string FixProductName(const objects::CSuspect_rule* rule, objects::CScope& scope, string& prot_name, GetFeatureFunc get_mrna, GetFeatureFunc get_cds); 222 223 NCBI_DISCREPANCY_EXPORT CConstRef<objects::CSuspect_rule_set> GetOrganelleProductRules(const string& name = ""); 224 NCBI_DISCREPANCY_EXPORT CConstRef<objects::CSuspect_rule_set> GetProductRules(const string& name = ""); 225 226 END_SCOPE(NDiscrepancy) 227 END_NCBI_SCOPE 228 229 #endif // _MISC_DISCREPANCY_DISCREPANCY_H_ 230