1 /*  $Id: mod_reader.hpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Justin Foley
27  *
28  */
29 #ifndef _MOD_READER_HPP_
30 #define _MOD_READER_HPP_
31 #include <corelib/ncbistd.hpp>
32 #include <map>
33 #include <unordered_map>
34 #include <unordered_set>
35 #include <objtools/readers/reader_error_codes.hpp>
36 
37 BEGIN_NCBI_SCOPE
38 BEGIN_SCOPE(objects)
39 
40 
41 class NCBI_XOBJREAD_EXPORT CModData
42 {
43 public:
44     CModData() = default;
45 
46     template<typename _T1, typename _T2>
CModData(_T1 && name,_T2 && value)47     CModData(_T1&& name, _T2&& value): m_name{forward<_T1>(name)}, m_value{forward<_T2>(value)}
48     {
49     }
50 
51     template<typename _T>
SetName(_T && name)52     void SetName(_T&& name)
53     {
54         m_name = forward<_T>(name);
55     }
56 
57     template<typename _T>
SetValue(_T && value)58     void SetValue(_T&& value)
59     {
60         m_value = forward<_T>(value);
61     }
62     template<typename _T>
SetAttrib(_T && attrib)63     void SetAttrib(_T&& attrib)
64     {
65         m_attrib = forward<_T>(attrib);
66     }
IsSetAttrib(void) const67     bool IsSetAttrib(void) const
68     {
69         return !m_attrib.empty();
70     }
71 
GetName(void) const72     const string& GetName(void) const
73     {
74         return m_name;
75     }
GetValue(void) const76     const string& GetValue(void) const
77     {
78         return m_value;
79     }
GetAttrib(void) const80     const string& GetAttrib(void) const
81     {
82         return m_attrib;
83     }
84 
85     string m_name, m_value, m_attrib;
86 };
87 
88 
89 
90 class NCBI_XOBJREAD_EXPORT CModHandler
91 {
92 public:
93 
94     using TModList = list<CModData>;
95 
96     enum EHandleExisting {
97         eReplace        = 0,
98         ePreserve       = 1,
99         eAppendReplace  = 2,
100         eAppendPreserve = 3
101     };
102 
103     using TMods = map<string, list<CModData>>;
104     using TModEntry = TMods::value_type;
105     using FReportError = function<void(const CModData& mod, const string& message, EDiagSev severity, EModSubcode subcode)>;
106 
107     CModHandler();
108     void SetExcludedMods(const vector<string>& excluded_mods);
109 
110     void AddMods(const TModList& mods,
111                  EHandleExisting handle_existing,
112                  TModList& rejected_mods,
113                  FReportError fReportError=nullptr);
114 
115     void SetMods(const TMods& mods);
116 
117     const TMods& GetMods(void) const;
118 
119     void Clear(void);
120 
121     static const string& GetCanonicalName(const TModEntry& mod_entry);
122     static const string& AssertReturnSingleValue(const TModEntry& mod_entry);
123     static string GetCanonicalName(const string& name);
124 
125 private:
126     static string x_GetNormalizedString(const string& name);
127     static bool x_MultipleValuesAllowed(const string& canonical_name);
128     static bool x_IsDeprecated(const string& canonical_name);
129     void x_SaveMods(TMods&& mods, EHandleExisting handle_existing, TMods& dest);
130 
131     TMods m_Mods;
132 
133     using TNameMap = unordered_map<string, string>;
134     using TNameSet = unordered_set<string>;
135     static const TNameMap sm_NameMap;
136     static const TNameSet sm_MultipleValuesForbidden;
137     static const TNameSet sm_DeprecatedModifiers;
138     TNameSet m_ExcludedModifiers;
139 };
140 
141 
142 class CBioseq;
143 class CSeq_inst;
144 class CSeq_loc;
145 class CModReaderException;
146 
147 
148 class NCBI_XOBJREAD_EXPORT CModAdder
149 {
150 public:
151     using TMods = CModHandler::TMods;
152     using TModEntry = CModHandler::TModEntry;
153     using TSkippedMods = list<CModData>;
154     using FReportError = CModHandler::FReportError;
155     using FPostMessage = FReportError;
156 
157     static void Apply(const CModHandler& mod_handler,
158             CBioseq& bioseq,
159             TSkippedMods& skipped_mods,
160             FPostMessage fPostMessage=nullptr);
161 
162     static void Apply(const CModHandler& mod_handler,
163             CBioseq& bioseq,
164             TSkippedMods& skipped_mods,
165             bool logInfo,
166             FPostMessage fPostMessage=nullptr);
167 private:
168 
169     static const string& x_GetModName(const TModEntry& mod_entry);
170     static const string& x_GetModValue(const TModEntry& mod_entry);
171 
172     static bool x_TrySeqInstMod(const TModEntry& mod_entry,
173             CSeq_inst& seq_inst,
174             TSkippedMods& skipped_mods,
175             FPostMessage fPostMessage);
176 
177     static void x_SetStrand(const TModEntry& mod_entry,
178             CSeq_inst& seq_inst,
179             TSkippedMods& skipped_mods,
180             FPostMessage fPostMessage);
181 
182     static void x_SetMolecule(const TModEntry& mod_entry,
183             CSeq_inst& seq_inst,
184             TSkippedMods& skipped_mods,
185             FPostMessage fPostMessage);
186 
187     static void x_SetMoleculeFromMolType(const TModEntry& mod_entry,
188             CSeq_inst& seq_inst);
189 
190     static void x_SetTopology(const TModEntry& mod_entry,
191             CSeq_inst& seq_inst,
192             TSkippedMods& skipped_mods,
193             FPostMessage fPostMessage);
194 
195     static void x_SetHist(const TModEntry& mod_entry,
196             CSeq_inst& seq_inst);
197 
198     static void x_ReportInvalidValue(const CModData& mod_data,
199                                      TSkippedMods& skipped_mods,
200                                      FPostMessage fPostMessage);
201 };
202 
203 
204 class IObjtoolsListener;
205 class NCBI_XOBJREAD_EXPORT CDefaultModErrorReporter
206 {
207 public:
208     using TModList = list<CModData>;
209 
210     CDefaultModErrorReporter(
211             const string& seqId,
212             int lineNum,
213             IObjtoolsListener* pMessageListener);
214 
215     void operator()(
216             const CModData& mod,
217             const string& msg,
218             EDiagSev sev,
219             EModSubcode subcode);
220 
221 private:
222     string m_SeqId;
223     int m_LineNum;
224     IObjtoolsListener* m_pMessageListener;
225 };
226 
227 
228 class NCBI_XOBJREAD_EXPORT CTitleParser
229 {
230 public:
231     using TModList = CModHandler::TModList;
232     static void Apply(const CTempString& title, TModList& mods, string& remainder);
233     static bool HasMods(const CTempString& title);
234 private:
235     static bool x_FindBrackets(const CTempString& line, size_t& start, size_t& stop, size_t& eq_pos);
236 };
237 
238 END_SCOPE(objects)
239 END_NCBI_SCOPE
240 
241 #endif // _MOD_READER_HPP_
242