1 /*  $Id: reader_base.hpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  *   Basic reader interface
30  *
31  */
32 
33 #ifndef OBJTOOLS_READERS___READERBASE__HPP
34 #define OBJTOOLS_READERS___READERBASE__HPP
35 
36 #include <corelib/ncbistd.hpp>
37 #include <objects/seq/Seq_annot.hpp>
38 #include <util/format_guess.hpp>
39 #include <util/line_reader.hpp>
40 #include <util/icanceled.hpp>
41 #include <objtools/readers/track_data.hpp>
42 #include <objtools/readers/line_error.hpp>
43 #include <objtools/readers/reader_message.hpp>
44 #include <objtools/readers/read_util.hpp>
45 
46 BEGIN_NCBI_SCOPE
47 BEGIN_objects_SCOPE
48 
49 class CSeq_entry;
50 class ILineErrorListener;
51 class CObjReaderLineException;
52 class CTrackData;
53 class CReaderListener;
54 class CReaderMessageHandler;
55 
56 //  ----------------------------------------------------------------------------
57 /// Defines and provides stubs for a general interface to a variety of file
58 /// readers. These readers are assumed to read information in some foreign
59 /// format from an input stream, and render it as an NCBI Genbank object.
60 ///
61 class NCBI_XOBJREAD_EXPORT CReaderBase
62 //  ----------------------------------------------------------------------------
63 {
64 public:
65     using TReaderLine = struct SReaderLine {
66         SReaderLine(unsigned int line, string data): mLine(line), mData(data) {};
67         unsigned int mLine;
68         string mData;
69     };
70     using TReaderData = vector<TReaderLine>;
71     /// Customization flags that are relevant to all CReaderBase derived readers.
72     ///
73     enum EFlags {
74         fNormal = 0,
75         /// numeric identifiers are local IDs
76         fNumericIdsAsLocal  = 1<<0,
77         /// all identifiers are local IDs
78         fAllIdsAsLocal      = 1<<1,
79 
80         fNextInLine = 1<<2,
81 
82         fAsRaw = 1<<3,
83     };
84     typedef unsigned int TReaderFlags;
85     enum ObjectType {
86         OT_UNKNOWN,
87         OT_SEQANNOT,
88         OT_SEQENTRY
89     };
90     typedef list< CRef< CSeq_annot > > TAnnotList;
91     typedef TAnnotList TAnnots;
92     typedef TAnnots::iterator TAnnotIt;
93     typedef TAnnots::const_iterator TAnnotCit;
94 
95     using SeqIdResolver = CRef<CSeq_id> (*)(const string&, unsigned int, bool);
96 
97 protected:
98     /// Protected constructor. Use GetReader() to get an actual reader object.
99     CReaderBase(
100         TReaderFlags flags = 0,     //flags
101         const string& name = "",    //annot name
102         const string& title = "",   //annot title
103         SeqIdResolver seqresolver = CReadUtil::AsSeqId,
104         CReaderListener* pListener = nullptr);
105 
106     CReaderBase(
107         const CReaderBase&) = delete;
108 
109     CReaderBase(
110         CReaderBase&&) = delete;
111 
112 public:
113     virtual ~CReaderBase();
114 
115     /// Allocate a CReaderBase derived reader object based on the given
116     /// file format.
117     /// @param format
118     ///   format specifier as defined in the class CFormatGuess
119     /// @param flags
120     ///   bit flags as defined in EFlags
121     ///
122     static CReaderBase* GetReader(
123         CFormatGuess::EFormat format,
124         TReaderFlags flags = 0,
125         CReaderListener* = nullptr );
126 
127     /// Read an object from a given input stream, render it as the most
128     /// appropriate Genbank object.
129     /// @param istr
130     ///   input stream to read from.
131     /// @param pErrors
132     ///   pointer to optional error container object.
133     ///
134     virtual CRef< CSerialObject >
135     ReadObject(
136         CNcbiIstream& istr,
137         ILineErrorListener* pErrors=0 );
138 
139     /// Read an object from a given line reader, render it as the most
140     /// appropriate Genbank object. This will be Seq-annot by default
141     /// but may be something else (Bioseq, Seq-entry, ...) in derived
142     /// classes.
143     /// This is the only function that does not come with a default
144     /// implementation. That is, an implementation must be provided in the
145     /// derived class.
146     /// @param lr
147     ///   line reader to read from.
148     /// @param pErrors
149     ///   pointer to optional error container object.
150     ///
151     virtual CRef< CSerialObject >
152     ReadObject(
153         ILineReader& lr,
154         ILineErrorListener* pErrors=0 );
155 
156     /// Read an object from a given input stream, render it as a single
157     /// Seq-annot. Return empty Seq-annot otherwise.
158     /// @param istr
159     ///   input stream to read from.
160     /// @param pErrors
161     ///   pointer to optional error container object.
162     ///
163     virtual CRef< CSeq_annot >
164     ReadSeqAnnot(
165         CNcbiIstream& istr,
166         ILineErrorListener* pErrors=0 );
167 
168     /// Read an object from a given line reader, render it as a single
169     /// Seq-annot, if possible. Return empty Seq-annot otherwise.
170     /// @param lr
171     ///   line reader to read from.
172     /// @param pErrors
173     ///   pointer to optional error container object.
174     ///
175     virtual CRef< CSeq_annot >
176     ReadSeqAnnot(
177         ILineReader& lr,
178         ILineErrorListener* pErrors=0 );
179 
180     /// Read all objects from given insput stream, returning them as a vector of
181     /// Seq-annots.
182     /// @param annots
183     ///   (out) vector containing read Seq-annots
184     /// @param istr
185     ///   input stream to read from.
186     /// @param pErrors
187     ///   pointer to optional error container object.
188     ///
189     virtual void
190     ReadSeqAnnots(
191         TAnnots& annots,
192         CNcbiIstream& istr,
193         ILineErrorListener* pErrors=0 );
194 
195     /// Read all objects from given insput stream, returning them as a vector of
196     /// Seq-annots.
197     /// @param annots
198     ///   (out) vector containing read Seq-annots
199     /// @param lr
200     ///   line reader to read from.
201     /// @param pErrors
202     ///   pointer to optional error container object.
203     ///
204     virtual void
205     ReadSeqAnnots(
206         TAnnots& annots,
207         ILineReader& lr,
208         ILineErrorListener* pErrors=0 );
209 
210     /// Read an object from a given input stream, render it as a single
211     /// Seq-entry, if possible. Return empty Seq-entry otherwise.
212     /// @param istr
213     ///   input stream to read from.
214     /// @param pErrors
215     ///   pointer to optional error container object.
216     ///
217     virtual CRef< CSeq_entry >
218     ReadSeqEntry(
219         CNcbiIstream& istr,
220         ILineErrorListener* pErrors=0 );
221 
222     /// Read an object from a given line reader, render it as a single
223     /// Seq-entry, if possible. Return empty Seq-entry otherwise.
224     /// @param lr
225     ///   line reader to read from.
226     /// @param pErrors
227     ///   pointer to optional error container object.
228     ///
229     virtual CRef< CSeq_entry >
230     ReadSeqEntry(
231         ILineReader& lr,
232         ILineErrorListener* pErrors=0 );
233 
234     void
235     SetProgressReportInterval(
236         unsigned int intv );
237 
238     void
239     SetCanceler(
240         ICanceled* =0);
241 
242     bool
IsCanceled() const243     IsCanceled() const { return m_pCanceler && m_pCanceler->IsCanceled(); };
244 
245 protected:
246     void xGuardedGetData(
247         ILineReader&,
248         TReaderData&,
249         ILineErrorListener*);
250 
251     virtual void xGuardedProcessData(
252         const TReaderData&,
253         CSeq_annot&,
254         ILineErrorListener*);
255 
256     virtual CRef<CSeq_annot> xCreateSeqAnnot();
257 
258     virtual void xGetData(
259         ILineReader&,
260         TReaderData&);
261 
262     virtual void xProcessData(
263         const TReaderData&,
264         CSeq_annot&);
265 
266     virtual bool xGetLine(
267         ILineReader&,
268         string&);
269 
270     virtual bool xUngetLine(
271         ILineReader&);
272 
273     virtual bool xIsCommentLine(
274         const CTempString& );
275 
276     virtual bool xIsTrackLine(
277         const CTempString& );
278 
279     virtual bool xIsBrowserLine(
280         const CTempString& );
281 
282     virtual bool xIsTrackTerminator(
283         const CTempString& );
284 
285     virtual void xAssignTrackData(
286         CSeq_annot& );
287 
288     virtual bool xParseBrowserLine(
289         const string&,
290         CSeq_annot&);
291 
292     virtual bool xParseTrackLine(
293         const string&);
294 
295     virtual bool xParseBrowserLine(
296         const string&);
297 
298     virtual void xSetBrowserRegion(
299         const string&,
300         CAnnot_descr&);
301 
302     virtual void xPostProcessAnnot(
303         CSeq_annot&);
304 
305     virtual void xAddConversionInfo(
306         CSeq_annot&,
307         ILineErrorListener*);
308 
309     bool xParseComment(
310         const CTempString&,
311         CRef<CSeq_annot>&);
312 
313     virtual bool xReadInit();
314 
315     virtual bool xProgressInit(
316         ILineReader& istr);
317 
318     bool xIsReportingProgress() const;
319 
320     bool xIsOperationCanceled() const;
321     void xReportProgress(
322         ILineErrorListener* = nullptr );
323 
324     void
325     ProcessError(
326         CObjReaderLineException&,
327         ILineErrorListener* );
328 
329     void
330     ProcessError(
331         CLineError&,
332         ILineErrorListener* );
333 
334     void
335     ProcessWarning(
336         CObjReaderLineException&,
337         ILineErrorListener* );
338 
339     void
340     ProcessWarning(
341         CLineError&,
342         ILineErrorListener* );
343 
344     void
345     xProcessReaderMessage(
346         CReaderMessage&,
347         ILineErrorListener*);
348 
349     void
350     xProcessLineError(
351         const ILineError&,
352         ILineErrorListener*);
353 
354     void
355     xProcessUnknownException(
356         const CException&);
357 
358     //
359     //  Data:
360     //
361     unsigned int m_uLineNumber;
362     unsigned int m_uDataCount = 0;
363     unsigned int m_uProgressReportInterval;
364     unsigned int m_uNextProgressReport;
365 
366     TReaderFlags m_iFlags;
367     string m_AnnotName;
368     string m_AnnotTitle;
369     string m_PendingLine;
370 
371     unique_ptr<CTrackData>  m_pTrackDefaults;
372     ILineReader* m_pReader;
373     ICanceled* m_pCanceler;
374     SeqIdResolver mSeqIdResolve;
375     unique_ptr<CReaderMessageHandler> m_pMessageHandler;
376 };
377 
378 END_objects_SCOPE
379 END_NCBI_SCOPE
380 
381 #endif // OBJTOOLS_READERS___READERBASE__HPP
382