1 /* $Id: reader_base.hpp 632526 2021-06-02 17:25:01Z ivanov $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Frank Ludwig 27 * 28 * File Description: 29 * Basic reader interface 30 * 31 */ 32 33 #ifndef OBJTOOLS_READERS___READERBASE__HPP 34 #define OBJTOOLS_READERS___READERBASE__HPP 35 36 #include <corelib/ncbistd.hpp> 37 #include <objects/seq/Seq_annot.hpp> 38 #include <util/format_guess.hpp> 39 #include <util/line_reader.hpp> 40 #include <util/icanceled.hpp> 41 #include <objtools/readers/track_data.hpp> 42 #include <objtools/readers/line_error.hpp> 43 #include <objtools/readers/reader_message.hpp> 44 #include <objtools/readers/read_util.hpp> 45 46 BEGIN_NCBI_SCOPE 47 BEGIN_objects_SCOPE 48 49 class CSeq_entry; 50 class ILineErrorListener; 51 class CObjReaderLineException; 52 class CTrackData; 53 class CReaderListener; 54 class CReaderMessageHandler; 55 56 // ---------------------------------------------------------------------------- 57 /// Defines and provides stubs for a general interface to a variety of file 58 /// readers. These readers are assumed to read information in some foreign 59 /// format from an input stream, and render it as an NCBI Genbank object. 60 /// 61 class NCBI_XOBJREAD_EXPORT CReaderBase 62 // ---------------------------------------------------------------------------- 63 { 64 public: 65 using TReaderLine = struct SReaderLine { 66 SReaderLine(unsigned int line, string data): mLine(line), mData(data) {}; 67 unsigned int mLine; 68 string mData; 69 }; 70 using TReaderData = vector<TReaderLine>; 71 /// Customization flags that are relevant to all CReaderBase derived readers. 72 /// 73 enum EFlags { 74 fNormal = 0, 75 /// numeric identifiers are local IDs 76 fNumericIdsAsLocal = 1<<0, 77 /// all identifiers are local IDs 78 fAllIdsAsLocal = 1<<1, 79 80 fNextInLine = 1<<2, 81 82 fAsRaw = 1<<3, 83 }; 84 typedef unsigned int TReaderFlags; 85 enum ObjectType { 86 OT_UNKNOWN, 87 OT_SEQANNOT, 88 OT_SEQENTRY 89 }; 90 typedef list< CRef< CSeq_annot > > TAnnotList; 91 typedef TAnnotList TAnnots; 92 typedef TAnnots::iterator TAnnotIt; 93 typedef TAnnots::const_iterator TAnnotCit; 94 95 using SeqIdResolver = CRef<CSeq_id> (*)(const string&, unsigned int, bool); 96 97 protected: 98 /// Protected constructor. Use GetReader() to get an actual reader object. 99 CReaderBase( 100 TReaderFlags flags = 0, //flags 101 const string& name = "", //annot name 102 const string& title = "", //annot title 103 SeqIdResolver seqresolver = CReadUtil::AsSeqId, 104 CReaderListener* pListener = nullptr); 105 106 CReaderBase( 107 const CReaderBase&) = delete; 108 109 CReaderBase( 110 CReaderBase&&) = delete; 111 112 public: 113 virtual ~CReaderBase(); 114 115 /// Allocate a CReaderBase derived reader object based on the given 116 /// file format. 117 /// @param format 118 /// format specifier as defined in the class CFormatGuess 119 /// @param flags 120 /// bit flags as defined in EFlags 121 /// 122 static CReaderBase* GetReader( 123 CFormatGuess::EFormat format, 124 TReaderFlags flags = 0, 125 CReaderListener* = nullptr ); 126 127 /// Read an object from a given input stream, render it as the most 128 /// appropriate Genbank object. 129 /// @param istr 130 /// input stream to read from. 131 /// @param pErrors 132 /// pointer to optional error container object. 133 /// 134 virtual CRef< CSerialObject > 135 ReadObject( 136 CNcbiIstream& istr, 137 ILineErrorListener* pErrors=0 ); 138 139 /// Read an object from a given line reader, render it as the most 140 /// appropriate Genbank object. This will be Seq-annot by default 141 /// but may be something else (Bioseq, Seq-entry, ...) in derived 142 /// classes. 143 /// This is the only function that does not come with a default 144 /// implementation. That is, an implementation must be provided in the 145 /// derived class. 146 /// @param lr 147 /// line reader to read from. 148 /// @param pErrors 149 /// pointer to optional error container object. 150 /// 151 virtual CRef< CSerialObject > 152 ReadObject( 153 ILineReader& lr, 154 ILineErrorListener* pErrors=0 ); 155 156 /// Read an object from a given input stream, render it as a single 157 /// Seq-annot. Return empty Seq-annot otherwise. 158 /// @param istr 159 /// input stream to read from. 160 /// @param pErrors 161 /// pointer to optional error container object. 162 /// 163 virtual CRef< CSeq_annot > 164 ReadSeqAnnot( 165 CNcbiIstream& istr, 166 ILineErrorListener* pErrors=0 ); 167 168 /// Read an object from a given line reader, render it as a single 169 /// Seq-annot, if possible. Return empty Seq-annot otherwise. 170 /// @param lr 171 /// line reader to read from. 172 /// @param pErrors 173 /// pointer to optional error container object. 174 /// 175 virtual CRef< CSeq_annot > 176 ReadSeqAnnot( 177 ILineReader& lr, 178 ILineErrorListener* pErrors=0 ); 179 180 /// Read all objects from given insput stream, returning them as a vector of 181 /// Seq-annots. 182 /// @param annots 183 /// (out) vector containing read Seq-annots 184 /// @param istr 185 /// input stream to read from. 186 /// @param pErrors 187 /// pointer to optional error container object. 188 /// 189 virtual void 190 ReadSeqAnnots( 191 TAnnots& annots, 192 CNcbiIstream& istr, 193 ILineErrorListener* pErrors=0 ); 194 195 /// Read all objects from given insput stream, returning them as a vector of 196 /// Seq-annots. 197 /// @param annots 198 /// (out) vector containing read Seq-annots 199 /// @param lr 200 /// line reader to read from. 201 /// @param pErrors 202 /// pointer to optional error container object. 203 /// 204 virtual void 205 ReadSeqAnnots( 206 TAnnots& annots, 207 ILineReader& lr, 208 ILineErrorListener* pErrors=0 ); 209 210 /// Read an object from a given input stream, render it as a single 211 /// Seq-entry, if possible. Return empty Seq-entry otherwise. 212 /// @param istr 213 /// input stream to read from. 214 /// @param pErrors 215 /// pointer to optional error container object. 216 /// 217 virtual CRef< CSeq_entry > 218 ReadSeqEntry( 219 CNcbiIstream& istr, 220 ILineErrorListener* pErrors=0 ); 221 222 /// Read an object from a given line reader, render it as a single 223 /// Seq-entry, if possible. Return empty Seq-entry otherwise. 224 /// @param lr 225 /// line reader to read from. 226 /// @param pErrors 227 /// pointer to optional error container object. 228 /// 229 virtual CRef< CSeq_entry > 230 ReadSeqEntry( 231 ILineReader& lr, 232 ILineErrorListener* pErrors=0 ); 233 234 void 235 SetProgressReportInterval( 236 unsigned int intv ); 237 238 void 239 SetCanceler( 240 ICanceled* =0); 241 242 bool IsCanceled() const243 IsCanceled() const { return m_pCanceler && m_pCanceler->IsCanceled(); }; 244 245 protected: 246 void xGuardedGetData( 247 ILineReader&, 248 TReaderData&, 249 ILineErrorListener*); 250 251 virtual void xGuardedProcessData( 252 const TReaderData&, 253 CSeq_annot&, 254 ILineErrorListener*); 255 256 virtual CRef<CSeq_annot> xCreateSeqAnnot(); 257 258 virtual void xGetData( 259 ILineReader&, 260 TReaderData&); 261 262 virtual void xProcessData( 263 const TReaderData&, 264 CSeq_annot&); 265 266 virtual bool xGetLine( 267 ILineReader&, 268 string&); 269 270 virtual bool xUngetLine( 271 ILineReader&); 272 273 virtual bool xIsCommentLine( 274 const CTempString& ); 275 276 virtual bool xIsTrackLine( 277 const CTempString& ); 278 279 virtual bool xIsBrowserLine( 280 const CTempString& ); 281 282 virtual bool xIsTrackTerminator( 283 const CTempString& ); 284 285 virtual void xAssignTrackData( 286 CSeq_annot& ); 287 288 virtual bool xParseBrowserLine( 289 const string&, 290 CSeq_annot&); 291 292 virtual bool xParseTrackLine( 293 const string&); 294 295 virtual bool xParseBrowserLine( 296 const string&); 297 298 virtual void xSetBrowserRegion( 299 const string&, 300 CAnnot_descr&); 301 302 virtual void xPostProcessAnnot( 303 CSeq_annot&); 304 305 virtual void xAddConversionInfo( 306 CSeq_annot&, 307 ILineErrorListener*); 308 309 bool xParseComment( 310 const CTempString&, 311 CRef<CSeq_annot>&); 312 313 virtual bool xReadInit(); 314 315 virtual bool xProgressInit( 316 ILineReader& istr); 317 318 bool xIsReportingProgress() const; 319 320 bool xIsOperationCanceled() const; 321 void xReportProgress( 322 ILineErrorListener* = nullptr ); 323 324 void 325 ProcessError( 326 CObjReaderLineException&, 327 ILineErrorListener* ); 328 329 void 330 ProcessError( 331 CLineError&, 332 ILineErrorListener* ); 333 334 void 335 ProcessWarning( 336 CObjReaderLineException&, 337 ILineErrorListener* ); 338 339 void 340 ProcessWarning( 341 CLineError&, 342 ILineErrorListener* ); 343 344 void 345 xProcessReaderMessage( 346 CReaderMessage&, 347 ILineErrorListener*); 348 349 void 350 xProcessLineError( 351 const ILineError&, 352 ILineErrorListener*); 353 354 void 355 xProcessUnknownException( 356 const CException&); 357 358 // 359 // Data: 360 // 361 unsigned int m_uLineNumber; 362 unsigned int m_uDataCount = 0; 363 unsigned int m_uProgressReportInterval; 364 unsigned int m_uNextProgressReport; 365 366 TReaderFlags m_iFlags; 367 string m_AnnotName; 368 string m_AnnotTitle; 369 string m_PendingLine; 370 371 unique_ptr<CTrackData> m_pTrackDefaults; 372 ILineReader* m_pReader; 373 ICanceled* m_pCanceler; 374 SeqIdResolver mSeqIdResolve; 375 unique_ptr<CReaderMessageHandler> m_pMessageHandler; 376 }; 377 378 END_objects_SCOPE 379 END_NCBI_SCOPE 380 381 #endif // OBJTOOLS_READERS___READERBASE__HPP 382