1 /*  $Id: reader_base.cpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Frank Ludwig
27  *
28  * File Description:
29  *   Basic reader interface.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbithr.hpp>
37 #include <corelib/ncbiutil.hpp>
38 #include <corelib/ncbiexpt.hpp>
39 #include <corelib/stream_utils.hpp>
40 
41 #include <util/static_map.hpp>
42 #include <util/line_reader.hpp>
43 
44 #include <serial/iterator.hpp>
45 #include <serial/objistrasn.hpp>
46 
47 // Objects includes
48 #include <objects/general/Int_fuzz.hpp>
49 #include <objects/general/Object_id.hpp>
50 #include <objects/general/User_object.hpp>
51 #include <objects/general/User_field.hpp>
52 #include <objects/general/Dbtag.hpp>
53 
54 #include <objects/seqloc/Seq_id.hpp>
55 #include <objects/seqloc/Seq_loc.hpp>
56 #include <objects/seqloc/Seq_interval.hpp>
57 #include <objects/seqloc/Seq_point.hpp>
58 
59 #include <objects/seqset/Seq_entry.hpp>
60 #include <objects/seq/Seq_annot.hpp>
61 #include <objects/seq/Annotdesc.hpp>
62 #include <objects/seq/Annot_descr.hpp>
63 #include <objects/seq/Seq_descr.hpp>
64 #include <objects/seqfeat/SeqFeatData.hpp>
65 
66 #include <objects/seqfeat/Seq_feat.hpp>
67 #include <objects/seqfeat/BioSource.hpp>
68 #include <objects/seqfeat/Org_ref.hpp>
69 #include <objects/seqfeat/OrgName.hpp>
70 #include <objects/seqfeat/SubSource.hpp>
71 #include <objects/seqfeat/OrgMod.hpp>
72 #include <objects/seqfeat/Gene_ref.hpp>
73 #include <objects/seqfeat/Cdregion.hpp>
74 #include <objects/seqfeat/Code_break.hpp>
75 #include <objects/seqfeat/Genetic_code.hpp>
76 #include <objects/seqfeat/Genetic_code_table.hpp>
77 #include <objects/seqfeat/RNA_ref.hpp>
78 #include <objects/seqfeat/Trna_ext.hpp>
79 #include <objects/seqfeat/Imp_feat.hpp>
80 #include <objects/seqfeat/Gb_qual.hpp>
81 #include <objects/seqfeat/Feat_id.hpp>
82 
83 #include <objtools/readers/read_util.hpp>
84 #include <objtools/readers/reader_exception.hpp>
85 #include <objtools/readers/line_error.hpp>
86 #include <objtools/readers/message_listener.hpp>
87 #include <objtools/readers/track_data.hpp>
88 #include <objtools/readers/reader_base.hpp>
89 #include <objtools/readers/reader_message.hpp>
90 #include <objtools/readers/bed_reader.hpp>
91 #include <objtools/readers/microarray_reader.hpp>
92 #include <objtools/readers/wiggle_reader.hpp>
93 #include <objtools/readers/gff3_reader.hpp>
94 #include <objtools/readers/gtf_reader.hpp>
95 #include <objtools/readers/gvf_reader.hpp>
96 #include <objtools/readers/vcf_reader.hpp>
97 #include <objtools/readers/rm_reader.hpp>
98 #include <objtools/readers/psl_reader.hpp>
99 #include <objtools/readers/fasta.hpp>
100 #include <objtools/readers/readfeat.hpp>
101 #include <objtools/error_codes.hpp>
102 #include <objtools/readers/ucscregion_reader.hpp>
103 
104 #include <algorithm>
105 #include <ctime>
106 
107 #include "reader_data.hpp"
108 #include "reader_message_handler.hpp"
109 
110 #define NCBI_USE_ERRCODE_X   Objtools_Rd_RepMask
111 
112 BEGIN_NCBI_SCOPE
113 BEGIN_objects_SCOPE // namespace ncbi::objects::
114 
115 //  ----------------------------------------------------------------------------
116 CReaderBase*
GetReader(CFormatGuess::EFormat format,TReaderFlags flags,CReaderListener * pRL)117 CReaderBase::GetReader(
118     CFormatGuess::EFormat format,
119     TReaderFlags flags,
120     CReaderListener* pRL )
121 //  ----------------------------------------------------------------------------
122 {
123     switch ( format ) {
124     default:
125         return 0;
126     case CFormatGuess::eBed:
127         return new CBedReader(flags);
128     case CFormatGuess::eBed15:
129         return new CMicroArrayReader(flags, pRL);
130     case CFormatGuess::eWiggle:
131         return new CWiggleReader(flags);
132     case CFormatGuess::eGtf:
133     case CFormatGuess::eGtf_POISENED:
134         return new CGtfReader(flags);
135     case CFormatGuess::eGff3:
136         return new CGff3Reader(flags);
137     case CFormatGuess::eGvf:
138         return new CGvfReader(flags);
139     case CFormatGuess::eVcf:
140         return new CVcfReader(flags, pRL);
141     case CFormatGuess::eRmo:
142         return new CRepeatMaskerReader(flags);
143     case CFormatGuess::eFasta:
144         return new CFastaReader(flags);
145     case CFormatGuess::eFiveColFeatureTable:
146         return new CFeature_table_reader(flags);
147     case CFormatGuess::eUCSCRegion:
148         return new CUCSCRegionReader(flags);
149     case CFormatGuess::ePsl:
150         return new CPslReader(flags, pRL);
151     }
152 }
153 
154 //  ----------------------------------------------------------------------------
CReaderBase(TReaderFlags flags,const string & annotName,const string & annotTitle,SeqIdResolver seqidresolver,CReaderListener * pListener)155 CReaderBase::CReaderBase(
156     TReaderFlags flags,
157     const string& annotName,
158     const string& annotTitle,
159     SeqIdResolver seqidresolver,
160     CReaderListener* pListener) :
161 //  ----------------------------------------------------------------------------
162     m_uLineNumber(0),
163     m_uProgressReportInterval(0),
164     m_uNextProgressReport(0),
165     m_iFlags(flags),
166     m_AnnotName(annotName),
167     m_AnnotTitle(annotTitle),
168     m_pTrackDefaults(new CTrackData),
169     m_pReader(nullptr),
170     m_pCanceler(nullptr),
171     mSeqIdResolve(seqidresolver),
172     m_pMessageHandler(new CReaderMessageHandler(pListener))
173 {
174 }
175 
176 
177 //  ----------------------------------------------------------------------------
~CReaderBase()178 CReaderBase::~CReaderBase()
179 //  ----------------------------------------------------------------------------
180 {
181 }
182 
183 //  ----------------------------------------------------------------------------
184 CRef< CSerialObject >
ReadObject(CNcbiIstream & istr,ILineErrorListener * pMessageListener)185 CReaderBase::ReadObject(
186     CNcbiIstream& istr,
187     ILineErrorListener* pMessageListener )
188 //  ----------------------------------------------------------------------------
189 {
190     CStreamLineReader lr( istr );
191     return ReadObject( lr, pMessageListener );
192 }
193 
194 //  ----------------------------------------------------------------------------
195 CRef<CSerialObject>
ReadObject(ILineReader & lr,ILineErrorListener * pMessageListener)196 CReaderBase::ReadObject(
197     ILineReader& lr,
198     ILineErrorListener* pMessageListener )
199 //  ----------------------------------------------------------------------------
200 {
201     CRef<CSerialObject> object(
202         ReadSeqAnnot( lr, pMessageListener ).ReleaseOrNull() );
203     return object;
204 }
205 
206 //  ----------------------------------------------------------------------------
207 CRef< CSeq_annot >
ReadSeqAnnot(CNcbiIstream & istr,ILineErrorListener * pMessageListener)208 CReaderBase::ReadSeqAnnot(
209     CNcbiIstream& istr,
210     ILineErrorListener* pMessageListener )
211 //  ----------------------------------------------------------------------------
212 {
213     CStreamLineReader lr( istr );
214     return ReadSeqAnnot( lr, pMessageListener );
215 }
216 
217 //  ----------------------------------------------------------------------------
218 CRef< CSeq_annot >
ReadSeqAnnot(ILineReader & lr,ILineErrorListener * pEL)219 CReaderBase::ReadSeqAnnot(
220     ILineReader& lr,
221     ILineErrorListener* pEL)
222 //  ----------------------------------------------------------------------------
223 {
224     xProgressInit(lr);
225 
226     m_uDataCount = 0;
227     CRef<CSeq_annot> pAnnot = xCreateSeqAnnot();
228 
229     TReaderData readerData;
230     xGuardedGetData(lr, readerData, pEL);
231     if (readerData.empty()) {
232         pAnnot.Reset();
233         return pAnnot;
234     }
235     while (!readerData.empty()) {
236         if (IsCanceled()) {
237             CReaderMessage cancelled(
238                 eDiag_Fatal,
239                 m_uLineNumber,
240                 "Data import interrupted by user.");
241             xProcessReaderMessage(cancelled, pEL);
242         }
243         xReportProgress();
244 
245         xGuardedProcessData(readerData, *pAnnot, pEL);
246         xGuardedGetData(lr, readerData, pEL);
247     }
248     xPostProcessAnnot(*pAnnot);
249     return pAnnot;
250 }
251 
252 //  ----------------------------------------------------------------------------
253 CRef<CSeq_annot>
xCreateSeqAnnot()254 CReaderBase::xCreateSeqAnnot()
255 //  ----------------------------------------------------------------------------
256 {
257     CRef<CSeq_annot> pAnnot(new CSeq_annot);
258     if (!m_AnnotName.empty()) {
259         pAnnot->SetNameDesc(m_AnnotName);
260     }
261     if (!m_AnnotTitle.empty()) {
262         pAnnot->SetTitleDesc(m_AnnotTitle);
263     }
264     return pAnnot;
265 }
266 
267 //  ----------------------------------------------------------------------------
268 void
xGuardedGetData(ILineReader & lr,TReaderData & readerData,ILineErrorListener * pEL)269 CReaderBase::xGuardedGetData(
270     ILineReader& lr,
271     TReaderData& readerData,
272     ILineErrorListener* pEL)
273 //  ----------------------------------------------------------------------------
274 {
275     try {
276         xGetData(lr, readerData);
277     }
278     catch (CReaderMessage& err) {
279         xProcessReaderMessage(err, pEL);
280     }
281     catch (ILineError& err) {
282         xProcessLineError(err, pEL);
283     }
284     catch (CException& err) {
285         xProcessUnknownException(err);
286     }
287 }
288 
289 //  ----------------------------------------------------------------------------
290 void
xGuardedProcessData(const TReaderData & readerData,CSeq_annot & annot,ILineErrorListener * pEL)291 CReaderBase::xGuardedProcessData(
292     const TReaderData& readerData,
293     CSeq_annot& annot,
294     ILineErrorListener* pEL)
295 //  ----------------------------------------------------------------------------
296 {
297     try {
298         xProcessData(readerData, annot);
299     }
300     catch (CReaderMessage& err) {
301         xProcessReaderMessage(err, pEL);
302     }
303     catch (ILineError& err) {
304         xProcessLineError(err, pEL);
305     }
306     catch (CException& err) {
307         xProcessUnknownException(err);
308     }
309 }
310 
311 //  ----------------------------------------------------------------------------
312 void
xGetData(ILineReader & lr,TReaderData & readerData)313 CReaderBase::xGetData(
314     ILineReader& lr,
315     TReaderData& readerData)
316 //  ----------------------------------------------------------------------------
317 {
318     readerData.clear();
319     string line;
320     if (xGetLine(lr, line)) {
321         readerData.push_back(TReaderLine{m_uLineNumber, line});
322     }
323     ++m_uDataCount;
324 }
325 
326 //  ----------------------------------------------------------------------------
327 void
xProcessData(const TReaderData & readerData,CSeq_annot & annot)328 CReaderBase::xProcessData(
329     const TReaderData& readerData,
330     CSeq_annot& annot)
331 //  ----------------------------------------------------------------------------
332 {
333 }
334 
335 //  ---------------------------------------------------------------------------
336 void
ReadSeqAnnots(TAnnots & annots,CNcbiIstream & istr,ILineErrorListener * pMessageListener)337 CReaderBase::ReadSeqAnnots(
338     TAnnots& annots,
339     CNcbiIstream& istr,
340     ILineErrorListener* pMessageListener )
341 //  ---------------------------------------------------------------------------
342 {
343     CStreamLineReader lr( istr );
344     ReadSeqAnnots( annots, lr, pMessageListener );
345 }
346 
347 //  ---------------------------------------------------------------------------
348 void
ReadSeqAnnots(TAnnots & annots,ILineReader & lr,ILineErrorListener * pMessageListener)349 CReaderBase::ReadSeqAnnots(
350     TAnnots& annots,
351     ILineReader& lr,
352     ILineErrorListener* pMessageListener )
353 //  ----------------------------------------------------------------------------
354 {
355     xReadInit();
356     xProgressInit(lr);
357     CRef<CSeq_annot> annot = ReadSeqAnnot(lr, pMessageListener);
358     while (annot) {
359         annots.push_back(annot);
360         annot = ReadSeqAnnot(lr, pMessageListener);
361     }
362 }
363 
364 //  ----------------------------------------------------------------------------
365 CRef< CSeq_entry >
ReadSeqEntry(CNcbiIstream & istr,ILineErrorListener * pMessageListener)366 CReaderBase::ReadSeqEntry(
367     CNcbiIstream& istr,
368     ILineErrorListener* pMessageListener )
369 //  ----------------------------------------------------------------------------
370 {
371     CStreamLineReader lr( istr );
372     CRef<CSeq_entry> pResult = ReadSeqEntry( lr, pMessageListener );
373     return pResult;
374 }
375 
376 //  ----------------------------------------------------------------------------
377 CRef< CSeq_entry >
ReadSeqEntry(ILineReader & lr,ILineErrorListener *)378 CReaderBase::ReadSeqEntry(
379     ILineReader& lr,
380     ILineErrorListener* )
381 //  ----------------------------------------------------------------------------
382 {
383     xProgressInit(lr);
384     return CRef<CSeq_entry>();
385 }
386 
387 //  ----------------------------------------------------------------------------
388 void
ProcessError(CObjReaderLineException & err,ILineErrorListener * pContainer)389 CReaderBase::ProcessError(
390     CObjReaderLineException& err,
391     ILineErrorListener* pContainer )
392 //  ----------------------------------------------------------------------------
393 {
394     err.SetLineNumber( m_uLineNumber );
395     if (!pContainer) {
396         err.Throw();
397     }
398     if (!pContainer->PutError(err)) {
399         AutoPtr<CObjReaderLineException> pErr(
400             CObjReaderLineException::Create(
401             eDiag_Critical,
402             0,
403             "Error allowance exceeded",
404             ILineError::eProblem_GeneralParsingError) );
405         pErr->Throw();
406     }
407 }
408 
409 //  ----------------------------------------------------------------------------
410 void
ProcessWarning(CObjReaderLineException & err,ILineErrorListener * pContainer)411 CReaderBase::ProcessWarning(
412     CObjReaderLineException& err,
413     ILineErrorListener* pContainer )
414 //  ----------------------------------------------------------------------------
415 {
416     err.SetLineNumber( m_uLineNumber );
417     if (!pContainer) {
418         cerr << m_uLineNumber << ": " << err.SeverityStr() << err.Message()
419             << endl;
420         return;
421     }
422     if (!pContainer->PutError(err)) {
423         err.Throw();
424     }
425 }
426 
427 //  ----------------------------------------------------------------------------
428 void
ProcessError(CLineError & err,ILineErrorListener * pContainer)429 CReaderBase::ProcessError(
430     CLineError& err,
431     ILineErrorListener* pContainer )
432 //  ----------------------------------------------------------------------------
433 {
434     if (!pContainer  ||  !pContainer->PutError(err)) {
435         err.Throw();
436     }
437  }
438 
439 //  ----------------------------------------------------------------------------
440 void
ProcessWarning(CLineError & err,ILineErrorListener * pContainer)441 CReaderBase::ProcessWarning(
442     CLineError& err,
443     ILineErrorListener* pContainer )
444 //  ----------------------------------------------------------------------------
445 {
446     if (!pContainer) {
447         cerr << m_uLineNumber << ": " << err.SeverityStr() << err.Message()
448             << endl;
449         return;
450     }
451     if (!pContainer->PutError(err)) {
452         err.Throw();
453     }
454  }
455 
456 //  ----------------------------------------------------------------------------
xSetBrowserRegion(const string & strRaw,CAnnot_descr & desc)457 void CReaderBase::xSetBrowserRegion(
458     const string& strRaw,
459     CAnnot_descr& desc)
460 //  ----------------------------------------------------------------------------
461 {
462     CReaderMessage error(
463         eDiag_Error,
464         m_uLineNumber,
465         "Bad browser line: cannot parse browser position.");
466 
467     CRef<CSeq_loc> location( new CSeq_loc );
468 
469     string strChrom;
470     string strInterval;
471     if ( ! NStr::SplitInTwo( strRaw, ":", strChrom, strInterval ) ) {
472         throw error;
473     }
474     CRef<CSeq_id> id( new CSeq_id( CSeq_id::e_Local, strChrom ) );
475 
476     if (NStr::Compare(strInterval, "start-stop") == 0 )
477     {
478         location->SetWhole(*id);
479     }
480     else
481     {
482         string strFrom;
483         string strTo;
484         if ( ! NStr::SplitInTwo( strInterval, "-", strFrom, strTo ) ) {
485             throw error;
486         }
487         try
488         {
489             int n_from,n_to;
490 
491             n_from = NStr::StringToInt(strFrom, NStr::fAllowCommas);
492             n_to   = NStr::StringToInt(strTo, NStr::fAllowCommas);
493 
494             CSeq_interval& interval = location->SetInt();
495             interval.SetFrom(n_from-1);
496             interval.SetTo(n_to-1);
497             interval.SetStrand( eNa_strand_unknown );
498             location->SetId( *id );
499 
500         }
501         catch (const CStringException&) {
502             location.Reset();
503             throw error;
504         }
505     }
506 
507     if (location.NotEmpty())
508     {
509         CRef<CAnnotdesc> region( new CAnnotdesc() );
510         region->SetRegion( *location );
511         desc.Set().push_back( region );
512     }
513 }
514 
515 //  ----------------------------------------------------------------------------
xParseBrowserLine(const string & strLine,CSeq_annot & annot)516 bool CReaderBase::xParseBrowserLine(
517     const string& strLine,
518     CSeq_annot& annot)
519 //  ----------------------------------------------------------------------------
520 {
521     CReaderMessage error(
522         eDiag_Error,
523         m_uLineNumber,
524         "Bad browser line: incomplete position directive.");
525 
526     if ( ! NStr::StartsWith( strLine, "browser" ) ) {
527         return false;
528     }
529     CAnnot_descr& desc = annot.SetDesc();
530 
531     vector<string> fields;
532     NStr::Split(strLine, " \t", fields, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
533     for ( vector<string>::iterator it = fields.begin(); it != fields.end(); ++it ) {
534         if ( *it == "position" ) {
535             ++it;
536             if ( it == fields.end() ) {
537                throw error;
538             }
539             xSetBrowserRegion(*it, desc);
540         }
541     }
542     return true;
543 }
544 
545 //  ----------------------------------------------------------------------------
xAssignTrackData(CSeq_annot & annot)546 void CReaderBase::xAssignTrackData(
547     CSeq_annot& annot )
548 //  ----------------------------------------------------------------------------
549 {
550     if (!m_AnnotName.empty()) {
551         annot.SetNameDesc(m_AnnotName);
552     }
553     if (!m_AnnotTitle.empty()) {
554         annot.SetTitleDesc(m_AnnotTitle);
555     }
556     m_pTrackDefaults->WriteToAnnot(annot);
557 }
558 
559 //  ----------------------------------------------------------------------------
xParseTrackLine(const string & strLine)560 bool CReaderBase::xParseTrackLine(
561     const string& strLine)
562 //  ----------------------------------------------------------------------------
563 {
564     vector<string> parts;
565     CReadUtil::Tokenize( strLine, " \t", parts );
566     if ( !CTrackData::IsTrackData( parts ) ) {
567         return false;
568     }
569     m_pTrackDefaults->ParseLine( parts );
570     return true;
571 }
572 
573 //  ----------------------------------------------------------------------------
xParseBrowserLine(const string & strLine)574 bool CReaderBase::xParseBrowserLine(
575     const string& strLine)
576 //  ----------------------------------------------------------------------------
577 {
578     return true;
579 }
580 
581 //  ----------------------------------------------------------------------------
xParseComment(const CTempString & record,CRef<CSeq_annot> & annot)582 bool CReaderBase::xParseComment(
583     const CTempString& record,
584     CRef<CSeq_annot>& annot )
585 //  ----------------------------------------------------------------------------
586 {
587     if (NStr::StartsWith(record, "#")) {
588         return true;
589     }
590     return false;
591 }
592 
593 //  ----------------------------------------------------------------------------
xPostProcessAnnot(CSeq_annot &)594 void CReaderBase::xPostProcessAnnot(
595     CSeq_annot&)
596 //  ----------------------------------------------------------------------------
597 {
598 }
599 
600 //  ----------------------------------------------------------------------------
xAddConversionInfo(CSeq_annot & annot,ILineErrorListener * pML)601 void CReaderBase::xAddConversionInfo(
602     CSeq_annot& annot,
603     ILineErrorListener* pML)
604 //  ----------------------------------------------------------------------------
605 {
606     size_t countInfos = m_pMessageHandler->LevelCount(eDiag_Info);
607     size_t countWarnings = m_pMessageHandler->LevelCount(eDiag_Warning);
608     size_t countErrors = m_pMessageHandler->LevelCount(eDiag_Error);
609     size_t countCritical = m_pMessageHandler->LevelCount(eDiag_Critical);
610     if (pML) {
611         countCritical += pML->LevelCount(eDiag_Critical);
612         countErrors += pML->LevelCount(eDiag_Error);
613         countWarnings += pML->LevelCount(eDiag_Warning);
614         countInfos += pML->LevelCount(eDiag_Info);
615     }
616     if (countInfos + countWarnings + countErrors + countCritical == 0) {
617         return;
618     }
619     CRef<CUser_object> conversioninfo(new CUser_object());
620     conversioninfo->SetType().SetStr("Conversion Info");
621     conversioninfo->AddField( "critical errors", static_cast<int>(countCritical));
622     conversioninfo->AddField( "errors", static_cast<int>(countErrors));
623     conversioninfo->AddField( "warnings", static_cast<int>(countWarnings));
624     conversioninfo->AddField( "notes", static_cast<int>(countInfos));
625 
626     CRef<CAnnotdesc> user(new CAnnotdesc());
627     user->SetUser(*conversioninfo);
628     annot.SetDesc().Set().push_back(user);
629 }
630 
631 //  ----------------------------------------------------------------------------
SetProgressReportInterval(unsigned int intv)632 void CReaderBase::SetProgressReportInterval(
633     unsigned int intv)
634 //  ----------------------------------------------------------------------------
635 {
636     m_uProgressReportInterval = intv;
637     m_uNextProgressReport = (unsigned int)time(0) + intv;
638 }
639 
640 //  ----------------------------------------------------------------------------
xIsReportingProgress() const641 bool CReaderBase::xIsReportingProgress() const
642 //  ----------------------------------------------------------------------------
643 {
644     if (0 == m_uProgressReportInterval) {
645         return false;
646     }
647     if (0 == m_pReader) {
648         return false;
649     }
650     return true;
651 }
652 
653 //  ----------------------------------------------------------------------------
xReportProgress(ILineErrorListener * pProgress)654 void CReaderBase::xReportProgress(
655     ILineErrorListener* pProgress)
656 //  ----------------------------------------------------------------------------
657 {
658     if (!xIsReportingProgress()) { // progress reports disabled
659         return;
660     }
661     unsigned int uCurrentTime = (unsigned int)time(0);
662     if (uCurrentTime < m_uNextProgressReport) { // not time yet
663         return;
664     }
665 
666     // report something
667     int curPos = static_cast<int>(m_pReader->GetPosition());
668     m_pMessageHandler->Progress(CReaderProgress(curPos, 0));
669     m_uNextProgressReport += m_uProgressReportInterval;
670 }
671 
672 //  ============================================================================
xReadInit()673 bool CReaderBase::xReadInit()
674 //  ============================================================================
675 {
676     return true;
677 }
678 
679 //  ============================================================================
xProgressInit(ILineReader & lr)680 bool CReaderBase::xProgressInit(
681     ILineReader& lr)
682 //  ============================================================================
683 {
684     if (0 == m_uProgressReportInterval) {
685         return true;
686     }
687     m_pReader = &lr;
688     return true;
689 }
690 
691 //  ============================================================================
SetCanceler(ICanceled * pCanceler)692 void CReaderBase::SetCanceler(
693     ICanceled* pCanceler)
694 //  ============================================================================
695 {
696     m_pCanceler = pCanceler;
697 }
698 
699 //  ============================================================================
xIsOperationCanceled() const700 bool CReaderBase::xIsOperationCanceled() const
701 //  ============================================================================
702 {
703     if (!m_pCanceler) {
704         return false;
705     }
706     return m_pCanceler->IsCanceled();
707 }
708 
709 //  ----------------------------------------------------------------------------
xIsCommentLine(const CTempString & strLine)710 bool CReaderBase::xIsCommentLine(
711     const CTempString& strLine)
712 //  ----------------------------------------------------------------------------
713 {
714     if (strLine.empty()) {
715         return true;
716     }
717     return (strLine[0] == '#' && strLine[1] != '#');
718 }
719 
720 //  ----------------------------------------------------------------------------
xIsTrackLine(const CTempString & strLine)721 bool CReaderBase::xIsTrackLine(
722     const CTempString& strLine)
723 //  ----------------------------------------------------------------------------
724 {
725     if (strLine == "track") {
726         return true;
727     }
728     if (NStr::StartsWith(strLine, "track ")) {
729         return true;
730     }
731     return NStr::StartsWith(strLine, "track\t");
732 }
733 
734 //  ----------------------------------------------------------------------------
xIsTrackTerminator(const CTempString & strLine)735 bool CReaderBase::xIsTrackTerminator(
736     const CTempString& strLine)
737 //  ----------------------------------------------------------------------------
738 {
739     auto line = NStr::TruncateSpaces_Unsafe(strLine);
740     return (line == "###");
741 }
742 
743 //  ----------------------------------------------------------------------------
xIsBrowserLine(const CTempString & strLine)744 bool CReaderBase::xIsBrowserLine(
745     const CTempString& strLine)
746 //  ----------------------------------------------------------------------------
747 {
748     return NStr::StartsWith(strLine, "browser ");
749 }
750 
751 //  ----------------------------------------------------------------------------
xGetLine(ILineReader & lr,string & line)752 bool CReaderBase::xGetLine(
753     ILineReader& lr,
754     string& line)
755 //  ----------------------------------------------------------------------------
756 {
757     if (!m_PendingLine.empty()) {
758         line = m_PendingLine;
759         m_PendingLine.clear();
760         return true;
761     }
762     CTempString temp;
763     while (!lr.AtEOF()) {
764         temp = *++lr;
765         ++m_uLineNumber;
766         temp = NStr::TruncateSpaces_Unsafe(temp);
767         if (!xIsCommentLine(temp)) {
768             line = temp;
769             return true;
770         }
771     }
772     return false;
773 }
774 
775 //  ----------------------------------------------------------------------------
xUngetLine(ILineReader & lr)776 bool CReaderBase::xUngetLine(
777     ILineReader& lr)
778 //  ----------------------------------------------------------------------------
779 {
780     lr.UngetLine();
781     --m_uLineNumber;
782     return true;
783 }
784 
785 //  ----------------------------------------------------------------------------
786 void
xProcessReaderMessage(CReaderMessage & readerMessage,ILineErrorListener * pEL)787 CReaderBase::xProcessReaderMessage(
788     CReaderMessage& readerMessage,
789     ILineErrorListener* pEL)
790 //
791 //  Strategy:
792 //  (0) Above all, don't swallow FATAl errors as they are guaranteed to stop
793 //      the program on the spot.
794 //  (1) Give readerMessage to internal message handler. If configured properly
795 //      it will handle readerMessage.Otherwise, it will emit an ILineError.
796 //  (2) If an ILineError is emitted and we have an actual ILineErrorListener
797 //      then give it a shot.
798 //  (3) If we don't have an ILineErrorListener or it the ILineErrorListener
799 //      does not want the ILineError then rethrow the ILineError and hope for
800 //      the best.
801 //  ----------------------------------------------------------------------------
802 {
803     readerMessage.SetLineNumber(m_uLineNumber);
804     try {
805         m_pMessageHandler->Report(readerMessage);
806         if (readerMessage.Severity() == eDiag_Fatal) {
807             throw;
808         }
809     }
810     catch(ILineError& lineError) {
811         xProcessLineError(lineError, pEL);
812     }
813 };
814 
815 //  ----------------------------------------------------------------------------
816 void
xProcessLineError(const ILineError & lineError,ILineErrorListener * pEL)817 CReaderBase::xProcessLineError(
818     const ILineError& lineError,
819     ILineErrorListener* pEL)
820 //
821 //  This is to deal with legacy format readers that may throw ILIneError instead
822 //  of the preferred CReaderMessage.
823 //
824 //  Strategy:
825 //  (1) If pEL is good, then give the lineError to pEL.
826 //  (2) If pEL doesn't accept the lineError then throw it (and hope some upper
827 //      layer knows what to do with it).
828 //  ----------------------------------------------------------------------------
829 {
830     if (!pEL  ||  !pEL->PutMessage(lineError)) {
831         throw;
832     }
833 }
834 
835 //  ----------------------------------------------------------------------------
836 void
xProcessUnknownException(const CException & error)837 CReaderBase::xProcessUnknownException(
838     const CException& error)
839 //
840 //  If we get errors outside of the established type system then there is no way
841 //  of knowing what happened, how it happened, how much data is bad, or even
842 //  whether there is any way of continuing at all.
843 //  We therefore turn all such errors into a FATAL CReaderMessage and rethrow
844 //  it.
845 //  ----------------------------------------------------------------------------
846 {
847     CReaderMessage terminator(
848         eDiag_Fatal,
849         m_uLineNumber,
850         "Exception: " + error.GetMsg());
851     throw(terminator);
852 }
853 
854 END_objects_SCOPE
855 END_NCBI_SCOPE
856