1 /*  $Id: bed_reader.cpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Frank Ludwig
27  *
28  * File Description:
29  *   BED file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
36 #include <objects/general/Object_id.hpp>
37 #include <objects/general/User_object.hpp>
38 #include <objects/seqloc/Seq_point.hpp>
39 #include <objects/seqfeat/SeqFeatXref.hpp>
40 #include <objects/seq/Annotdesc.hpp>
41 #include <objects/seq/Annot_descr.hpp>
42 #include <objects/seqfeat/SeqFeatData.hpp>
43 #include <objects/seqfeat/Seq_feat.hpp>
44 #include <objects/seqfeat/Feat_id.hpp>
45 
46 #include <objtools/readers/bed_reader.hpp>
47 #include "bed_autosql.hpp"
48 #include "reader_message_handler.hpp"
49 #include "bed_column_data.hpp"
50 
51 #include <algorithm>
52 #include <deque>
53 
54 BEGIN_NCBI_SCOPE
55 BEGIN_objects_SCOPE // namespace ncbi::objects::
56 
57 //  ============================================================================
58 class CLinePreBuffer
59 //  ============================================================================
60 {
61 public:
62     using LinePreIt = deque<string>::const_iterator;
63 
CLinePreBuffer(ILineReader & lineReader)64     CLinePreBuffer(
65         ILineReader& lineReader):
66         mLineReader(lineReader),
67         mLineNumber(0)
68     {};
69 
~CLinePreBuffer()70     virtual ~CLinePreBuffer() {};
71 
FillBuffer(size_t numLines)72     bool FillBuffer(
73         size_t numLines)
74     {
75         string line;
76         while (numLines  &&  !mLineReader.AtEOF()) {
77             line = *++mLineReader;
78             CLinePreBuffer::StripSpaceCharsInPlace(line);
79             mBuffer.push_back(line);
80             if (!IsCommentLine(line)) {
81                 --numLines;
82             }
83         }
84         return true;
85     }
86 
IsCommentLine(const CTempString & line)87     virtual bool IsCommentLine(
88         const CTempString& line)
89     {
90         if (NStr::StartsWith(line, "#")) {
91             return true;
92         }
93         if (NStr::IsBlank(line)) {
94             return true;
95         }
96         return false;
97     };
98 
GetLine(string & line)99     bool GetLine(
100         string& line)
101     {
102         while (!mBuffer.empty()  ||  !mLineReader.AtEOF()) {
103             string temp;
104             if (!mBuffer.empty()) {
105                 temp = mBuffer.front();
106                 mBuffer.pop_front();
107             }
108             else {
109                 temp = *++mLineReader;
110                 CLinePreBuffer::StripSpaceCharsInPlace(temp);
111             }
112             if (!IsCommentLine(temp)) {
113                 line = temp;
114                 ++mLineNumber;
115                 return true;
116             }
117         }
118         return false;
119     };
120 
UngetLine(const string & line)121     bool UngetLine(
122         const string& line)
123     {
124         mBuffer.push_front(line);
125         --mLineNumber;
126         return true;
127     }
128 
LineNumber() const129     int LineNumber() const
130     {
131         return mLineNumber;
132     };
133 
begin()134     LinePreIt begin()
135     {
136         return mBuffer.begin();
137     };
138 
end()139     LinePreIt end()
140     {
141         return mBuffer.end();
142     };
143 
144     void
AssignReader(ILineReader & lineReader)145     AssignReader(
146         ILineReader& lineReader) {
147         if (&mLineReader != &lineReader) {
148             mLineReader = lineReader;
149             mBuffer.clear();
150             mLineNumber = 0;
151         }
152     };
153 
154     static void
StripSpaceCharsInPlace(string & str)155     StripSpaceCharsInPlace(
156         string& str)
157     {
158         if (str.empty()) {
159             return;
160         }
161         auto newFirst = 0;
162         while (str[newFirst] == ' ') {
163             ++newFirst;
164         }
165         auto newLast = str.length() - 1;
166         while (str[newLast] == ' ') {
167             --newLast;
168         }
169         str = str.substr(newFirst, newLast - newFirst + 1);
170     };
171 
172 protected:
173     ILineReader& mLineReader;
174     deque<string> mBuffer;
175     int mLineNumber;
176 };
177 
178 
179 //  ----------------------------------------------------------------------------
180 void
SetInterval(CSeq_id & id,unsigned int start,unsigned int stop,ENa_strand strand)181 CRawBedRecord::SetInterval(
182 //  ----------------------------------------------------------------------------
183     CSeq_id& id,
184     unsigned int start,
185     unsigned int stop,
186     ENa_strand strand)
187 {
188     m_pInterval.Reset(new CSeq_interval());
189     m_pInterval->SetId(id);
190     m_pInterval->SetFrom(start);
191     m_pInterval->SetTo(stop-1);
192     m_pInterval->SetStrand(strand);
193 };
194 
195 //  ----------------------------------------------------------------------------
196 void
SetScore(unsigned int score)197 CRawBedRecord::SetScore(
198     unsigned int score)
199 //  ----------------------------------------------------------------------------
200 {
201     m_score = score;
202 };
203 
204 //  ----------------------------------------------------------------------------
205 void
Dump(CNcbiOstream & ostr) const206 CRawBedRecord::Dump(
207     CNcbiOstream& ostr) const
208 //  ----------------------------------------------------------------------------
209 {
210     ostr << "  [CRawBedRecord" << endl;
211     ostr << "id=\"" << m_pInterval->GetId().AsFastaString() << "\" ";
212     ostr << "start=" << m_pInterval->GetFrom() << " ";
213     ostr << "stop=" << m_pInterval->GetTo() << " ";
214     ostr << "strand=" <<
215         (m_pInterval->GetStrand() == eNa_strand_minus ? "-" : "+") << " ";
216     if (m_score >= 0) {
217         ostr << "score=" << m_score << " ";
218     }
219     ostr << "]" << endl;
220 };
221 
222 //  ----------------------------------------------------------------------------
223 void
Dump(CNcbiOstream & ostr) const224 CRawBedTrack::Dump(
225     CNcbiOstream& ostr) const
226 //  ----------------------------------------------------------------------------
227 {
228     ostr << "[CRawBedTrack" << endl;
229     for (vector<CRawBedRecord>::const_iterator it = m_Records.begin();
230             it != m_Records.end(); ++it) {
231         it->Dump(ostr);
232     }
233     ostr << "]" << std::endl;
234 }
235 
236 //  ----------------------------------------------------------------------------
CBedReader(int flags,const string & annotName,const string & annotTitle,CReaderListener * pRL)237 CBedReader::CBedReader(
238     int flags,
239     const string& annotName,
240     const string& annotTitle,
241     CReaderListener* pRL ) :
242 //  ----------------------------------------------------------------------------
243     CReaderBase(flags, annotName, annotTitle, CReadUtil::AsSeqId, pRL),
244     m_currentId(""),
245     mColumnSeparator(""),
246     mColumnSplitFlags(0),
247     mRealColumnCount(0),
248     mValidColumnCount(0),
249     mAssumeErrorsAreRecordLevel(true),
250     m_CurrentFeatureCount(0),
251     m_usescore(false),
252     m_CurBatchSize(0),
253     m_MaxBatchSize(10000),
254     mLinePreBuffer(nullptr),
255     mpAutoSql(new CBedAutoSql(flags))
256 {
257 }
258 
259 //  ----------------------------------------------------------------------------
~CBedReader()260 CBedReader::~CBedReader()
261 //  ----------------------------------------------------------------------------
262 {
263 }
264 
265 //  ----------------------------------------------------------------------------
266 CRef< CSeq_annot >
ReadSeqAnnot(ILineReader & lineReader,ILineErrorListener * pEC)267 CBedReader::ReadSeqAnnot(
268     ILineReader& lineReader,
269     ILineErrorListener* pEC )
270 //  ----------------------------------------------------------------------------
271 {
272     m_CurrentFeatureCount = 0;
273     return CReaderBase::ReadSeqAnnot(lineReader, pEC);
274 }
275 
276 //  ----------------------------------------------------------------------------
277 bool
SetAutoSql(const string & fileName)278 CBedReader::SetAutoSql(
279     const string& fileName)
280 //  ----------------------------------------------------------------------------
281 {
282     CNcbiIfstream istr;
283     try {
284         auto origExceptions = istr.exceptions();
285         istr.exceptions(std::istream::failbit);
286         istr.open(fileName);
287         istr.exceptions(origExceptions);
288     }
289     catch (CException& e) {
290         cerr << e.GetMsg() << endl;
291         return false;
292     }
293     m_iFlags |= CBedReader::fAutoSql;
294     return SetAutoSql(istr);
295 }
296 
297 //  ----------------------------------------------------------------------------
298 bool
SetAutoSql(CNcbiIstream & istr)299 CBedReader::SetAutoSql(
300     CNcbiIstream& istr)
301 //  ----------------------------------------------------------------------------
302 {
303    return  mpAutoSql->Load(istr, *m_pMessageHandler);
304 }
305 
306 //  ----------------------------------------------------------------------------
307 CRef<CSeq_annot>
xCreateSeqAnnot()308 CBedReader::xCreateSeqAnnot()
309 //  ----------------------------------------------------------------------------
310 {
311     CRef<CSeq_annot> pAnnot(new CSeq_annot);
312     if (!m_AnnotName.empty()) {
313         pAnnot->SetNameDesc(m_AnnotName);
314     }
315     if (!m_AnnotTitle.empty()) {
316         pAnnot->SetTitleDesc(m_AnnotTitle);
317     }
318     CRef<CAnnot_descr> pDescr(new CAnnot_descr);
319     pAnnot->SetDesc(*pDescr);
320     return pAnnot;
321 }
322 
323 //  ----------------------------------------------------------------------------
324 void
xGetData(ILineReader & lr,TReaderData & readerData)325 CBedReader::xGetData(
326     ILineReader& lr,
327     TReaderData& readerData)
328 //  ----------------------------------------------------------------------------
329 {
330     if (!mLinePreBuffer) {
331         mLinePreBuffer.reset(new CLinePreBuffer(lr));
332     }
333     if (mRealColumnCount == 0) {
334         xDetermineLikelyColumnCount(*mLinePreBuffer, nullptr);
335     }
336 
337     readerData.clear();
338     string line;
339     if (!mLinePreBuffer->GetLine(line)) {
340         return;
341     }
342     bool isBrowserLine = NStr::StartsWith(line, "browser ");
343     bool isTrackLine = NStr::StartsWith(line, "track ");
344     if (xIsTrackLine(line)  && m_uDataCount != 0) {
345         mLinePreBuffer->UngetLine(line);
346         return;
347     }
348     m_uLineNumber = mLinePreBuffer->LineNumber();
349     readerData.push_back(TReaderLine{m_uLineNumber, line});
350     if (!isBrowserLine  &&  !isTrackLine) {
351         ++m_uDataCount;
352     }
353 }
354 
355 //  ----------------------------------------------------------------------------
356 void
xProcessData(const TReaderData & readerData,CSeq_annot & annot)357 CBedReader::xProcessData(
358     const TReaderData& readerData,
359     CSeq_annot& annot)
360 //  ----------------------------------------------------------------------------
361 {
362     for (const auto& lineData: readerData) {
363         string line = lineData.mData;
364         if (xParseTrackLine(line)) {
365             return;
366         }
367         if (xParseBrowserLine(line, annot)) {
368             return;
369         }
370         xParseFeature(lineData, annot, nullptr);
371         ++m_CurrentFeatureCount;
372     }
373 }
374 
375 //  ----------------------------------------------------------------------------
xDetermineLikelyColumnCount(CLinePreBuffer & preBuffer,ILineErrorListener * pEc)376 bool CBedReader::xDetermineLikelyColumnCount(
377     CLinePreBuffer& preBuffer,
378     ILineErrorListener* pEc)
379 //  ----------------------------------------------------------------------------
380 {
381     if (this->m_iFlags & fAutoSql) {
382         mValidColumnCount = mRealColumnCount = mpAutoSql->ColumnCount();;
383         return true;
384     }
385 
386     using LineIt = CLinePreBuffer::LinePreIt;
387     int bufferLineNumber = 0;
388     CReaderMessage fatalColumns(
389         eDiag_Fatal,
390         0,
391         "Bad data line: Inconsistent column count.");
392 
393     CReaderMessage fatalChroms(
394         eDiag_Fatal,
395         0,
396         "Bad data line: Invalid chrom boundaries.");
397 
398     const size_t MIN_SAMPLE_SIZE = 50;
399     preBuffer.FillBuffer(MIN_SAMPLE_SIZE);
400 
401     mRealColumnCount = mValidColumnCount = 0;
402     vector<string>::size_type realColumnCount = 0;
403     vector<string>::size_type validColumnCount = 0;
404     for (LineIt lineIt = preBuffer.begin(); lineIt != preBuffer.end(); ++lineIt) {
405         bufferLineNumber++;
406         const auto& line = *lineIt;
407         if (preBuffer.IsCommentLine(line)) {
408             continue;
409         }
410         if (this->xIsTrackLine(line)) {
411             continue;
412         }
413         if (this->xIsBrowserLine(line)) {
414             continue;
415         }
416 
417         CBedColumnData columnData(SReaderLine(bufferLineNumber, line));
418         if (realColumnCount == 0 ) {
419             realColumnCount = columnData.ColumnCount();
420         }
421         if (realColumnCount !=  columnData.ColumnCount()) {
422             fatalColumns.SetLineNumber(bufferLineNumber);
423             throw(fatalColumns);
424         }
425 
426         if (validColumnCount == 0) {
427             validColumnCount = realColumnCount;
428             if (validColumnCount > 12) {
429                 validColumnCount = 12;
430             }
431         }
432         unsigned long chromStart = 0, chromEnd = 0;
433         try {
434             chromStart = NStr::StringToULong(columnData[1]);
435             chromEnd = NStr::StringToULong(columnData[2]);
436         }
437         catch (CException&) {
438             fatalChroms.SetLineNumber(bufferLineNumber);
439             throw(fatalChroms);
440         }
441         if (validColumnCount >= 7) {
442             try {
443                 auto thickStart = NStr::StringToULong(columnData[6]);
444                 if (thickStart < chromStart  ||  chromEnd < thickStart) {
445                     validColumnCount = 6;
446                 }
447             }
448             catch(CException&) {
449                 validColumnCount = 6;
450             }
451         }
452         if (validColumnCount >= 8) {
453             try {
454                 auto thickEnd = NStr::StringToULong(columnData[7]);
455                 if (thickEnd < chromStart  ||  chromEnd < thickEnd) {
456                     validColumnCount = 6;
457                 }
458             }
459             catch(CException&) {
460                 validColumnCount = 6;
461             }
462         }
463 
464         int blockCount;
465         if (validColumnCount >= 10) {
466             try {
467                 blockCount = NStr::StringToInt(
468                     columnData[9], NStr::fDS_ProhibitFractions);
469                 if (blockCount < 1) {
470                     validColumnCount = 9;
471                 }
472             }
473             catch(CException&) {
474                 validColumnCount = 9;
475             }
476         }
477         if (validColumnCount >= 11) {
478             vector<string> blockSizes;
479             auto col10 = columnData[10];
480             if (NStr::EndsWith(col10, ",")) {
481                 col10 = col10.substr(0, col10.size()-1);
482             }
483             NStr::Split(col10, ",", blockSizes, NStr::fSplit_MergeDelimiters);
484             if (blockSizes.size() != blockCount) {
485                 validColumnCount = 9;
486             }
487             else {
488                 try {
489                     for (auto blockSize: blockSizes) {
490                         NStr::StringToULong(blockSize);
491                     }
492                 }
493                 catch(CException&) {
494                     validColumnCount = 9;
495                 }
496             }
497         }
498         if (validColumnCount >= 12) {
499             vector<string> blockStarts;
500             auto col11 = columnData[11];
501             if (NStr::EndsWith(col11, ",")) {
502                 col11 = col11.substr(0, col11.size()-1);
503             }
504             NStr::Split(col11, ",", blockStarts, NStr::fSplit_MergeDelimiters);
505             if (blockStarts.size() != blockCount) {
506                 validColumnCount = 9;
507             }
508             else {
509                 try {
510                     for (auto blockStart: blockStarts) {
511                         NStr::StringToULong(blockStart);
512                     }
513                 }
514                 catch(CException&) {
515                     validColumnCount = 9;
516                 }
517             }
518         }
519     }
520     mRealColumnCount = realColumnCount;
521     mValidColumnCount = validColumnCount;
522     mAssumeErrorsAreRecordLevel = (
523         validColumnCount == realColumnCount  &&
524         validColumnCount != 7  &&
525         validColumnCount != 10  &&
526         validColumnCount != 11);
527 
528     return true;
529 }
530 
531 //  ----------------------------------------------------------------------------
xPostProcessAnnot(CSeq_annot & annot)532 void CBedReader::xPostProcessAnnot(
533     CSeq_annot& annot)
534 //  ----------------------------------------------------------------------------
535 {
536     xAddConversionInfo(annot, nullptr);
537     xAssignTrackData(annot);
538     xAssignBedColumnCount(annot);
539 }
540 
541 //  ----------------------------------------------------------------------------
542 bool
xParseTrackLine(const string & strLine)543 CBedReader::xParseTrackLine(
544     const string& strLine)
545 //  ----------------------------------------------------------------------------
546 {
547     CReaderMessage warning(
548         eDiag_Warning,
549         m_uLineNumber,
550         "Bad track line: Expected \"track key1=value1 key2=value2 ...\". Ignored.");
551 
552     if ( ! NStr::StartsWith( strLine, "track" ) ) {
553         return false;
554     }
555     vector<string> parts;
556     CReadUtil::Tokenize( strLine, " \t", parts );
557     if (parts.size() >= 3) {
558         const string digits("0123456789");
559         bool col2_is_numeric =
560             (string::npos == parts[1].find_first_not_of(digits));
561         bool col3_is_numeric =
562             (string::npos == parts[2].find_first_not_of(digits));
563         if (col2_is_numeric  &&  col3_is_numeric) {
564             return false;
565         }
566     }
567     m_currentId.clear();
568     if (!CReaderBase::xParseTrackLine(strLine)) {
569         m_pMessageHandler->Report(warning);
570     }
571     return true;
572 }
573 
574 //  ----------------------------------------------------------------------------
575 bool
xParseFeature(const SReaderLine & lineData,CSeq_annot & annot,ILineErrorListener * pEC)576 CBedReader::xParseFeature(
577     const SReaderLine& lineData,
578     CSeq_annot& annot,
579     ILineErrorListener* pEC)
580 //  ----------------------------------------------------------------------------
581 {
582     CBedColumnData columnData(lineData);
583     if (columnData.ColumnCount()!= mRealColumnCount) {
584         CReaderMessage error(
585             eDiag_Error,
586             m_uLineNumber,
587             "Bad data line: Inconsistent column count.");
588         throw error;
589     }
590 
591     if (m_iFlags & CBedReader::fThreeFeatFormat) {
592         return xParseFeatureThreeFeatFormat(columnData, annot, pEC);
593     }
594     else if (m_iFlags & CBedReader::fDirectedFeatureModel) {
595         return xParseFeatureGeneModelFormat(columnData, annot, pEC);
596     }
597     else if (m_iFlags & CBedReader::fAutoSql) {
598         return xParseFeatureAutoSql(columnData, annot, pEC);
599     }
600     else {
601         return xParseFeatureUserFormat(columnData, annot, pEC);
602     }
603     return false;
604 }
605 
606 //  ----------------------------------------------------------------------------
xParseFeatureThreeFeatFormat(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)607 bool CBedReader::xParseFeatureThreeFeatFormat(
608     const CBedColumnData& columnData,
609     CSeq_annot& annot,
610     ILineErrorListener* pEC)
611 //  ----------------------------------------------------------------------------
612 {
613      unsigned int baseId = 3*m_CurrentFeatureCount;
614 
615     if (!xAppendFeatureChrom(columnData, annot, baseId, pEC)) {
616         return false;
617     }
618     if (xContainsThickFeature(columnData)  &&
619             !xAppendFeatureThick(columnData, annot, baseId, pEC)) {
620         return false;
621     }
622     if (xContainsBlockFeature(columnData)  &&
623             !xAppendFeatureBlock(columnData, annot, baseId, pEC)) {
624         return false;
625     }
626     return true;
627 }
628 
629 //  ----------------------------------------------------------------------------
xParseFeatureGeneModelFormat(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)630 bool CBedReader::xParseFeatureGeneModelFormat(
631     const CBedColumnData& columnData,
632     CSeq_annot& annot,
633     ILineErrorListener* pEC)
634 //  ----------------------------------------------------------------------------
635 {
636     unsigned int baseId = 3*m_CurrentFeatureCount;
637 
638     CRef<CSeq_feat> pGene = xAppendFeatureGene(columnData, annot, baseId, pEC);
639     if (!pGene) {
640         return false;
641     }
642 
643     CRef<CSeq_feat> pRna;
644     if (xContainsRnaFeature(columnData)) {//blocks
645         pRna = xAppendFeatureRna(columnData, annot, baseId, pEC);
646         if (!pRna) {
647             return false;
648         }
649     }
650 
651     CRef<CSeq_feat> pCds;
652     if (xContainsCdsFeature(columnData)) {//thick
653         pCds = xAppendFeatureCds(columnData, annot, baseId, pEC);
654         if (!pCds) {
655             return false;
656         }
657     }
658 
659     if (pRna  &&  pCds) {
660         CRef<CSeq_loc> pRnaLoc(new CSeq_loc);
661         CRef<CSeq_loc> pClippedLoc = pRna->GetLocation().Intersect(pCds->GetLocation(), 0, 0);
662         pCds->SetLocation(*pClippedLoc);
663     }
664     return true;
665 }
666 
667 //  ----------------------------------------------------------------------------
xAppendFeatureChrom(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)668 bool CBedReader::xAppendFeatureChrom(
669     const CBedColumnData& columnData,
670     CSeq_annot& annot,
671     unsigned int baseId,
672     ILineErrorListener* pEC)
673 //  ----------------------------------------------------------------------------
674 {
675     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
676     CRef<CSeq_feat> feature;
677     feature.Reset(new CSeq_feat);
678 
679     xSetFeatureLocationChrom(feature, columnData);
680     xSetFeatureIdsChrom(feature, columnData, baseId);
681     xSetFeatureBedData(feature, columnData, pEC);
682 
683     ftable.push_back(feature);
684     m_currentId = columnData[0];
685     return true;
686 }
687 
688 //  ----------------------------------------------------------------------------
xAppendFeatureGene(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)689 CRef<CSeq_feat> CBedReader::xAppendFeatureGene(
690     const CBedColumnData& columnData,
691     CSeq_annot& annot,
692     unsigned int baseId,
693     ILineErrorListener* pEC)
694 //  ----------------------------------------------------------------------------
695 {
696     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
697     CRef<CSeq_feat> feature;
698     feature.Reset(new CSeq_feat);
699 
700     xSetFeatureLocationGene(feature, columnData);
701     xSetFeatureIdsGene(feature, columnData, baseId);
702     xSetFeatureBedData(feature, columnData, pEC);
703 
704     ftable.push_back(feature);
705     m_currentId = columnData[0];
706     return feature;
707 }
708 
709 //  ----------------------------------------------------------------------------
xAppendFeatureThick(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)710 bool CBedReader::xAppendFeatureThick(
711     const CBedColumnData& columnData,
712     CSeq_annot& annot,
713     unsigned int baseId,
714     ILineErrorListener* pEC)
715 //  ----------------------------------------------------------------------------
716 {
717     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
718     CRef<CSeq_feat> feature;
719     feature.Reset(new CSeq_feat);
720 
721     xSetFeatureLocationThick(feature, columnData);
722     xSetFeatureIdsThick(feature, columnData, baseId);
723     xSetFeatureBedData(feature, columnData, pEC);
724 
725     ftable.push_back(feature);
726     return true;
727 }
728 
729 //  ----------------------------------------------------------------------------
xAppendFeatureCds(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)730 CRef<CSeq_feat> CBedReader::xAppendFeatureCds(
731     const CBedColumnData& columnData,
732     CSeq_annot& annot,
733     unsigned int baseId,
734     ILineErrorListener* pEC)
735 //  ----------------------------------------------------------------------------
736 {
737     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
738     CRef<CSeq_feat> feature;
739     feature.Reset(new CSeq_feat);
740 
741     xSetFeatureLocationCds(feature, columnData);
742     xSetFeatureIdsCds(feature, columnData, baseId);
743     xSetFeatureBedData(feature, columnData, pEC);
744 
745     ftable.push_back(feature);
746     return feature;
747 }
748 
749 //  ----------------------------------------------------------------------------
xAppendFeatureBlock(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)750 bool CBedReader::xAppendFeatureBlock(
751     const CBedColumnData& columnData,
752     CSeq_annot& annot,
753     unsigned int baseId,
754     ILineErrorListener* pEC)
755 //  ----------------------------------------------------------------------------
756 {
757     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
758     CRef<CSeq_feat> feature;
759     feature.Reset(new CSeq_feat);
760 
761     xSetFeatureLocationBlock(feature, columnData);
762     xSetFeatureIdsBlock(feature, columnData, baseId);
763     xSetFeatureBedData(feature, columnData, pEC);
764 
765     ftable.push_back(feature);
766     return true;
767 }
768 
769 //  ----------------------------------------------------------------------------
xAppendFeatureRna(const CBedColumnData & columnData,CSeq_annot & annot,unsigned int baseId,ILineErrorListener * pEC)770 CRef<CSeq_feat> CBedReader::xAppendFeatureRna(
771     const CBedColumnData& columnData,
772     CSeq_annot& annot,
773     unsigned int baseId,
774     ILineErrorListener* pEC)
775 //  ----------------------------------------------------------------------------
776 {
777     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
778     CRef<CSeq_feat> feature;
779     feature.Reset(new CSeq_feat);
780 
781     xSetFeatureLocationRna(feature, columnData);
782     xSetFeatureIdsRna(feature, columnData, baseId);
783     xSetFeatureBedData(feature, columnData, pEC);
784 
785     ftable.push_back(feature);
786     return feature;
787 }
788 
789 
790 //  ----------------------------------------------------------------------------
xParseFeatureUserFormat(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)791 bool CBedReader::xParseFeatureUserFormat(
792     const CBedColumnData& columnData,
793     CSeq_annot& annot,
794     ILineErrorListener* pEC)
795 //  ----------------------------------------------------------------------------
796 {
797     //  assign
798     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
799     CRef<CSeq_feat> feature;
800     feature.Reset( new CSeq_feat );
801 
802     xSetFeatureTitle(feature, columnData);
803     xSetFeatureLocation(feature, columnData);
804     xSetFeatureDisplayData(feature, columnData);
805 
806     ftable.push_back( feature );
807     m_currentId = columnData[0];
808     return true;
809 }
810 
811 //  ----------------------------------------------------------------------------
xParseFeatureAutoSql(const CBedColumnData & columnData,CSeq_annot & annot,ILineErrorListener * pEC)812 bool CBedReader::xParseFeatureAutoSql(
813     const CBedColumnData& columnData,
814     CSeq_annot& annot,
815     ILineErrorListener* pEC)
816 //  ----------------------------------------------------------------------------
817 {
818     CRef<CSeq_feat> pFeat(new CSeq_feat);;
819     if (!mpAutoSql->ReadSeqFeat(columnData, *pFeat, *m_pMessageHandler)) {
820         return false;
821     }
822     CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
823     ftable.push_back(pFeat);
824     m_currentId = columnData[0];
825     return true;
826 }
827 
828 
829 //  ----------------------------------------------------------------------------
xSetFeatureDisplayData(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)830 void CBedReader::xSetFeatureDisplayData(
831     CRef<CSeq_feat>& feature,
832     const CBedColumnData& columnData)
833 //  ----------------------------------------------------------------------------
834 {
835     CRef<CUser_object> display_data( new CUser_object );
836     display_data->SetType().SetStr( "Display Data" );
837     if (mValidColumnCount >= 4) {
838         display_data->AddField( "name", columnData[3] );
839     }
840     else {
841         display_data->AddField( "name", string("") );
842         feature->SetData().SetUser( *display_data );
843         return;
844     }
845     if (mValidColumnCount >= 5) {
846         if ( !m_usescore ) {
847             display_data->AddField(
848                 "score",
849                 NStr::StringToInt(columnData[4],
850                     NStr::fConvErr_NoThrow|NStr::fAllowTrailingSymbols) );
851         }
852         else {
853             display_data->AddField(
854                 "greylevel",
855                 NStr::StringToInt(columnData[4],
856                     NStr::fConvErr_NoThrow|NStr::fAllowTrailingSymbols) );
857         }
858     }
859     if (mValidColumnCount >= 7) {
860         display_data->AddField(
861             "thickStart",
862             NStr::StringToInt(columnData[6], NStr::fDS_ProhibitFractions) );
863     }
864     if (mValidColumnCount >= 8) {
865         display_data->AddField(
866             "thickEnd",
867             NStr::StringToInt(columnData[7], NStr::fDS_ProhibitFractions) - 1 );
868     }
869     if (mValidColumnCount >= 9) {
870         display_data->AddField(
871             "itemRGB",
872             columnData[8]);
873     }
874     if (mValidColumnCount >= 10) {
875         display_data->AddField(
876             "blockCount",
877             NStr::StringToInt(columnData[9], NStr::fDS_ProhibitFractions) );
878     }
879     if (mValidColumnCount >= 11) {
880         display_data->AddField( "blockSizes", columnData[10] );
881     }
882     if (mValidColumnCount >= 12) {
883         display_data->AddField( "blockStarts", columnData[11] );
884     }
885     feature->SetData().SetUser( *display_data );
886 }
887 
888 //  ----------------------------------------------------------------------------
xSetFeatureLocationChrom(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)889 void CBedReader::xSetFeatureLocationChrom(
890     CRef<CSeq_feat>& feature,
891     const CBedColumnData& columnData)
892 //  ----------------------------------------------------------------------------
893 {
894     xSetFeatureLocation(feature, columnData);
895 
896     CRef<CUser_object> pBed(new CUser_object());
897     pBed->SetType().SetStr("BED");
898     pBed->AddField("location", "chrom");
899     CSeq_feat::TExts& exts = feature->SetExts();
900     exts.push_back(pBed);
901 }
902 
903 //  ----------------------------------------------------------------------------
xSetFeatureLocationGene(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)904 void CBedReader::xSetFeatureLocationGene(
905     CRef<CSeq_feat>& feature,
906     const CBedColumnData& columnData)
907 //  ----------------------------------------------------------------------------
908 {
909     xSetFeatureLocation(feature, columnData);
910 
911     CRef<CUser_object> pBed(new CUser_object());
912     pBed->SetType().SetStr("BED");
913     pBed->AddField("location", "chrom");
914     CSeq_feat::TExts& exts = feature->SetExts();
915     exts.push_back(pBed);
916 }
917 
918 //  ----------------------------------------------------------------------------
xSetFeatureLocationThick(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)919 void CBedReader::xSetFeatureLocationThick(
920     CRef<CSeq_feat>& feature,
921     const CBedColumnData& columnData)
922 //  ----------------------------------------------------------------------------
923 {
924     CRef<CSeq_loc> location(new CSeq_loc);
925     int from, to;
926     from = to = -1;
927 
928     //already established: We got at least three columns
929     try {
930         from = NStr::StringToInt(columnData[6]);
931     }
932     catch (std::exception&) {
933         CReaderMessage error(
934             eDiag_Error,
935             m_uLineNumber,
936             "Invalid data line: Bad \"ThickStart\" value.");
937         throw error;
938     }
939     try {
940         to = NStr::StringToInt(columnData[7]) - 1;
941     }
942     catch (std::exception&) {
943         CReaderMessage error(
944             eDiag_Error,
945             m_uLineNumber,
946             "Invalid data line: Bad \"ThickStop\" value.");
947         throw error;
948     }
949     if (from == to) {
950         location->SetPnt().SetPoint(from);
951     }
952     else if (from < to) {
953         location->SetInt().SetFrom(from);
954         location->SetInt().SetTo(to);
955     }
956     else if (from > to) {
957         //below: flip commenting to switch from null locations to impossible
958         // intervals
959         //location->SetInt().SetFrom(from);
960         //location->SetInt().SetTo(to);
961         location->SetNull();
962     }
963 
964     if (!location->IsNull()) {
965         location->SetStrand(xGetStrand(columnData));
966     }
967     CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
968     location->SetId(*id);
969     feature->SetLocation(*location);
970 
971     CRef<CUser_object> pBed(new CUser_object());
972     pBed->SetType().SetStr("BED");
973     pBed->AddField("location", "thick");
974     CSeq_feat::TExts& exts = feature->SetExts();
975     exts.push_back(pBed);
976 }
977 
978 //  ----------------------------------------------------------------------------
xSetFeatureLocationCds(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)979 void CBedReader::xSetFeatureLocationCds(
980     CRef<CSeq_feat>& feature,
981     const CBedColumnData& columnData)
982 //  ----------------------------------------------------------------------------
983 {
984     CRef<CSeq_loc> location(new CSeq_loc);
985     int from, to;
986     from = to = -1;
987 
988     //already established: We got at least three columns
989     try {
990         from = NStr::StringToInt(columnData[6]);
991     }
992     catch (std::exception&) {
993         CReaderMessage error(
994             eDiag_Error,
995             m_uLineNumber,
996             "Invalid data line: Bad \"ThickStart\" value.");
997         throw error;
998     }
999     try {
1000         to = NStr::StringToInt(columnData[7]) - 1;
1001     }
1002     catch (std::exception&) {
1003         CReaderMessage error(
1004             eDiag_Error,
1005             m_uLineNumber,
1006             "Invalid data line: Bad \"ThickStop\" value.");
1007         throw error;
1008     }
1009     if (from == to) {
1010         location->SetPnt().SetPoint(from);
1011     }
1012     else if (from < to) {
1013         location->SetInt().SetFrom(from);
1014         location->SetInt().SetTo(to);
1015     }
1016     else if (from > to) {
1017         //below: flip commenting to switch from null locations to impossible
1018         // intervals
1019         //location->SetInt().SetFrom(from);
1020         //location->SetInt().SetTo(to);
1021         location->SetNull();
1022     }
1023 
1024     if (!location->IsNull()) {
1025         location->SetStrand(xGetStrand(columnData));
1026     }
1027     CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1028     location->SetId(*id);
1029     feature->SetLocation(*location);
1030 
1031     CRef<CUser_object> pBed(new CUser_object());
1032     pBed->SetType().SetStr("BED");
1033     pBed->AddField("location", "thick");
1034     CSeq_feat::TExts& exts = feature->SetExts();
1035     exts.push_back(pBed);
1036 }
1037 
1038 //  ----------------------------------------------------------------------------
xGetStrand(const CBedColumnData & columnData) const1039 ENa_strand CBedReader::xGetStrand(
1040     const CBedColumnData& columnData) const
1041 //  ----------------------------------------------------------------------------
1042 {
1043     size_t strand_field = 5;
1044     if (columnData.ColumnCount() == 5  &&
1045             (columnData[4] == "-"  ||  columnData[4] == "+")) {
1046         strand_field = 4;
1047     }
1048     if (strand_field < columnData.ColumnCount()) {
1049         string strand = columnData[strand_field];
1050         if (strand != "+"  &&  strand != "-"  &&  strand != ".") {
1051             CReaderMessage error(
1052                 eDiag_Error,
1053                 m_uLineNumber,
1054                 "Invalid data line: Invalid strand character.");
1055             throw error;
1056         }
1057     }
1058     return (columnData[strand_field] == "-" ? eNa_strand_minus : eNa_strand_plus);
1059 }
1060 
1061 //  ----------------------------------------------------------------------------
xSetFeatureLocationBlock(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1062 void CBedReader::xSetFeatureLocationBlock(
1063     CRef<CSeq_feat>& feature,
1064     const CBedColumnData& columnData)
1065 //  ----------------------------------------------------------------------------
1066 {
1067     //already established: there are sufficient columns to do this
1068     size_t blockCount = NStr::StringToUInt(columnData[9]);
1069     vector<size_t> blockSizes;
1070     vector<size_t> blockStarts;
1071     {{
1072         blockSizes.reserve(blockCount);
1073         vector<string> vals;
1074         NStr::Split(columnData[10], ",", vals);
1075         if (vals.back() == "") {
1076             vals.erase(vals.end()-1);
1077         }
1078         if (vals.size() != blockCount) {
1079             CReaderMessage error(
1080                 eDiag_Error,
1081                 columnData.LineNo(),
1082                 "Invalid data line: Bad value count in \"blockSizes\".");
1083             throw error;
1084         }
1085         try {
1086             for (size_t i=0; i < blockCount; ++i) {
1087                 blockSizes.push_back(NStr::StringToUInt(vals[i]));
1088             }
1089         }
1090         catch (std::exception&) {
1091             CReaderMessage error(
1092                 eDiag_Error,
1093                 columnData.LineNo(),
1094                 "Invalid data line: Malformed \"blockSizes\" column.");
1095             throw error;
1096         }
1097     }}
1098     {{
1099         blockStarts.reserve(blockCount);
1100         vector<string> vals;
1101         size_t baseStart = NStr::StringToUInt(columnData[1]);
1102         NStr::Split(columnData[11], ",", vals);
1103         if (vals.back() == "") {
1104             vals.erase(vals.end()-1);
1105         }
1106         if (vals.size() != blockCount) {
1107             CReaderMessage error(
1108                 eDiag_Error,
1109                 columnData.LineNo(),
1110                 "Invalid data line: Bad value count in \"blockStarts\".");
1111             throw error;
1112         }
1113         try {
1114             for (size_t i=0; i < blockCount; ++i) {
1115                 blockStarts.push_back(baseStart + NStr::StringToUInt(vals[i]));
1116             }
1117         }
1118         catch (std::exception&) {
1119             CReaderMessage error(
1120                 eDiag_Error,
1121                 columnData.LineNo(),
1122                 "Invalid data line: Malformed \"blockStarts\" column.");
1123             throw error;
1124         }
1125     }}
1126 
1127     CPacked_seqint& location = feature->SetLocation().SetPacked_int();
1128     ENa_strand strand = xGetStrand(columnData);
1129     CRef<CSeq_id> pId = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1130 
1131     bool negative = columnData[5] == "-";
1132 
1133     CPacked_seqint::Tdata& blocks = location.Set();
1134 
1135     for (size_t i=0; i < blockCount; ++i) {
1136         CRef<CSeq_interval> pInterval(new CSeq_interval);
1137         pInterval->SetId(*pId);
1138         pInterval->SetFrom(static_cast<CSeq_interval::TFrom>(blockStarts[i]));
1139         pInterval->SetTo(static_cast<CSeq_interval::TTo>(
1140             blockStarts[i] + blockSizes[i] - 1));
1141         pInterval->SetStrand(strand);
1142         if (negative)
1143             blocks.insert(blocks.begin(), pInterval);
1144         else
1145             blocks.push_back(pInterval);
1146     }
1147 
1148     CRef<CUser_object> pBed(new CUser_object());
1149     pBed->SetType().SetStr("BED");
1150     pBed->AddField("location", "block");
1151     CSeq_feat::TExts& exts = feature->SetExts();
1152     exts.push_back(pBed);
1153 }
1154 
1155 //  ----------------------------------------------------------------------------
xSetFeatureLocationRna(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1156 void CBedReader::xSetFeatureLocationRna(
1157     CRef<CSeq_feat>& feature,
1158     const CBedColumnData& columnData)
1159 //  ----------------------------------------------------------------------------
1160 {
1161     //already established: there are sufficient columns to do this
1162     size_t blockCount = NStr::StringToUInt(columnData[9]);
1163     vector<size_t> blockSizes;
1164     vector<size_t> blockStarts;
1165     {{
1166         blockSizes.reserve(blockCount);
1167         vector<string> vals;
1168         NStr::Split(columnData[10], ",", vals);
1169         if (vals.back() == "") {
1170             vals.erase(vals.end()-1);
1171         }
1172         if (vals.size() != blockCount) {
1173             CReaderMessage error(
1174                 eDiag_Error,
1175                 columnData.LineNo(),
1176                 "Invalid data line: Bad value count in \"blockSizes\".");
1177             throw error;
1178         }
1179         try {
1180             for (size_t i=0; i < blockCount; ++i) {
1181                 blockSizes.push_back(NStr::StringToUInt(vals[i]));
1182             }
1183         }
1184         catch (std::exception&) {
1185             CReaderMessage error(
1186                 eDiag_Error,
1187                 columnData.LineNo(),
1188                 "Invalid data line: Malformed \"blockSizes\" column.");
1189             throw error;
1190         }
1191     }}
1192     {{
1193         blockStarts.reserve(blockCount);
1194         vector<string> vals;
1195         size_t baseStart = NStr::StringToUInt(columnData[1]);
1196         NStr::Split(columnData[11], ",", vals);
1197         if (vals.back() == "") {
1198             vals.erase(vals.end()-1);
1199         }
1200         if (vals.size() != blockCount) {
1201             CReaderMessage error(
1202                 eDiag_Error,
1203                 columnData.LineNo(),
1204                 "Invalid data line: Bad value count in \"blockStarts\".");
1205             throw error;
1206         }
1207         try {
1208             for (size_t i=0; i < blockCount; ++i) {
1209                 blockStarts.push_back(baseStart + NStr::StringToUInt(vals[i]));
1210             }
1211         }
1212         catch (std::exception&) {
1213             CReaderMessage error(
1214                 eDiag_Error,
1215                 columnData.LineNo(),
1216                 "Invalid data line: Malformed \"blockStarts\" column.");
1217             throw error;
1218         }
1219     }}
1220 
1221     CPacked_seqint& location = feature->SetLocation().SetPacked_int();
1222     ENa_strand strand = xGetStrand(columnData);
1223     CRef<CSeq_id> pId = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1224 
1225     bool negative = columnData[5] == "-";
1226 
1227     CPacked_seqint::Tdata& blocks = location.Set();
1228 
1229     for (size_t i=0; i < blockCount; ++i) {
1230         CRef<CSeq_interval> pInterval(new CSeq_interval);
1231         pInterval->SetId(*pId);
1232         pInterval->SetFrom(static_cast<CSeq_interval::TFrom>(blockStarts[i]));
1233         pInterval->SetTo(static_cast<CSeq_interval::TTo>(
1234             blockStarts[i] + blockSizes[i] -1));
1235         pInterval->SetStrand(strand);
1236         if (negative)
1237             blocks.insert(blocks.begin(), pInterval);
1238         else
1239             blocks.push_back(pInterval);
1240     }
1241 
1242     CRef<CUser_object> pBed(new CUser_object());
1243     pBed->SetType().SetStr("BED");
1244     pBed->AddField("location", "block");
1245     CSeq_feat::TExts& exts = feature->SetExts();
1246     exts.push_back(pBed);
1247 }
1248 
1249 //  ----------------------------------------------------------------------------
xSetFeatureIdsChrom(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1250 void CBedReader::xSetFeatureIdsChrom(
1251     CRef<CSeq_feat>& feature,
1252     const CBedColumnData& columnData,
1253     unsigned int baseId)
1254 //  ----------------------------------------------------------------------------
1255 {
1256     baseId++; //0-based to 1-based
1257     feature->SetId().SetLocal().SetId(baseId);
1258 
1259     if (xContainsThickFeature(columnData)) {
1260         CRef<CFeat_id> pIdThick(new CFeat_id);
1261         pIdThick->SetLocal().SetId(baseId+1);
1262         CRef<CSeqFeatXref> pXrefThick(new CSeqFeatXref);
1263         pXrefThick->SetId(*pIdThick);
1264         feature->SetXref().push_back(pXrefThick);
1265     }
1266 
1267     if (xContainsBlockFeature(columnData)) {
1268         CRef<CFeat_id> pIdBlock(new CFeat_id);
1269         pIdBlock->SetLocal().SetId(baseId+2);
1270         CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1271         pXrefBlock->SetId(*pIdBlock);
1272         feature->SetXref().push_back(pXrefBlock);
1273     }
1274 }
1275 
1276 //  ----------------------------------------------------------------------------
xSetFeatureIdsGene(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1277 void CBedReader::xSetFeatureIdsGene(
1278     CRef<CSeq_feat>& feature,
1279     const CBedColumnData& columnData,
1280     unsigned int baseId)
1281 //  ----------------------------------------------------------------------------
1282 {
1283     baseId++; //0-based to 1-based
1284     feature->SetId().SetLocal().SetId(baseId);
1285 }
1286 
1287 //  ----------------------------------------------------------------------------
xSetFeatureIdsThick(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1288 void CBedReader::xSetFeatureIdsThick(
1289     CRef<CSeq_feat>& feature,
1290     const CBedColumnData& columnData,
1291     unsigned int baseId)
1292 //  ----------------------------------------------------------------------------
1293 {
1294     baseId++; //0-based to 1-based
1295     feature->SetId().SetLocal().SetId(baseId+1);
1296 
1297     CRef<CFeat_id> pIdChrom(new CFeat_id);
1298     pIdChrom->SetLocal().SetId(baseId);
1299     CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1300     pXrefChrom->SetId(*pIdChrom);
1301     feature->SetXref().push_back(pXrefChrom);
1302 
1303     if (xContainsBlockFeature(columnData)) {
1304         CRef<CFeat_id> pIdBlock(new CFeat_id);
1305         pIdBlock->SetLocal().SetId(baseId+2);
1306         CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1307         pXrefBlock->SetId(*pIdBlock);
1308         feature->SetXref().push_back(pXrefBlock);
1309     }
1310 }
1311 
1312 //  ----------------------------------------------------------------------------
xSetFeatureIdsCds(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1313 void CBedReader::xSetFeatureIdsCds(
1314     CRef<CSeq_feat>& feature,
1315     const CBedColumnData& columnData,
1316    unsigned int baseId)
1317 //  ----------------------------------------------------------------------------
1318 {
1319     baseId++; //0-based to 1-based
1320     feature->SetId().SetLocal().SetId(baseId+1);
1321 
1322     if (xContainsBlockFeature(columnData)) {
1323         CRef<CFeat_id> pIdBlock(new CFeat_id);
1324         pIdBlock->SetLocal().SetId(baseId+2);
1325         CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1326         pXrefBlock->SetId(*pIdBlock);
1327         feature->SetXref().push_back(pXrefBlock);
1328     }
1329     else {
1330         CRef<CFeat_id> pIdChrom(new CFeat_id);
1331         pIdChrom->SetLocal().SetId(baseId);
1332         CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1333         pXrefChrom->SetId(*pIdChrom);
1334         feature->SetXref().push_back(pXrefChrom);
1335     }
1336 }
1337 
1338 //  ----------------------------------------------------------------------------
xSetFeatureIdsBlock(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1339 void CBedReader::xSetFeatureIdsBlock(
1340     CRef<CSeq_feat>& feature,
1341     const CBedColumnData& columnData,
1342     unsigned int baseId)
1343 //  ----------------------------------------------------------------------------
1344 {
1345     baseId++; //0-based to 1-based
1346     feature->SetId().SetLocal().SetId(baseId+2);
1347 
1348     CRef<CFeat_id> pIdChrom(new CFeat_id);
1349     pIdChrom->SetLocal().SetId(baseId);
1350     CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1351     pXrefChrom->SetId(*pIdChrom);
1352     feature->SetXref().push_back(pXrefChrom);
1353 
1354     if (xContainsThickFeature(columnData)) {
1355         CRef<CFeat_id> pIdThick(new CFeat_id);
1356         pIdThick->SetLocal().SetId(baseId+1);
1357         CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1358         pXrefBlock->SetId(*pIdThick);
1359         feature->SetXref().push_back(pXrefBlock);
1360     }
1361 }
1362 
1363 //  ----------------------------------------------------------------------------
xSetFeatureIdsRna(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,unsigned int baseId)1364 void CBedReader::xSetFeatureIdsRna(
1365     CRef<CSeq_feat>& feature,
1366     const CBedColumnData& columnData,
1367     unsigned int baseId)
1368 //  ----------------------------------------------------------------------------
1369 {
1370     baseId++; //0-based to 1-based
1371     feature->SetId().SetLocal().SetId(baseId+2);
1372 
1373     CRef<CFeat_id> pIdChrom(new CFeat_id);
1374     pIdChrom->SetLocal().SetId(baseId);
1375     CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1376     pXrefChrom->SetId(*pIdChrom);
1377     feature->SetXref().push_back(pXrefChrom);
1378 }
1379 
1380 //  ----------------------------------------------------------------------------
xSetFeatureTitle(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1381 void CBedReader::xSetFeatureTitle(
1382     CRef<CSeq_feat>& feature,
1383     const CBedColumnData& columnData)
1384 //  ----------------------------------------------------------------------------
1385 {
1386     if (columnData.ColumnCount() >= 4  &&
1387             !columnData[3].empty()  &&  columnData[3] != ".") {
1388         feature->SetTitle(columnData[0]);
1389     }
1390     else {
1391         feature->SetTitle(string("line_") + NStr::IntToString(m_uLineNumber));
1392     }
1393 }
1394 
1395 
1396 //  ----------------------------------------------------------------------------
xSetFeatureScore(CRef<CUser_object> pDisplayData,const CBedColumnData & columnData)1397 void CBedReader::xSetFeatureScore(
1398     CRef<CUser_object> pDisplayData,
1399     const CBedColumnData& columnData)
1400 //  ----------------------------------------------------------------------------
1401 {
1402     CReaderMessage error(
1403         eDiag_Error,
1404         columnData.LineNo(),
1405         "Invalid data line: Bad \"score\" value.");
1406 
1407     string trackUseScore = m_pTrackDefaults->ValueOf("useScore");
1408     if (columnData.ColumnCount() < 5  || trackUseScore == "1") {
1409         //record does not carry score information
1410         return;
1411     }
1412 
1413     int int_score = NStr::StringToInt(columnData[4], NStr::fConvErr_NoThrow );
1414     double d_score = 0;
1415 
1416     if (int_score == 0 && columnData[4].compare("0") != 0) {
1417         try {
1418             d_score = NStr::StringToDouble(columnData[4]);
1419         }
1420         catch(std::exception&) {
1421             throw error;
1422         }
1423     }
1424 
1425     if (d_score < 0 || int_score < 0) {
1426             throw error;
1427     }
1428     else if (d_score > 0) {
1429         pDisplayData->AddField("score", d_score);
1430     }
1431     else {
1432         pDisplayData->AddField("score", int_score);
1433     }
1434 }
1435 
1436 
1437 //  ----------------------------------------------------------------------------
xSetFeatureColor(CRef<CUser_object> pDisplayData,const CBedColumnData & columnData,ILineErrorListener * pEC)1438 void CBedReader::xSetFeatureColor(
1439     CRef<CUser_object> pDisplayData,
1440     const CBedColumnData& columnData,
1441     ILineErrorListener* pEC )
1442 //  ----------------------------------------------------------------------------
1443 {
1444     //1: if track line itemRgb is set, try that first:
1445     string trackItemRgb = m_pTrackDefaults->ValueOf("itemRgb");
1446     if (trackItemRgb == "On"  &&  columnData.ColumnCount() >= 9) {
1447         string featItemRgb = columnData[8];
1448         if (featItemRgb != ".") {
1449             xSetFeatureColorFromItemRgb(pDisplayData, featItemRgb, pEC);
1450             return;
1451         }
1452     }
1453 
1454     //2: if track useScore is set, try that next:
1455     string trackUseScore = m_pTrackDefaults->ValueOf("useScore");
1456     if (trackUseScore == "1"  && columnData.ColumnCount() >= 5) {
1457         string featScore = columnData[4];
1458         if (featScore != ".") {
1459             xSetFeatureColorFromScore(pDisplayData, featScore);
1460             return;
1461         }
1462     }
1463 
1464     //3: if track colorByStrand is set, try that next:
1465     string trackColorByStrand = m_pTrackDefaults->ValueOf("colorByStrand");
1466     if (!trackColorByStrand.empty()  && columnData.ColumnCount() >= 6) {
1467         ENa_strand strand =
1468             (columnData[5] == "-") ? eNa_strand_minus : eNa_strand_plus;
1469         xSetFeatureColorByStrand(pDisplayData, trackColorByStrand, strand, pEC);
1470         return;
1471     }
1472     //4: if none of the track color attributes are set, attempt feature itemRgb:
1473     if (columnData.ColumnCount() >= 9) {
1474         string featItemRgb = columnData[8];
1475         if (featItemRgb != ".") {
1476             xSetFeatureColorFromItemRgb(pDisplayData, featItemRgb, pEC);
1477             return;
1478         }
1479     }
1480 
1481     //5: if still here, assign default color:
1482     xSetFeatureColorDefault(pDisplayData);
1483 }
1484 
1485 //  ----------------------------------------------------------------------------
xSetFeatureColorDefault(CRef<CUser_object> pDisplayData)1486 void CBedReader::xSetFeatureColorDefault(
1487     CRef<CUser_object> pDisplayData)
1488 //  ----------------------------------------------------------------------------
1489 {
1490     const string colorDefault("0 0 0");
1491     pDisplayData->AddField("color", colorDefault);
1492 }
1493 
1494 //  ----------------------------------------------------------------------------
xSetFeatureColorByStrand(CRef<CUser_object> pDisplayData,const string & trackColorByStrand,ENa_strand strand,ILineErrorListener * pEC)1495 void CBedReader::xSetFeatureColorByStrand(
1496     CRef<CUser_object> pDisplayData,
1497     const string& trackColorByStrand,
1498     ENa_strand strand,
1499     ILineErrorListener* pEC)
1500 //  ----------------------------------------------------------------------------
1501 {
1502     try {
1503         string colorPlus, colorMinus;
1504         NStr::SplitInTwo(trackColorByStrand, " ", colorPlus, colorMinus);
1505         string useColor = (strand == eNa_strand_minus) ? colorMinus : colorPlus;
1506         xSetFeatureColorFromItemRgb(pDisplayData, useColor, pEC);
1507     }
1508     catch (std::exception&) {
1509         CReaderMessage error(
1510             eDiag_Error,
1511             m_uLineNumber,
1512             "Invalid track line: Bad colorByStrand value.");
1513         throw error;
1514     }
1515 }
1516 
1517 //  ----------------------------------------------------------------------------
xSetFeatureColorFromScore(CRef<CUser_object> pDisplayData,const string & featScore)1518 void CBedReader::xSetFeatureColorFromScore(
1519     CRef<CUser_object> pDisplayData,
1520     const string& featScore )
1521 //  ----------------------------------------------------------------------------
1522 {
1523     CReaderMessage error(
1524         eDiag_Error,
1525         m_uLineNumber,
1526         "Invalid data line: Bad score value to be used for color.");
1527 
1528     int score = 0;
1529     try {
1530         score = static_cast<int>(NStr::StringToDouble(featScore));
1531     }
1532     catch (const std::exception&) {
1533         throw error;
1534     }
1535     if (score < 0  ||  1000 < score) {
1536         throw error;
1537     }
1538     string greyValue  = NStr::DoubleToString(255 - (score/4));
1539     vector<string> srgb{ greyValue, greyValue, greyValue};
1540     string rgbValue = NStr::Join(srgb, " ");
1541     pDisplayData->AddField("color", rgbValue);
1542 }
1543 
1544 //  ----------------------------------------------------------------------------
xSetFeatureColorFromItemRgb(CRef<CUser_object> pDisplayData,const string & itemRgb,ILineErrorListener * pEC)1545 void CBedReader::xSetFeatureColorFromItemRgb(
1546     CRef<CUser_object> pDisplayData,
1547     const string& itemRgb,
1548     ILineErrorListener* pEC )
1549 //  ----------------------------------------------------------------------------
1550 {
1551     CReaderMessage warning(
1552         eDiag_Warning,
1553         m_uLineNumber,
1554         "Bad color value - converted to BLACK.");
1555     const string rgbDefault = "0 0 0";
1556 
1557     //optimization for common case:
1558     if (itemRgb == "0") {
1559         pDisplayData->AddField("color", rgbDefault);
1560         return;
1561     }
1562 
1563     vector<string> srgb;
1564     NStr::Split(itemRgb, ",", srgb);
1565 
1566     if (srgb.size() == 3) {
1567         auto valuesOk = true;
1568         for (auto i=0; i<3; ++i) {
1569             int test;
1570             try {
1571                 test = NStr::StringToInt(srgb[i], NStr::fDS_ProhibitFractions);
1572             }
1573             catch(CException&) {
1574                 valuesOk = false;
1575                 break;
1576             }
1577             if ((test < 0)  ||  (256 <= test)) {
1578                 valuesOk = false;
1579                 break;
1580             }
1581         }
1582         if (!valuesOk) {
1583             m_pMessageHandler->Report(warning);
1584             pDisplayData->AddField("color", rgbDefault);
1585             return;
1586         }
1587         auto outValue = srgb[0] + " " + srgb[1] + " " + srgb[2];
1588         pDisplayData->AddField("color", outValue);
1589         return;
1590     }
1591 
1592     if (srgb.size() == 1) {
1593         auto assumeHex = false;
1594         string itemRgbCopy(itemRgb);
1595         if (NStr::StartsWith(itemRgbCopy, "0x")) {
1596             assumeHex = true;
1597             itemRgbCopy = itemRgb.substr(2);
1598         }
1599         else if (NStr::StartsWith(itemRgbCopy, "#")) {
1600             assumeHex = true;
1601             itemRgbCopy = itemRgbCopy.substr(1);
1602         }
1603         unsigned long colorValue;
1604         int radix = (assumeHex ? 16 : 10);
1605         try {
1606             colorValue = NStr::StringToULong(
1607                 itemRgbCopy, NStr::fDS_ProhibitFractions, radix);
1608         }
1609         catch (CStringException&) {
1610             m_pMessageHandler->Report(warning);
1611             pDisplayData->AddField("color", rgbDefault);
1612             return;
1613         }
1614         int blue = colorValue & 0xFF;
1615         colorValue >>= 8;
1616         int green = colorValue & 0xFF;
1617         colorValue >>= 8;
1618         int red = colorValue & 0xFF;
1619         auto outValue = NStr::IntToString(red) + " " + NStr::IntToString(green) +
1620             " " + NStr::IntToString(blue);
1621         pDisplayData->AddField("color", outValue);
1622         return;
1623     }
1624 
1625     m_pMessageHandler->Report(warning);
1626     pDisplayData->AddField("color", rgbDefault);
1627     return;
1628 }
1629 
1630 //  ----------------------------------------------------------------------------
xSetFeatureBedData(CRef<CSeq_feat> & feature,const CBedColumnData & columnData,ILineErrorListener * pEc)1631 void CBedReader::xSetFeatureBedData(
1632     CRef<CSeq_feat>& feature,
1633     const CBedColumnData& columnData,
1634     ILineErrorListener* pEc )
1635 //  ----------------------------------------------------------------------------
1636 {
1637     CSeqFeatData& data = feature->SetData();
1638     if (columnData.ColumnCount() >= 4  &&  columnData[3] != ".") {
1639         data.SetRegion() = columnData[3];
1640     }
1641     else {
1642         data.SetRegion() = columnData[0];
1643     }
1644 
1645     CRef<CUser_object> pDisplayData(new CUser_object());
1646 
1647     CSeq_feat::TExts& exts = feature->SetExts();
1648     pDisplayData->SetType().SetStr("DisplaySettings");
1649     exts.push_front(pDisplayData);
1650 
1651     xSetFeatureScore(pDisplayData, columnData);
1652     xSetFeatureColor(pDisplayData, columnData, pEc);
1653 }
1654 
1655 //  ----------------------------------------------------------------------------
xSetFeatureLocation(CRef<CSeq_feat> & feature,const CBedColumnData & columnData)1656 void CBedReader::xSetFeatureLocation(
1657     CRef<CSeq_feat>& feature,
1658     const CBedColumnData& columnData )
1659 //  ----------------------------------------------------------------------------
1660 {
1661     //
1662     //  Note:
1663     //  BED convention for specifying intervals is 0-based, first in, first out.
1664     //  ASN convention for specifying intervals is 0-based, first in, last in.
1665     //  Hence, conversion BED->ASN  leaves the first leaves the "from" coordinate
1666     //  unchanged, and decrements the "to" coordinate by one.
1667     //
1668 
1669     CRef<CSeq_loc> location(new CSeq_loc);
1670     int from, to;
1671     from = to = -1;
1672 
1673     //already established: We got at least three columns
1674     try {
1675         from = NStr::StringToInt(columnData[1]);
1676     }
1677     catch(std::exception&) {
1678         CReaderMessage error(
1679             eDiag_Error,
1680             columnData.LineNo(),
1681             "Invalid data line: Bad \"SeqStart\" value.");
1682         throw error;
1683     }
1684     try {
1685         to = NStr::StringToInt(columnData[2]) - 1;
1686     }
1687     catch(std::exception&) {
1688         CReaderMessage error(
1689             eDiag_Error,
1690             columnData.LineNo(),
1691             "Invalid data line: Bad \"SeqStop\" value.");
1692         throw error;
1693     }
1694     if (from == to) {
1695         location->SetPnt().SetPoint(from);
1696     }
1697     else if (from < to) {
1698         location->SetInt().SetFrom(from);
1699         location->SetInt().SetTo(to);
1700     }
1701     else {
1702         CReaderMessage error(
1703             eDiag_Error,
1704             columnData.LineNo(),
1705             "Invalid data line: \"SeqStop\" less than \"SeqStart\".");
1706         throw error;
1707     }
1708 
1709     size_t strand_field = 5;
1710     if (columnData.ColumnCount() == 5  &&
1711             (columnData[4] == "-"  ||  columnData[4] == "+")) {
1712         strand_field = 4;
1713     }
1714     if (strand_field < columnData.ColumnCount()) {
1715         string strand = columnData[strand_field];
1716         if (strand != "+"  &&  strand != "-"  &&  strand != ".") {
1717             CReaderMessage error(
1718                 eDiag_Error,
1719                 columnData.LineNo(),
1720                 "Invalid data line: Invalid strand character.");
1721             throw error;
1722         }
1723         location->SetStrand(( columnData[strand_field] == "+" ) ?
1724                            eNa_strand_plus : eNa_strand_minus );
1725     }
1726 
1727     CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1728     location->SetId(*id);
1729     feature->SetLocation(*location);
1730 }
1731 
1732 //  ----------------------------------------------------------------------------
1733 bool
ReadTrackData(ILineReader & lr,CRawBedTrack & rawdata,ILineErrorListener * pMessageListener)1734 CBedReader::ReadTrackData(
1735     ILineReader& lr,
1736     CRawBedTrack& rawdata,
1737     ILineErrorListener* pMessageListener)
1738 //  ----------------------------------------------------------------------------
1739 {
1740     if (m_CurBatchSize == m_MaxBatchSize) {
1741         m_CurBatchSize = 0;
1742         return xReadBedDataRaw(lr, rawdata, pMessageListener);
1743     }
1744 
1745     string line;
1746     while (xGetLine(lr, line)) {
1747         m_CurBatchSize = 0;
1748         if (line == "browser"  ||  NStr::StartsWith(line, "browser ")) {
1749             continue;
1750         }
1751         if (line == "track"  ||  NStr::StartsWith(line, "track ")) {
1752             continue;
1753         }
1754         //data line
1755         lr.UngetLine();
1756         return xReadBedDataRaw(lr, rawdata, pMessageListener);
1757     }
1758     return false;
1759 }
1760 
1761 //  ----------------------------------------------------------------------------
1762 bool
xReadBedRecordRaw(const string & line,CRawBedRecord & record,ILineErrorListener * pMessageListener)1763 CBedReader::xReadBedRecordRaw(
1764     const string& line,
1765     CRawBedRecord& record,
1766     ILineErrorListener* pMessageListener)
1767 //  ----------------------------------------------------------------------------
1768 {
1769     if (line == "browser"  || NStr::StartsWith(line, "browser ")
1770             || NStr::StartsWith(line, "browser\t")) {
1771         return false;
1772     }
1773     if (line == "track"  || NStr::StartsWith(line, "track ")
1774             || NStr::StartsWith(line, "track\t")) {
1775         return false;
1776     }
1777 
1778     vector<string> columns;
1779     string linecopy = line;
1780     NStr::TruncateSpacesInPlace(linecopy);
1781 
1782     //  parse
1783     NStr::Split(linecopy, " \t", columns, NStr::fSplit_MergeDelimiters);
1784     xCleanColumnValues(columns);
1785     if (columns.size() != mRealColumnCount) {
1786         CReaderMessage error(
1787             eDiag_Error,
1788             m_uLineNumber,
1789             "Invalid data line: Inconsistent column count.");
1790         m_pMessageHandler->Report(error);
1791         return false;
1792     }
1793 
1794     //assign columns to record:
1795     CRef<CSeq_id> id = CReadUtil::AsSeqId(columns[0], m_iFlags, false);
1796 
1797     unsigned int start;
1798     try {
1799         start = NStr::StringToInt(columns[1]);
1800     }
1801     catch(std::exception&) {
1802         CReaderMessage error(
1803             eDiag_Error,
1804             m_uLineNumber,
1805             "Invalid data line: Invalid \"SeqStart\" (column 2) value.");
1806         m_pMessageHandler->Report(error);
1807         return false;
1808     }
1809 
1810     unsigned int stop;
1811     try {
1812         stop = NStr::StringToInt(columns[2]);
1813     }
1814     catch(std::exception&) {
1815         CReaderMessage error(
1816             eDiag_Error,
1817             m_uLineNumber,
1818             "Invalid data line: Invalid \"SeqStop\" (column 3) value.");
1819         m_pMessageHandler->Report(error);
1820         return false;
1821     }
1822 
1823     int score(-1);
1824     if (mValidColumnCount >= 5  &&  columns[4] != ".") {
1825         try {
1826             score = NStr::StringToInt(columns[4],
1827                 NStr::fConvErr_NoThrow|NStr::fAllowTrailingSymbols);
1828         }
1829         catch(std::exception&) {
1830             CReaderMessage error(
1831                 eDiag_Error,
1832                 m_uLineNumber,
1833                 "Invalid data line: Invalid \"Score\" (column 5) value.");
1834             m_pMessageHandler->Report(error);
1835             return false;
1836         }
1837     }
1838     ENa_strand strand = eNa_strand_plus;
1839     if (mValidColumnCount >= 6) {
1840         if (columns[5] == "-") {
1841             strand = eNa_strand_minus;
1842         }
1843     }
1844     record.SetInterval(*id, start, stop, strand);
1845     if (score >= 0) {
1846         record.SetScore(score);
1847     }
1848     return true;
1849 }
1850 
1851 //  ----------------------------------------------------------------------------
1852 bool
xContainsThickFeature(const CBedColumnData & columnData) const1853 CBedReader::xContainsThickFeature(
1854     const CBedColumnData& columnData) const
1855 //  ----------------------------------------------------------------------------
1856 {
1857     if (columnData.ColumnCount() < 8  ||  mValidColumnCount < 8) {
1858         return false;
1859     }
1860 
1861     int start = -1, from = -1, to = -1;
1862     try {
1863         start = NStr::StringToInt(columnData[1]);
1864         from = NStr::StringToInt(columnData[6]);
1865         to = NStr::StringToInt(columnData[7]);
1866     }
1867     catch (std::exception&) {
1868         CReaderMessage error(
1869             eDiag_Error,
1870             columnData.LineNo(),
1871             "Invalid data line: Bad \"Start/ThickStart/ThickStop\" values.");
1872         throw error;
1873     }
1874     if (start == from  &&  from == to) {
1875         return false;
1876     }
1877     return true;
1878 }
1879 
1880 
1881 //  ----------------------------------------------------------------------------
1882 bool
xContainsRnaFeature(const CBedColumnData & columnData) const1883 CBedReader::xContainsRnaFeature(
1884     const CBedColumnData& columnData) const
1885 //  ----------------------------------------------------------------------------
1886 {
1887     if (columnData.ColumnCount() < 12  ||  mValidColumnCount < 12) {
1888         return false;
1889     }
1890 
1891     int start = -1, from = -1, to = -1;
1892     try {
1893         start = NStr::StringToInt(columnData[1]);
1894         from = NStr::StringToInt(columnData[6]);
1895         to = NStr::StringToInt(columnData[7]);
1896     }
1897     catch (std::exception&) {
1898         CReaderMessage error(
1899             eDiag_Error,
1900             columnData.LineNo(),
1901             "Invalid data line: Bad \"Start/ThickStart/ThickStop\" values.");
1902         throw error;
1903     }
1904     if (start == from  &&  from == to) {
1905         return false;
1906     }
1907     return true;
1908 }
1909 
1910 
1911 //  ----------------------------------------------------------------------------
1912 bool
xContainsBlockFeature(const CBedColumnData & columnData) const1913 CBedReader::xContainsBlockFeature(
1914     const CBedColumnData& columnData) const
1915 //  ----------------------------------------------------------------------------
1916 {
1917     return (columnData.ColumnCount() >= 12  &&  mValidColumnCount >= 12);
1918 }
1919 
1920 
1921 //  ----------------------------------------------------------------------------
1922 bool
xContainsCdsFeature(const CBedColumnData & columnData) const1923 CBedReader::xContainsCdsFeature(
1924     const CBedColumnData& columnData) const
1925 //  ----------------------------------------------------------------------------
1926 {
1927     return (columnData.ColumnCount() >= 8  &&  mValidColumnCount >= 8);
1928 }
1929 
1930 
1931 //  ----------------------------------------------------------------------------
1932 bool
xReadBedDataRaw(ILineReader & lr,CRawBedTrack & rawdata,ILineErrorListener * pMessageListener)1933 CBedReader::xReadBedDataRaw(
1934     ILineReader& lr,
1935     CRawBedTrack& rawdata,
1936     ILineErrorListener* pMessageListener)
1937 //  ----------------------------------------------------------------------------
1938 {
1939     rawdata.Reset();
1940     string line;
1941     while (xGetLine(lr, line)) {
1942         CRawBedRecord record;
1943         if (!xReadBedRecordRaw(line, record, pMessageListener)) {
1944             lr.UngetLine();
1945             break;
1946         }
1947         rawdata.AddRecord(record);
1948         ++m_CurBatchSize;
1949         if (m_CurBatchSize == m_MaxBatchSize) {
1950             return rawdata.HasData();
1951         }
1952     }
1953 
1954     return rawdata.HasData();
1955 }
1956 
1957 //  ----------------------------------------------------------------------------
1958 void
xCleanColumnValues(vector<string> & columns)1959 CBedReader::xCleanColumnValues(
1960    vector<string>& columns)
1961 //  ----------------------------------------------------------------------------
1962 {
1963     string fixup;
1964 
1965     if (NStr::EqualNocase(columns[0], "chr")  &&  columns.size() > 1) {
1966         columns[1] = columns[0] + columns[1];
1967         columns.erase(columns.begin());
1968     }
1969     if (columns.size() < 3) {
1970         CReaderMessage error(
1971             eDiag_Error,
1972             0,
1973             "Invalid data line: Insufficient column count.");
1974         throw error;
1975     }
1976 
1977     try {
1978         NStr::Replace(columns[1], ",", "", fixup);
1979         columns[1] = fixup;
1980     }
1981     catch(std::exception&) {
1982         CReaderMessage error(
1983             eDiag_Error,
1984             0,
1985             "Invalid data line: Invalid \"SeqStart\" (column 2) value.");
1986         throw error;
1987     }
1988 
1989     try {
1990         NStr::Replace(columns[2], ",", "", fixup);
1991         columns[2] = fixup;
1992     }
1993     catch(std::exception&) {
1994         CReaderMessage error(
1995             eDiag_Error,
1996             0,
1997             "Invalid data line: Invalid \"SeqStop\" (column 3) value.");
1998         throw error;
1999     }
2000 }
2001 
2002 //  ----------------------------------------------------------------------------
2003 void
xAssignBedColumnCount(CSeq_annot & annot)2004 CBedReader::xAssignBedColumnCount(
2005     CSeq_annot& annot)
2006 //  ----------------------------------------------------------------------------
2007 {
2008     if(mValidColumnCount < 3) {
2009         return;
2010     }
2011     CRef<CUser_object> columnCountUser(new CUser_object());
2012     columnCountUser->SetType().SetStr("NCBI_BED_COLUMN_COUNT");
2013     columnCountUser->AddField("NCBI_BED_COLUMN_COUNT", int (mValidColumnCount));
2014 
2015     CRef<CAnnotdesc> userDesc(new CAnnotdesc());
2016     userDesc->SetUser().Assign(*columnCountUser);
2017     annot.SetDesc().Set().push_back(userDesc);
2018 }
2019 
2020 END_objects_SCOPE
2021 END_NCBI_SCOPE
2022