1 /*  $Id: wiggle_reader.cpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Frank Ludwig, leaning heavily on code lifted from the the wig2table
27  *          project, by Aaron Ucko.
28  *
29  * File Description:
30  *   WIGGLE file reader
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 
37 #include <util/line_reader.hpp>
38 
39 #include <objects/seq/Annotdesc.hpp>
40 #include <objects/seqtable/Seq_table.hpp>
41 #include <objects/seqtable/SeqTable_column.hpp>
42 #include <objects/seqres/Seq_graph.hpp>
43 #include <objects/seqres/Byte_graph.hpp>
44 
45 #include <objtools/readers/wiggle_reader.hpp>
46 #include "reader_message_handler.hpp"
47 
48 BEGIN_NCBI_SCOPE
49 BEGIN_objects_SCOPE
50 
51 //  ----------------------------------------------------------------------------
52 bool
xValuesAreFromSingleSequence() const53 CWiggleReader::xValuesAreFromSingleSequence() const
54 //  ----------------------------------------------------------------------------
55 {
56     if (m_Values.empty()) {
57         return false;
58     }
59     TValues::const_iterator cit = m_Values.begin();
60     string first = cit->m_Chrom;
61     for (cit++; cit != m_Values.end(); ++cit) {
62         if (cit->m_Chrom != first) {
63             return false;
64         }
65     }
66     return true;
67 }
68 
69 //  ----------------------------------------------------------------------------
CWiggleReader(int iFlags,const string & name,const string & title,CReaderListener * pRl)70 CWiggleReader::CWiggleReader(
71     int iFlags,
72     const string& name,
73     const string& title,
74     CReaderListener* pRl):
75 //  ----------------------------------------------------------------------------
76     CReaderBase(iFlags, name, title, CReadUtil::AsSeqId, pRl),
77     m_OmitZeros(false),
78     m_TrackType(eTrackType_invalid)
79 {
80     m_uLineNumber = 0;
81     m_GapValue = 0.0;
82 }
83 
84 //  ----------------------------------------------------------------------------
~CWiggleReader()85 CWiggleReader::~CWiggleReader()
86 //  ----------------------------------------------------------------------------
87 {
88 }
89 
90 //  ----------------------------------------------------------------------------
91 CRef< CSeq_annot >
ReadSeqAnnot(ILineReader & lr,ILineErrorListener * pEL)92 CWiggleReader::ReadSeqAnnot(
93     ILineReader& lr,
94     ILineErrorListener* pEL)
95 //  ----------------------------------------------------------------------------
96 {
97     m_ChromId.clear();
98     m_Values.clear();
99     if (!(m_iFlags & CWiggleReader::fAsGraph)) {
100         m_ChromId.clear();
101         m_Values.clear();
102         xParseTrackLine("track type=wiggle_0");
103     }
104 
105     xProgressInit(lr);
106 
107     m_uDataCount = 0;
108     CRef<CSeq_annot> pAnnot = xCreateSeqAnnot();
109 
110     TReaderData readerData;
111     xGuardedGetData(lr, readerData, pEL);
112     if (readerData.empty()) {
113         pAnnot.Reset();
114         return pAnnot;
115     }
116     xGuardedProcessData(readerData, *pAnnot, pEL);
117     xPostProcessAnnot(*pAnnot);
118     return pAnnot;
119 }
120 
121 //  ----------------------------------------------------------------------------
122 void
xGetData(ILineReader & lr,TReaderData & readerData)123 CWiggleReader::xGetData(
124     ILineReader& lr,
125     TReaderData& readerData)
126 //  ----------------------------------------------------------------------------
127 {
128     // Goal: Extract one wiggle graph object (including all its data lines) at
129     //  once
130     bool haveData = false;
131     readerData.clear();
132     string line;
133     while (xGetLine(lr, line)) {
134         bool isMeta = NStr::StartsWith(line, "fixedStep")  ||
135             NStr::StartsWith(line, "variableStep")  ||
136             xIsTrackLine(line)  ||
137             xIsBrowserLine(line);
138         if (isMeta) {
139             if (haveData) {
140                 xUngetLine(lr);
141                 break;
142             }
143         }
144         readerData.push_back(TReaderLine{m_uLineNumber, line});
145         if (!isMeta) {
146             haveData = true;
147         }
148         ++m_uDataCount;
149     }
150 }
151 
152 //  ----------------------------------------------------------------------------
153 void
xProcessData(const TReaderData & readerData,CSeq_annot &)154 CWiggleReader::xProcessData(
155     const TReaderData& readerData,
156     CSeq_annot&)
157 //  ----------------------------------------------------------------------------
158 {
159     for (auto curData = readerData.begin(); curData != readerData.end(); curData++) {
160         auto line = curData->mData;
161         if (xParseBrowserLine(line)) {
162             continue;
163         }
164         if (xParseTrackLine(line)) {
165             continue;
166         }
167 
168         if (xProcessFixedStepData(curData, readerData)) {
169             break;
170         }
171         if (xProcessVariableStepData(curData, readerData)) {
172             break;
173         }
174         xProcessBedData(curData, readerData);
175     }
176 }
177 
178 
179 //  ----------------------------------------------------------------------------
180 bool
ReadTrackData(ILineReader & lr,CRawWiggleTrack & rawdata,ILineErrorListener * pMessageListener)181 CWiggleReader::ReadTrackData(
182     ILineReader& lr,
183     CRawWiggleTrack& rawdata,
184     ILineErrorListener* pMessageListener)
185 //  ----------------------------------------------------------------------------
186 {
187     TReaderData readerData;
188     xGuardedGetData(lr, readerData, pMessageListener);
189     auto curIt = readerData.cbegin();
190     while (curIt != readerData.end()) {
191         auto firstLine(curIt->mData);
192         if (NStr::StartsWith(firstLine, "fixedStep")) {
193             SFixedStepInfo fixedStepInfo;
194             xGetFixedStepInfo(firstLine, fixedStepInfo);
195             return xReadFixedStepDataRaw(
196                 fixedStepInfo, ++curIt, readerData, rawdata);
197         }
198         if (NStr::StartsWith(firstLine, "variableStep")) {
199             SVarStepInfo varStepInfo;
200             xGetVariableStepInfo(firstLine, varStepInfo);
201             return xReadVariableStepDataRaw(
202                 varStepInfo, ++curIt, readerData, rawdata);
203         }
204         ++curIt;
205     }
206     return false;
207 }
208 
209 //  ----------------------------------------------------------------------------
210 bool
xReadFixedStepDataRaw(const SFixedStepInfo & fixedStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData,CRawWiggleTrack & rawdata)211 CWiggleReader::xReadFixedStepDataRaw(
212     const SFixedStepInfo& fixedStepInfo,
213     TReaderData::const_iterator& curIt,
214     const TReaderData& readerData,
215     CRawWiggleTrack& rawdata)
216 //  ----------------------------------------------------------------------------
217 {
218     rawdata.Reset();
219 
220     CRef<CSeq_id> id =
221         CReadUtil::AsSeqId(fixedStepInfo.mChrom, m_iFlags);
222 
223     unsigned int pos(fixedStepInfo.mStart);
224     while (curIt != readerData.end()) {
225         auto line(curIt->mData);
226         double value(0);
227         xGetDouble(line, value);
228         rawdata.AddRecord(
229             CRawWiggleRecord(*id, pos, fixedStepInfo.mSpan, value));
230         pos += fixedStepInfo.mStep;
231         curIt++;
232     }
233     return rawdata.HasData();
234 }
235 
236 //  ----------------------------------------------------------------------------
237 bool
xReadVariableStepDataRaw(const SVarStepInfo & varStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData,CRawWiggleTrack & rawdata)238 CWiggleReader::xReadVariableStepDataRaw(
239     const SVarStepInfo& varStepInfo,
240     TReaderData::const_iterator& curIt,
241     const TReaderData& readerData,
242     CRawWiggleTrack& rawdata)
243 //  ----------------------------------------------------------------------------
244 {
245     rawdata.Reset();
246 
247     CRef<CSeq_id> id =
248         CReadUtil::AsSeqId(varStepInfo.mChrom, m_iFlags);
249 
250     while (curIt != readerData.end()) {
251         auto line(curIt->mData);
252         unsigned int pos(0);
253         xGetPos(line, pos);
254         xSkipWS(line);
255         double value(0);
256         xGetDouble(line, value);
257         rawdata.AddRecord(
258             CRawWiggleRecord(*id, pos, varStepInfo.mSpan, value));
259         curIt++;
260     }
261     return rawdata.HasData();
262 }
263 
264 
265 //  ----------------------------------------------------------------------------
xEstimateSize(size_t rows,bool fixed_span) const266 double CWiggleReader::xEstimateSize(size_t rows, bool fixed_span) const
267 //  ----------------------------------------------------------------------------
268 {
269     double ret = 0;
270     ret += rows*4;
271     if ( !fixed_span )
272         ret += rows*4;
273     (m_iFlags & fAsByte) ? (ret += rows) : (ret += 8*rows);
274     return ret;
275 }
276 
277 //  ----------------------------------------------------------------------------
xPreprocessValues(SWiggleStat & stat)278 void CWiggleReader::xPreprocessValues(SWiggleStat& stat)
279 //  ----------------------------------------------------------------------------
280 {
281     bool sorted = true;
282     size_t size = m_Values.size();
283     if ( size ) {
284         stat.SetFirstSpan(m_Values[0].m_Span);
285         stat.SetFirstValue(m_Values[0].m_Value);
286 
287         for ( size_t i = 1; i < size; ++i ) {
288             stat.AddSpan(m_Values[i].m_Span);
289             stat.AddValue(m_Values[i].m_Value);
290             if ( sorted ) {
291                 if ( m_Values[i].m_Pos < m_Values[i-1].m_Pos ) {
292                     sorted = false;
293                 }
294                 if ( m_Values[i].m_Pos != m_Values[i-1].GetEnd() ) {
295                     stat.m_HaveGaps = true;
296                 }
297             }
298         }
299     }
300     if ( !sorted ) {
301         sort(m_Values.begin(), m_Values.end());
302         stat.m_HaveGaps = false;
303         for ( size_t i = 1; i < size; ++i ) {
304             if ( m_Values[i].m_Pos != m_Values[i-1].GetEnd() ) {
305                 stat.m_HaveGaps = true;
306                 break;
307             }
308         }
309     }
310     if ( (m_iFlags & fAsGraph) && stat.m_HaveGaps ) {
311         stat.AddValue(m_GapValue);
312     }
313 
314     const int range = 255;
315     if ( stat.m_Max > stat.m_Min &&
316             (!stat.m_IntValues || stat.m_Max-stat.m_Min > range) ) {
317         stat.m_Step = (stat.m_Max-stat.m_Min)/range;
318         stat.m_StepMul = 1/stat.m_Step;
319     }
320 
321     if ( !(m_iFlags & fAsGraph) && (m_iFlags & fJoinSame) && size ) {
322         TValues nv;
323         nv.reserve(size);
324         nv.push_back(m_Values[0]);
325         for ( size_t i = 1; i < size; ++i ) {
326             if ( m_Values[i].m_Pos == nv.back().GetEnd() &&
327                  m_Values[i].m_Value == nv.back().m_Value ) {
328                 nv.back().m_Span += m_Values[i].m_Span;
329             }
330             else {
331                 nv.push_back(m_Values[i]);
332             }
333         }
334         if ( nv.size() != size ) {
335             double s = xEstimateSize(size, stat.m_FixedSpan);
336             double ns = xEstimateSize(nv.size(), false);
337             if ( ns < s*.75 ) {
338                 m_Values.swap(nv);
339                 size = m_Values.size();
340                 LOG_POST("Joined size: "<<size);
341                 stat.m_FixedSpan = false;
342             }
343         }
344     }
345 
346     if ( (m_iFlags & fAsGraph) && !stat.m_FixedSpan ) {
347         stat.m_Span = 1;
348         stat.m_FixedSpan = true;
349     }
350 }
351 
352 //  ----------------------------------------------------------------------------
xMakeChromId()353 CRef<CSeq_id> CWiggleReader::xMakeChromId()
354 //  ----------------------------------------------------------------------------
355 {
356     CRef<CSeq_id> id =
357         CReadUtil::AsSeqId(m_ChromId, m_iFlags);
358     return id;
359 }
360 
361 //  ----------------------------------------------------------------------------
xSetTotalLoc(CSeq_loc & loc,CSeq_id & chrom_id)362 void CWiggleReader::xSetTotalLoc(CSeq_loc& loc, CSeq_id& chrom_id)
363 //  ----------------------------------------------------------------------------
364 {
365     if ( m_Values.empty() ) {
366         loc.SetEmpty(chrom_id);
367     }
368     else {
369         CSeq_interval& interval = loc.SetInt();
370         interval.SetId(chrom_id);
371         interval.SetFrom(m_Values.front().m_Pos);
372         interval.SetTo(m_Values.back().GetEnd()-1);
373     }
374 }
375 
376 //  ----------------------------------------------------------------------------
xMakeTable()377 CRef<CSeq_table> CWiggleReader::xMakeTable()
378 //  ----------------------------------------------------------------------------
379 {
380     size_t size = m_Values.size();
381 
382     CRef<CSeq_table> table(new CSeq_table);
383     table->SetFeat_type(0);
384 
385     CRef<CSeq_id> chrom_id = xMakeChromId();
386 
387     CRef<CSeq_loc> table_loc(new CSeq_loc);
388     { // Seq-table location
389         CRef<CSeqTable_column> col_id(new CSeqTable_column);
390         table->SetColumns().push_back(col_id);
391         col_id->SetHeader().SetField_name("Seq-table location");
392         col_id->SetDefault().SetLoc(*table_loc);
393     }
394 
395     { // Seq-id
396         CRef<CSeqTable_column> col_id(new CSeqTable_column);
397         table->SetColumns().push_back(col_id);
398         col_id->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_id);
399         //col_id->SetDefault().SetId(*chrom_id);
400         if (this->xValuesAreFromSingleSequence()) {
401             CRef<CSeq_id> pId = CReadUtil::AsSeqId(m_Values.front().m_Chrom, m_iFlags);
402             col_id->SetDefault().SetId(*pId);
403         }
404         else {
405             CSeqTable_multi_data::TId& seq_id = col_id->SetData().SetId();
406             seq_id.reserve(size);
407             for (TValues::const_iterator cit = m_Values.begin(); cit != m_Values.end(); ++cit) {
408                 CRef<CSeq_id> pId = CReadUtil::AsSeqId(cit->m_Chrom, m_iFlags);
409                 seq_id.push_back(pId);
410             }
411         }
412     }
413 
414     // position
415     CRef<CSeqTable_column> col_pos(new CSeqTable_column);
416     table->SetColumns().push_back(col_pos);
417     col_pos->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_from);
418     CSeqTable_multi_data::TInt& pos = col_pos->SetData().SetInt();
419 
420     SWiggleStat stat;
421     xPreprocessValues(stat);
422 
423     xSetTotalLoc(*table_loc, *chrom_id);
424 
425     table->SetNum_rows(static_cast<TSeqPos>(size));
426     pos.reserve(size);
427 
428     CSeqTable_multi_data::TInt* span_ptr = 0;
429     { // span
430         CRef<CSeqTable_column> col_span(new CSeqTable_column);
431         table->SetColumns().push_back(col_span);
432         col_span->SetHeader().SetField_name("span");
433         if ( stat.m_FixedSpan ) {
434             col_span->SetDefault().SetInt(stat.m_Span);
435         }
436         else {
437             span_ptr = &col_span->SetData().SetInt();
438             span_ptr->reserve(size);
439         }
440     }
441 
442     if ( stat.m_HaveGaps ) {
443         CRef<CSeqTable_column> col_step(new CSeqTable_column);
444         table->SetColumns().push_back(col_step);
445         col_step->SetHeader().SetField_name("value_gap");
446         col_step->SetDefault().SetReal(m_GapValue);
447     }
448 
449     if (m_iFlags & fAsByte) { // values
450         CRef<CSeqTable_column> col_min(new CSeqTable_column);
451         table->SetColumns().push_back(col_min);
452         col_min->SetHeader().SetField_name("value_min");
453         col_min->SetDefault().SetReal(stat.m_Min);
454 
455         CRef<CSeqTable_column> col_step(new CSeqTable_column);
456         table->SetColumns().push_back(col_step);
457         col_step->SetHeader().SetField_name("value_step");
458         col_step->SetDefault().SetReal(stat.m_Step);
459 
460         CRef<CSeqTable_column> col_val(new CSeqTable_column);
461         table->SetColumns().push_back(col_val);
462         col_val->SetHeader().SetField_name("values");
463 
464         if ( 1 ) {
465             AutoPtr< vector<char> > values(new vector<char>());
466             values->reserve(size);
467             ITERATE ( TValues, it, m_Values ) {
468                 pos.push_back(it->m_Pos);
469                 if ( span_ptr ) {
470                     span_ptr->push_back(it->m_Span);
471                 }
472                 values->push_back(stat.AsByte(it->m_Value));
473             }
474             col_val->SetData().SetBytes().push_back(values.release());
475         }
476         else {
477             CSeqTable_multi_data::TInt& values = col_val->SetData().SetInt();
478             values.reserve(size);
479 
480             ITERATE ( TValues, it, m_Values ) {
481                 pos.push_back(it->m_Pos);
482                 if ( span_ptr ) {
483                     span_ptr->push_back(it->m_Span);
484                 }
485                 values.push_back(stat.AsByte(it->m_Value));
486             }
487         }
488     }
489     else {
490         CRef<CSeqTable_column> col_val(new CSeqTable_column);
491         table->SetColumns().push_back(col_val);
492         col_val->SetHeader().SetField_name("values");
493         CSeqTable_multi_data::TReal& values = col_val->SetData().SetReal();
494         values.reserve(size);
495 
496         ITERATE ( TValues, it, m_Values ) {
497             pos.push_back(it->m_Pos);
498             if ( span_ptr ) {
499                 span_ptr->push_back(it->m_Span);
500             }
501             values.push_back(it->m_Value);
502         }
503     }
504     return table;
505 }
506 
507 //  ----------------------------------------------------------------------------
xMakeGraph()508 CRef<CSeq_graph> CWiggleReader::xMakeGraph()
509 //  ----------------------------------------------------------------------------
510 {
511     CRef<CSeq_graph> graph(new CSeq_graph);
512 
513     CRef<CSeq_id> chrom_id = xMakeChromId();
514 
515     CRef<CSeq_loc> graph_loc(new CSeq_loc);
516     graph->SetLoc(*graph_loc);
517 
518     SWiggleStat stat;
519     xPreprocessValues(stat);
520 
521     xSetTotalLoc(*graph_loc, *chrom_id);
522 
523     string trackName = m_pTrackDefaults->Name();
524     if (!trackName.empty()) {
525         graph->SetTitle(trackName);
526     }
527 
528     graph->SetComp(stat.m_Span);
529     graph->SetA(stat.m_Step);
530     graph->SetB(stat.m_Min);
531 
532     CByte_graph& b_graph = graph->SetGraph().SetByte();
533     b_graph.SetMin(stat.AsByte(stat.m_Min));
534     b_graph.SetMax(stat.AsByte(stat.m_Max));
535     b_graph.SetAxis(0);
536     vector<char>& bytes = b_graph.SetValues();
537 
538     if ( m_Values.empty() ) {
539         graph->SetNumval(0);
540     }
541     else {
542         _ASSERT(stat.m_FixedSpan);
543         TSeqPos start = m_Values[0].m_Pos;
544         TSeqPos end = m_Values.back().GetEnd();
545         size_t size = (end-start)/stat.m_Span;
546         graph->SetNumval(static_cast<TSeqPos>(size));
547         bytes.resize(size, stat.AsByte(m_GapValue));
548         ITERATE ( TValues, it, m_Values ) {
549             TSeqPos pos = it->m_Pos - start;
550             TSeqPos span = it->m_Span;
551             _ASSERT(pos % stat.m_Span == 0);
552             _ASSERT(span % stat.m_Span == 0);
553             size_t i = pos / stat.m_Span;
554             int v = stat.AsByte(it->m_Value);
555             for ( ; span > 0; span -= stat.m_Span, ++i ) {
556                 bytes[i] = v;
557             }
558         }
559     }
560     return graph;
561 }
562 
563 //  ----------------------------------------------------------------------------
xSkipWS(string & line)564 bool CWiggleReader::xSkipWS(
565     string& line)
566 //  ----------------------------------------------------------------------------
567 {
568     const char* ptr = line.c_str();
569     size_t skip = 0;
570     for ( size_t len = line.size(); skip < len; ++skip ) {
571         char c = ptr[skip];
572         if ( c != ' ' && c != '\t' ) {
573             break;
574         }
575     }
576     line = line.substr(skip);
577     return !line.empty();
578 }
579 
580 //  ----------------------------------------------------------------------------
xGetWord(string & line)581 string CWiggleReader::xGetWord(
582     string& line)
583 //  ----------------------------------------------------------------------------
584 {
585     const char* ptr = line.c_str();
586     size_t skip = 0;
587     for ( size_t len = line.size(); skip < len; ++skip ) {
588         char c = ptr[skip];
589         if ( c == ' ' || c == '\t' ) {
590             break;
591         }
592     }
593     if ( skip == 0 ) {
594         CReaderMessage error(eDiag_Error,
595             m_uLineNumber,
596             "Identifier expected");
597         throw error;
598     }
599     string word(ptr, skip);
600     line = line.substr(skip);
601     return word;
602 }
603 
604 //  ----------------------------------------------------------------------------
xGetParamName(string & line)605 string CWiggleReader::xGetParamName(
606     string& line)
607 //  ----------------------------------------------------------------------------
608 {
609     const char* ptr = line.c_str();
610     size_t skip = 0;
611     for ( size_t len = line.size(); skip < len; ++skip ) {
612         char c = ptr[skip];
613         if ( c == '=' ) {
614             string name(ptr, skip);
615             line = line.substr(skip+1);
616             return name;
617         }
618         if ( c == ' ' || c == '\t' ) {
619             break;
620         }
621     }
622     CReaderMessage error(eDiag_Error,
623         m_uLineNumber,
624         "\"=\" expected");
625     throw error;
626 }
627 
628 //  ----------------------------------------------------------------------------
xGetParamValue(string & line)629 string CWiggleReader::xGetParamValue(
630     string& line)
631 //  ----------------------------------------------------------------------------
632 {
633     const char* ptr = line.c_str();
634     size_t len = line.size();
635     if ( len && *ptr == '"' ) {
636         size_t pos = 1;
637         for ( ; pos < len; ++pos ) {
638             char c = ptr[pos];
639             if ( c == '"' ) {
640                 string value(ptr, pos);
641                 line = line.substr(pos+1);
642                 return value;
643             }
644         }
645         CReaderMessage error(eDiag_Error,
646             m_uLineNumber,
647             "Open quotes");
648         throw error;
649     }
650     return xGetWord(line);
651 }
652 
653 //  ----------------------------------------------------------------------------
xGetPos(string & line,TSeqPos & v)654 void CWiggleReader::xGetPos(
655     string& line,
656     TSeqPos& v)
657 //  ----------------------------------------------------------------------------
658 {
659     CReaderMessage error(eDiag_Error,
660         m_uLineNumber,
661         "Integer value expected");
662 
663     char c = line.c_str()[0];
664     if ( c < '0' || c > '9' ) {
665         throw error;
666     }
667 
668     TSeqPos ret = 0;
669     const char* ptr = line.c_str();
670     for ( size_t skip = 0; ; ++skip ) {
671         char c = ptr[skip];
672         if ( c >= '0' && c <= '9' ) {
673             ret = ret*10 + (c-'0');
674         }
675         else if ( (c == ' ' || c == '\t' || c == '\0') && skip ) {
676             line = line.substr(skip);
677             v = ret;
678             return;
679         }
680         else {
681             throw error;
682         }
683     }
684 }
685 
686 //  ----------------------------------------------------------------------------
xTryGetDoubleSimple(string & line,double & v)687 bool CWiggleReader::xTryGetDoubleSimple(
688     string& line,
689     double& v)
690 //  ----------------------------------------------------------------------------
691 {
692     double ret = 0;
693     const char* ptr = line.c_str();
694     size_t skip = 0;
695     bool negate = false, digits = false;
696     for ( ; ; ++skip ) {
697         char c = ptr[skip];
698         if ( !skip ) {
699             if ( c == '-' ) {
700                 negate = true;
701                 continue;
702             }
703             if ( c == '+' ) {
704                 continue;
705             }
706         }
707         if ( c >= '0' && c <= '9' ) {
708             digits = true;
709             ret = ret*10 + (c-'0');
710         }
711         else if ( c == '.' ) {
712             ++skip;
713             break;
714         }
715         else if ( c == '\0' ) {
716             if ( !digits ) {
717                 return false;
718             }
719             line.clear();
720             if ( negate ) {
721                 ret = -ret;
722             }
723             v = ret;
724             return true;
725         }
726         else {
727             return false;
728         }
729     }
730     double digit_mul = 1;
731     for ( ; ; ++skip ) {
732         char c = ptr[skip];
733         if ( c >= '0' && c <= '9' ) {
734             digits = true;
735             digit_mul *= .1;
736             ret += (c-'0')*digit_mul;
737         }
738         else if ( (c == ' ' || c == '\t' || c == '\0') && digits ) {
739             line.clear();
740             v = (negate ? -ret : ret);
741             return true;
742         }
743         else {
744             return false;
745         }
746     }
747 }
748 
749 //  ----------------------------------------------------------------------------
xGetDouble(string & line,double & v)750 inline void CWiggleReader::xGetDouble(
751     string& line,
752     double& v)
753 //  ----------------------------------------------------------------------------
754 {
755     if (xTryGetDoubleSimple(line, v)) {
756         return;
757     }
758     const char* ptr = line.c_str();
759     char* endptr = 0;
760     v = strtod(ptr, &endptr);
761     if ( endptr == ptr ) {
762         CReaderMessage error(eDiag_Error,
763             m_uLineNumber,
764             "Floating point value expected");
765         throw error;
766     }
767     if ( *endptr ) {
768         CReaderMessage error(eDiag_Error,
769             m_uLineNumber,
770             "Extra text on line");
771         throw error;
772     }
773     line.clear();
774 }
775 
776 //  ----------------------------------------------------------------------------
xPostProcessAnnot(CSeq_annot & annot)777 void CWiggleReader::xPostProcessAnnot(
778     CSeq_annot& annot)
779 //  ----------------------------------------------------------------------------
780 {
781     if ( m_ChromId.empty() ) {
782         return;
783     }
784     if (m_iFlags & fAsGraph) {
785         annot.SetData().SetGraph().push_back(xMakeGraph());
786     }
787     else {
788         annot.SetData().SetSeq_table(*xMakeTable());
789     }
790     if (annot.GetData().Which() != CSeq_annot::TData::e_not_set) {
791         xAssignTrackData(annot);
792     }
793     m_ChromId.clear();
794 }
795 
796 //  ----------------------------------------------------------------------------
xDumpChromValues(void)797 void CWiggleReader::xDumpChromValues(void)
798 //  ----------------------------------------------------------------------------
799 {
800     if ( m_ChromId.empty() ) {
801         return;
802     }
803     if ( !m_Annot ) {
804         m_Annot = xCreateSeqAnnot();
805     }
806     if (m_iFlags & fAsGraph) {
807         m_Annot->SetData().SetGraph().push_back(xMakeGraph());
808     }
809     else {
810         m_Annot->SetData().SetSeq_table(*xMakeTable());
811     }
812 }
813 
814 //  ----------------------------------------------------------------------------
xSetChrom(const string & chrom)815 void CWiggleReader::xSetChrom(
816     const string& chrom)
817 //  ----------------------------------------------------------------------------
818 {
819     if ( chrom != m_ChromId ) {
820         xDumpChromValues();
821         if (m_iFlags & CWiggleReader::fAsGraph) {
822             m_Values.clear();
823         }
824         m_ChromId = chrom;
825     }
826 }
827 
828 //  ----------------------------------------------------------------------------
xParseBrowserLine(const string & line)829 bool CWiggleReader::xParseBrowserLine(
830     const string& line)
831 //  ----------------------------------------------------------------------------
832 {
833     if (!NStr::StartsWith(line, "browser")) {
834         return false;
835     }
836     return true;
837 }
838 
839 //  ----------------------------------------------------------------------------
xParseTrackLine(const string & line)840 bool CWiggleReader::xParseTrackLine(
841     const string& line)
842 //  ----------------------------------------------------------------------------
843 {
844     if (!xIsTrackLine(line)) {
845         return false;
846     }
847     CReaderBase::xParseTrackLine(line);
848 
849     m_TrackType = eTrackType_invalid;
850     if (m_pTrackDefaults->Type() == "wiggle_0") {
851         m_TrackType = eTrackType_wiggle_0;
852         return true;
853     }
854     if (m_pTrackDefaults->Type() == "bedGraph") {
855         m_TrackType = eTrackType_bedGraph;
856         return true;
857     }
858     CReaderMessage error(eDiag_Error,
859         m_uLineNumber,
860         "Invalid track type");
861     throw error;
862 }
863 
864 //  ----------------------------------------------------------------------------
865 bool
xProcessFixedStepData(TReaderData::const_iterator & curIt,const TReaderData & readerData)866 CWiggleReader::xProcessFixedStepData(
867     TReaderData::const_iterator& curIt,
868     const TReaderData& readerData)
869 //  ----------------------------------------------------------------------------
870 {
871     auto firstLine(curIt->mData);
872     if (!NStr::StartsWith(firstLine, "fixedStep")) {
873         return false;
874     }
875 
876     SFixedStepInfo fixedStepInfo;
877     xGetFixedStepInfo(firstLine, fixedStepInfo);
878     ++curIt;
879     xReadFixedStepData(fixedStepInfo, curIt, readerData);
880     return true;
881 }
882 
883 
884 //  ----------------------------------------------------------------------------
885 bool
xProcessVariableStepData(TReaderData::const_iterator & curIt,const TReaderData & readerData)886 CWiggleReader::xProcessVariableStepData(
887     TReaderData::const_iterator& curIt,
888     const TReaderData& readerData)
889 //  ----------------------------------------------------------------------------
890 {
891     auto firstLine(curIt->mData);
892     if (!NStr::StartsWith(firstLine, "variableStep")) {
893         return false;
894     }
895 
896     SVarStepInfo variableStepInfo;
897     xGetVariableStepInfo(firstLine, variableStepInfo);
898     ++curIt;
899     xReadVariableStepData(variableStepInfo, curIt, readerData);
900     return true;
901 }
902 
903 
904 //  ----------------------------------------------------------------------------
xGetFixedStepInfo(const string & directive,SFixedStepInfo & fixedStepInfo)905 void CWiggleReader::xGetFixedStepInfo(
906     const string& directive,
907     SFixedStepInfo& fixedStepInfo)
908 //  ----------------------------------------------------------------------------
909 {
910     if ( m_TrackType != eTrackType_wiggle_0 ) {
911         if ( m_TrackType != eTrackType_invalid ) {
912             CReaderMessage error(eDiag_Error,
913                 m_uLineNumber,
914                 "Track \"type=wiggle_0\" is required");
915             throw error;
916         }
917         else {
918             m_TrackType = eTrackType_wiggle_0;
919         }
920     }
921 
922     auto line(directive.substr(string("fixedStep").size() + 1));
923     NStr::TruncateSpacesInPlace(line);
924     fixedStepInfo.Reset();
925     while (xSkipWS(line)) {
926         string name = xGetParamName(line);
927         string value = xGetParamValue(line);
928         if (name == "chrom") {
929             fixedStepInfo.mChrom = value;
930             continue;
931         }
932         if (name == "start") {
933             fixedStepInfo.mStart = NStr::StringToUInt(value);
934             if (0 == fixedStepInfo.mStart) {
935                 CReaderMessage warning(eDiag_Warning,
936                     m_uLineNumber,
937                     "Bad start value: must be positive. Assuming \"start=1\"");
938                 m_pMessageHandler->Report(warning);
939                 fixedStepInfo.mStart = 1;
940             }
941             continue;
942         }
943         if (name == "step") {
944             fixedStepInfo.mStep = NStr::StringToUInt(value);
945             continue;
946         }
947         if (name == "span") {
948             fixedStepInfo.mSpan = NStr::StringToUInt(value);
949             continue;
950         }
951         CReaderMessage warning(eDiag_Warning,
952             m_uLineNumber,
953             "Bad parameter name. Ignored");
954         m_pMessageHandler->Report(warning);
955     }
956     if (fixedStepInfo.mChrom.empty()) {
957         CReaderMessage error(eDiag_Error,
958             m_uLineNumber,
959             "Missing chrom parameter");
960         throw error;
961     }
962     if (fixedStepInfo.mStart == 0) {
963         CReaderMessage error(eDiag_Error,
964             m_uLineNumber,
965             "Missing start parameter");
966         throw error;
967     }
968     if (fixedStepInfo.mStep == 0) {
969         CReaderMessage error(eDiag_Error,
970             m_uLineNumber,
971             "Missing step parameter");
972         throw error;
973     }
974 }
975 
976 //  ----------------------------------------------------------------------------
xReadFixedStepData(const SFixedStepInfo & fixedStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData)977 void CWiggleReader::xReadFixedStepData(
978     const SFixedStepInfo& fixedStepInfo,
979     TReaderData::const_iterator& curIt,
980     const TReaderData& readerData)
981 //  ----------------------------------------------------------------------------
982 {
983     xSetChrom(fixedStepInfo.mChrom);
984     SValueInfo value;
985     value.m_Chrom = fixedStepInfo.mChrom;
986     value.m_Pos = fixedStepInfo.mStart-1;
987     value.m_Span = fixedStepInfo.mSpan;
988     while (curIt != readerData.end()) {
989         auto line(curIt->mData);
990         xGetDouble(line, value.m_Value);
991         xAddValue(value);
992         value.m_Pos += fixedStepInfo.mStep;
993         curIt++;
994     }
995 }
996 
997 //  ----------------------------------------------------------------------------
xGetVariableStepInfo(const string & directive,SVarStepInfo & varStepInfo)998 void CWiggleReader::xGetVariableStepInfo(
999     const string& directive,
1000     SVarStepInfo& varStepInfo)
1001 //  ----------------------------------------------------------------------------
1002 {
1003     if ( m_TrackType != eTrackType_wiggle_0 ) {
1004         if ( m_TrackType != eTrackType_invalid ) {
1005             CReaderMessage error(eDiag_Error,
1006                 m_uLineNumber,
1007                 "Track \"type=wiggle_0\" is required");
1008             throw error;
1009         }
1010         else {
1011             m_TrackType = eTrackType_wiggle_0;
1012         }
1013     }
1014 
1015     varStepInfo.Reset();
1016     auto line(directive.substr(string("variableStep").size() + 1));
1017     while (xSkipWS(line)) {
1018         string name = xGetParamName(line);
1019         string value = xGetParamValue(line);
1020         if ( name == "chrom" ) {
1021             varStepInfo.mChrom = value;
1022         }
1023         else if ( name == "span" ) {
1024             varStepInfo.mSpan = NStr::StringToUInt(value);
1025         }
1026         else {
1027             CReaderMessage warning(eDiag_Warning,
1028                 m_uLineNumber,
1029                 "Bad parameter name. Ignored");
1030             m_pMessageHandler->Report(warning);
1031         }
1032     }
1033     if ( varStepInfo.mChrom.empty() ) {
1034         CReaderMessage error(eDiag_Error,
1035             m_uLineNumber,
1036             "Missing chrom parameter");
1037         throw error;
1038     }
1039 }
1040 
1041 //  ----------------------------------------------------------------------------
xReadVariableStepData(const SVarStepInfo & varStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData)1042 void CWiggleReader::xReadVariableStepData(
1043     const SVarStepInfo& varStepInfo,
1044     TReaderData::const_iterator& curIt,
1045     const TReaderData& readerData)
1046 //  ----------------------------------------------------------------------------
1047 {
1048     xSetChrom(varStepInfo.mChrom);
1049     SValueInfo value;
1050     value.m_Chrom = varStepInfo.mChrom;
1051     value.m_Span = varStepInfo.mSpan;
1052     while (curIt != readerData.end()) {
1053         string line(curIt->mData);
1054         xGetPos(line, value.m_Pos);
1055         xSkipWS(line);
1056         xGetDouble(line, value.m_Value);
1057         value.m_Pos -= 1;
1058         xAddValue(value);
1059         curIt++;
1060     }
1061 }
1062 
1063 //  =========================================================================
xProcessBedData(TReaderData::const_iterator & curIt,const TReaderData & readerData)1064 bool CWiggleReader::xProcessBedData(
1065     TReaderData::const_iterator& curIt,
1066     const TReaderData& readerData)
1067 //  =========================================================================
1068 {
1069     while (curIt != readerData.end()) {
1070         auto line(curIt->mData);
1071         auto chrom = xGetWord(line);
1072         xSetChrom(chrom);
1073 
1074         SValueInfo value;
1075         xSkipWS(line);
1076         xGetPos(line, value.m_Pos);
1077         xSkipWS(line);
1078         xGetPos(line, value.m_Span);
1079         xSkipWS(line);
1080         xGetDouble(line, value.m_Value);
1081         value.m_Span -= value.m_Pos;
1082         xAddValue(value);
1083 
1084         curIt++;
1085     }
1086     return true;
1087 }
1088 
1089 END_objects_SCOPE
1090 END_NCBI_SCOPE
1091