1 /* $Id: wiggle_reader.cpp 632526 2021-06-02 17:25:01Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig, leaning heavily on code lifted from the the wig2table
27 * project, by Aaron Ucko.
28 *
29 * File Description:
30 * WIGGLE file reader
31 *
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36
37 #include <util/line_reader.hpp>
38
39 #include <objects/seq/Annotdesc.hpp>
40 #include <objects/seqtable/Seq_table.hpp>
41 #include <objects/seqtable/SeqTable_column.hpp>
42 #include <objects/seqres/Seq_graph.hpp>
43 #include <objects/seqres/Byte_graph.hpp>
44
45 #include <objtools/readers/wiggle_reader.hpp>
46 #include "reader_message_handler.hpp"
47
48 BEGIN_NCBI_SCOPE
49 BEGIN_objects_SCOPE
50
51 // ----------------------------------------------------------------------------
52 bool
xValuesAreFromSingleSequence() const53 CWiggleReader::xValuesAreFromSingleSequence() const
54 // ----------------------------------------------------------------------------
55 {
56 if (m_Values.empty()) {
57 return false;
58 }
59 TValues::const_iterator cit = m_Values.begin();
60 string first = cit->m_Chrom;
61 for (cit++; cit != m_Values.end(); ++cit) {
62 if (cit->m_Chrom != first) {
63 return false;
64 }
65 }
66 return true;
67 }
68
69 // ----------------------------------------------------------------------------
CWiggleReader(int iFlags,const string & name,const string & title,CReaderListener * pRl)70 CWiggleReader::CWiggleReader(
71 int iFlags,
72 const string& name,
73 const string& title,
74 CReaderListener* pRl):
75 // ----------------------------------------------------------------------------
76 CReaderBase(iFlags, name, title, CReadUtil::AsSeqId, pRl),
77 m_OmitZeros(false),
78 m_TrackType(eTrackType_invalid)
79 {
80 m_uLineNumber = 0;
81 m_GapValue = 0.0;
82 }
83
84 // ----------------------------------------------------------------------------
~CWiggleReader()85 CWiggleReader::~CWiggleReader()
86 // ----------------------------------------------------------------------------
87 {
88 }
89
90 // ----------------------------------------------------------------------------
91 CRef< CSeq_annot >
ReadSeqAnnot(ILineReader & lr,ILineErrorListener * pEL)92 CWiggleReader::ReadSeqAnnot(
93 ILineReader& lr,
94 ILineErrorListener* pEL)
95 // ----------------------------------------------------------------------------
96 {
97 m_ChromId.clear();
98 m_Values.clear();
99 if (!(m_iFlags & CWiggleReader::fAsGraph)) {
100 m_ChromId.clear();
101 m_Values.clear();
102 xParseTrackLine("track type=wiggle_0");
103 }
104
105 xProgressInit(lr);
106
107 m_uDataCount = 0;
108 CRef<CSeq_annot> pAnnot = xCreateSeqAnnot();
109
110 TReaderData readerData;
111 xGuardedGetData(lr, readerData, pEL);
112 if (readerData.empty()) {
113 pAnnot.Reset();
114 return pAnnot;
115 }
116 xGuardedProcessData(readerData, *pAnnot, pEL);
117 xPostProcessAnnot(*pAnnot);
118 return pAnnot;
119 }
120
121 // ----------------------------------------------------------------------------
122 void
xGetData(ILineReader & lr,TReaderData & readerData)123 CWiggleReader::xGetData(
124 ILineReader& lr,
125 TReaderData& readerData)
126 // ----------------------------------------------------------------------------
127 {
128 // Goal: Extract one wiggle graph object (including all its data lines) at
129 // once
130 bool haveData = false;
131 readerData.clear();
132 string line;
133 while (xGetLine(lr, line)) {
134 bool isMeta = NStr::StartsWith(line, "fixedStep") ||
135 NStr::StartsWith(line, "variableStep") ||
136 xIsTrackLine(line) ||
137 xIsBrowserLine(line);
138 if (isMeta) {
139 if (haveData) {
140 xUngetLine(lr);
141 break;
142 }
143 }
144 readerData.push_back(TReaderLine{m_uLineNumber, line});
145 if (!isMeta) {
146 haveData = true;
147 }
148 ++m_uDataCount;
149 }
150 }
151
152 // ----------------------------------------------------------------------------
153 void
xProcessData(const TReaderData & readerData,CSeq_annot &)154 CWiggleReader::xProcessData(
155 const TReaderData& readerData,
156 CSeq_annot&)
157 // ----------------------------------------------------------------------------
158 {
159 for (auto curData = readerData.begin(); curData != readerData.end(); curData++) {
160 auto line = curData->mData;
161 if (xParseBrowserLine(line)) {
162 continue;
163 }
164 if (xParseTrackLine(line)) {
165 continue;
166 }
167
168 if (xProcessFixedStepData(curData, readerData)) {
169 break;
170 }
171 if (xProcessVariableStepData(curData, readerData)) {
172 break;
173 }
174 xProcessBedData(curData, readerData);
175 }
176 }
177
178
179 // ----------------------------------------------------------------------------
180 bool
ReadTrackData(ILineReader & lr,CRawWiggleTrack & rawdata,ILineErrorListener * pMessageListener)181 CWiggleReader::ReadTrackData(
182 ILineReader& lr,
183 CRawWiggleTrack& rawdata,
184 ILineErrorListener* pMessageListener)
185 // ----------------------------------------------------------------------------
186 {
187 TReaderData readerData;
188 xGuardedGetData(lr, readerData, pMessageListener);
189 auto curIt = readerData.cbegin();
190 while (curIt != readerData.end()) {
191 auto firstLine(curIt->mData);
192 if (NStr::StartsWith(firstLine, "fixedStep")) {
193 SFixedStepInfo fixedStepInfo;
194 xGetFixedStepInfo(firstLine, fixedStepInfo);
195 return xReadFixedStepDataRaw(
196 fixedStepInfo, ++curIt, readerData, rawdata);
197 }
198 if (NStr::StartsWith(firstLine, "variableStep")) {
199 SVarStepInfo varStepInfo;
200 xGetVariableStepInfo(firstLine, varStepInfo);
201 return xReadVariableStepDataRaw(
202 varStepInfo, ++curIt, readerData, rawdata);
203 }
204 ++curIt;
205 }
206 return false;
207 }
208
209 // ----------------------------------------------------------------------------
210 bool
xReadFixedStepDataRaw(const SFixedStepInfo & fixedStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData,CRawWiggleTrack & rawdata)211 CWiggleReader::xReadFixedStepDataRaw(
212 const SFixedStepInfo& fixedStepInfo,
213 TReaderData::const_iterator& curIt,
214 const TReaderData& readerData,
215 CRawWiggleTrack& rawdata)
216 // ----------------------------------------------------------------------------
217 {
218 rawdata.Reset();
219
220 CRef<CSeq_id> id =
221 CReadUtil::AsSeqId(fixedStepInfo.mChrom, m_iFlags);
222
223 unsigned int pos(fixedStepInfo.mStart);
224 while (curIt != readerData.end()) {
225 auto line(curIt->mData);
226 double value(0);
227 xGetDouble(line, value);
228 rawdata.AddRecord(
229 CRawWiggleRecord(*id, pos, fixedStepInfo.mSpan, value));
230 pos += fixedStepInfo.mStep;
231 curIt++;
232 }
233 return rawdata.HasData();
234 }
235
236 // ----------------------------------------------------------------------------
237 bool
xReadVariableStepDataRaw(const SVarStepInfo & varStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData,CRawWiggleTrack & rawdata)238 CWiggleReader::xReadVariableStepDataRaw(
239 const SVarStepInfo& varStepInfo,
240 TReaderData::const_iterator& curIt,
241 const TReaderData& readerData,
242 CRawWiggleTrack& rawdata)
243 // ----------------------------------------------------------------------------
244 {
245 rawdata.Reset();
246
247 CRef<CSeq_id> id =
248 CReadUtil::AsSeqId(varStepInfo.mChrom, m_iFlags);
249
250 while (curIt != readerData.end()) {
251 auto line(curIt->mData);
252 unsigned int pos(0);
253 xGetPos(line, pos);
254 xSkipWS(line);
255 double value(0);
256 xGetDouble(line, value);
257 rawdata.AddRecord(
258 CRawWiggleRecord(*id, pos, varStepInfo.mSpan, value));
259 curIt++;
260 }
261 return rawdata.HasData();
262 }
263
264
265 // ----------------------------------------------------------------------------
xEstimateSize(size_t rows,bool fixed_span) const266 double CWiggleReader::xEstimateSize(size_t rows, bool fixed_span) const
267 // ----------------------------------------------------------------------------
268 {
269 double ret = 0;
270 ret += rows*4;
271 if ( !fixed_span )
272 ret += rows*4;
273 (m_iFlags & fAsByte) ? (ret += rows) : (ret += 8*rows);
274 return ret;
275 }
276
277 // ----------------------------------------------------------------------------
xPreprocessValues(SWiggleStat & stat)278 void CWiggleReader::xPreprocessValues(SWiggleStat& stat)
279 // ----------------------------------------------------------------------------
280 {
281 bool sorted = true;
282 size_t size = m_Values.size();
283 if ( size ) {
284 stat.SetFirstSpan(m_Values[0].m_Span);
285 stat.SetFirstValue(m_Values[0].m_Value);
286
287 for ( size_t i = 1; i < size; ++i ) {
288 stat.AddSpan(m_Values[i].m_Span);
289 stat.AddValue(m_Values[i].m_Value);
290 if ( sorted ) {
291 if ( m_Values[i].m_Pos < m_Values[i-1].m_Pos ) {
292 sorted = false;
293 }
294 if ( m_Values[i].m_Pos != m_Values[i-1].GetEnd() ) {
295 stat.m_HaveGaps = true;
296 }
297 }
298 }
299 }
300 if ( !sorted ) {
301 sort(m_Values.begin(), m_Values.end());
302 stat.m_HaveGaps = false;
303 for ( size_t i = 1; i < size; ++i ) {
304 if ( m_Values[i].m_Pos != m_Values[i-1].GetEnd() ) {
305 stat.m_HaveGaps = true;
306 break;
307 }
308 }
309 }
310 if ( (m_iFlags & fAsGraph) && stat.m_HaveGaps ) {
311 stat.AddValue(m_GapValue);
312 }
313
314 const int range = 255;
315 if ( stat.m_Max > stat.m_Min &&
316 (!stat.m_IntValues || stat.m_Max-stat.m_Min > range) ) {
317 stat.m_Step = (stat.m_Max-stat.m_Min)/range;
318 stat.m_StepMul = 1/stat.m_Step;
319 }
320
321 if ( !(m_iFlags & fAsGraph) && (m_iFlags & fJoinSame) && size ) {
322 TValues nv;
323 nv.reserve(size);
324 nv.push_back(m_Values[0]);
325 for ( size_t i = 1; i < size; ++i ) {
326 if ( m_Values[i].m_Pos == nv.back().GetEnd() &&
327 m_Values[i].m_Value == nv.back().m_Value ) {
328 nv.back().m_Span += m_Values[i].m_Span;
329 }
330 else {
331 nv.push_back(m_Values[i]);
332 }
333 }
334 if ( nv.size() != size ) {
335 double s = xEstimateSize(size, stat.m_FixedSpan);
336 double ns = xEstimateSize(nv.size(), false);
337 if ( ns < s*.75 ) {
338 m_Values.swap(nv);
339 size = m_Values.size();
340 LOG_POST("Joined size: "<<size);
341 stat.m_FixedSpan = false;
342 }
343 }
344 }
345
346 if ( (m_iFlags & fAsGraph) && !stat.m_FixedSpan ) {
347 stat.m_Span = 1;
348 stat.m_FixedSpan = true;
349 }
350 }
351
352 // ----------------------------------------------------------------------------
xMakeChromId()353 CRef<CSeq_id> CWiggleReader::xMakeChromId()
354 // ----------------------------------------------------------------------------
355 {
356 CRef<CSeq_id> id =
357 CReadUtil::AsSeqId(m_ChromId, m_iFlags);
358 return id;
359 }
360
361 // ----------------------------------------------------------------------------
xSetTotalLoc(CSeq_loc & loc,CSeq_id & chrom_id)362 void CWiggleReader::xSetTotalLoc(CSeq_loc& loc, CSeq_id& chrom_id)
363 // ----------------------------------------------------------------------------
364 {
365 if ( m_Values.empty() ) {
366 loc.SetEmpty(chrom_id);
367 }
368 else {
369 CSeq_interval& interval = loc.SetInt();
370 interval.SetId(chrom_id);
371 interval.SetFrom(m_Values.front().m_Pos);
372 interval.SetTo(m_Values.back().GetEnd()-1);
373 }
374 }
375
376 // ----------------------------------------------------------------------------
xMakeTable()377 CRef<CSeq_table> CWiggleReader::xMakeTable()
378 // ----------------------------------------------------------------------------
379 {
380 size_t size = m_Values.size();
381
382 CRef<CSeq_table> table(new CSeq_table);
383 table->SetFeat_type(0);
384
385 CRef<CSeq_id> chrom_id = xMakeChromId();
386
387 CRef<CSeq_loc> table_loc(new CSeq_loc);
388 { // Seq-table location
389 CRef<CSeqTable_column> col_id(new CSeqTable_column);
390 table->SetColumns().push_back(col_id);
391 col_id->SetHeader().SetField_name("Seq-table location");
392 col_id->SetDefault().SetLoc(*table_loc);
393 }
394
395 { // Seq-id
396 CRef<CSeqTable_column> col_id(new CSeqTable_column);
397 table->SetColumns().push_back(col_id);
398 col_id->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_id);
399 //col_id->SetDefault().SetId(*chrom_id);
400 if (this->xValuesAreFromSingleSequence()) {
401 CRef<CSeq_id> pId = CReadUtil::AsSeqId(m_Values.front().m_Chrom, m_iFlags);
402 col_id->SetDefault().SetId(*pId);
403 }
404 else {
405 CSeqTable_multi_data::TId& seq_id = col_id->SetData().SetId();
406 seq_id.reserve(size);
407 for (TValues::const_iterator cit = m_Values.begin(); cit != m_Values.end(); ++cit) {
408 CRef<CSeq_id> pId = CReadUtil::AsSeqId(cit->m_Chrom, m_iFlags);
409 seq_id.push_back(pId);
410 }
411 }
412 }
413
414 // position
415 CRef<CSeqTable_column> col_pos(new CSeqTable_column);
416 table->SetColumns().push_back(col_pos);
417 col_pos->SetHeader().SetField_id(CSeqTable_column_info::eField_id_location_from);
418 CSeqTable_multi_data::TInt& pos = col_pos->SetData().SetInt();
419
420 SWiggleStat stat;
421 xPreprocessValues(stat);
422
423 xSetTotalLoc(*table_loc, *chrom_id);
424
425 table->SetNum_rows(static_cast<TSeqPos>(size));
426 pos.reserve(size);
427
428 CSeqTable_multi_data::TInt* span_ptr = 0;
429 { // span
430 CRef<CSeqTable_column> col_span(new CSeqTable_column);
431 table->SetColumns().push_back(col_span);
432 col_span->SetHeader().SetField_name("span");
433 if ( stat.m_FixedSpan ) {
434 col_span->SetDefault().SetInt(stat.m_Span);
435 }
436 else {
437 span_ptr = &col_span->SetData().SetInt();
438 span_ptr->reserve(size);
439 }
440 }
441
442 if ( stat.m_HaveGaps ) {
443 CRef<CSeqTable_column> col_step(new CSeqTable_column);
444 table->SetColumns().push_back(col_step);
445 col_step->SetHeader().SetField_name("value_gap");
446 col_step->SetDefault().SetReal(m_GapValue);
447 }
448
449 if (m_iFlags & fAsByte) { // values
450 CRef<CSeqTable_column> col_min(new CSeqTable_column);
451 table->SetColumns().push_back(col_min);
452 col_min->SetHeader().SetField_name("value_min");
453 col_min->SetDefault().SetReal(stat.m_Min);
454
455 CRef<CSeqTable_column> col_step(new CSeqTable_column);
456 table->SetColumns().push_back(col_step);
457 col_step->SetHeader().SetField_name("value_step");
458 col_step->SetDefault().SetReal(stat.m_Step);
459
460 CRef<CSeqTable_column> col_val(new CSeqTable_column);
461 table->SetColumns().push_back(col_val);
462 col_val->SetHeader().SetField_name("values");
463
464 if ( 1 ) {
465 AutoPtr< vector<char> > values(new vector<char>());
466 values->reserve(size);
467 ITERATE ( TValues, it, m_Values ) {
468 pos.push_back(it->m_Pos);
469 if ( span_ptr ) {
470 span_ptr->push_back(it->m_Span);
471 }
472 values->push_back(stat.AsByte(it->m_Value));
473 }
474 col_val->SetData().SetBytes().push_back(values.release());
475 }
476 else {
477 CSeqTable_multi_data::TInt& values = col_val->SetData().SetInt();
478 values.reserve(size);
479
480 ITERATE ( TValues, it, m_Values ) {
481 pos.push_back(it->m_Pos);
482 if ( span_ptr ) {
483 span_ptr->push_back(it->m_Span);
484 }
485 values.push_back(stat.AsByte(it->m_Value));
486 }
487 }
488 }
489 else {
490 CRef<CSeqTable_column> col_val(new CSeqTable_column);
491 table->SetColumns().push_back(col_val);
492 col_val->SetHeader().SetField_name("values");
493 CSeqTable_multi_data::TReal& values = col_val->SetData().SetReal();
494 values.reserve(size);
495
496 ITERATE ( TValues, it, m_Values ) {
497 pos.push_back(it->m_Pos);
498 if ( span_ptr ) {
499 span_ptr->push_back(it->m_Span);
500 }
501 values.push_back(it->m_Value);
502 }
503 }
504 return table;
505 }
506
507 // ----------------------------------------------------------------------------
xMakeGraph()508 CRef<CSeq_graph> CWiggleReader::xMakeGraph()
509 // ----------------------------------------------------------------------------
510 {
511 CRef<CSeq_graph> graph(new CSeq_graph);
512
513 CRef<CSeq_id> chrom_id = xMakeChromId();
514
515 CRef<CSeq_loc> graph_loc(new CSeq_loc);
516 graph->SetLoc(*graph_loc);
517
518 SWiggleStat stat;
519 xPreprocessValues(stat);
520
521 xSetTotalLoc(*graph_loc, *chrom_id);
522
523 string trackName = m_pTrackDefaults->Name();
524 if (!trackName.empty()) {
525 graph->SetTitle(trackName);
526 }
527
528 graph->SetComp(stat.m_Span);
529 graph->SetA(stat.m_Step);
530 graph->SetB(stat.m_Min);
531
532 CByte_graph& b_graph = graph->SetGraph().SetByte();
533 b_graph.SetMin(stat.AsByte(stat.m_Min));
534 b_graph.SetMax(stat.AsByte(stat.m_Max));
535 b_graph.SetAxis(0);
536 vector<char>& bytes = b_graph.SetValues();
537
538 if ( m_Values.empty() ) {
539 graph->SetNumval(0);
540 }
541 else {
542 _ASSERT(stat.m_FixedSpan);
543 TSeqPos start = m_Values[0].m_Pos;
544 TSeqPos end = m_Values.back().GetEnd();
545 size_t size = (end-start)/stat.m_Span;
546 graph->SetNumval(static_cast<TSeqPos>(size));
547 bytes.resize(size, stat.AsByte(m_GapValue));
548 ITERATE ( TValues, it, m_Values ) {
549 TSeqPos pos = it->m_Pos - start;
550 TSeqPos span = it->m_Span;
551 _ASSERT(pos % stat.m_Span == 0);
552 _ASSERT(span % stat.m_Span == 0);
553 size_t i = pos / stat.m_Span;
554 int v = stat.AsByte(it->m_Value);
555 for ( ; span > 0; span -= stat.m_Span, ++i ) {
556 bytes[i] = v;
557 }
558 }
559 }
560 return graph;
561 }
562
563 // ----------------------------------------------------------------------------
xSkipWS(string & line)564 bool CWiggleReader::xSkipWS(
565 string& line)
566 // ----------------------------------------------------------------------------
567 {
568 const char* ptr = line.c_str();
569 size_t skip = 0;
570 for ( size_t len = line.size(); skip < len; ++skip ) {
571 char c = ptr[skip];
572 if ( c != ' ' && c != '\t' ) {
573 break;
574 }
575 }
576 line = line.substr(skip);
577 return !line.empty();
578 }
579
580 // ----------------------------------------------------------------------------
xGetWord(string & line)581 string CWiggleReader::xGetWord(
582 string& line)
583 // ----------------------------------------------------------------------------
584 {
585 const char* ptr = line.c_str();
586 size_t skip = 0;
587 for ( size_t len = line.size(); skip < len; ++skip ) {
588 char c = ptr[skip];
589 if ( c == ' ' || c == '\t' ) {
590 break;
591 }
592 }
593 if ( skip == 0 ) {
594 CReaderMessage error(eDiag_Error,
595 m_uLineNumber,
596 "Identifier expected");
597 throw error;
598 }
599 string word(ptr, skip);
600 line = line.substr(skip);
601 return word;
602 }
603
604 // ----------------------------------------------------------------------------
xGetParamName(string & line)605 string CWiggleReader::xGetParamName(
606 string& line)
607 // ----------------------------------------------------------------------------
608 {
609 const char* ptr = line.c_str();
610 size_t skip = 0;
611 for ( size_t len = line.size(); skip < len; ++skip ) {
612 char c = ptr[skip];
613 if ( c == '=' ) {
614 string name(ptr, skip);
615 line = line.substr(skip+1);
616 return name;
617 }
618 if ( c == ' ' || c == '\t' ) {
619 break;
620 }
621 }
622 CReaderMessage error(eDiag_Error,
623 m_uLineNumber,
624 "\"=\" expected");
625 throw error;
626 }
627
628 // ----------------------------------------------------------------------------
xGetParamValue(string & line)629 string CWiggleReader::xGetParamValue(
630 string& line)
631 // ----------------------------------------------------------------------------
632 {
633 const char* ptr = line.c_str();
634 size_t len = line.size();
635 if ( len && *ptr == '"' ) {
636 size_t pos = 1;
637 for ( ; pos < len; ++pos ) {
638 char c = ptr[pos];
639 if ( c == '"' ) {
640 string value(ptr, pos);
641 line = line.substr(pos+1);
642 return value;
643 }
644 }
645 CReaderMessage error(eDiag_Error,
646 m_uLineNumber,
647 "Open quotes");
648 throw error;
649 }
650 return xGetWord(line);
651 }
652
653 // ----------------------------------------------------------------------------
xGetPos(string & line,TSeqPos & v)654 void CWiggleReader::xGetPos(
655 string& line,
656 TSeqPos& v)
657 // ----------------------------------------------------------------------------
658 {
659 CReaderMessage error(eDiag_Error,
660 m_uLineNumber,
661 "Integer value expected");
662
663 char c = line.c_str()[0];
664 if ( c < '0' || c > '9' ) {
665 throw error;
666 }
667
668 TSeqPos ret = 0;
669 const char* ptr = line.c_str();
670 for ( size_t skip = 0; ; ++skip ) {
671 char c = ptr[skip];
672 if ( c >= '0' && c <= '9' ) {
673 ret = ret*10 + (c-'0');
674 }
675 else if ( (c == ' ' || c == '\t' || c == '\0') && skip ) {
676 line = line.substr(skip);
677 v = ret;
678 return;
679 }
680 else {
681 throw error;
682 }
683 }
684 }
685
686 // ----------------------------------------------------------------------------
xTryGetDoubleSimple(string & line,double & v)687 bool CWiggleReader::xTryGetDoubleSimple(
688 string& line,
689 double& v)
690 // ----------------------------------------------------------------------------
691 {
692 double ret = 0;
693 const char* ptr = line.c_str();
694 size_t skip = 0;
695 bool negate = false, digits = false;
696 for ( ; ; ++skip ) {
697 char c = ptr[skip];
698 if ( !skip ) {
699 if ( c == '-' ) {
700 negate = true;
701 continue;
702 }
703 if ( c == '+' ) {
704 continue;
705 }
706 }
707 if ( c >= '0' && c <= '9' ) {
708 digits = true;
709 ret = ret*10 + (c-'0');
710 }
711 else if ( c == '.' ) {
712 ++skip;
713 break;
714 }
715 else if ( c == '\0' ) {
716 if ( !digits ) {
717 return false;
718 }
719 line.clear();
720 if ( negate ) {
721 ret = -ret;
722 }
723 v = ret;
724 return true;
725 }
726 else {
727 return false;
728 }
729 }
730 double digit_mul = 1;
731 for ( ; ; ++skip ) {
732 char c = ptr[skip];
733 if ( c >= '0' && c <= '9' ) {
734 digits = true;
735 digit_mul *= .1;
736 ret += (c-'0')*digit_mul;
737 }
738 else if ( (c == ' ' || c == '\t' || c == '\0') && digits ) {
739 line.clear();
740 v = (negate ? -ret : ret);
741 return true;
742 }
743 else {
744 return false;
745 }
746 }
747 }
748
749 // ----------------------------------------------------------------------------
xGetDouble(string & line,double & v)750 inline void CWiggleReader::xGetDouble(
751 string& line,
752 double& v)
753 // ----------------------------------------------------------------------------
754 {
755 if (xTryGetDoubleSimple(line, v)) {
756 return;
757 }
758 const char* ptr = line.c_str();
759 char* endptr = 0;
760 v = strtod(ptr, &endptr);
761 if ( endptr == ptr ) {
762 CReaderMessage error(eDiag_Error,
763 m_uLineNumber,
764 "Floating point value expected");
765 throw error;
766 }
767 if ( *endptr ) {
768 CReaderMessage error(eDiag_Error,
769 m_uLineNumber,
770 "Extra text on line");
771 throw error;
772 }
773 line.clear();
774 }
775
776 // ----------------------------------------------------------------------------
xPostProcessAnnot(CSeq_annot & annot)777 void CWiggleReader::xPostProcessAnnot(
778 CSeq_annot& annot)
779 // ----------------------------------------------------------------------------
780 {
781 if ( m_ChromId.empty() ) {
782 return;
783 }
784 if (m_iFlags & fAsGraph) {
785 annot.SetData().SetGraph().push_back(xMakeGraph());
786 }
787 else {
788 annot.SetData().SetSeq_table(*xMakeTable());
789 }
790 if (annot.GetData().Which() != CSeq_annot::TData::e_not_set) {
791 xAssignTrackData(annot);
792 }
793 m_ChromId.clear();
794 }
795
796 // ----------------------------------------------------------------------------
xDumpChromValues(void)797 void CWiggleReader::xDumpChromValues(void)
798 // ----------------------------------------------------------------------------
799 {
800 if ( m_ChromId.empty() ) {
801 return;
802 }
803 if ( !m_Annot ) {
804 m_Annot = xCreateSeqAnnot();
805 }
806 if (m_iFlags & fAsGraph) {
807 m_Annot->SetData().SetGraph().push_back(xMakeGraph());
808 }
809 else {
810 m_Annot->SetData().SetSeq_table(*xMakeTable());
811 }
812 }
813
814 // ----------------------------------------------------------------------------
xSetChrom(const string & chrom)815 void CWiggleReader::xSetChrom(
816 const string& chrom)
817 // ----------------------------------------------------------------------------
818 {
819 if ( chrom != m_ChromId ) {
820 xDumpChromValues();
821 if (m_iFlags & CWiggleReader::fAsGraph) {
822 m_Values.clear();
823 }
824 m_ChromId = chrom;
825 }
826 }
827
828 // ----------------------------------------------------------------------------
xParseBrowserLine(const string & line)829 bool CWiggleReader::xParseBrowserLine(
830 const string& line)
831 // ----------------------------------------------------------------------------
832 {
833 if (!NStr::StartsWith(line, "browser")) {
834 return false;
835 }
836 return true;
837 }
838
839 // ----------------------------------------------------------------------------
xParseTrackLine(const string & line)840 bool CWiggleReader::xParseTrackLine(
841 const string& line)
842 // ----------------------------------------------------------------------------
843 {
844 if (!xIsTrackLine(line)) {
845 return false;
846 }
847 CReaderBase::xParseTrackLine(line);
848
849 m_TrackType = eTrackType_invalid;
850 if (m_pTrackDefaults->Type() == "wiggle_0") {
851 m_TrackType = eTrackType_wiggle_0;
852 return true;
853 }
854 if (m_pTrackDefaults->Type() == "bedGraph") {
855 m_TrackType = eTrackType_bedGraph;
856 return true;
857 }
858 CReaderMessage error(eDiag_Error,
859 m_uLineNumber,
860 "Invalid track type");
861 throw error;
862 }
863
864 // ----------------------------------------------------------------------------
865 bool
xProcessFixedStepData(TReaderData::const_iterator & curIt,const TReaderData & readerData)866 CWiggleReader::xProcessFixedStepData(
867 TReaderData::const_iterator& curIt,
868 const TReaderData& readerData)
869 // ----------------------------------------------------------------------------
870 {
871 auto firstLine(curIt->mData);
872 if (!NStr::StartsWith(firstLine, "fixedStep")) {
873 return false;
874 }
875
876 SFixedStepInfo fixedStepInfo;
877 xGetFixedStepInfo(firstLine, fixedStepInfo);
878 ++curIt;
879 xReadFixedStepData(fixedStepInfo, curIt, readerData);
880 return true;
881 }
882
883
884 // ----------------------------------------------------------------------------
885 bool
xProcessVariableStepData(TReaderData::const_iterator & curIt,const TReaderData & readerData)886 CWiggleReader::xProcessVariableStepData(
887 TReaderData::const_iterator& curIt,
888 const TReaderData& readerData)
889 // ----------------------------------------------------------------------------
890 {
891 auto firstLine(curIt->mData);
892 if (!NStr::StartsWith(firstLine, "variableStep")) {
893 return false;
894 }
895
896 SVarStepInfo variableStepInfo;
897 xGetVariableStepInfo(firstLine, variableStepInfo);
898 ++curIt;
899 xReadVariableStepData(variableStepInfo, curIt, readerData);
900 return true;
901 }
902
903
904 // ----------------------------------------------------------------------------
xGetFixedStepInfo(const string & directive,SFixedStepInfo & fixedStepInfo)905 void CWiggleReader::xGetFixedStepInfo(
906 const string& directive,
907 SFixedStepInfo& fixedStepInfo)
908 // ----------------------------------------------------------------------------
909 {
910 if ( m_TrackType != eTrackType_wiggle_0 ) {
911 if ( m_TrackType != eTrackType_invalid ) {
912 CReaderMessage error(eDiag_Error,
913 m_uLineNumber,
914 "Track \"type=wiggle_0\" is required");
915 throw error;
916 }
917 else {
918 m_TrackType = eTrackType_wiggle_0;
919 }
920 }
921
922 auto line(directive.substr(string("fixedStep").size() + 1));
923 NStr::TruncateSpacesInPlace(line);
924 fixedStepInfo.Reset();
925 while (xSkipWS(line)) {
926 string name = xGetParamName(line);
927 string value = xGetParamValue(line);
928 if (name == "chrom") {
929 fixedStepInfo.mChrom = value;
930 continue;
931 }
932 if (name == "start") {
933 fixedStepInfo.mStart = NStr::StringToUInt(value);
934 if (0 == fixedStepInfo.mStart) {
935 CReaderMessage warning(eDiag_Warning,
936 m_uLineNumber,
937 "Bad start value: must be positive. Assuming \"start=1\"");
938 m_pMessageHandler->Report(warning);
939 fixedStepInfo.mStart = 1;
940 }
941 continue;
942 }
943 if (name == "step") {
944 fixedStepInfo.mStep = NStr::StringToUInt(value);
945 continue;
946 }
947 if (name == "span") {
948 fixedStepInfo.mSpan = NStr::StringToUInt(value);
949 continue;
950 }
951 CReaderMessage warning(eDiag_Warning,
952 m_uLineNumber,
953 "Bad parameter name. Ignored");
954 m_pMessageHandler->Report(warning);
955 }
956 if (fixedStepInfo.mChrom.empty()) {
957 CReaderMessage error(eDiag_Error,
958 m_uLineNumber,
959 "Missing chrom parameter");
960 throw error;
961 }
962 if (fixedStepInfo.mStart == 0) {
963 CReaderMessage error(eDiag_Error,
964 m_uLineNumber,
965 "Missing start parameter");
966 throw error;
967 }
968 if (fixedStepInfo.mStep == 0) {
969 CReaderMessage error(eDiag_Error,
970 m_uLineNumber,
971 "Missing step parameter");
972 throw error;
973 }
974 }
975
976 // ----------------------------------------------------------------------------
xReadFixedStepData(const SFixedStepInfo & fixedStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData)977 void CWiggleReader::xReadFixedStepData(
978 const SFixedStepInfo& fixedStepInfo,
979 TReaderData::const_iterator& curIt,
980 const TReaderData& readerData)
981 // ----------------------------------------------------------------------------
982 {
983 xSetChrom(fixedStepInfo.mChrom);
984 SValueInfo value;
985 value.m_Chrom = fixedStepInfo.mChrom;
986 value.m_Pos = fixedStepInfo.mStart-1;
987 value.m_Span = fixedStepInfo.mSpan;
988 while (curIt != readerData.end()) {
989 auto line(curIt->mData);
990 xGetDouble(line, value.m_Value);
991 xAddValue(value);
992 value.m_Pos += fixedStepInfo.mStep;
993 curIt++;
994 }
995 }
996
997 // ----------------------------------------------------------------------------
xGetVariableStepInfo(const string & directive,SVarStepInfo & varStepInfo)998 void CWiggleReader::xGetVariableStepInfo(
999 const string& directive,
1000 SVarStepInfo& varStepInfo)
1001 // ----------------------------------------------------------------------------
1002 {
1003 if ( m_TrackType != eTrackType_wiggle_0 ) {
1004 if ( m_TrackType != eTrackType_invalid ) {
1005 CReaderMessage error(eDiag_Error,
1006 m_uLineNumber,
1007 "Track \"type=wiggle_0\" is required");
1008 throw error;
1009 }
1010 else {
1011 m_TrackType = eTrackType_wiggle_0;
1012 }
1013 }
1014
1015 varStepInfo.Reset();
1016 auto line(directive.substr(string("variableStep").size() + 1));
1017 while (xSkipWS(line)) {
1018 string name = xGetParamName(line);
1019 string value = xGetParamValue(line);
1020 if ( name == "chrom" ) {
1021 varStepInfo.mChrom = value;
1022 }
1023 else if ( name == "span" ) {
1024 varStepInfo.mSpan = NStr::StringToUInt(value);
1025 }
1026 else {
1027 CReaderMessage warning(eDiag_Warning,
1028 m_uLineNumber,
1029 "Bad parameter name. Ignored");
1030 m_pMessageHandler->Report(warning);
1031 }
1032 }
1033 if ( varStepInfo.mChrom.empty() ) {
1034 CReaderMessage error(eDiag_Error,
1035 m_uLineNumber,
1036 "Missing chrom parameter");
1037 throw error;
1038 }
1039 }
1040
1041 // ----------------------------------------------------------------------------
xReadVariableStepData(const SVarStepInfo & varStepInfo,TReaderData::const_iterator & curIt,const TReaderData & readerData)1042 void CWiggleReader::xReadVariableStepData(
1043 const SVarStepInfo& varStepInfo,
1044 TReaderData::const_iterator& curIt,
1045 const TReaderData& readerData)
1046 // ----------------------------------------------------------------------------
1047 {
1048 xSetChrom(varStepInfo.mChrom);
1049 SValueInfo value;
1050 value.m_Chrom = varStepInfo.mChrom;
1051 value.m_Span = varStepInfo.mSpan;
1052 while (curIt != readerData.end()) {
1053 string line(curIt->mData);
1054 xGetPos(line, value.m_Pos);
1055 xSkipWS(line);
1056 xGetDouble(line, value.m_Value);
1057 value.m_Pos -= 1;
1058 xAddValue(value);
1059 curIt++;
1060 }
1061 }
1062
1063 // =========================================================================
xProcessBedData(TReaderData::const_iterator & curIt,const TReaderData & readerData)1064 bool CWiggleReader::xProcessBedData(
1065 TReaderData::const_iterator& curIt,
1066 const TReaderData& readerData)
1067 // =========================================================================
1068 {
1069 while (curIt != readerData.end()) {
1070 auto line(curIt->mData);
1071 auto chrom = xGetWord(line);
1072 xSetChrom(chrom);
1073
1074 SValueInfo value;
1075 xSkipWS(line);
1076 xGetPos(line, value.m_Pos);
1077 xSkipWS(line);
1078 xGetPos(line, value.m_Span);
1079 xSkipWS(line);
1080 xGetDouble(line, value.m_Value);
1081 value.m_Span -= value.m_Pos;
1082 xAddValue(value);
1083
1084 curIt++;
1085 }
1086 return true;
1087 }
1088
1089 END_objects_SCOPE
1090 END_NCBI_SCOPE
1091