1 /*  $Id: wiggle_reader.hpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  *   WIGGLE file reader
30  *
31  */
32 
33 #ifndef OBJTOOLS_READERS___WIGGLEREADER__HPP
34 #define OBJTOOLS_READERS___WIGGLEREADER__HPP
35 
36 #include <corelib/ncbistd.hpp>
37 #include <objects/seq/Seq_annot.hpp>
38 #include <objects/seqloc/Seq_id.hpp>
39 #include <objects/seqloc/Seq_interval.hpp>
40 
41 #include <objtools/readers/reader_base.hpp>
42 
43 BEGIN_NCBI_SCOPE
44 
45 BEGIN_objects_SCOPE // namespace ncbi::objects::
46 
47 //  ============================================================================
48 struct SFixedStepInfo {
49 //  ============================================================================
50     string mChrom;
51     TSeqPos mStart;
52     TSeqPos mStep;
53     TSeqPos mSpan;
54 
ResetSFixedStepInfo55     void Reset() {
56         mChrom.clear();
57         mStart = mStep = 0;
58         mSpan = 1;
59     }
SFixedStepInfoSFixedStepInfo60     SFixedStepInfo() {
61         Reset();
62     }
63 };
64 
65 //  ============================================================================
66 struct SVarStepInfo {
67 //  ============================================================================
68     string mChrom;
69     TSeqPos mSpan;
70 
ResetSVarStepInfo71     void Reset() {
72         mChrom.clear();
73         mSpan =1;
74     }
SVarStepInfoSVarStepInfo75     SVarStepInfo() {
76         Reset();
77     }
78 };
79 
80 //  ============================================================================
81 struct SValueInfo {
82 //  ============================================================================
83     string m_Chrom;
84     TSeqPos m_Pos;
85     TSeqPos m_Span;
86     double m_Value;
87 
SValueInfoSValueInfo88     SValueInfo(): m_Pos(0), m_Span(1), m_Value(0.0) {};
89 
GetEndSValueInfo90     TSeqPos GetEnd(void) const {
91         return m_Pos + m_Span;
92     }
operator <SValueInfo93     bool operator<(const SValueInfo& v) const {
94         if (m_Chrom != v.m_Chrom) {
95             return m_Chrom < v.m_Chrom;
96         }
97         return m_Pos < v.m_Pos;
98     }
99 };
100 
101 //  ============================================================================
102 struct SWiggleStat {
103 //  ============================================================================
104     bool m_FixedSpan;
105     bool m_HaveGaps;
106     bool m_IntValues;
107     TSeqPos m_Span;
108     double m_Min, m_Max, m_Step, m_StepMul;
109 
SWiggleStatSWiggleStat110     SWiggleStat()
111         : m_FixedSpan(true),
112           m_HaveGaps(false),
113           m_IntValues(true),
114           m_Span(1),
115           m_Min(0),
116           m_Max(0),
117           m_Step(1),
118           m_StepMul(1)
119         {
120         }
SetFirstSpanSWiggleStat121     void SetFirstSpan(TSeqPos span)
122         {
123             m_FixedSpan = true;
124             m_Span = span;
125         }
AddSpanSWiggleStat126     void AddSpan(TSeqPos span)
127         {
128             if ( span != m_Span ) {
129                 m_FixedSpan = false;
130             }
131         }
SetFirstValueSWiggleStat132     void SetFirstValue(double v)
133         {
134             m_Min = m_Max = v;
135             m_IntValues = v == int(v);
136         }
AddValueSWiggleStat137     void AddValue(double v)
138         {
139             if ( v < m_Min ) {
140                 m_Min = v;
141             }
142             if ( v > m_Max ) {
143                 m_Max = v;
144             }
145             if ( m_IntValues && v != int(v) ) {
146                 m_IntValues = false;
147             }
148         }
AsByteSWiggleStat149     int AsByte(double v) const
150         {
151             return int((v-m_Min)*m_StepMul+.5);
152         }
153 };
154 
155 //  ----------------------------------------------------------------------------
156 class CRawWiggleRecord
157 //  ----------------------------------------------------------------------------
158 {
159 public:
CRawWiggleRecord(CSeq_id & id,unsigned int start,unsigned int span,double value)160     CRawWiggleRecord(
161         CSeq_id& id,
162         unsigned int start,
163         unsigned int span,
164         double value)
165     {
166         m_pInterval.Reset(new CSeq_interval());
167         m_pInterval->SetId(id);
168         m_pInterval->SetFrom(start-1);
169         m_pInterval->SetTo(start-1+span-1);
170         m_value = value;
171     };
172 
~CRawWiggleRecord()173     ~CRawWiggleRecord() {};
174 
Dump(CNcbiOstream & ostr) const175     void Dump(
176         CNcbiOstream& ostr) const
177     {
178         ostr << "  [CRawWiggleRecord ";
179         ostr << "id=\"" << m_pInterval->GetId().AsFastaString() << "\" ";
180         ostr << "start=" << m_pInterval->GetFrom() << " ";
181         ostr << "stop=" << m_pInterval->GetTo() << " ";
182         ostr << "value=" << m_value << "]" << endl;
183     }
184 
185 public:
186     CRef<CSeq_interval> m_pInterval;
187     double m_value;
188 };
189 
190 //  ----------------------------------------------------------------------------
191 class CRawWiggleTrack
192 //  ----------------------------------------------------------------------------
193 {
194 public:
CRawWiggleTrack()195     CRawWiggleTrack() {};
~CRawWiggleTrack()196     ~CRawWiggleTrack() {};
197 
198 public:
Reset()199     void Reset()
200     {
201         m_pId.Reset();
202         m_Records.clear();
203     }
204 
Dump(CNcbiOstream & ostr) const205     void Dump(
206         CNcbiOstream& ostr) const
207     {
208         ostr << "[CRawWiggleTrack" << endl;
209         for (vector<CRawWiggleRecord>::const_iterator it = m_Records.begin();
210                 it != m_Records.end(); ++it) {
211             it->Dump(ostr);
212         }
213         ostr << "]" << std::endl;
214     }
215 
AddRecord(CRawWiggleRecord record)216     void AddRecord(
217         CRawWiggleRecord record)
218     {
219         m_Records.push_back(record);
220     }
221 
Records() const222     const vector<CRawWiggleRecord>& Records() const
223     {
224         return m_Records;
225     }
226 
HasData() const227     bool HasData() const
228     {
229         return (!m_Records.empty());
230     }
231 
232 public:
233     CRef<CSeq_id> m_pId;
234     vector<CRawWiggleRecord> m_Records;
235 };
236 
237 //  ----------------------------------------------------------------------------
238 class NCBI_XOBJREAD_EXPORT CWiggleReader
239 //  ----------------------------------------------------------------------------
240     : public CReaderBase
241 {
242 public:
243     typedef vector<SValueInfo> TValues;
244 
245 public:
246     CWiggleReader(
247         int = fDefaults,
248         const string& = "",
249         const string& = "",
250         CReaderListener* = nullptr);
251 
252     virtual ~CWiggleReader();
253 
254     //
255     //  object interface:
256     //
257 public:
258     enum EWiggleFlags {
259         fDefaults = 0,
260         fJoinSame = 1<<8,
261         fAsByte = 1<<9,
262         fAsGraph = 1<<10,
263         fDumpStats = 1<<11,
264         fAsRaw = 1<<12,
265     };
266     typedef int TFlags;
267 
268     virtual CRef< CSeq_annot >
269     ReadSeqAnnot(
270         ILineReader&,
271         ILineErrorListener* =0 );
272 
273     virtual bool
274     ReadTrackData(
275         ILineReader&,
276         CRawWiggleTrack&,
277         ILineErrorListener* =0 );
278 
279     //
280     //  helpers:
281     //
282 protected:
283     void xGetData(
284         ILineReader&,
285         TReaderData&);
286 
287     void xProcessData(
288         const TReaderData&,
289         CSeq_annot&);
290 
291     void xPostProcessAnnot(
292         CSeq_annot&);
293 
294     bool
295     xParseBrowserLine(
296         const string&);
297 
298     bool
299     xParseTrackLine(
300         const string&);
301 
302     bool
303     xProcessFixedStepData(
304         TReaderData::const_iterator&,
305         const TReaderData&);
306 
307     void
308     xGetFixedStepInfo(
309         const string&,
310         SFixedStepInfo&);
311 
312     void
313     xReadFixedStepData(
314         const SFixedStepInfo&,
315         TReaderData::const_iterator&,
316         const TReaderData&);
317 
318     bool
319     xReadFixedStepDataRaw(
320         const SFixedStepInfo&,
321         TReaderData::const_iterator&,
322         const TReaderData&,
323         CRawWiggleTrack&);
324 
325     bool
326     xProcessVariableStepData(
327         TReaderData::const_iterator&,
328         const TReaderData&);
329 
330     bool
331     xProcessBedData(
332         TReaderData::const_iterator&,
333         const TReaderData&);
334 
335     void
336     xGetVariableStepInfo(
337         const string&,
338         SVarStepInfo&);
339 
340     void
341     xReadVariableStepData(
342         const SVarStepInfo&,
343         TReaderData::const_iterator&,
344         const TReaderData&);
345 
346     bool
347     xReadVariableStepDataRaw(
348         const SVarStepInfo&,
349         TReaderData::const_iterator&,
350         const TReaderData&,
351         CRawWiggleTrack&);
352 
353     string
354     xGetWord(
355         string&);
356 
357     bool
358     xSkipWS(
359         string&);
360 
361     string
362     xGetParamName(
363         string&);
364 
365     string
366     xGetParamValue(
367         string&);
368 
369     void
370     xGetPos(
371         string&,
372         TSeqPos& v);
373 
374     bool
375     xTryGetDoubleSimple(
376         string&,
377         double& v);
378 
379     void
380     xGetDouble(
381         string& line,
382         double& v);
383 
384     CRef<CSeq_id>
385     xMakeChromId();
386 
387     CRef<CSeq_table>
388     xMakeTable();
389 
390     CRef<CSeq_graph>
391     xMakeGraph();
392 
393     void
394     xPreprocessValues(
395         SWiggleStat&);
396 
397     void
xAddValue(const SValueInfo & value)398     xAddValue(const SValueInfo& value) {
399         if ( !m_OmitZeros || value.m_Value != 0 ) {
400             m_Values.push_back(value);
401         }
402     }
403 
404     double
405     xEstimateSize(
406         size_t rows,
407         bool fixed_span) const;
408 
409     void
410     xSetTotalLoc(
411         CSeq_loc& loc,
412         CSeq_id& chrom_id);
413 
414     void
415     xDumpChromValues();
416 
417     void
418     xSetChrom(
419         const string& chrom);
420 
421     bool
422     xValuesAreFromSingleSequence() const;
423 
424     //
425     //  data:
426     //
427 protected:
428     string m_ChromId;
429     TValues m_Values;
430     double m_GapValue;
431     bool m_SingleAnnot;
432     bool m_OmitZeros;
433 
434     enum ETrackType {
435         eTrackType_invalid,
436         eTrackType_wiggle_0,
437         eTrackType_bedGraph
438     };
439     ETrackType m_TrackType;
440     CRef<CSeq_annot> m_Annot;
441 };
442 
443 END_objects_SCOPE
444 END_NCBI_SCOPE
445 
446 #endif // OBJTOOLS_READERS___WIGGLEREADER__HPP
447