1 /* $Id: wiggle_reader.hpp 632526 2021-06-02 17:25:01Z ivanov $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Frank Ludwig 27 * 28 * File Description: 29 * WIGGLE file reader 30 * 31 */ 32 33 #ifndef OBJTOOLS_READERS___WIGGLEREADER__HPP 34 #define OBJTOOLS_READERS___WIGGLEREADER__HPP 35 36 #include <corelib/ncbistd.hpp> 37 #include <objects/seq/Seq_annot.hpp> 38 #include <objects/seqloc/Seq_id.hpp> 39 #include <objects/seqloc/Seq_interval.hpp> 40 41 #include <objtools/readers/reader_base.hpp> 42 43 BEGIN_NCBI_SCOPE 44 45 BEGIN_objects_SCOPE // namespace ncbi::objects:: 46 47 // ============================================================================ 48 struct SFixedStepInfo { 49 // ============================================================================ 50 string mChrom; 51 TSeqPos mStart; 52 TSeqPos mStep; 53 TSeqPos mSpan; 54 ResetSFixedStepInfo55 void Reset() { 56 mChrom.clear(); 57 mStart = mStep = 0; 58 mSpan = 1; 59 } SFixedStepInfoSFixedStepInfo60 SFixedStepInfo() { 61 Reset(); 62 } 63 }; 64 65 // ============================================================================ 66 struct SVarStepInfo { 67 // ============================================================================ 68 string mChrom; 69 TSeqPos mSpan; 70 ResetSVarStepInfo71 void Reset() { 72 mChrom.clear(); 73 mSpan =1; 74 } SVarStepInfoSVarStepInfo75 SVarStepInfo() { 76 Reset(); 77 } 78 }; 79 80 // ============================================================================ 81 struct SValueInfo { 82 // ============================================================================ 83 string m_Chrom; 84 TSeqPos m_Pos; 85 TSeqPos m_Span; 86 double m_Value; 87 SValueInfoSValueInfo88 SValueInfo(): m_Pos(0), m_Span(1), m_Value(0.0) {}; 89 GetEndSValueInfo90 TSeqPos GetEnd(void) const { 91 return m_Pos + m_Span; 92 } operator <SValueInfo93 bool operator<(const SValueInfo& v) const { 94 if (m_Chrom != v.m_Chrom) { 95 return m_Chrom < v.m_Chrom; 96 } 97 return m_Pos < v.m_Pos; 98 } 99 }; 100 101 // ============================================================================ 102 struct SWiggleStat { 103 // ============================================================================ 104 bool m_FixedSpan; 105 bool m_HaveGaps; 106 bool m_IntValues; 107 TSeqPos m_Span; 108 double m_Min, m_Max, m_Step, m_StepMul; 109 SWiggleStatSWiggleStat110 SWiggleStat() 111 : m_FixedSpan(true), 112 m_HaveGaps(false), 113 m_IntValues(true), 114 m_Span(1), 115 m_Min(0), 116 m_Max(0), 117 m_Step(1), 118 m_StepMul(1) 119 { 120 } SetFirstSpanSWiggleStat121 void SetFirstSpan(TSeqPos span) 122 { 123 m_FixedSpan = true; 124 m_Span = span; 125 } AddSpanSWiggleStat126 void AddSpan(TSeqPos span) 127 { 128 if ( span != m_Span ) { 129 m_FixedSpan = false; 130 } 131 } SetFirstValueSWiggleStat132 void SetFirstValue(double v) 133 { 134 m_Min = m_Max = v; 135 m_IntValues = v == int(v); 136 } AddValueSWiggleStat137 void AddValue(double v) 138 { 139 if ( v < m_Min ) { 140 m_Min = v; 141 } 142 if ( v > m_Max ) { 143 m_Max = v; 144 } 145 if ( m_IntValues && v != int(v) ) { 146 m_IntValues = false; 147 } 148 } AsByteSWiggleStat149 int AsByte(double v) const 150 { 151 return int((v-m_Min)*m_StepMul+.5); 152 } 153 }; 154 155 // ---------------------------------------------------------------------------- 156 class CRawWiggleRecord 157 // ---------------------------------------------------------------------------- 158 { 159 public: CRawWiggleRecord(CSeq_id & id,unsigned int start,unsigned int span,double value)160 CRawWiggleRecord( 161 CSeq_id& id, 162 unsigned int start, 163 unsigned int span, 164 double value) 165 { 166 m_pInterval.Reset(new CSeq_interval()); 167 m_pInterval->SetId(id); 168 m_pInterval->SetFrom(start-1); 169 m_pInterval->SetTo(start-1+span-1); 170 m_value = value; 171 }; 172 ~CRawWiggleRecord()173 ~CRawWiggleRecord() {}; 174 Dump(CNcbiOstream & ostr) const175 void Dump( 176 CNcbiOstream& ostr) const 177 { 178 ostr << " [CRawWiggleRecord "; 179 ostr << "id=\"" << m_pInterval->GetId().AsFastaString() << "\" "; 180 ostr << "start=" << m_pInterval->GetFrom() << " "; 181 ostr << "stop=" << m_pInterval->GetTo() << " "; 182 ostr << "value=" << m_value << "]" << endl; 183 } 184 185 public: 186 CRef<CSeq_interval> m_pInterval; 187 double m_value; 188 }; 189 190 // ---------------------------------------------------------------------------- 191 class CRawWiggleTrack 192 // ---------------------------------------------------------------------------- 193 { 194 public: CRawWiggleTrack()195 CRawWiggleTrack() {}; ~CRawWiggleTrack()196 ~CRawWiggleTrack() {}; 197 198 public: Reset()199 void Reset() 200 { 201 m_pId.Reset(); 202 m_Records.clear(); 203 } 204 Dump(CNcbiOstream & ostr) const205 void Dump( 206 CNcbiOstream& ostr) const 207 { 208 ostr << "[CRawWiggleTrack" << endl; 209 for (vector<CRawWiggleRecord>::const_iterator it = m_Records.begin(); 210 it != m_Records.end(); ++it) { 211 it->Dump(ostr); 212 } 213 ostr << "]" << std::endl; 214 } 215 AddRecord(CRawWiggleRecord record)216 void AddRecord( 217 CRawWiggleRecord record) 218 { 219 m_Records.push_back(record); 220 } 221 Records() const222 const vector<CRawWiggleRecord>& Records() const 223 { 224 return m_Records; 225 } 226 HasData() const227 bool HasData() const 228 { 229 return (!m_Records.empty()); 230 } 231 232 public: 233 CRef<CSeq_id> m_pId; 234 vector<CRawWiggleRecord> m_Records; 235 }; 236 237 // ---------------------------------------------------------------------------- 238 class NCBI_XOBJREAD_EXPORT CWiggleReader 239 // ---------------------------------------------------------------------------- 240 : public CReaderBase 241 { 242 public: 243 typedef vector<SValueInfo> TValues; 244 245 public: 246 CWiggleReader( 247 int = fDefaults, 248 const string& = "", 249 const string& = "", 250 CReaderListener* = nullptr); 251 252 virtual ~CWiggleReader(); 253 254 // 255 // object interface: 256 // 257 public: 258 enum EWiggleFlags { 259 fDefaults = 0, 260 fJoinSame = 1<<8, 261 fAsByte = 1<<9, 262 fAsGraph = 1<<10, 263 fDumpStats = 1<<11, 264 fAsRaw = 1<<12, 265 }; 266 typedef int TFlags; 267 268 virtual CRef< CSeq_annot > 269 ReadSeqAnnot( 270 ILineReader&, 271 ILineErrorListener* =0 ); 272 273 virtual bool 274 ReadTrackData( 275 ILineReader&, 276 CRawWiggleTrack&, 277 ILineErrorListener* =0 ); 278 279 // 280 // helpers: 281 // 282 protected: 283 void xGetData( 284 ILineReader&, 285 TReaderData&); 286 287 void xProcessData( 288 const TReaderData&, 289 CSeq_annot&); 290 291 void xPostProcessAnnot( 292 CSeq_annot&); 293 294 bool 295 xParseBrowserLine( 296 const string&); 297 298 bool 299 xParseTrackLine( 300 const string&); 301 302 bool 303 xProcessFixedStepData( 304 TReaderData::const_iterator&, 305 const TReaderData&); 306 307 void 308 xGetFixedStepInfo( 309 const string&, 310 SFixedStepInfo&); 311 312 void 313 xReadFixedStepData( 314 const SFixedStepInfo&, 315 TReaderData::const_iterator&, 316 const TReaderData&); 317 318 bool 319 xReadFixedStepDataRaw( 320 const SFixedStepInfo&, 321 TReaderData::const_iterator&, 322 const TReaderData&, 323 CRawWiggleTrack&); 324 325 bool 326 xProcessVariableStepData( 327 TReaderData::const_iterator&, 328 const TReaderData&); 329 330 bool 331 xProcessBedData( 332 TReaderData::const_iterator&, 333 const TReaderData&); 334 335 void 336 xGetVariableStepInfo( 337 const string&, 338 SVarStepInfo&); 339 340 void 341 xReadVariableStepData( 342 const SVarStepInfo&, 343 TReaderData::const_iterator&, 344 const TReaderData&); 345 346 bool 347 xReadVariableStepDataRaw( 348 const SVarStepInfo&, 349 TReaderData::const_iterator&, 350 const TReaderData&, 351 CRawWiggleTrack&); 352 353 string 354 xGetWord( 355 string&); 356 357 bool 358 xSkipWS( 359 string&); 360 361 string 362 xGetParamName( 363 string&); 364 365 string 366 xGetParamValue( 367 string&); 368 369 void 370 xGetPos( 371 string&, 372 TSeqPos& v); 373 374 bool 375 xTryGetDoubleSimple( 376 string&, 377 double& v); 378 379 void 380 xGetDouble( 381 string& line, 382 double& v); 383 384 CRef<CSeq_id> 385 xMakeChromId(); 386 387 CRef<CSeq_table> 388 xMakeTable(); 389 390 CRef<CSeq_graph> 391 xMakeGraph(); 392 393 void 394 xPreprocessValues( 395 SWiggleStat&); 396 397 void xAddValue(const SValueInfo & value)398 xAddValue(const SValueInfo& value) { 399 if ( !m_OmitZeros || value.m_Value != 0 ) { 400 m_Values.push_back(value); 401 } 402 } 403 404 double 405 xEstimateSize( 406 size_t rows, 407 bool fixed_span) const; 408 409 void 410 xSetTotalLoc( 411 CSeq_loc& loc, 412 CSeq_id& chrom_id); 413 414 void 415 xDumpChromValues(); 416 417 void 418 xSetChrom( 419 const string& chrom); 420 421 bool 422 xValuesAreFromSingleSequence() const; 423 424 // 425 // data: 426 // 427 protected: 428 string m_ChromId; 429 TValues m_Values; 430 double m_GapValue; 431 bool m_SingleAnnot; 432 bool m_OmitZeros; 433 434 enum ETrackType { 435 eTrackType_invalid, 436 eTrackType_wiggle_0, 437 eTrackType_bedGraph 438 }; 439 ETrackType m_TrackType; 440 CRef<CSeq_annot> m_Annot; 441 }; 442 443 END_objects_SCOPE 444 END_NCBI_SCOPE 445 446 #endif // OBJTOOLS_READERS___WIGGLEREADER__HPP 447