1 #ifndef SRA__READER__SRA__WGSREAD__HPP 2 #define SRA__READER__SRA__WGSREAD__HPP 3 /* $Id: graphread.hpp 562530 2018-04-23 20:00:53Z vasilche $ 4 * =========================================================================== 5 * 6 * PUBLIC DOMAIN NOTICE 7 * National Center for Biotechnology Information 8 * 9 * This software/database is a "United States Government Work" under the 10 * terms of the United States Copyright Act. It was written as part of 11 * the author's official duties as a United States Government employee and 12 * thus cannot be copyrighted. This software/database is freely available 13 * to the public for use. The National Library of Medicine and the U.S. 14 * Government have not placed any restriction on its use or reproduction. 15 * 16 * Although all reasonable efforts have been taken to ensure the accuracy 17 * and reliability of the software and data, the NLM and the U.S. 18 * Government do not and cannot warrant the performance or results that 19 * may be obtained by using this software or data. The NLM and the U.S. 20 * Government disclaim all warranties, express or implied, including 21 * warranties of performance, merchantability or fitness for any particular 22 * purpose. 23 * 24 * Please cite the author in any work or product based on this material. 25 * 26 * =========================================================================== 27 * 28 * Authors: Eugene Vasilchenko 29 * 30 * File Description: 31 * Access to VDB Graph files 32 * 33 */ 34 35 #include <corelib/ncbistd.hpp> 36 #include <corelib/ncbimtx.hpp> 37 #include <util/range.hpp> 38 #include <sra/readers/sra/vdbread.hpp> 39 #include <objects/seq/seq_id_handle.hpp> 40 #include <objects/seqres/Seq_graph.hpp> 41 #include <map> 42 #include <list> 43 44 BEGIN_NCBI_NAMESPACE; 45 BEGIN_NAMESPACE(objects); 46 47 class CSeq_entry; 48 class CSeq_graph; 49 class CSeq_table; 50 class CUser_object; 51 class CUser_field; 52 class CVDBGraphSeqIterator; 53 54 struct SVDBGraphDb_Base { 55 enum ELookupType { 56 eLookupDefault, 57 eLookupInMemory, 58 eLookupInVDB 59 }; 60 }; 61 62 class NCBI_SRAREAD_EXPORT CVDBGraphDb_Impl : public CObject, public SVDBGraphDb_Base 63 { 64 public: 65 CVDBGraphDb_Impl(CVDBMgr& mgr, CTempString path, ELookupType lookup_type = eLookupDefault); 66 virtual ~CVDBGraphDb_Impl(void); 67 68 // check if there are graph track of intermediate zoom level 69 bool HasMidZoomGraphs(void); 70 LookupIsInVDB() const71 bool LookupIsInVDB() const 72 { 73 return m_LookupIndex; 74 } LookupIsInMemory() const75 bool LookupIsInMemory() const 76 { 77 return !m_LookupIndex; 78 } 79 static bool LookupIsInMemory(ELookupType lookup_type); 80 81 protected: 82 friend class CVDBGraphSeqIterator; 83 84 // SSeqTableCursor is helper accessor structure for SEQUENCE table 85 struct SGraphTableCursor : public CObject { 86 SGraphTableCursor(const CVDBTable& table); 87 88 CVDBCursor m_Cursor; 89 90 typedef int64_t TGraphV; 91 typedef int64_t TGraphQ; 92 93 DECLARE_VDB_COLUMN_AS_STRING(SID); 94 DECLARE_VDB_COLUMN_AS(int64_t, START); 95 DECLARE_VDB_COLUMN_AS(uint32_t, LEN); 96 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q0); 97 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q10); 98 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q50); 99 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q90); 100 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q100); 101 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q0); 102 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q10); 103 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q50); 104 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q90); 105 DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q100); 106 DECLARE_VDB_COLUMN_AS(TGraphV, GRAPH); 107 DECLARE_VDB_COLUMN_AS(uint32_t, SCALE); 108 DECLARE_VDB_COLUMN_AS(int64_t, NUM_SWITCHES); 109 }; 110 GetPath(void) const111 const string& GetPath(void) const { 112 return m_Path; 113 } 114 115 // SSeqInfo holds cached refseq information - ids, len, rows 116 struct SSeqInfo { SSeqInfoCVDBGraphDb_Impl::SSeqInfo117 SSeqInfo() 118 : m_SeqLength(0), 119 m_RowSize(0), 120 m_RowFirst(0), 121 m_RowLast(0) 122 { 123 } 124 125 string m_SeqId; 126 CSeq_id_Handle m_Seq_id_Handle; 127 TSeqPos m_SeqLength; 128 TSeqPos m_RowSize; 129 TVDBRowId m_RowFirst, m_RowLast; 130 }; 131 typedef list<SSeqInfo> TSeqInfoList; 132 typedef map<CSeq_id_Handle, TSeqInfoList::iterator> TSeqInfoMapBySeq_id; 133 typedef map<TVDBRowId, TSeqInfoList::iterator> TSeqInfoMapByFirstRow; 134 GetSeqInfoList(void) const135 const TSeqInfoList& GetSeqInfoList(void) const { 136 return m_SeqList; 137 } GetSeqInfoMapBySeq_id(void) const138 const TSeqInfoMapBySeq_id& GetSeqInfoMapBySeq_id(void) const { 139 return m_SeqMapBySeq_id; 140 } 141 142 // get table object GraphTable(void)143 const CVDBTable& GraphTable(void) { 144 return m_GraphTable; 145 } 146 // get table accessor object for exclusive access 147 CRef<SGraphTableCursor> Graph(void); 148 // return table accessor object for reuse Put(CRef<SGraphTableCursor> & curs)149 void Put(CRef<SGraphTableCursor>& curs) { 150 m_Graph.Put(curs); 151 } 152 153 SSeqInfo GetSeqInfoAtRow(TVDBRowId row); 154 SSeqInfo GetSeqInfo(const CSeq_id_Handle& seq_id); 155 156 private: 157 CVDBMgr m_Mgr; 158 string m_Path; 159 160 CVDBTable m_GraphTable; 161 CVDBTableIndex m_LookupIndex; 162 163 CVDBObjectCache<SGraphTableCursor> m_Graph; 164 165 CMutex m_SeqInfoMutex; 166 TSeqInfoList m_SeqList; // list of cached refseqs' information 167 TSeqInfoMapBySeq_id m_SeqMapBySeq_id; // index for refseq info lookup 168 TSeqInfoMapByFirstRow m_SeqMapByFirstRow; // index for refseq info lookup 169 }; 170 171 172 class CVDBGraphDb : public CRef<CVDBGraphDb_Impl>, public SVDBGraphDb_Base 173 { 174 public: CVDBGraphDb(void)175 CVDBGraphDb(void) 176 { 177 } CVDBGraphDb(CVDBGraphDb_Impl * impl)178 explicit CVDBGraphDb(CVDBGraphDb_Impl* impl) 179 : CRef<CVDBGraphDb_Impl>(impl) 180 { 181 } CVDBGraphDb(CVDBMgr & mgr,CTempString path,ELookupType lookup_type=eLookupDefault)182 CVDBGraphDb(CVDBMgr& mgr, CTempString path, ELookupType lookup_type = eLookupDefault) 183 : CRef<CVDBGraphDb_Impl>(new CVDBGraphDb_Impl(mgr, path, lookup_type)) 184 { 185 } 186 }; 187 188 189 class NCBI_SRAREAD_EXPORT CVDBGraphSeqIterator 190 { 191 public: 192 typedef CVDBGraphDb_Impl::SSeqInfo SSeqInfo; 193 typedef CVDBGraphDb_Impl::SGraphTableCursor SGraphTableCursor; 194 typedef CVDBGraphDb_Impl::TSeqInfoList::const_iterator TSeqInfoIter; 195 CVDBGraphSeqIterator(void)196 CVDBGraphSeqIterator(void) 197 { 198 } 199 explicit CVDBGraphSeqIterator(const CVDBGraphDb& db); 200 CVDBGraphSeqIterator(const CVDBGraphDb& db, 201 const CSeq_id_Handle& seq_id); 202 operator !(void) const203 bool operator!(void) const { 204 return !m_Info.m_RowSize; 205 } operator const void*(void) const206 operator const void*(void) const { 207 return !*this? 0: this; 208 } 209 210 const SSeqInfo& GetInfo(void) const; operator *(void) const211 const SSeqInfo& operator*(void) const { 212 return GetInfo(); 213 } operator ->(void) const214 const SSeqInfo* operator->(void) const { 215 return &GetInfo(); 216 } 217 218 CVDBGraphSeqIterator& operator++(void); 219 GetSeqId(void) const220 const string& GetSeqId(void) const { 221 return GetInfo().m_SeqId; 222 } GetSeq_id_Handle(void) const223 const CSeq_id_Handle& GetSeq_id_Handle(void) const { 224 return GetInfo().m_Seq_id_Handle; 225 } 226 GetSeqLength(void) const227 TSeqPos GetSeqLength(void) const { 228 return GetInfo().m_SeqLength; 229 } 230 231 // Do not mix graphs of different zoom levels 232 enum EContentFlags { 233 // original detailed graph 234 fGraphMain = 1<<0, 235 236 // overview graphs (percentiles) 237 fGraphQ0 = 1<<1, 238 fGraphQ10 = 1<<2, 239 fGraphQ50 = 1<<3, 240 fGraphQ90 = 1<<4, 241 fGraphQ100 = 1<<5, 242 fGraphQAll = (fGraphQ0 | fGraphQ10 | fGraphQ50 | 243 fGraphQ90 | fGraphQ100), 244 245 // main graph representation - either Seq-table or Seq-graph, 246 // set both *As* flags to get more compact representation 247 fGraphMainAsTable = 1<<8, 248 fGraphMainAsGraph = 1<<9, 249 fGraphMainAsBest = (fGraphMainAsTable | fGraphMainAsGraph), 250 251 // zoom graphs if available (percentiles) 252 fGraphZoomQ0 = 1<<11, 253 fGraphZoomQ10 = 1<<12, 254 fGraphZoomQ50 = 1<<13, 255 fGraphZoomQ90 = 1<<14, 256 fGraphZoomQ100 = 1<<15, 257 fGraphZoomQAll = (fGraphZoomQ0 | fGraphZoomQ10 | fGraphZoomQ50 | 258 fGraphZoomQ90 | fGraphZoomQ100), 259 260 // overview graphs by default 261 fDefaultContent = fGraphQAll 262 }; 263 typedef int TContentFlags; 264 265 // Returns annot containing graphs over the specified range 266 // (CRange<TSeqPos> or COpenRange<TSeqPos>). 267 CRef<CSeq_annot> GetAnnot(COpenRange<TSeqPos> range, 268 const string& annot_name = kEmptyStr, 269 TContentFlags content = fDefaultContent) const; 270 271 bool SeqTableIsSmaller(COpenRange<TSeqPos> range) const; 272 273 protected: GetDb(void) const274 CVDBGraphDb_Impl& GetDb(void) const { 275 return m_Db.GetNCObject(); 276 } 277 278 CRef<CSeq_graph> x_MakeGraph(const string& annot_name, 279 CSeq_loc& loc, 280 const SSeqInfo& info, 281 const COpenRange<TSeqPos>& range, 282 TSeqPos step, 283 SGraphTableCursor& cursor, 284 CVDBColumn& column, 285 int level) const; 286 CRef<CSeq_table> x_MakeTable(const string& annot_name, 287 CSeq_loc& loc, 288 const SSeqInfo& info, 289 const COpenRange<TSeqPos>& range, 290 SGraphTableCursor& cursor) const; 291 292 bool x_SeqTableIsSmaller(COpenRange<TSeqPos> range, 293 SGraphTableCursor& cursor) const; 294 295 private: 296 CVDBGraphDb m_Db; 297 CVDBGraphDb_Impl::SSeqInfo m_Info; 298 }; 299 300 301 END_NAMESPACE(objects); 302 END_NCBI_NAMESPACE; 303 304 #endif // SRA__READER__SRA__WGSREAD__HPP 305