1 #ifndef SRA__READER__SRA__WGSREAD__HPP
2 #define SRA__READER__SRA__WGSREAD__HPP
3 /*  $Id: graphread.hpp 562530 2018-04-23 20:00:53Z vasilche $
4  * ===========================================================================
5  *
6  *                            PUBLIC DOMAIN NOTICE
7  *               National Center for Biotechnology Information
8  *
9  *  This software/database is a "United States Government Work" under the
10  *  terms of the United States Copyright Act.  It was written as part of
11  *  the author's official duties as a United States Government employee and
12  *  thus cannot be copyrighted.  This software/database is freely available
13  *  to the public for use. The National Library of Medicine and the U.S.
14  *  Government have not placed any restriction on its use or reproduction.
15  *
16  *  Although all reasonable efforts have been taken to ensure the accuracy
17  *  and reliability of the software and data, the NLM and the U.S.
18  *  Government do not and cannot warrant the performance or results that
19  *  may be obtained by using this software or data. The NLM and the U.S.
20  *  Government disclaim all warranties, express or implied, including
21  *  warranties of performance, merchantability or fitness for any particular
22  *  purpose.
23  *
24  *  Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors:  Eugene Vasilchenko
29  *
30  * File Description:
31  *   Access to VDB Graph files
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbimtx.hpp>
37 #include <util/range.hpp>
38 #include <sra/readers/sra/vdbread.hpp>
39 #include <objects/seq/seq_id_handle.hpp>
40 #include <objects/seqres/Seq_graph.hpp>
41 #include <map>
42 #include <list>
43 
44 BEGIN_NCBI_NAMESPACE;
45 BEGIN_NAMESPACE(objects);
46 
47 class CSeq_entry;
48 class CSeq_graph;
49 class CSeq_table;
50 class CUser_object;
51 class CUser_field;
52 class CVDBGraphSeqIterator;
53 
54 struct SVDBGraphDb_Base {
55     enum ELookupType {
56         eLookupDefault,
57         eLookupInMemory,
58         eLookupInVDB
59     };
60 };
61 
62 class NCBI_SRAREAD_EXPORT CVDBGraphDb_Impl : public CObject, public SVDBGraphDb_Base
63 {
64 public:
65     CVDBGraphDb_Impl(CVDBMgr& mgr, CTempString path, ELookupType lookup_type = eLookupDefault);
66     virtual ~CVDBGraphDb_Impl(void);
67 
68     // check if there are graph track of intermediate zoom level
69     bool HasMidZoomGraphs(void);
70 
LookupIsInVDB() const71     bool LookupIsInVDB() const
72     {
73         return m_LookupIndex;
74     }
LookupIsInMemory() const75     bool LookupIsInMemory() const
76     {
77         return !m_LookupIndex;
78     }
79     static bool LookupIsInMemory(ELookupType lookup_type);
80 
81 protected:
82     friend class CVDBGraphSeqIterator;
83 
84     // SSeqTableCursor is helper accessor structure for SEQUENCE table
85     struct SGraphTableCursor : public CObject {
86         SGraphTableCursor(const CVDBTable& table);
87 
88         CVDBCursor m_Cursor;
89 
90         typedef int64_t TGraphV;
91         typedef int64_t TGraphQ;
92 
93         DECLARE_VDB_COLUMN_AS_STRING(SID);
94         DECLARE_VDB_COLUMN_AS(int64_t, START);
95         DECLARE_VDB_COLUMN_AS(uint32_t, LEN);
96         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q0);
97         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q10);
98         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q50);
99         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q90);
100         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_Q100);
101         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q0);
102         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q10);
103         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q50);
104         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q90);
105         DECLARE_VDB_COLUMN_AS(TGraphQ, GR_ZOOM_Q100);
106         DECLARE_VDB_COLUMN_AS(TGraphV, GRAPH);
107         DECLARE_VDB_COLUMN_AS(uint32_t, SCALE);
108         DECLARE_VDB_COLUMN_AS(int64_t, NUM_SWITCHES);
109     };
110 
GetPath(void) const111     const string& GetPath(void) const {
112         return m_Path;
113     }
114 
115     // SSeqInfo holds cached refseq information - ids, len, rows
116     struct SSeqInfo {
SSeqInfoCVDBGraphDb_Impl::SSeqInfo117         SSeqInfo()
118             : m_SeqLength(0),
119               m_RowSize(0),
120               m_RowFirst(0),
121               m_RowLast(0)
122             {
123             }
124 
125         string m_SeqId;
126         CSeq_id_Handle m_Seq_id_Handle;
127         TSeqPos m_SeqLength;
128         TSeqPos m_RowSize;
129         TVDBRowId m_RowFirst, m_RowLast;
130     };
131     typedef list<SSeqInfo> TSeqInfoList;
132     typedef map<CSeq_id_Handle, TSeqInfoList::iterator> TSeqInfoMapBySeq_id;
133     typedef map<TVDBRowId, TSeqInfoList::iterator> TSeqInfoMapByFirstRow;
134 
GetSeqInfoList(void) const135     const TSeqInfoList& GetSeqInfoList(void) const {
136         return m_SeqList;
137     }
GetSeqInfoMapBySeq_id(void) const138     const TSeqInfoMapBySeq_id& GetSeqInfoMapBySeq_id(void) const {
139         return m_SeqMapBySeq_id;
140     }
141 
142     // get table object
GraphTable(void)143     const CVDBTable& GraphTable(void) {
144         return m_GraphTable;
145     }
146     // get table accessor object for exclusive access
147     CRef<SGraphTableCursor> Graph(void);
148     // return table accessor object for reuse
Put(CRef<SGraphTableCursor> & curs)149     void Put(CRef<SGraphTableCursor>& curs) {
150         m_Graph.Put(curs);
151     }
152 
153     SSeqInfo GetSeqInfoAtRow(TVDBRowId row);
154     SSeqInfo GetSeqInfo(const CSeq_id_Handle& seq_id);
155 
156 private:
157     CVDBMgr m_Mgr;
158     string m_Path;
159 
160     CVDBTable m_GraphTable;
161     CVDBTableIndex m_LookupIndex;
162 
163     CVDBObjectCache<SGraphTableCursor> m_Graph;
164 
165     CMutex m_SeqInfoMutex;
166     TSeqInfoList m_SeqList; // list of cached refseqs' information
167     TSeqInfoMapBySeq_id m_SeqMapBySeq_id; // index for refseq info lookup
168     TSeqInfoMapByFirstRow m_SeqMapByFirstRow; // index for refseq info lookup
169 };
170 
171 
172 class CVDBGraphDb : public CRef<CVDBGraphDb_Impl>, public SVDBGraphDb_Base
173 {
174 public:
CVDBGraphDb(void)175     CVDBGraphDb(void)
176         {
177         }
CVDBGraphDb(CVDBGraphDb_Impl * impl)178     explicit CVDBGraphDb(CVDBGraphDb_Impl* impl)
179         : CRef<CVDBGraphDb_Impl>(impl)
180         {
181         }
CVDBGraphDb(CVDBMgr & mgr,CTempString path,ELookupType lookup_type=eLookupDefault)182     CVDBGraphDb(CVDBMgr& mgr, CTempString path, ELookupType lookup_type = eLookupDefault)
183         : CRef<CVDBGraphDb_Impl>(new CVDBGraphDb_Impl(mgr, path, lookup_type))
184         {
185         }
186 };
187 
188 
189 class NCBI_SRAREAD_EXPORT CVDBGraphSeqIterator
190 {
191 public:
192     typedef CVDBGraphDb_Impl::SSeqInfo SSeqInfo;
193     typedef CVDBGraphDb_Impl::SGraphTableCursor SGraphTableCursor;
194     typedef CVDBGraphDb_Impl::TSeqInfoList::const_iterator TSeqInfoIter;
195 
CVDBGraphSeqIterator(void)196     CVDBGraphSeqIterator(void)
197         {
198         }
199     explicit CVDBGraphSeqIterator(const CVDBGraphDb& db);
200     CVDBGraphSeqIterator(const CVDBGraphDb& db,
201                          const CSeq_id_Handle& seq_id);
202 
operator !(void) const203     bool operator!(void) const {
204         return !m_Info.m_RowSize;
205     }
operator const void*(void) const206     operator const void*(void) const {
207         return !*this? 0: this;
208     }
209 
210     const SSeqInfo& GetInfo(void) const;
operator *(void) const211     const SSeqInfo& operator*(void) const {
212         return GetInfo();
213     }
operator ->(void) const214     const SSeqInfo* operator->(void) const {
215         return &GetInfo();
216     }
217 
218     CVDBGraphSeqIterator& operator++(void);
219 
GetSeqId(void) const220     const string& GetSeqId(void) const {
221         return GetInfo().m_SeqId;
222     }
GetSeq_id_Handle(void) const223     const CSeq_id_Handle& GetSeq_id_Handle(void) const {
224         return GetInfo().m_Seq_id_Handle;
225     }
226 
GetSeqLength(void) const227     TSeqPos GetSeqLength(void) const {
228         return GetInfo().m_SeqLength;
229     }
230 
231     // Do not mix graphs of different zoom levels
232     enum EContentFlags {
233         // original detailed graph
234         fGraphMain = 1<<0,
235 
236         // overview graphs (percentiles)
237         fGraphQ0   = 1<<1,
238         fGraphQ10  = 1<<2,
239         fGraphQ50  = 1<<3,
240         fGraphQ90  = 1<<4,
241         fGraphQ100 = 1<<5,
242         fGraphQAll = (fGraphQ0 | fGraphQ10 | fGraphQ50 |
243                       fGraphQ90 | fGraphQ100),
244 
245         // main graph representation - either Seq-table or Seq-graph,
246         // set both *As* flags to get more compact representation
247         fGraphMainAsTable = 1<<8,
248         fGraphMainAsGraph = 1<<9,
249         fGraphMainAsBest = (fGraphMainAsTable | fGraphMainAsGraph),
250 
251         // zoom graphs if available (percentiles)
252         fGraphZoomQ0   = 1<<11,
253         fGraphZoomQ10  = 1<<12,
254         fGraphZoomQ50  = 1<<13,
255         fGraphZoomQ90  = 1<<14,
256         fGraphZoomQ100 = 1<<15,
257         fGraphZoomQAll = (fGraphZoomQ0 | fGraphZoomQ10 | fGraphZoomQ50 |
258                           fGraphZoomQ90 | fGraphZoomQ100),
259 
260         // overview graphs by default
261         fDefaultContent = fGraphQAll
262     };
263     typedef int TContentFlags;
264 
265     // Returns annot containing graphs over the specified range
266     // (CRange<TSeqPos> or COpenRange<TSeqPos>).
267     CRef<CSeq_annot> GetAnnot(COpenRange<TSeqPos> range,
268                               const string& annot_name = kEmptyStr,
269                               TContentFlags content = fDefaultContent) const;
270 
271     bool SeqTableIsSmaller(COpenRange<TSeqPos> range) const;
272 
273 protected:
GetDb(void) const274     CVDBGraphDb_Impl& GetDb(void) const {
275         return m_Db.GetNCObject();
276     }
277 
278     CRef<CSeq_graph> x_MakeGraph(const string& annot_name,
279                                  CSeq_loc& loc,
280                                  const SSeqInfo& info,
281                                  const COpenRange<TSeqPos>& range,
282                                  TSeqPos step,
283                                  SGraphTableCursor& cursor,
284                                  CVDBColumn& column,
285                                  int level) const;
286     CRef<CSeq_table> x_MakeTable(const string& annot_name,
287                                  CSeq_loc& loc,
288                                  const SSeqInfo& info,
289                                  const COpenRange<TSeqPos>& range,
290                                  SGraphTableCursor& cursor) const;
291 
292     bool x_SeqTableIsSmaller(COpenRange<TSeqPos> range,
293                              SGraphTableCursor& cursor) const;
294 
295 private:
296     CVDBGraphDb m_Db;
297     CVDBGraphDb_Impl::SSeqInfo m_Info;
298 };
299 
300 
301 END_NAMESPACE(objects);
302 END_NCBI_NAMESPACE;
303 
304 #endif // SRA__READER__SRA__WGSREAD__HPP
305