1 #ifndef SRA__READER__SRA__WGSRESOLVER_IMPL__HPP
2 #define SRA__READER__SRA__WGSRESOLVER_IMPL__HPP
3 /*  $Id: wgsresolver_impl.hpp 632486 2021-06-02 11:14:23Z ivanov $
4  * ===========================================================================
5  *
6  *                            PUBLIC DOMAIN NOTICE
7  *               National Center for Biotechnology Information
8  *
9  *  This software/database is a "United States Government Work" under the
10  *  terms of the United States Copyright Act.  It was written as part of
11  *  the author's official duties as a United States Government employee and
12  *  thus cannot be copyrighted.  This software/database is freely available
13  *  to the public for use. The National Library of Medicine and the U.S.
14  *  Government have not placed any restriction on its use or reproduction.
15  *
16  *  Although all reasonable efforts have been taken to ensure the accuracy
17  *  and reliability of the software and data, the NLM and the U.S.
18  *  Government do not and cannot warrant the performance or results that
19  *  may be obtained by using this software or data. The NLM and the U.S.
20  *  Government disclaim all warranties, express or implied, including
21  *  warranties of performance, merchantability or fitness for any particular
22  *  purpose.
23  *
24  *  Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors:  Eugene Vasilchenko
29  *
30  * File Description:
31  *   Resolve WGS accessions
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbimtx.hpp>
37 #include <sra/readers/sra/wgsresolver.hpp>
38 #include <sra/readers/sra/vdbread.hpp>
39 #include <util/rangemap.hpp>
40 
41 BEGIN_NCBI_NAMESPACE;
42 BEGIN_NAMESPACE(objects);
43 
44 class CSeq_id;
45 class CTextseq_id;
46 class CDbtag;
47 class CID2_Reply;
48 class CID2Client;
49 class CDataLoader;
50 
51 class NCBI_SRAREAD_EXPORT CWGSResolver_VDB : public CWGSResolver
52 {
53 public:
54     enum EIndexType {
55         eMainIndex,
56         eSecondIndex,
57         eThirdIndex
58     };
59     explicit CWGSResolver_VDB(const CVDBMgr& mgr,
60                               EIndexType index_type = eMainIndex,
61                               CWGSResolver_VDB* next_resolver = 0);
62     CWGSResolver_VDB(const CVDBMgr& mgr,
63                      const string& path,
64                      CWGSResolver_VDB* next_resolver = 0);
65     ~CWGSResolver_VDB(void);
66 
67     static CRef<CWGSResolver> CreateResolver(const CVDBMgr& mgr);
68 
69     // default path to main index
70     static string GetDefaultWGSIndexPath(EIndexType index_type = eMainIndex);
71     static string GetDefaultWGSIndexAcc(EIndexType index_type = eMainIndex);
72 
73     void Open(const CVDBMgr& mgr, const string& path);
74     void Reopen(void);
75     void Close(void);
76 
GetWGSIndexPath(void) const77     const string& GetWGSIndexPath(void) const {
78         return m_WGSIndexPath;
79     }
GetWGSIndexResolvedPath(void) const80     const string& GetWGSIndexResolvedPath(void) const {
81         return m_WGSIndexResolvedPath;
82     }
83 
IsValid(void) const84     bool IsValid(void) const {
85         return m_Db;
86     }
87 
GetTimestamp(void) const88     const CTime& GetTimestamp(void) const {
89         return m_Timestamp;
90     }
91 
92     // return all WGS accessions that could contain gi or accession
93     virtual TWGSPrefixes GetPrefixes(TGi gi);
94     virtual TWGSPrefixes GetPrefixes(const string& acc);
95 
96     // force update of indexes from files
97     virtual bool Update(void);
98 
99 protected:
100     // helper accessor structures for index tables
101     struct SGiIdxTableCursor;
102     struct SAccIdxTableCursor;
103 
GiIdxTable(void)104     const CVDBTable& GiIdxTable(void) {
105         return m_GiIdxTable;
106     }
AccIdxTable(void)107     const CVDBTable& AccIdxTable(void) {
108         return m_AccIdxTable;
109     }
110 
111     // get table accessor object for exclusive access
112     CRef<SGiIdxTableCursor> GiIdx(TIntId gi = 0);
113     CRef<SAccIdxTableCursor> AccIdx(void);
114     // return table accessor object for reuse
115     void Put(CRef<SGiIdxTableCursor>& curs, TIntId gi = 0);
116     void Put(CRef<SAccIdxTableCursor>& curs);
117 
118     void x_Close(); // unguarded
119     bool x_Update();
120 
121 private:
122     CVDBMgr m_Mgr;
123     typedef CRWLock TDBMutex;
124     TDBMutex m_DBMutex; // for update
125     string m_WGSIndexPath;
126     string m_WGSIndexResolvedPath;
127     CTime m_Timestamp;
128     CVDB m_Db;
129     CVDBTable m_GiIdxTable;
130     CVDBTable m_AccIdxTable;
131     CVDBTableIndex m_AccIndex;
132     bool m_AccIndexIsPrefix;
133     CVDBObjectCache<SGiIdxTableCursor> m_GiIdxCursorCache;
134     CVDBObjectCache<SAccIdxTableCursor> m_AccIdxCursorCache;
135     CRef<CWGSResolver_VDB> m_NextResolver;
136 };
137 
138 
139 class NCBI_SRAREAD_EXPORT CWGSResolver_Ids : public CWGSResolver
140 {
141 public:
142     CWGSResolver_Ids(void);
143     ~CWGSResolver_Ids(void);
144 
145     // return all WGS accessions that could contain gi or accession
146     virtual TWGSPrefixes GetPrefixes(TGi gi);
147     virtual TWGSPrefixes GetPrefixes(const string& acc);
148 
149 protected:
150     string ParseWGSAcc(const string& acc, bool protein) const;
151     string ParseWGSPrefix(const CDbtag& dbtag) const;
152     string ParseWGSPrefix(const CTextseq_id& text_id) const;
153     string ParseWGSPrefix(const CSeq_id& id) const;
154 
155     virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id) = 0;
156 };
157 
158 
159 class NCBI_SRAREAD_EXPORT CWGSResolver_DL : public CWGSResolver_Ids
160 {
161 public:
162     CWGSResolver_DL(void); // find GenBank loader
163     explicit
164     CWGSResolver_DL(CDataLoader* loader);
165     ~CWGSResolver_DL(void);
166 
167     static CRef<CWGSResolver> CreateResolver(void); // find GenBank loader
168     static CRef<CWGSResolver> CreateResolver(CDataLoader* loader);
169 
IsValid(void) const170     bool IsValid(void) const {
171         return m_Loader;
172     }
173 
174 protected:
175     virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id);
176 
177     CRef<CDataLoader> m_Loader;
178 };
179 
180 
181 class NCBI_SRAREAD_EXPORT CWGSResolver_Proc : public CWGSResolver_Ids
182 {
183 public:
184     explicit
185     CWGSResolver_Proc(CID2ProcessorResolver* resolver);
186     ~CWGSResolver_Proc(void);
187 
188     static CRef<CWGSResolver> CreateResolver(CID2ProcessorResolver* resolver);
189 
IsValid(void) const190     bool IsValid(void) const {
191         return m_Resolver;
192     }
193 
194 protected:
195     virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id);
196 
197     CRef<CID2ProcessorResolver> m_Resolver;
198 };
199 
200 
201 //#define WGS_RESOLVER_USE_ID2_CLIENT
202 
203 #ifdef WGS_RESOLVER_USE_ID2_CLIENT
204 class NCBI_SRAREAD_EXPORT CWGSResolver_ID2 : public CWGSResolver_Ids
205 {
206 public:
207     CWGSResolver_ID2(void);
208     ~CWGSResolver_ID2(void);
209 
210     static CRef<CWGSResolver> CreateResolver(void);
211 
IsValid(void) const212     bool IsValid(void) const {
213         return m_ID2Client;
214     }
215 
216     // force update of indexes from files
217     virtual bool Update(void);
218 
219 protected:
220     string ParseWGSPrefix(const CID2_Reply& reply) const;
221 
222     virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id);
223 
224     CMutex m_Mutex; // for cache
225     typedef map<string, string> TCache;
226     TCache m_Cache;
227     CRef<CID2Client> m_ID2Client;
228 };
229 #endif
230 
231 
232 END_NAMESPACE(objects);
233 END_NCBI_NAMESPACE;
234 
235 #endif // SRA__READER__SRA__WGSRESOLVER_IMPL__HPP
236