1 #ifndef SRA__READER__SRA__WGSRESOLVER_IMPL__HPP 2 #define SRA__READER__SRA__WGSRESOLVER_IMPL__HPP 3 /* $Id: wgsresolver_impl.hpp 632486 2021-06-02 11:14:23Z ivanov $ 4 * =========================================================================== 5 * 6 * PUBLIC DOMAIN NOTICE 7 * National Center for Biotechnology Information 8 * 9 * This software/database is a "United States Government Work" under the 10 * terms of the United States Copyright Act. It was written as part of 11 * the author's official duties as a United States Government employee and 12 * thus cannot be copyrighted. This software/database is freely available 13 * to the public for use. The National Library of Medicine and the U.S. 14 * Government have not placed any restriction on its use or reproduction. 15 * 16 * Although all reasonable efforts have been taken to ensure the accuracy 17 * and reliability of the software and data, the NLM and the U.S. 18 * Government do not and cannot warrant the performance or results that 19 * may be obtained by using this software or data. The NLM and the U.S. 20 * Government disclaim all warranties, express or implied, including 21 * warranties of performance, merchantability or fitness for any particular 22 * purpose. 23 * 24 * Please cite the author in any work or product based on this material. 25 * 26 * =========================================================================== 27 * 28 * Authors: Eugene Vasilchenko 29 * 30 * File Description: 31 * Resolve WGS accessions 32 * 33 */ 34 35 #include <corelib/ncbistd.hpp> 36 #include <corelib/ncbimtx.hpp> 37 #include <sra/readers/sra/wgsresolver.hpp> 38 #include <sra/readers/sra/vdbread.hpp> 39 #include <util/rangemap.hpp> 40 41 BEGIN_NCBI_NAMESPACE; 42 BEGIN_NAMESPACE(objects); 43 44 class CSeq_id; 45 class CTextseq_id; 46 class CDbtag; 47 class CID2_Reply; 48 class CID2Client; 49 class CDataLoader; 50 51 class NCBI_SRAREAD_EXPORT CWGSResolver_VDB : public CWGSResolver 52 { 53 public: 54 enum EIndexType { 55 eMainIndex, 56 eSecondIndex, 57 eThirdIndex 58 }; 59 explicit CWGSResolver_VDB(const CVDBMgr& mgr, 60 EIndexType index_type = eMainIndex, 61 CWGSResolver_VDB* next_resolver = 0); 62 CWGSResolver_VDB(const CVDBMgr& mgr, 63 const string& path, 64 CWGSResolver_VDB* next_resolver = 0); 65 ~CWGSResolver_VDB(void); 66 67 static CRef<CWGSResolver> CreateResolver(const CVDBMgr& mgr); 68 69 // default path to main index 70 static string GetDefaultWGSIndexPath(EIndexType index_type = eMainIndex); 71 static string GetDefaultWGSIndexAcc(EIndexType index_type = eMainIndex); 72 73 void Open(const CVDBMgr& mgr, const string& path); 74 void Reopen(void); 75 void Close(void); 76 GetWGSIndexPath(void) const77 const string& GetWGSIndexPath(void) const { 78 return m_WGSIndexPath; 79 } GetWGSIndexResolvedPath(void) const80 const string& GetWGSIndexResolvedPath(void) const { 81 return m_WGSIndexResolvedPath; 82 } 83 IsValid(void) const84 bool IsValid(void) const { 85 return m_Db; 86 } 87 GetTimestamp(void) const88 const CTime& GetTimestamp(void) const { 89 return m_Timestamp; 90 } 91 92 // return all WGS accessions that could contain gi or accession 93 virtual TWGSPrefixes GetPrefixes(TGi gi); 94 virtual TWGSPrefixes GetPrefixes(const string& acc); 95 96 // force update of indexes from files 97 virtual bool Update(void); 98 99 protected: 100 // helper accessor structures for index tables 101 struct SGiIdxTableCursor; 102 struct SAccIdxTableCursor; 103 GiIdxTable(void)104 const CVDBTable& GiIdxTable(void) { 105 return m_GiIdxTable; 106 } AccIdxTable(void)107 const CVDBTable& AccIdxTable(void) { 108 return m_AccIdxTable; 109 } 110 111 // get table accessor object for exclusive access 112 CRef<SGiIdxTableCursor> GiIdx(TIntId gi = 0); 113 CRef<SAccIdxTableCursor> AccIdx(void); 114 // return table accessor object for reuse 115 void Put(CRef<SGiIdxTableCursor>& curs, TIntId gi = 0); 116 void Put(CRef<SAccIdxTableCursor>& curs); 117 118 void x_Close(); // unguarded 119 bool x_Update(); 120 121 private: 122 CVDBMgr m_Mgr; 123 typedef CRWLock TDBMutex; 124 TDBMutex m_DBMutex; // for update 125 string m_WGSIndexPath; 126 string m_WGSIndexResolvedPath; 127 CTime m_Timestamp; 128 CVDB m_Db; 129 CVDBTable m_GiIdxTable; 130 CVDBTable m_AccIdxTable; 131 CVDBTableIndex m_AccIndex; 132 bool m_AccIndexIsPrefix; 133 CVDBObjectCache<SGiIdxTableCursor> m_GiIdxCursorCache; 134 CVDBObjectCache<SAccIdxTableCursor> m_AccIdxCursorCache; 135 CRef<CWGSResolver_VDB> m_NextResolver; 136 }; 137 138 139 class NCBI_SRAREAD_EXPORT CWGSResolver_Ids : public CWGSResolver 140 { 141 public: 142 CWGSResolver_Ids(void); 143 ~CWGSResolver_Ids(void); 144 145 // return all WGS accessions that could contain gi or accession 146 virtual TWGSPrefixes GetPrefixes(TGi gi); 147 virtual TWGSPrefixes GetPrefixes(const string& acc); 148 149 protected: 150 string ParseWGSAcc(const string& acc, bool protein) const; 151 string ParseWGSPrefix(const CDbtag& dbtag) const; 152 string ParseWGSPrefix(const CTextseq_id& text_id) const; 153 string ParseWGSPrefix(const CSeq_id& id) const; 154 155 virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id) = 0; 156 }; 157 158 159 class NCBI_SRAREAD_EXPORT CWGSResolver_DL : public CWGSResolver_Ids 160 { 161 public: 162 CWGSResolver_DL(void); // find GenBank loader 163 explicit 164 CWGSResolver_DL(CDataLoader* loader); 165 ~CWGSResolver_DL(void); 166 167 static CRef<CWGSResolver> CreateResolver(void); // find GenBank loader 168 static CRef<CWGSResolver> CreateResolver(CDataLoader* loader); 169 IsValid(void) const170 bool IsValid(void) const { 171 return m_Loader; 172 } 173 174 protected: 175 virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id); 176 177 CRef<CDataLoader> m_Loader; 178 }; 179 180 181 class NCBI_SRAREAD_EXPORT CWGSResolver_Proc : public CWGSResolver_Ids 182 { 183 public: 184 explicit 185 CWGSResolver_Proc(CID2ProcessorResolver* resolver); 186 ~CWGSResolver_Proc(void); 187 188 static CRef<CWGSResolver> CreateResolver(CID2ProcessorResolver* resolver); 189 IsValid(void) const190 bool IsValid(void) const { 191 return m_Resolver; 192 } 193 194 protected: 195 virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id); 196 197 CRef<CID2ProcessorResolver> m_Resolver; 198 }; 199 200 201 //#define WGS_RESOLVER_USE_ID2_CLIENT 202 203 #ifdef WGS_RESOLVER_USE_ID2_CLIENT 204 class NCBI_SRAREAD_EXPORT CWGSResolver_ID2 : public CWGSResolver_Ids 205 { 206 public: 207 CWGSResolver_ID2(void); 208 ~CWGSResolver_ID2(void); 209 210 static CRef<CWGSResolver> CreateResolver(void); 211 IsValid(void) const212 bool IsValid(void) const { 213 return m_ID2Client; 214 } 215 216 // force update of indexes from files 217 virtual bool Update(void); 218 219 protected: 220 string ParseWGSPrefix(const CID2_Reply& reply) const; 221 222 virtual TWGSPrefixes GetPrefixes(const CSeq_id& seq_id); 223 224 CMutex m_Mutex; // for cache 225 typedef map<string, string> TCache; 226 TCache m_Cache; 227 CRef<CID2Client> m_ID2Client; 228 }; 229 #endif 230 231 232 END_NAMESPACE(objects); 233 END_NCBI_NAMESPACE; 234 235 #endif // SRA__READER__SRA__WGSRESOLVER_IMPL__HPP 236