1 #ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP 2 #define OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP 3 4 /* $Id: bdbloader.hpp 610972 2020-06-26 12:58:17Z grichenk $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Christiam Camacho 30 * 31 * =========================================================================== 32 */ 33 34 /** @file bdbloader.hpp 35 * Data loader implementation that uses the blast databases 36 */ 37 38 #include <corelib/ncbistd.hpp> 39 #include <objmgr/data_loader.hpp> 40 #include <objtools/data_loaders/blastdb/blastdb_adapter.hpp> 41 #include <objects/seqset/Seq_entry.hpp> 42 #include <util/limited_size_map.hpp> 43 44 BEGIN_NCBI_SCOPE 45 BEGIN_SCOPE(objects) 46 47 // Parameter names used by loader factory 48 49 const string kCFParam_BlastDb_DbName = "DbName"; // = string 50 const string kCFParam_BlastDb_DbType = "DbType"; // = EDbType (e.g. "Protein") 51 52 53 class NCBI_XLOADER_BLASTDB_EXPORT CBlastDbDataLoader : public CDataLoader 54 { 55 public: 56 /// Describes the type of blast database to use 57 enum EDbType { 58 eNucleotide = 0, ///< nucleotide database 59 eProtein = 1, ///< protein database 60 eUnknown = 2 ///< protein is attempted first, then nucleotide 61 }; 62 63 struct NCBI_XLOADER_BLASTDB_EXPORT SBlastDbParam 64 { 65 SBlastDbParam(const string& db_name = "nr", 66 EDbType dbtype = eUnknown, 67 bool use_fixed_size_slices = true); 68 69 SBlastDbParam(CRef<CSeqDB> db_handle, 70 bool use_fixed_size_slices = true); 71 72 string m_DbName; 73 EDbType m_DbType; 74 bool m_UseFixedSizeSlices; 75 CRef<CSeqDB> m_BlastDbHandle; 76 }; 77 78 typedef SRegisterLoaderInfo<CBlastDbDataLoader> TRegisterLoaderInfo; 79 static TRegisterLoaderInfo RegisterInObjectManager( 80 CObjectManager& om, 81 const string& dbname = "nr", 82 const EDbType dbtype = eUnknown, 83 bool use_fixed_size_slices = true, 84 CObjectManager::EIsDefault is_default = CObjectManager::eNonDefault, 85 CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet); 86 static TRegisterLoaderInfo RegisterInObjectManager( 87 CObjectManager& om, 88 CRef<CSeqDB> db_handle, 89 bool use_fixed_size_slices = true, 90 CObjectManager::EIsDefault is_default = CObjectManager::eNonDefault, 91 CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet); 92 static string GetLoaderNameFromArgs(CConstRef<CSeqDB> db_handle); 93 static string GetLoaderNameFromArgs(const SBlastDbParam& param); GetLoaderNameFromArgs(const string & dbname="nr",const EDbType dbtype=eUnknown)94 static string GetLoaderNameFromArgs(const string& dbname = "nr", 95 const EDbType dbtype = eUnknown) 96 { 97 return GetLoaderNameFromArgs(SBlastDbParam(dbname, dbtype)); 98 } 99 100 virtual ~CBlastDbDataLoader(); 101 102 virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const; 103 104 105 /// Load TSE 106 virtual TTSE_LockSet GetRecords(const CSeq_id_Handle& idh, EChoice choice); 107 /// Load a description or data chunk. 108 virtual void GetChunk(TChunk chunk); 109 110 virtual TTaxId GetTaxId(const CSeq_id_Handle& idh); 111 virtual void GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret); 112 virtual TSeqPos GetSequenceLength(const CSeq_id_Handle& idh); 113 virtual void GetSequenceLengths(const TIds& ids, TLoaded& loaded, 114 TSequenceLengths& ret); 115 virtual CSeq_inst::TMol GetSequenceType(const CSeq_id_Handle& idh); 116 virtual void GetSequenceTypes(const TIds& ids, TLoaded& loaded, 117 TSequenceTypes& ret); 118 119 /// Gets the blob id for a given sequence. 120 /// 121 /// Given a Seq_id_Handle, this method finds the corresponding top 122 /// level Seq-entry (TSE) and returns a blob corresponding to it. 123 /// The BlobId is initialized with a pointer to that CSeq_entry if 124 /// the sequence is known to this data loader, which will be true 125 /// if GetRecords() was called for this sequence. 126 /// 127 /// @param idh 128 /// Indicates the sequence for which to get a blob id. 129 /// @return 130 /// A TBlobId corresponding to the provided Seq_id_Handle. 131 virtual TBlobId GetBlobId(const CSeq_id_Handle& idh); 132 133 /// Test method for GetBlobById feature. 134 /// 135 /// The caller will use this method to determine whether this data 136 /// loader allows blobs to be managed by ID. 137 /// 138 /// @return 139 /// Returns true to indicate that GetBlobById() is available. 140 virtual bool CanGetBlobById() const; 141 142 /// For a given TBlobId, get the TTSE_Lock. 143 /// 144 /// If the provided TBlobId is known to this code, the 145 /// corresponding TTSE_Lock data will be fetched and returned. 146 /// Otherwise, an empty valued TTSE_Lock is returned. 147 /// 148 /// @param blob_id 149 /// Indicates which data to get. 150 /// @return 151 /// The returned data. 152 virtual TTSE_Lock GetBlobById(const TBlobId& blob_id); 153 154 /// A mapping from sequence identifier to blob ids. 155 typedef limited_size_map<CSeq_id_Handle, int> TIdMap; 156 157 /// @note this is added to temporarily comply with the toolkit's stable 158 /// components rule of having backwards compatible APIs 159 NCBI_DEPRECATED 160 static TRegisterLoaderInfo RegisterInObjectManager( 161 CObjectManager& om, 162 const string& dbname, 163 const EDbType dbtype, 164 CObjectManager::EIsDefault is_default, 165 CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet); 166 /// @note this is added to temporarily comply with the toolkit's stable 167 /// components rule of having backwards compatible APIs 168 NCBI_DEPRECATED 169 static TRegisterLoaderInfo RegisterInObjectManager( 170 CObjectManager& om, 171 CRef<CSeqDB> db_handle, 172 CObjectManager::EIsDefault is_default = CObjectManager::eNonDefault, 173 CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet); 174 protected: 175 /// TPlace is a Seq-id or an integer id, this data loader uses the former. 176 typedef int TBioseq_setId; 177 typedef CSeq_id_Handle TBioseqId; 178 typedef pair<TBioseqId, TBioseq_setId> TPlace; 179 180 typedef CParamLoaderMaker<CBlastDbDataLoader, SBlastDbParam> TMaker; 181 friend class CParamLoaderMaker<CBlastDbDataLoader, SBlastDbParam>; 182 183 /// Default (no-op) constructor CBlastDbDataLoader()184 CBlastDbDataLoader() {} 185 /// Parametrized constructor 186 /// @param loader_name name of this data loader [in] 187 /// @param param parameters to initialize this data loader [in] 188 CBlastDbDataLoader(const string& loader_name, const SBlastDbParam& param); 189 190 /// Prevent automatic copy constructor generation 191 CBlastDbDataLoader(const CBlastDbDataLoader &); 192 193 /// Prevent automatic assignment operator generation 194 CBlastDbDataLoader & operator=(const CBlastDbDataLoader &); 195 196 /// Gets the OID from m_Ids cache or the BLAST databases 197 int x_GetOid(const CSeq_id_Handle& idh); 198 /// Gets the OID from a TBlobId (see typedef in bdbloader.cpp) 199 int x_GetOid(const TBlobId& blob_id) const; 200 201 /// Load sequence data from cache or from the database. 202 /// 203 /// This checks the OID cache and loads the sequence data from 204 /// there or if not found, from the CSeqDB database. When new 205 /// data is built, the sequence is also split into chunks. A 206 /// description of what data is available will be returned in the 207 /// "lock" parameter. 208 /// 209 /// @param idh 210 /// A handle to the sequence identifier. 211 /// @param oid 212 /// Object id in BLAST DB 213 /// @param lock 214 /// Information about the sequence data is returned here. 215 void x_LoadData(const CSeq_id_Handle& idh, int oid, CTSE_LoadLock & lock, 216 int slice_size); 217 218 string m_DBName; ///< Blast database name 219 EDbType m_DBType; ///< Is this database protein or nucleotide? 220 CRef<IBlastDbAdapter> m_BlastDb; ///< The sequence database 221 222 TIdMap m_Ids; ///< ID to OID translation 223 224 /// Configuration value specified to the CCachedSequence 225 bool m_UseFixedSizeSlices; 226 }; 227 228 END_SCOPE(objects) 229 230 231 extern NCBI_XLOADER_BLASTDB_EXPORT const string kDataLoader_BlastDb_DriverName; 232 233 extern "C" 234 { 235 236 NCBI_XLOADER_BLASTDB_EXPORT 237 void NCBI_EntryPoint_DataLoader_BlastDb( 238 CPluginManager<objects::CDataLoader>::TDriverInfoList& info_list, 239 CPluginManager<objects::CDataLoader>::EEntryPointRequest method); 240 241 NCBI_XLOADER_BLASTDB_EXPORT 242 void NCBI_EntryPoint_xloader_blastdb( 243 CPluginManager<objects::CDataLoader>::TDriverInfoList& info_list, 244 CPluginManager<objects::CDataLoader>::EEntryPointRequest method); 245 246 } // extern C 247 248 249 END_NCBI_SCOPE 250 251 #endif /* OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP */ 252