1 #ifndef OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
2 #define OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP
3 
4 /*  $Id: bdbloader.hpp 610972 2020-06-26 12:58:17Z grichenk $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 *  ===========================================================================
28 *
29 *  Author: Christiam Camacho
30 *
31 * ===========================================================================
32 */
33 
34 /** @file bdbloader.hpp
35   * Data loader implementation that uses the blast databases
36   */
37 
38 #include <corelib/ncbistd.hpp>
39 #include <objmgr/data_loader.hpp>
40 #include <objtools/data_loaders/blastdb/blastdb_adapter.hpp>
41 #include <objects/seqset/Seq_entry.hpp>
42 #include <util/limited_size_map.hpp>
43 
44 BEGIN_NCBI_SCOPE
45 BEGIN_SCOPE(objects)
46 
47 // Parameter names used by loader factory
48 
49 const string kCFParam_BlastDb_DbName = "DbName"; // = string
50 const string kCFParam_BlastDb_DbType = "DbType"; // = EDbType (e.g. "Protein")
51 
52 
53 class NCBI_XLOADER_BLASTDB_EXPORT CBlastDbDataLoader : public CDataLoader
54 {
55 public:
56     /// Describes the type of blast database to use
57     enum EDbType {
58         eNucleotide = 0,    ///< nucleotide database
59         eProtein = 1,       ///< protein database
60         eUnknown = 2        ///< protein is attempted first, then nucleotide
61     };
62 
63     struct NCBI_XLOADER_BLASTDB_EXPORT SBlastDbParam
64     {
65         SBlastDbParam(const string& db_name = "nr",
66                       EDbType       dbtype = eUnknown,
67                       bool          use_fixed_size_slices = true);
68 
69         SBlastDbParam(CRef<CSeqDB> db_handle,
70                       bool         use_fixed_size_slices = true);
71 
72         string          m_DbName;
73         EDbType         m_DbType;
74         bool            m_UseFixedSizeSlices;
75         CRef<CSeqDB>    m_BlastDbHandle;
76     };
77 
78     typedef SRegisterLoaderInfo<CBlastDbDataLoader> TRegisterLoaderInfo;
79     static TRegisterLoaderInfo RegisterInObjectManager(
80         CObjectManager& om,
81         const string& dbname = "nr",
82         const EDbType dbtype = eUnknown,
83         bool use_fixed_size_slices = true,
84         CObjectManager::EIsDefault is_default = CObjectManager::eNonDefault,
85         CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet);
86     static TRegisterLoaderInfo RegisterInObjectManager(
87         CObjectManager& om,
88         CRef<CSeqDB> db_handle,
89         bool use_fixed_size_slices = true,
90         CObjectManager::EIsDefault is_default = CObjectManager::eNonDefault,
91         CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet);
92     static string GetLoaderNameFromArgs(CConstRef<CSeqDB> db_handle);
93     static string GetLoaderNameFromArgs(const SBlastDbParam& param);
GetLoaderNameFromArgs(const string & dbname="nr",const EDbType dbtype=eUnknown)94     static string GetLoaderNameFromArgs(const string& dbname = "nr",
95                                         const EDbType dbtype = eUnknown)
96     {
97         return GetLoaderNameFromArgs(SBlastDbParam(dbname, dbtype));
98     }
99 
100     virtual ~CBlastDbDataLoader();
101 
102     virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const;
103 
104 
105     /// Load TSE
106     virtual TTSE_LockSet GetRecords(const CSeq_id_Handle& idh, EChoice choice);
107     /// Load a description or data chunk.
108     virtual void GetChunk(TChunk chunk);
109 
110     virtual TTaxId GetTaxId(const CSeq_id_Handle& idh);
111     virtual void GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret);
112     virtual TSeqPos GetSequenceLength(const CSeq_id_Handle& idh);
113     virtual void GetSequenceLengths(const TIds& ids, TLoaded& loaded,
114                                     TSequenceLengths& ret);
115     virtual CSeq_inst::TMol GetSequenceType(const CSeq_id_Handle& idh);
116     virtual void GetSequenceTypes(const TIds& ids, TLoaded& loaded,
117                                   TSequenceTypes& ret);
118 
119     /// Gets the blob id for a given sequence.
120     ///
121     /// Given a Seq_id_Handle, this method finds the corresponding top
122     /// level Seq-entry (TSE) and returns a blob corresponding to it.
123     /// The BlobId is initialized with a pointer to that CSeq_entry if
124     /// the sequence is known to this data loader, which will be true
125     /// if GetRecords() was called for this sequence.
126     ///
127     /// @param idh
128     ///   Indicates the sequence for which to get a blob id.
129     /// @return
130     ///   A TBlobId corresponding to the provided Seq_id_Handle.
131     virtual TBlobId GetBlobId(const CSeq_id_Handle& idh);
132 
133     /// Test method for GetBlobById feature.
134     ///
135     /// The caller will use this method to determine whether this data
136     /// loader allows blobs to be managed by ID.
137     ///
138     /// @return
139     ///   Returns true to indicate that GetBlobById() is available.
140     virtual bool CanGetBlobById() const;
141 
142     /// For a given TBlobId, get the TTSE_Lock.
143     ///
144     /// If the provided TBlobId is known to this code, the
145     /// corresponding TTSE_Lock data will be fetched and returned.
146     /// Otherwise, an empty valued TTSE_Lock is returned.
147     ///
148     /// @param blob_id
149     ///   Indicates which data to get.
150     /// @return
151     ///   The returned data.
152     virtual TTSE_Lock GetBlobById(const TBlobId& blob_id);
153 
154     /// A mapping from sequence identifier to blob ids.
155     typedef limited_size_map<CSeq_id_Handle, int> TIdMap;
156 
157     /// @note this is added to temporarily comply with the toolkit's stable
158     /// components rule of having backwards compatible APIs
159     NCBI_DEPRECATED
160     static TRegisterLoaderInfo RegisterInObjectManager(
161         CObjectManager& om,
162         const string& dbname,
163         const EDbType dbtype,
164         CObjectManager::EIsDefault is_default,
165         CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet);
166     /// @note this is added to temporarily comply with the toolkit's stable
167     /// components rule of having backwards compatible APIs
168     NCBI_DEPRECATED
169     static TRegisterLoaderInfo RegisterInObjectManager(
170         CObjectManager& om,
171         CRef<CSeqDB> db_handle,
172         CObjectManager::EIsDefault is_default = CObjectManager::eNonDefault,
173         CObjectManager::TPriority priority = CObjectManager::kPriority_NotSet);
174 protected:
175     /// TPlace is a Seq-id or an integer id, this data loader uses the former.
176     typedef int TBioseq_setId;
177     typedef CSeq_id_Handle TBioseqId;
178     typedef pair<TBioseqId, TBioseq_setId> TPlace;
179 
180     typedef CParamLoaderMaker<CBlastDbDataLoader, SBlastDbParam> TMaker;
181     friend class CParamLoaderMaker<CBlastDbDataLoader, SBlastDbParam>;
182 
183     /// Default (no-op) constructor
CBlastDbDataLoader()184     CBlastDbDataLoader() {}
185     /// Parametrized constructor
186     /// @param loader_name name of this data loader [in]
187     /// @param param parameters to initialize this data loader [in]
188     CBlastDbDataLoader(const string& loader_name, const SBlastDbParam& param);
189 
190     /// Prevent automatic copy constructor generation
191     CBlastDbDataLoader(const CBlastDbDataLoader &);
192 
193     /// Prevent automatic assignment operator generation
194     CBlastDbDataLoader & operator=(const CBlastDbDataLoader &);
195 
196     /// Gets the OID from m_Ids cache or the BLAST databases
197     int x_GetOid(const CSeq_id_Handle& idh);
198     /// Gets the OID from a TBlobId (see typedef in bdbloader.cpp)
199     int x_GetOid(const TBlobId& blob_id) const;
200 
201     /// Load sequence data from cache or from the database.
202     ///
203     /// This checks the OID cache and loads the sequence data from
204     /// there or if not found, from the CSeqDB database.  When new
205     /// data is built, the sequence is also split into chunks.  A
206     /// description of what data is available will be returned in the
207     /// "lock" parameter.
208     ///
209     /// @param idh
210     ///   A handle to the sequence identifier.
211     /// @param oid
212     ///   Object id in BLAST DB
213     /// @param lock
214     ///   Information about the sequence data is returned here.
215     void x_LoadData(const CSeq_id_Handle& idh, int oid, CTSE_LoadLock & lock,
216                     int slice_size);
217 
218     string          m_DBName;      ///< Blast database name
219     EDbType         m_DBType;      ///< Is this database protein or nucleotide?
220     CRef<IBlastDbAdapter> m_BlastDb;       ///< The sequence database
221 
222     TIdMap          m_Ids;         ///< ID to OID translation
223 
224     /// Configuration value specified to the CCachedSequence
225     bool            m_UseFixedSizeSlices;
226 };
227 
228 END_SCOPE(objects)
229 
230 
231 extern NCBI_XLOADER_BLASTDB_EXPORT const string kDataLoader_BlastDb_DriverName;
232 
233 extern "C"
234 {
235 
236 NCBI_XLOADER_BLASTDB_EXPORT
237 void NCBI_EntryPoint_DataLoader_BlastDb(
238     CPluginManager<objects::CDataLoader>::TDriverInfoList&   info_list,
239     CPluginManager<objects::CDataLoader>::EEntryPointRequest method);
240 
241 NCBI_XLOADER_BLASTDB_EXPORT
242 void NCBI_EntryPoint_xloader_blastdb(
243     CPluginManager<objects::CDataLoader>::TDriverInfoList&   info_list,
244     CPluginManager<objects::CDataLoader>::EEntryPointRequest method);
245 
246 } // extern C
247 
248 
249 END_NCBI_SCOPE
250 
251 #endif /* OBJTOOLS_DATA_LOADERS_BLASTDB___BDBLOADER__HPP */
252