1 #ifndef OBJTOOLS_READERS_SEQDB__SEQDBLMDBSET_HPP
2 #define OBJTOOLS_READERS_SEQDB__SEQDBLMDBSET_HPP
3 
4 /*
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author:  Amelia Fong
30  *
31  */
32 
33 /// @file seqdbLMDBset.hpp
34 /// Manages a set of LMDB files.
35 ///
36 /// Defines classes:
37 ///     CSeqDBLMDBSet
38 ///     CSeqDBLMDBEntry
39 ///
40 /// Implemented for: UNIX, MS-Windows
41 
42 #include <objtools/blast/seqdb_reader/impl/seqdb_lmdb.hpp>
43 #include <algo/blast/core/ncbi_std.h>
44 #include "seqdbvolset.hpp"
45 
46 BEGIN_NCBI_SCOPE
47 
48 /// Import definitions from the ncbi::objects namespace.
49 USING_SCOPE(objects);
50 
51 /// CSeqDBLMDBEntry
52 ///
53 /// This class controls access to the CSeqDBLMDB class.  It contains
54 /// data that is not relevant to the internal operation of am LMDB file,
55 /// but is associated with  operations over the volume LMDB
56 /// set as a whole, such as the starting OID of the LMDB file and masking
57 /// information (GI and OID lists).
58 
59 class CSeqDBLMDBEntry : public CObject {
60 public:
61 	typedef blastdb::TOid TOid;
62     /// Constructor
63     ///
64     /// This creates a object containing the specified volume object
65     /// pointer.  Although this object owns the pointer, it uses a
66     /// vector, so it does not keep an auto pointer or CRef<>.
67     /// Instead, the destructor of the CSeqDBLMDBSet class deletes the
68     /// volumes by calling Free() in a destructor.  Using indirect
69     /// pointers (CRef<> for example) would require slightly more
70     /// cycles in several performance critical paths.
71     ///
72     /// @param new_vol
73     ///   A pointer to a volume.
74     CSeqDBLMDBEntry(const string & name, TOid start_oid, const vector<string> & vol_names);
75 
76     ~CSeqDBLMDBEntry();
77 
78 
79     /// Get the starting OID in this volume's range.
80     ///
81     /// This returns the first OID in this volume's OID range.
82     ///
83     /// @return The starting OID of the range
GetOIDStart() const84     int GetOIDStart() const { return m_OIDStart; }
85 
86     /// Get the ending OID in this volume's range.
87     ///
88     /// This returns the first OID past the end of this volume's OID
89     /// range.
90     ///
91     /// @return
92     ///   The ending OID of the range
GetOIDEnd() const93     int GetOIDEnd() const { return m_OIDEnd; }
94 
GetLMDBFileName() const95     string GetLMDBFileName() const { return m_LMDBFName; }
96 
97     void AccessionToOids(const string & acc, vector<TOid>  & oids) const;
98 
99     void AccessionsToOids(const vector<string>& accs, vector<TOid>& oids) const;
100 
101     void NegativeSeqIdsToOids(const vector<string>& ids, vector<blastdb::TOid>& rv) const;
102 
103     void TaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
104 
105     void NegativeTaxIdsToOids(const set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv, vector<TTaxId> & tax_ids_found) const;
106 
107     void GetDBTaxIds(vector<TTaxId> & tax_ids) const;
108 
109     void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
110 
111 private:
112     void x_AdjustOidsOffset(vector<TOid> & oids) const;
113     void x_AdjustOidsOffset_TaxList(vector<TOid> & oids) const;
114 
115     string 				m_LMDBFName;
116     /// The underlying volume object
117     CRef<CSeqDBLMDB>	m_LMDB;
118 
119     /// The start of the OID range.
120     TOid             	m_OIDStart;
121 
122     /// The end of the OID range.
123     TOid             	m_OIDEnd;
124 
125     struct SVolumeInfo {
126     	TOid skipped_oids;
127     	TOid max_oid;
128     	string vol_name;
129     };
130     vector<SVolumeInfo> m_VolInfo;
131     bool m_isPartial;
132 
133 };
134 
135 
136 /// CSeqDBLMDBSet
137 ///
138 /// This class stores a set of CSeqDBVol objects and defines an
139 /// interface to control usage of them.  Several methods are provided
140 /// to create the set of volumes, or to get the required volumes by
141 /// different criteria.  Also, certain methods perform operations over
142 /// the set of volumes.  The CSeqDBLMDBEntry class, defined internally
143 /// to this one, provides some of this abstraction.
144 class CSeqDBLMDBSet {
145 public:
146 	typedef blastdb::TOid TOid;
147     /// Standard Constructor
148     ///
149     ///  An obejct to manage the LMDB file set associted with the
150 	///  input seq db volumes
151     CSeqDBLMDBSet( const CSeqDBVolSet & m_VolSet);
152 
153     /// Default Constructor
154     ///
155     /// An empty volume set will be created; this is in support of the
156     /// CSeqDBExpert class's default constructor.
157     CSeqDBLMDBSet();
158 
159     /// Destructor
160     ///
161     /// The destructor will release all resources still held, but some
162     /// of the resources will probably already be cleaned up via a
163     /// call to the UnLease method.
164     ~CSeqDBLMDBSet();
165 
166     void AccessionToOids(const string & acc, vector<TOid>  & oids) const;
167 
168     void AccessionsToOids(const vector<string>& accs, vector<TOid>& oids) const;
169 
IsBlastDBVersion5() const170     bool IsBlastDBVersion5() const { return (m_LMDBEntrySet.empty()? false:true); }
171 
172     void NegativeSeqIdsToOids(const vector<string>& ids, vector<blastdb::TOid>& rv) const;
173 
174     void TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
175 
176     void NegativeTaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv) const;
177 
178     void GetDBTaxIds(set<TTaxId> & tax_ids) const;
179 
180     void GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids) const;
181 
182 private:
183     vector<CRef<CSeqDBLMDBEntry> >  m_LMDBEntrySet;
184 
185 };
186 
187 END_NCBI_SCOPE
188 
189 #endif // OBJTOOLS_READERS_SEQDB__SEQDBLMDBSET_HPP
190 
191