1 #ifndef ___ASN_CACHE__HPP
2 #define ___ASN_CACHE__HPP
3 
4 /*  $Id: asn_cache.hpp 555826 2018-01-23 19:55:42Z kotliaro $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Mike DiCuccio Cheinan Marks Eyal Mozes
30  *
31  * 2018-01-18: Adding support for hierarchical caches.
32  *
33  */
34 
35 /** @file asn_cache.hpp
36  * Contains the class definiton for CAsnCache, the main
37  * client class for accessing the ASN cache data.
38  *
39  */
40 
41 #include <corelib/ncbistd.hpp>
42 
43 #include <objtools/data_loaders/asn_cache/asn_cache_iface.hpp>
44 
45 BEGIN_NCBI_SCOPE
46 
47 
48 class CCompressionIStream;
49 class CSubCacheCreate;
50 class CChunkFile;
51 class CSeqIdChunkFile;
52 class CBitVectorWrapper;
53 
54 /// CAsnCache is used by clients to access the ASN cache data.  The ASN
55 /// cache is a cache of the ID database that is designed for fast access
56 /// and retrieval of CSeq_entry blobs.
57 /// @note Data in the ASN cache can also be accessed via the object manager
58 /// and the ASN cache data loader, CAsnCache_DataLoader.
59 class CAsnCache : public CObject,
60                   public IAsnCacheStore
61 {
62 public:
63     /// Type used to hold raw (unformatted) blob data.
64     using TBuffer = vector<unsigned char>;
65 
66     CAsnCache(const CAsnCache&) = delete;
67     CAsnCache& operator=(const CAsnCache&) = delete;
68 
69     /// Pass in the path to the ASN cache to construct an object.
70     explicit CAsnCache(const string& db_path);
71 
72     /// Return the raw blob in an unformatted buffer.
73     bool GetRaw(const objects::CSeq_id_Handle& id, TBuffer& buffer);
74     bool GetMultipleRaw(const objects::CSeq_id_Handle& id, vector<TBuffer>& buffer);
75 
76     /// Return the cache blob, packed and uninterpreted
77     bool GetBlob(const objects::CSeq_id_Handle& id, objects::CCache_blob& blob);
78     bool GetMultipleBlobs(const objects::CSeq_id_Handle& id,
79                           vector< CRef<objects::CCache_blob> >& blob);
80 
81     ///
82     /// Return the set of seq-ids associated with a given ID. By default, if
83     /// the SeqId index is not available, and the SeqIds can't be retrieved
84     /// cheaply, does nothing and return false. If cheap_only is set to false,
85     /// will always retrieve the SeqIds, by retrieving the full blob if that is
86     /// the only available way.
87     ///
88     bool GetSeqIds(const objects::CSeq_id_Handle& id,
89                    vector<objects::CSeq_id_Handle>& all_ids,
90                    bool cheap_only = true);
91 #if 0 // Is not being used anywhere
92 
93     ///
94     /// Check if the SeqId cache, for efficient retrieval of SeqIds, is
95     /// available
96     ///
97 
98     bool EfficientlyGetSeqIds() const { return m_SeqIdIndex.get(); }
99 #endif
100     /// Return a blob as a CSeq_entry object.
101     CRef<objects::CSeq_entry> GetEntry(const objects::CSeq_id_Handle& id);
102     vector< CRef<objects::CSeq_entry> > GetMultipleEntries(const objects::CSeq_id_Handle& id);
103 
104     /// Return the GI and timestamp for a given seq_id.  This can be a very
105     /// fast way to look up the GI for an accession.version because only the
106     /// index is queried -- the blob is not retrieved.
107     bool GetIdInfo(const objects::CSeq_id_Handle& id,
108                    CAsnIndex::TGi& gi,
109                    time_t& timestamp);
110 
111     /// Return the GI and timestamp for a given seq_id.  This can be a very
112     /// fast way to look up the GI for an accession.version because only the
113     /// index is queried -- the blob is not retrieved.
114     bool GetIdInfo(const objects::CSeq_id_Handle& id,
115                    objects::CSeq_id_Handle& accession,
116                    CAsnIndex::TGi& gi,
117                    time_t& timestamp,
118                    Uint4& sequence_length,
119                    Uint4& tax_id);
120     /// Get the full ASN cache index entry.  This does not retrieve the full
121     /// blob and is very fast.
122     bool GetIndexEntry(const objects::CSeq_id_Handle & id,
123                        CAsnIndex::SIndexInfo &info);
124     bool GetMultipleIndexEntries(const objects::CSeq_id_Handle & id,
125                                  vector<CAsnIndex::SIndexInfo> &info);
126 
127 
128     // AsnCacheStats
129     size_t GetGiCount() const;
130     void EnumSeqIds(IAsnCacheStore::TEnumSeqidCallback cb) const;
131     void EnumIndex(IAsnCacheStore::TEnumIndexCallback cb) const;
132 
133 private:
134     string m_DbPath;
135     std::unique_ptr<IAsnCacheStore> m_Store;
136 };
137 
138 END_NCBI_SCOPE
139 
140 
141 #endif  // ___ASN_CACHE__HPP
142