1 #ifndef ___ASN_CACHE__HPP 2 #define ___ASN_CACHE__HPP 3 4 /* $Id: asn_cache.hpp 555826 2018-01-23 19:55:42Z kotliaro $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Mike DiCuccio Cheinan Marks Eyal Mozes 30 * 31 * 2018-01-18: Adding support for hierarchical caches. 32 * 33 */ 34 35 /** @file asn_cache.hpp 36 * Contains the class definiton for CAsnCache, the main 37 * client class for accessing the ASN cache data. 38 * 39 */ 40 41 #include <corelib/ncbistd.hpp> 42 43 #include <objtools/data_loaders/asn_cache/asn_cache_iface.hpp> 44 45 BEGIN_NCBI_SCOPE 46 47 48 class CCompressionIStream; 49 class CSubCacheCreate; 50 class CChunkFile; 51 class CSeqIdChunkFile; 52 class CBitVectorWrapper; 53 54 /// CAsnCache is used by clients to access the ASN cache data. The ASN 55 /// cache is a cache of the ID database that is designed for fast access 56 /// and retrieval of CSeq_entry blobs. 57 /// @note Data in the ASN cache can also be accessed via the object manager 58 /// and the ASN cache data loader, CAsnCache_DataLoader. 59 class CAsnCache : public CObject, 60 public IAsnCacheStore 61 { 62 public: 63 /// Type used to hold raw (unformatted) blob data. 64 using TBuffer = vector<unsigned char>; 65 66 CAsnCache(const CAsnCache&) = delete; 67 CAsnCache& operator=(const CAsnCache&) = delete; 68 69 /// Pass in the path to the ASN cache to construct an object. 70 explicit CAsnCache(const string& db_path); 71 72 /// Return the raw blob in an unformatted buffer. 73 bool GetRaw(const objects::CSeq_id_Handle& id, TBuffer& buffer); 74 bool GetMultipleRaw(const objects::CSeq_id_Handle& id, vector<TBuffer>& buffer); 75 76 /// Return the cache blob, packed and uninterpreted 77 bool GetBlob(const objects::CSeq_id_Handle& id, objects::CCache_blob& blob); 78 bool GetMultipleBlobs(const objects::CSeq_id_Handle& id, 79 vector< CRef<objects::CCache_blob> >& blob); 80 81 /// 82 /// Return the set of seq-ids associated with a given ID. By default, if 83 /// the SeqId index is not available, and the SeqIds can't be retrieved 84 /// cheaply, does nothing and return false. If cheap_only is set to false, 85 /// will always retrieve the SeqIds, by retrieving the full blob if that is 86 /// the only available way. 87 /// 88 bool GetSeqIds(const objects::CSeq_id_Handle& id, 89 vector<objects::CSeq_id_Handle>& all_ids, 90 bool cheap_only = true); 91 #if 0 // Is not being used anywhere 92 93 /// 94 /// Check if the SeqId cache, for efficient retrieval of SeqIds, is 95 /// available 96 /// 97 98 bool EfficientlyGetSeqIds() const { return m_SeqIdIndex.get(); } 99 #endif 100 /// Return a blob as a CSeq_entry object. 101 CRef<objects::CSeq_entry> GetEntry(const objects::CSeq_id_Handle& id); 102 vector< CRef<objects::CSeq_entry> > GetMultipleEntries(const objects::CSeq_id_Handle& id); 103 104 /// Return the GI and timestamp for a given seq_id. This can be a very 105 /// fast way to look up the GI for an accession.version because only the 106 /// index is queried -- the blob is not retrieved. 107 bool GetIdInfo(const objects::CSeq_id_Handle& id, 108 CAsnIndex::TGi& gi, 109 time_t& timestamp); 110 111 /// Return the GI and timestamp for a given seq_id. This can be a very 112 /// fast way to look up the GI for an accession.version because only the 113 /// index is queried -- the blob is not retrieved. 114 bool GetIdInfo(const objects::CSeq_id_Handle& id, 115 objects::CSeq_id_Handle& accession, 116 CAsnIndex::TGi& gi, 117 time_t& timestamp, 118 Uint4& sequence_length, 119 Uint4& tax_id); 120 /// Get the full ASN cache index entry. This does not retrieve the full 121 /// blob and is very fast. 122 bool GetIndexEntry(const objects::CSeq_id_Handle & id, 123 CAsnIndex::SIndexInfo &info); 124 bool GetMultipleIndexEntries(const objects::CSeq_id_Handle & id, 125 vector<CAsnIndex::SIndexInfo> &info); 126 127 128 // AsnCacheStats 129 size_t GetGiCount() const; 130 void EnumSeqIds(IAsnCacheStore::TEnumSeqidCallback cb) const; 131 void EnumIndex(IAsnCacheStore::TEnumIndexCallback cb) const; 132 133 private: 134 string m_DbPath; 135 std::unique_ptr<IAsnCacheStore> m_Store; 136 }; 137 138 END_NCBI_SCOPE 139 140 141 #endif // ___ASN_CACHE__HPP 142