1 #ifndef UTIL___ICACHE__REF__HPP
2 #define UTIL___ICACHE__REF__HPP
3 
4 /*  $Id: cache_ref.hpp 254611 2011-02-16 14:51:38Z kazimird $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Anatoliy Kuznetsov
30  *
31  * File Description: ICache external reference using hash key(CRC32, MD5)
32  *
33  */
34 
35 /// @file cache_ref.hpp
36 /// cache (ICache) external reference using hash key(CRC32, MD5)
37 ///
38 
39 #include <util/cache/icache.hpp>
40 
41 BEGIN_NCBI_SCOPE
42 
43 
44 /// Hashed content cache. This class works with hash-content pair.
45 /// Hash code alone can potentially give false positives (collisions)
46 /// so this implementation compares both hash and content.
47 /// Hash mismatch automatically screens-out content comparison
48 /// (it is presumed that hash cannot give us false negatives).
49 ///
50 /// <pre>
51 /// Comparison order and operation complexity:
52 /// 1. Hash comparison (B-tree search)
53 /// 2.    - if 1 successful
54 ///          -> memory comparison (content length dependent).
55 /// </pre>
56 ///
57 ///
58 /// Note for CGI developers:
59 /// Caching functions can throw exceptions!
60 /// For many CGIs cache is optional please catch and supress exceptions
61 /// in the calling application. Treat exception case as if cache found
62 /// nothing.
63 ///
64 class CCacheHashedContent
65 {
66 public:
67     CCacheHashedContent(ICache& cache);
68 
69 
70     /// Store hashed content
71     ///
72     /// @param hash_str
73     ///     Hash key (CRC32 or MD5 or something else).
74     ///     Choose low-collision hash value (like MD5).
75     ///
76     /// @param hashed_content
77     ///     The content string (hash source).
78     /// @param ref_value
79     ///     Content reference value (NetCache key).
80     ///
81     /// @return TRUE if hashed reference can be returned
82     ///
83     void StoreHashedContent(const string& hash_str,
84                             const string& hashed_content,
85                             const string& ref_value);
86 
87     /// Get writer to store hashed content
88     ///
89     /// @return content writer pointer (caller takes ownership)
90     ///
91     IWriter* StoreHashedContent(const string& hash_str,
92                                 const string& hashed_content);
93 
94 
95     /// Store hashed content into intermidiate slot defined by
96     /// hash + application key(mediator).
97     ///
98     /// @param hash_str
99     ///     Hash key (CRC32 or MD5 or something else).
100     /// @param key
101     ///     Application defined unique string (NetSchedule key)
102     /// @param hashed_content
103     ///     The content string (hash source).
104     ///
105     void StoreMediatorKeyContent(const string& hash_str,
106                                  const string& key,
107                                  const string& hashed_content);
108 
109     /// Store hashed content defined by mediator key
110     /// @sa StoreMediatorKeyContent
111     /// After this call mediator becomes invalid
112     ///
113     /// @return Writer or NULL if mediator key not found
114     ///
115     IWriter* StoreHashedContentByMediator(const string& hash_str,
116                                           const string& key);
117 
118 
119     /// Get hashed content
120     /// Method compares both hash value and hashed content.
121     ///
122     /// @param hash_str
123     ///     Hash key (could be CRC32 or MD5 or something else).
124     /// @param hashed_content
125     ///     The content string (hash source).
126     /// @param ref_value
127     ///     Output content reference value (NetCache key).
128     ///
129     /// @return TRUE if hashed reference can be returned
130     ///
131     bool GetHashedContent(const string& hash_str,
132                           const string& hashed_content,
133                           string*       ref_value);
134 
135     /// Return reader interface on cached BLOB
136     /// @return NULL
137     ///    - BLOB not found (caller takes ownership)
138     ///
139     IReader* GetHashedContent(const string& hash_str,
140                               const string& hashed_content);
141 
142     /// Read hash content by mediator key
143     bool GetMediatorKeyContent(const string& hash_str,
144                                const string& key,
145                                string* hashed_content);
146 
147 protected:
148     /// Returns TRUE if hash verification is successfull
149     bool x_CheckHashContent(const string& hash_str,
150                             const string& hashed_content);
151 
152 private:
153     CCacheHashedContent(const CCacheHashedContent&);
154     CCacheHashedContent& operator=(const CCacheHashedContent&);
155 protected:
156     ICache&         m_ICache;
157     const string    m_HashContentSubKey;
158     const string    m_RefValueSubKey;
159 };
160 
161 
162 /////////////////////////////////////////////////////////////////////////////
163 //  IMPLEMENTATION of INLINE functions
164 /////////////////////////////////////////////////////////////////////////////
165 
166 
167 inline
CCacheHashedContent(ICache & cache)168 CCacheHashedContent::CCacheHashedContent(ICache& cache)
169 : m_ICache(cache),
170   m_HashContentSubKey("CONTENT"),
171   m_RefValueSubKey("REF")
172 {}
173 
174 inline
StoreHashedContent(const string & hash_str,const string & hashed_content,const string & ref_value)175 void CCacheHashedContent::StoreHashedContent(const string& hash_str,
176                                              const string& hashed_content,
177                                              const string& ref_value)
178 {
179     // two cache subkey are used here:
180     //  CONTENT for hashed content (used for cache search-comparison)
181     //  REF for the reference itself (reference storage)
182     //
183     const void* data = hashed_content.c_str();
184     m_ICache.Store(hash_str,
185                    0,
186                    m_HashContentSubKey,
187                    data,
188                    hashed_content.length());
189     data = ref_value.c_str();
190     m_ICache.Store(hash_str,
191                    0,
192                    m_RefValueSubKey,
193                    data,
194                    ref_value.length());
195 
196 }
197 
198 inline
199 IWriter*
StoreHashedContent(const string & hash_str,const string & hashed_content)200 CCacheHashedContent::StoreHashedContent(const string& hash_str,
201                                         const string& hashed_content)
202 {
203     const void* data = hashed_content.c_str();
204     m_ICache.Store(hash_str,
205                    0,
206                    m_HashContentSubKey,
207                    data,
208                    hashed_content.length());
209     // TODO: needs ICache change to get Writer for a newly created BLOB
210     // (current spec says ICache returns NULL writer if BLOB not found
211     //
212     IWriter* wrt = m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
213     if (wrt) {
214         return wrt;
215     }
216     // create empty BLOB
217     m_ICache.Store(hash_str,
218                    0,
219                    m_RefValueSubKey,
220                    (const void*)0,
221                    0);
222     return m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
223 }
224 
225 inline
StoreMediatorKeyContent(const string & hash_str,const string & key,const string & hashed_content)226 void CCacheHashedContent::StoreMediatorKeyContent(const string& hash_str,
227                                                   const string& key,
228                                                   const string& hashed_content)
229 {
230     // key mediator is used as ICache subkey
231     const void* data = hashed_content.c_str();
232     m_ICache.Store(hash_str,
233                    0,
234                    key,
235                    data,
236                    hashed_content.length());
237 }
238 
239 inline
GetMediatorKeyContent(const string & hash_str,const string & key,string * hashed_content)240 bool CCacheHashedContent::GetMediatorKeyContent(const string& hash_str,
241                                                 const string& key,
242                                                 string* hashed_content)
243 {
244     // key mediator is used as ICache subkey
245     const size_t buf_size = 4 * 1024;
246     char buf[buf_size];
247 
248     ICache::SBlobAccessDescr blob_access(buf, buf_size);
249     m_ICache.GetBlobAccess(hash_str, 0, key, &blob_access);
250     if (!blob_access.blob_found) {
251         return false;
252     }
253 
254     if (blob_access.reader.get()) {
255         // TODO: implement reader operation
256         return false;
257     } else {
258         if (hashed_content) {
259             hashed_content->resize(0);
260             hashed_content->append(blob_access.buf, blob_access.buf_size);
261         }
262     }
263     return true;
264 }
265 
266 inline
StoreHashedContentByMediator(const string & hash_str,const string & key)267 IWriter* CCacheHashedContent::StoreHashedContentByMediator(const string& hash_str,
268                                                            const string& key)
269 {
270     // key mediator is used as ICache subkey
271     const size_t buf_size = 4 * 1024;
272     char key_buf[buf_size];
273 
274     ICache::SBlobAccessDescr blob_access(key_buf, buf_size);
275     m_ICache.GetBlobAccess(hash_str, 0, key, &blob_access);
276     if (!blob_access.blob_found) {
277         return 0;
278     }
279     m_ICache.Remove(hash_str, 0, key);
280 
281     m_ICache.Store(hash_str,
282                    0,
283                    m_HashContentSubKey,
284                    blob_access.buf,
285                    blob_access.buf_size);
286 
287     // TODO: needs ICache change to get Writer for a newly created BLOB
288     // (current spec says ICache returns NULL writer if BLOB not found
289     //
290     IWriter* wrt = m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
291     if (wrt) {
292         return wrt;
293     }
294     // create empty BLOB
295     m_ICache.Store(hash_str,
296                    0,
297                    m_RefValueSubKey,
298                    (const void*)0,
299                    0);
300     wrt = m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
301     return wrt;
302 }
303 
304 
305 inline
GetHashedContent(const string & hash_str,const string & hashed_content,string * ref_value)306 bool CCacheHashedContent::GetHashedContent(const string& hash_str,
307                                            const string& hashed_content,
308                                            string*       ref_value)
309 {
310     bool hash_ok = x_CheckHashContent(hash_str, hashed_content);
311     if (!hash_ok) {
312         return false;
313     }
314 
315     const size_t buf_size = 4 * 1024;
316     char buf[buf_size];
317 
318     ICache::SBlobAccessDescr blob_access(buf, buf_size);
319 
320     // read the reference
321     //
322 
323     m_ICache.GetBlobAccess(hash_str, 0, m_RefValueSubKey, &blob_access);
324     if (!blob_access.blob_found) {
325         return false;
326     }
327     if (blob_access.reader.get()) {
328         // TODO: implement reader operation
329         return false;
330     } else {
331         if (ref_value) {
332             ref_value->resize(0);
333             ref_value->append(blob_access.buf, blob_access.buf_size);
334         }
335     }
336     return true;
337 }
338 
339 inline
340 IReader*
GetHashedContent(const string & hash_str,const string & hashed_content)341 CCacheHashedContent::GetHashedContent(const string& hash_str,
342                                       const string& hashed_content)
343 {
344     bool hash_ok = x_CheckHashContent(hash_str, hashed_content);
345     if (!hash_ok) {
346         return 0;
347     }
348     return m_ICache.GetReadStream(hash_str, 0, m_RefValueSubKey);
349 }
350 
351 inline
352 bool
x_CheckHashContent(const string & hash_str,const string & hashed_content)353 CCacheHashedContent::x_CheckHashContent(const string& hash_str,
354                                         const string& hashed_content)
355 {
356     const size_t buf_size = 4 * 1024;
357     char buf[buf_size];
358 
359     ICache::SBlobAccessDescr blob_access(buf, buf_size);
360 
361     // read-compare hashed content
362     //
363     m_ICache.GetBlobAccess(hash_str, 0, m_HashContentSubKey, &blob_access);
364     if (!blob_access.blob_found) {
365         return false;
366     }
367     if (blob_access.reader.get()) {
368         // too large...
369         // TODO: implement reader based comparison
370         return false;
371     } else {
372         // BLOB is in memory - memcmp
373         if (hashed_content.length() != blob_access.blob_size) {
374             return false;
375         }
376         int cmp = memcmp(blob_access.buf,
377                          hashed_content.c_str(),
378                          blob_access.blob_size);
379         if (cmp != 0) {
380             return false;
381         }
382     }
383     return true;
384 }
385 
386 
387 END_NCBI_SCOPE
388 
389 #endif  /* UTIL___ICACHE__REF__HPP */
390