1 #ifndef UTIL___ICACHE__REF__HPP
2 #define UTIL___ICACHE__REF__HPP
3
4 /* $Id: cache_ref.hpp 254611 2011-02-16 14:51:38Z kazimird $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Authors: Anatoliy Kuznetsov
30 *
31 * File Description: ICache external reference using hash key(CRC32, MD5)
32 *
33 */
34
35 /// @file cache_ref.hpp
36 /// cache (ICache) external reference using hash key(CRC32, MD5)
37 ///
38
39 #include <util/cache/icache.hpp>
40
41 BEGIN_NCBI_SCOPE
42
43
44 /// Hashed content cache. This class works with hash-content pair.
45 /// Hash code alone can potentially give false positives (collisions)
46 /// so this implementation compares both hash and content.
47 /// Hash mismatch automatically screens-out content comparison
48 /// (it is presumed that hash cannot give us false negatives).
49 ///
50 /// <pre>
51 /// Comparison order and operation complexity:
52 /// 1. Hash comparison (B-tree search)
53 /// 2. - if 1 successful
54 /// -> memory comparison (content length dependent).
55 /// </pre>
56 ///
57 ///
58 /// Note for CGI developers:
59 /// Caching functions can throw exceptions!
60 /// For many CGIs cache is optional please catch and supress exceptions
61 /// in the calling application. Treat exception case as if cache found
62 /// nothing.
63 ///
64 class CCacheHashedContent
65 {
66 public:
67 CCacheHashedContent(ICache& cache);
68
69
70 /// Store hashed content
71 ///
72 /// @param hash_str
73 /// Hash key (CRC32 or MD5 or something else).
74 /// Choose low-collision hash value (like MD5).
75 ///
76 /// @param hashed_content
77 /// The content string (hash source).
78 /// @param ref_value
79 /// Content reference value (NetCache key).
80 ///
81 /// @return TRUE if hashed reference can be returned
82 ///
83 void StoreHashedContent(const string& hash_str,
84 const string& hashed_content,
85 const string& ref_value);
86
87 /// Get writer to store hashed content
88 ///
89 /// @return content writer pointer (caller takes ownership)
90 ///
91 IWriter* StoreHashedContent(const string& hash_str,
92 const string& hashed_content);
93
94
95 /// Store hashed content into intermidiate slot defined by
96 /// hash + application key(mediator).
97 ///
98 /// @param hash_str
99 /// Hash key (CRC32 or MD5 or something else).
100 /// @param key
101 /// Application defined unique string (NetSchedule key)
102 /// @param hashed_content
103 /// The content string (hash source).
104 ///
105 void StoreMediatorKeyContent(const string& hash_str,
106 const string& key,
107 const string& hashed_content);
108
109 /// Store hashed content defined by mediator key
110 /// @sa StoreMediatorKeyContent
111 /// After this call mediator becomes invalid
112 ///
113 /// @return Writer or NULL if mediator key not found
114 ///
115 IWriter* StoreHashedContentByMediator(const string& hash_str,
116 const string& key);
117
118
119 /// Get hashed content
120 /// Method compares both hash value and hashed content.
121 ///
122 /// @param hash_str
123 /// Hash key (could be CRC32 or MD5 or something else).
124 /// @param hashed_content
125 /// The content string (hash source).
126 /// @param ref_value
127 /// Output content reference value (NetCache key).
128 ///
129 /// @return TRUE if hashed reference can be returned
130 ///
131 bool GetHashedContent(const string& hash_str,
132 const string& hashed_content,
133 string* ref_value);
134
135 /// Return reader interface on cached BLOB
136 /// @return NULL
137 /// - BLOB not found (caller takes ownership)
138 ///
139 IReader* GetHashedContent(const string& hash_str,
140 const string& hashed_content);
141
142 /// Read hash content by mediator key
143 bool GetMediatorKeyContent(const string& hash_str,
144 const string& key,
145 string* hashed_content);
146
147 protected:
148 /// Returns TRUE if hash verification is successfull
149 bool x_CheckHashContent(const string& hash_str,
150 const string& hashed_content);
151
152 private:
153 CCacheHashedContent(const CCacheHashedContent&);
154 CCacheHashedContent& operator=(const CCacheHashedContent&);
155 protected:
156 ICache& m_ICache;
157 const string m_HashContentSubKey;
158 const string m_RefValueSubKey;
159 };
160
161
162 /////////////////////////////////////////////////////////////////////////////
163 // IMPLEMENTATION of INLINE functions
164 /////////////////////////////////////////////////////////////////////////////
165
166
167 inline
CCacheHashedContent(ICache & cache)168 CCacheHashedContent::CCacheHashedContent(ICache& cache)
169 : m_ICache(cache),
170 m_HashContentSubKey("CONTENT"),
171 m_RefValueSubKey("REF")
172 {}
173
174 inline
StoreHashedContent(const string & hash_str,const string & hashed_content,const string & ref_value)175 void CCacheHashedContent::StoreHashedContent(const string& hash_str,
176 const string& hashed_content,
177 const string& ref_value)
178 {
179 // two cache subkey are used here:
180 // CONTENT for hashed content (used for cache search-comparison)
181 // REF for the reference itself (reference storage)
182 //
183 const void* data = hashed_content.c_str();
184 m_ICache.Store(hash_str,
185 0,
186 m_HashContentSubKey,
187 data,
188 hashed_content.length());
189 data = ref_value.c_str();
190 m_ICache.Store(hash_str,
191 0,
192 m_RefValueSubKey,
193 data,
194 ref_value.length());
195
196 }
197
198 inline
199 IWriter*
StoreHashedContent(const string & hash_str,const string & hashed_content)200 CCacheHashedContent::StoreHashedContent(const string& hash_str,
201 const string& hashed_content)
202 {
203 const void* data = hashed_content.c_str();
204 m_ICache.Store(hash_str,
205 0,
206 m_HashContentSubKey,
207 data,
208 hashed_content.length());
209 // TODO: needs ICache change to get Writer for a newly created BLOB
210 // (current spec says ICache returns NULL writer if BLOB not found
211 //
212 IWriter* wrt = m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
213 if (wrt) {
214 return wrt;
215 }
216 // create empty BLOB
217 m_ICache.Store(hash_str,
218 0,
219 m_RefValueSubKey,
220 (const void*)0,
221 0);
222 return m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
223 }
224
225 inline
StoreMediatorKeyContent(const string & hash_str,const string & key,const string & hashed_content)226 void CCacheHashedContent::StoreMediatorKeyContent(const string& hash_str,
227 const string& key,
228 const string& hashed_content)
229 {
230 // key mediator is used as ICache subkey
231 const void* data = hashed_content.c_str();
232 m_ICache.Store(hash_str,
233 0,
234 key,
235 data,
236 hashed_content.length());
237 }
238
239 inline
GetMediatorKeyContent(const string & hash_str,const string & key,string * hashed_content)240 bool CCacheHashedContent::GetMediatorKeyContent(const string& hash_str,
241 const string& key,
242 string* hashed_content)
243 {
244 // key mediator is used as ICache subkey
245 const size_t buf_size = 4 * 1024;
246 char buf[buf_size];
247
248 ICache::SBlobAccessDescr blob_access(buf, buf_size);
249 m_ICache.GetBlobAccess(hash_str, 0, key, &blob_access);
250 if (!blob_access.blob_found) {
251 return false;
252 }
253
254 if (blob_access.reader.get()) {
255 // TODO: implement reader operation
256 return false;
257 } else {
258 if (hashed_content) {
259 hashed_content->resize(0);
260 hashed_content->append(blob_access.buf, blob_access.buf_size);
261 }
262 }
263 return true;
264 }
265
266 inline
StoreHashedContentByMediator(const string & hash_str,const string & key)267 IWriter* CCacheHashedContent::StoreHashedContentByMediator(const string& hash_str,
268 const string& key)
269 {
270 // key mediator is used as ICache subkey
271 const size_t buf_size = 4 * 1024;
272 char key_buf[buf_size];
273
274 ICache::SBlobAccessDescr blob_access(key_buf, buf_size);
275 m_ICache.GetBlobAccess(hash_str, 0, key, &blob_access);
276 if (!blob_access.blob_found) {
277 return 0;
278 }
279 m_ICache.Remove(hash_str, 0, key);
280
281 m_ICache.Store(hash_str,
282 0,
283 m_HashContentSubKey,
284 blob_access.buf,
285 blob_access.buf_size);
286
287 // TODO: needs ICache change to get Writer for a newly created BLOB
288 // (current spec says ICache returns NULL writer if BLOB not found
289 //
290 IWriter* wrt = m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
291 if (wrt) {
292 return wrt;
293 }
294 // create empty BLOB
295 m_ICache.Store(hash_str,
296 0,
297 m_RefValueSubKey,
298 (const void*)0,
299 0);
300 wrt = m_ICache.GetWriteStream(hash_str, 0, m_RefValueSubKey);
301 return wrt;
302 }
303
304
305 inline
GetHashedContent(const string & hash_str,const string & hashed_content,string * ref_value)306 bool CCacheHashedContent::GetHashedContent(const string& hash_str,
307 const string& hashed_content,
308 string* ref_value)
309 {
310 bool hash_ok = x_CheckHashContent(hash_str, hashed_content);
311 if (!hash_ok) {
312 return false;
313 }
314
315 const size_t buf_size = 4 * 1024;
316 char buf[buf_size];
317
318 ICache::SBlobAccessDescr blob_access(buf, buf_size);
319
320 // read the reference
321 //
322
323 m_ICache.GetBlobAccess(hash_str, 0, m_RefValueSubKey, &blob_access);
324 if (!blob_access.blob_found) {
325 return false;
326 }
327 if (blob_access.reader.get()) {
328 // TODO: implement reader operation
329 return false;
330 } else {
331 if (ref_value) {
332 ref_value->resize(0);
333 ref_value->append(blob_access.buf, blob_access.buf_size);
334 }
335 }
336 return true;
337 }
338
339 inline
340 IReader*
GetHashedContent(const string & hash_str,const string & hashed_content)341 CCacheHashedContent::GetHashedContent(const string& hash_str,
342 const string& hashed_content)
343 {
344 bool hash_ok = x_CheckHashContent(hash_str, hashed_content);
345 if (!hash_ok) {
346 return 0;
347 }
348 return m_ICache.GetReadStream(hash_str, 0, m_RefValueSubKey);
349 }
350
351 inline
352 bool
x_CheckHashContent(const string & hash_str,const string & hashed_content)353 CCacheHashedContent::x_CheckHashContent(const string& hash_str,
354 const string& hashed_content)
355 {
356 const size_t buf_size = 4 * 1024;
357 char buf[buf_size];
358
359 ICache::SBlobAccessDescr blob_access(buf, buf_size);
360
361 // read-compare hashed content
362 //
363 m_ICache.GetBlobAccess(hash_str, 0, m_HashContentSubKey, &blob_access);
364 if (!blob_access.blob_found) {
365 return false;
366 }
367 if (blob_access.reader.get()) {
368 // too large...
369 // TODO: implement reader based comparison
370 return false;
371 } else {
372 // BLOB is in memory - memcmp
373 if (hashed_content.length() != blob_access.blob_size) {
374 return false;
375 }
376 int cmp = memcmp(blob_access.buf,
377 hashed_content.c_str(),
378 blob_access.blob_size);
379 if (cmp != 0) {
380 return false;
381 }
382 }
383 return true;
384 }
385
386
387 END_NCBI_SCOPE
388
389 #endif /* UTIL___ICACHE__REF__HPP */
390