1 /* $Id: pubseq_gateway_cache_utils.cpp 629837 2021-04-22 12:47:49Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Sergey Satskiy
27 *
28 * File Description:
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33
34 #include "pubseq_gateway_cache_utils.hpp"
35 #include "pubseq_gateway.hpp"
36 #include "pubseq_gateway_convert_utils.hpp"
37 #include "insdc_utils.hpp"
38 #include "pending_operation.hpp"
39
40 USING_NCBI_SCOPE;
41
42
43
44 EPSGS_CacheLookupResult
x_LookupBioseqInfo(SBioseqResolution & bioseq_resolution)45 CPSGCache::x_LookupBioseqInfo(SBioseqResolution & bioseq_resolution)
46 {
47 auto app = CPubseqGatewayApp::GetInstance();
48 CPubseqGatewayCache * cache = app->GetLookupCache();
49
50 if (cache == nullptr)
51 return ePSGS_CacheNotHit;
52
53 auto version = bioseq_resolution.m_BioseqInfo.GetVersion();
54 auto seq_id_type = bioseq_resolution.m_BioseqInfo.GetSeqIdType();
55 auto gi = bioseq_resolution.m_BioseqInfo.GetGI();
56
57 CBioseqInfoFetchRequest fetch_request;
58 fetch_request.SetAccession(bioseq_resolution.m_BioseqInfo.GetAccession());
59 if (version >= 0)
60 fetch_request.SetVersion(version);
61 if (seq_id_type >= 0)
62 fetch_request.SetSeqIdType(seq_id_type);
63 if (gi > 0)
64 fetch_request.SetGI(gi);
65
66 auto start = chrono::high_resolution_clock::now();
67 bool cache_hit = false;
68
69 COperationTiming & timing = app->GetTiming();
70 try {
71 if (m_NeedTrace) {
72 m_Reply->SendTrace(
73 "Cache request: " +
74 ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
75 m_Request->GetStartTimestamp());
76 }
77
78 auto records = cache->FetchBioseqInfo(fetch_request);
79
80 if (m_NeedTrace) {
81 string msg = to_string(records.size()) + " hit(s)";
82 for (const auto & item : records) {
83 msg += "\n" +
84 ToJson(item, SPSGS_ResolveRequest::fPSGS_AllBioseqFields).
85 Repr(CJsonNode::fStandardJson);
86 }
87 m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
88 }
89
90 switch (records.size()) {
91 case 0:
92 if (IsINSDCSeqIdType(seq_id_type)) {
93 timing.Register(eLookupLmdbBioseqInfo, eOpStatusNotFound, start);
94 app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheMiss);
95 return CPSGCache::x_LookupINSDCBioseqInfo(bioseq_resolution);
96 }
97 cache_hit = false;
98 break;
99 case 1:
100 cache_hit = true;
101 bioseq_resolution.m_BioseqInfo = std::move(records[0]);
102 break;
103 default:
104 // More than one record; may be need to pick the latest version
105 auto ver = records[0].GetVersion();
106 auto date_changed = records[0].GetDateChanged();
107 size_t index_to_pick = 0;
108 for (size_t k = 0; k < records.size(); ++k) {
109 if (records[k].GetVersion() > ver) {
110 index_to_pick = k;
111 ver = records[k].GetVersion();
112 date_changed = records[k].GetDateChanged();
113 } else {
114 if (records[k].GetVersion() == ver) {
115 if (records[k].GetDateChanged() > date_changed) {
116 index_to_pick = k;
117 date_changed = records[k].GetDateChanged();
118 }
119 }
120 }
121 }
122 if (m_NeedTrace) {
123 m_Reply->SendTrace(
124 "Record with max version (and max date changed if "
125 "more than one with max version) selected\n" +
126 ToJson(records[index_to_pick],
127 SPSGS_ResolveRequest::fPSGS_AllBioseqFields).
128 Repr(CJsonNode::fStandardJson),
129 m_Request->GetStartTimestamp());
130 }
131
132 cache_hit = true;
133 bioseq_resolution.m_BioseqInfo = std::move(records[index_to_pick]);
134
135 break;
136 }
137 } catch (const exception & exc) {
138 if (m_NeedTrace)
139 m_Reply->SendTrace("Cache fetch exception. Report failure.",
140 m_Request->GetStartTimestamp());
141 ERR_POST(Critical << "Exception while bioseq info cache lookup: "
142 << exc.what());
143 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
144 return ePSGS_CacheFailure;
145 } catch (...) {
146 if (m_NeedTrace)
147 m_Reply->SendTrace("Cache fetch exception. Report failure.",
148 m_Request->GetStartTimestamp());
149 ERR_POST(Critical << "Unknown exception while bioseq info cache lookup");
150 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
151 return ePSGS_CacheFailure;
152 }
153
154 if (cache_hit) {
155 if (m_NeedTrace)
156 m_Reply->SendTrace("Report cache hit",
157 m_Request->GetStartTimestamp());
158 timing.Register(eLookupLmdbBioseqInfo, eOpStatusFound, start);
159 app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheHit);
160 return ePSGS_CacheHit;
161 }
162
163 if (m_NeedTrace)
164 m_Reply->SendTrace("Report cache no hit",
165 m_Request->GetStartTimestamp());
166 timing.Register(eLookupLmdbBioseqInfo, eOpStatusNotFound, start);
167 app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheMiss);
168 return ePSGS_CacheNotHit;
169 }
170
171
172 EPSGS_CacheLookupResult
x_LookupINSDCBioseqInfo(SBioseqResolution & bioseq_resolution)173 CPSGCache::x_LookupINSDCBioseqInfo(SBioseqResolution & bioseq_resolution)
174 {
175 auto app = CPubseqGatewayApp::GetInstance();
176 CPubseqGatewayCache * cache = app->GetLookupCache();
177
178 auto version = bioseq_resolution.m_BioseqInfo.GetVersion();
179 auto gi = bioseq_resolution.m_BioseqInfo.GetGI();
180
181 CBioseqInfoFetchRequest fetch_request;
182 fetch_request.SetAccession(bioseq_resolution.m_BioseqInfo.GetAccession());
183 if (version >= 0)
184 fetch_request.SetVersion(version);
185 if (gi > 0)
186 fetch_request.SetGI(gi);
187
188 auto start = chrono::high_resolution_clock::now();
189 bool cache_hit = false;
190
191 COperationTiming & timing = app->GetTiming();
192 try {
193 if (m_NeedTrace) {
194 m_Reply->SendTrace(
195 "Cache request for INSDC types: " +
196 ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
197 m_Request->GetStartTimestamp());
198 }
199
200 auto records = cache->FetchBioseqInfo(fetch_request);
201 SINSDCDecision decision = DecideINSDC(records, version);
202
203 if (m_NeedTrace) {
204 string msg = to_string(records.size()) +
205 " hit(s); decision status: " + to_string(decision.status);
206 for (const auto & item : records) {
207 msg += "\n" +
208 ToJson(item,
209 SPSGS_ResolveRequest::fPSGS_AllBioseqFields).
210 Repr(CJsonNode::fStandardJson);
211 }
212 m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
213 }
214
215 switch (decision.status) {
216 case CRequestStatus::e200_Ok:
217 cache_hit = true;
218 bioseq_resolution.m_BioseqInfo = std::move(records[decision.index]);
219 break;
220 case CRequestStatus::e404_NotFound:
221 cache_hit = false;
222 break;
223 case CRequestStatus::e500_InternalServerError:
224 // No suitable records
225 cache_hit = false;
226 break;
227 default:
228 // Impossible
229 cache_hit = false;
230 break;
231 }
232 } catch (const exception & exc) {
233 if (m_NeedTrace)
234 m_Reply->SendTrace("Cache fetch for INSDC types exception. "
235 "Report failure.",
236 m_Request->GetStartTimestamp());
237
238 ERR_POST(Critical << "Exception while INSDC bioseq info cache lookup: "
239 << exc.what());
240 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
241 return ePSGS_CacheFailure;
242 } catch (...) {
243 if (m_NeedTrace)
244 m_Reply->SendTrace("Cache fetch for INSDC types exception. "
245 "Report failure.",
246 m_Request->GetStartTimestamp());
247
248 ERR_POST(Critical << "Unknown exception while INSDC bioseq info cache lookup");
249 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
250 return ePSGS_CacheFailure;
251 }
252
253 if (cache_hit) {
254 if (m_NeedTrace)
255 m_Reply->SendTrace("Report cache for INSDC types hit",
256 m_Request->GetStartTimestamp());
257 timing.Register(eLookupLmdbBioseqInfo, eOpStatusFound, start);
258 app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheHit);
259 return ePSGS_CacheHit;
260 }
261
262 if (m_NeedTrace)
263 m_Reply->SendTrace("Report cache for INSDC types no hit",
264 m_Request->GetStartTimestamp());
265 timing.Register(eLookupLmdbBioseqInfo, eOpStatusNotFound, start);
266 app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheMiss);
267 return ePSGS_CacheNotHit;
268 }
269
270
271 EPSGS_CacheLookupResult
x_LookupSi2csi(SBioseqResolution & bioseq_resolution)272 CPSGCache::x_LookupSi2csi(SBioseqResolution & bioseq_resolution)
273 {
274 auto app = CPubseqGatewayApp::GetInstance();
275 CPubseqGatewayCache * cache = app->GetLookupCache();
276
277 if (cache == nullptr)
278 return ePSGS_CacheNotHit;
279
280 auto seq_id_type = bioseq_resolution.m_BioseqInfo.GetSeqIdType();
281
282 CSi2CsiFetchRequest fetch_request;
283 fetch_request.SetSecSeqId(bioseq_resolution.m_BioseqInfo.GetAccession());
284 if (seq_id_type >= 0)
285 fetch_request.SetSecSeqIdType(seq_id_type);
286
287 auto start = chrono::high_resolution_clock::now();
288 bool cache_hit = false;
289
290 try {
291 if (m_NeedTrace) {
292 m_Reply->SendTrace(
293 "Cache request: " +
294 ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
295 m_Request->GetStartTimestamp());
296 }
297
298 auto records = cache->FetchSi2Csi(fetch_request);
299
300 if (m_NeedTrace) {
301 string msg = to_string(records.size()) + " hit(s)";
302 for (const auto & item : records) {
303 msg += "\n" + ToJson(item).Repr(CJsonNode::fStandardJson);
304 }
305 m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
306 }
307
308 switch (records.size()) {
309 case 0:
310 cache_hit = false;
311 break;
312 case 1:
313 cache_hit = true;
314 bioseq_resolution.m_BioseqInfo.SetAccession(records[0].GetAccession());
315 bioseq_resolution.m_BioseqInfo.SetVersion(records[0].GetVersion());
316 bioseq_resolution.m_BioseqInfo.SetSeqIdType(records[0].GetSeqIdType());
317 bioseq_resolution.m_BioseqInfo.SetGI(records[0].GetGI());
318 break;
319 default:
320 if (m_NeedTrace) {
321 m_Reply->SendTrace(
322 to_string(records.size()) + " hits. "
323 "Cannot decide what to choose so treat as no hit",
324 m_Request->GetStartTimestamp());
325 }
326
327 // More than one record: there is no basis to choose, so
328 // say that there was no cache hit
329 cache_hit = false;
330 break;
331 }
332 } catch (const exception & exc) {
333 if (m_NeedTrace)
334 m_Reply->SendTrace("Cache fetch exception. Report failure.",
335 m_Request->GetStartTimestamp());
336 ERR_POST(Critical << "Exception while csi cache lookup: "
337 << exc.what());
338 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
339 return ePSGS_CacheFailure;
340 } catch (...) {
341 if (m_NeedTrace)
342 m_Reply->SendTrace("Cache fetch exception. Report failure.",
343 m_Request->GetStartTimestamp());
344 ERR_POST(Critical << "Unknown exception while csi cache lookup");
345 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
346 return ePSGS_CacheFailure;
347 }
348
349 COperationTiming & timing = app->GetTiming();
350 if (cache_hit) {
351 if (m_NeedTrace)
352 m_Reply->SendTrace("Report cache hit",
353 m_Request->GetStartTimestamp());
354 timing.Register(eLookupLmdbSi2csi, eOpStatusFound, start);
355 app->GetCounters().Increment(CPSGSCounters::ePSGS_Si2csiCacheHit);
356 return ePSGS_CacheHit;
357 }
358
359 if (m_NeedTrace)
360 m_Reply->SendTrace("Report cache no hit",
361 m_Request->GetStartTimestamp());
362 timing.Register(eLookupLmdbSi2csi, eOpStatusNotFound, start);
363 app->GetCounters().Increment(CPSGSCounters::ePSGS_Si2csiCacheMiss);
364 return ePSGS_CacheNotHit;
365 }
366
367
x_LookupBlobProp(int sat,int sat_key,int64_t & last_modified,CBlobRecord & blob_record)368 EPSGS_CacheLookupResult CPSGCache::x_LookupBlobProp(
369 int sat,
370 int sat_key,
371 int64_t & last_modified,
372 CBlobRecord & blob_record)
373 {
374 auto app = CPubseqGatewayApp::GetInstance();
375 CPubseqGatewayCache * cache = app->GetLookupCache();
376
377 if (cache == nullptr)
378 return ePSGS_CacheNotHit;
379
380 CBlobFetchRequest fetch_request;
381 fetch_request.SetSat(sat);
382 fetch_request.SetSatKey(sat_key);
383 if (last_modified != INT64_MIN)
384 fetch_request.SetLastModified(last_modified);
385
386 auto start = chrono::high_resolution_clock::now();
387 bool cache_hit = false;
388
389 try {
390 if (m_NeedTrace) {
391 m_Reply->SendTrace(
392 "Cache request: " +
393 ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
394 m_Request->GetStartTimestamp());
395 }
396
397 auto records = cache->FetchBlobProp(fetch_request);
398
399 if (m_NeedTrace) {
400 string msg = to_string(records.size()) + " hit(s)";
401 for (const auto & item : records) {
402 msg += "\n" + ToJson(item).Repr(CJsonNode::fStandardJson);
403 }
404 m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
405 }
406
407 switch (records.size()) {
408 case 0:
409 cache_hit = false;
410 break;
411 case 1:
412 cache_hit = true;
413 last_modified = records[0].GetModified();
414 blob_record = std::move(records[0]);
415 break;
416 default:
417 // More than one record: need to choose by last modified
418 cache_hit = true;
419 size_t max_last_modified_index = 0;
420 for (size_t k = 0; k < records.size(); ++k) {
421 if (records[k].GetModified() >
422 records[max_last_modified_index].GetModified())
423 max_last_modified_index = k;
424 }
425 if (m_NeedTrace) {
426 m_Reply->SendTrace(
427 "Record with max last_modified selected\n" +
428 ToJson(records[max_last_modified_index]).
429 Repr(CJsonNode::fStandardJson),
430 m_Request->GetStartTimestamp());
431 }
432
433 last_modified = records[max_last_modified_index].GetModified();
434 blob_record = std::move(records[max_last_modified_index]);
435
436 break;
437 }
438 } catch (const exception & exc) {
439 if (m_NeedTrace)
440 m_Reply->SendTrace("Cache fetch exception. Report failure.",
441 m_Request->GetStartTimestamp());
442 ERR_POST(Critical << "Exception while blob prop cache lookup: "
443 << exc.what());
444 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
445 return ePSGS_CacheFailure;
446 } catch (...) {
447 if (m_NeedTrace)
448 m_Reply->SendTrace("Cache fetch exception. Report failure.",
449 m_Request->GetStartTimestamp());
450 ERR_POST(Critical << "Unknown exception while blob prop cache lookup");
451 app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
452 return ePSGS_CacheFailure;
453 }
454
455 COperationTiming & timing = app->GetTiming();
456 if (cache_hit) {
457 if (m_NeedTrace)
458 m_Reply->SendTrace("Report cache hit",
459 m_Request->GetStartTimestamp());
460 timing.Register(eLookupLmdbBlobProp, eOpStatusFound, start);
461 app->GetCounters().Increment(CPSGSCounters::ePSGS_BlobPropCacheHit);
462 return ePSGS_CacheHit;
463 }
464
465 if (m_NeedTrace)
466 m_Reply->SendTrace("Report cache no hit",
467 m_Request->GetStartTimestamp());
468 timing.Register(eLookupLmdbBlobProp, eOpStatusNotFound, start);
469 app->GetCounters().Increment(CPSGSCounters::ePSGS_BlobPropCacheMiss);
470 return ePSGS_CacheNotHit;
471 }
472
473