1 /*  $Id: pubseq_gateway_cache_utils.cpp 629837 2021-04-22 12:47:49Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Sergey Satskiy
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "pubseq_gateway_cache_utils.hpp"
35 #include "pubseq_gateway.hpp"
36 #include "pubseq_gateway_convert_utils.hpp"
37 #include "insdc_utils.hpp"
38 #include "pending_operation.hpp"
39 
40 USING_NCBI_SCOPE;
41 
42 
43 
44 EPSGS_CacheLookupResult
x_LookupBioseqInfo(SBioseqResolution & bioseq_resolution)45 CPSGCache::x_LookupBioseqInfo(SBioseqResolution &  bioseq_resolution)
46 {
47     auto                    app = CPubseqGatewayApp::GetInstance();
48     CPubseqGatewayCache *   cache = app->GetLookupCache();
49 
50     if (cache == nullptr)
51         return ePSGS_CacheNotHit;
52 
53     auto    version = bioseq_resolution.m_BioseqInfo.GetVersion();
54     auto    seq_id_type = bioseq_resolution.m_BioseqInfo.GetSeqIdType();
55     auto    gi = bioseq_resolution.m_BioseqInfo.GetGI();
56 
57     CBioseqInfoFetchRequest     fetch_request;
58     fetch_request.SetAccession(bioseq_resolution.m_BioseqInfo.GetAccession());
59     if (version >= 0)
60         fetch_request.SetVersion(version);
61     if (seq_id_type >= 0)
62         fetch_request.SetSeqIdType(seq_id_type);
63     if (gi > 0)
64         fetch_request.SetGI(gi);
65 
66     auto        start = chrono::high_resolution_clock::now();
67     bool        cache_hit = false;
68 
69     COperationTiming &      timing = app->GetTiming();
70     try {
71         if (m_NeedTrace) {
72             m_Reply->SendTrace(
73                 "Cache request: " +
74                 ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
75                 m_Request->GetStartTimestamp());
76         }
77 
78         auto    records = cache->FetchBioseqInfo(fetch_request);
79 
80         if (m_NeedTrace) {
81             string  msg = to_string(records.size()) + " hit(s)";
82             for (const auto &  item : records) {
83                 msg += "\n" +
84                        ToJson(item, SPSGS_ResolveRequest::fPSGS_AllBioseqFields).
85                             Repr(CJsonNode::fStandardJson);
86             }
87             m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
88         }
89 
90         switch (records.size()) {
91             case 0:
92                 if (IsINSDCSeqIdType(seq_id_type)) {
93                     timing.Register(eLookupLmdbBioseqInfo, eOpStatusNotFound, start);
94                     app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheMiss);
95                     return CPSGCache::x_LookupINSDCBioseqInfo(bioseq_resolution);
96                 }
97                 cache_hit = false;
98                 break;
99             case 1:
100                 cache_hit = true;
101                 bioseq_resolution.m_BioseqInfo = std::move(records[0]);
102                 break;
103             default:
104                 // More than one record; may be need to pick the latest version
105                 auto    ver = records[0].GetVersion();
106                 auto    date_changed = records[0].GetDateChanged();
107                 size_t  index_to_pick = 0;
108                 for (size_t  k = 0; k < records.size(); ++k) {
109                     if (records[k].GetVersion() > ver) {
110                         index_to_pick = k;
111                         ver = records[k].GetVersion();
112                         date_changed = records[k].GetDateChanged();
113                     } else {
114                         if (records[k].GetVersion() == ver) {
115                             if (records[k].GetDateChanged() > date_changed) {
116                                 index_to_pick = k;
117                                 date_changed = records[k].GetDateChanged();
118                             }
119                         }
120                     }
121                 }
122                 if (m_NeedTrace) {
123                     m_Reply->SendTrace(
124                         "Record with max version (and max date changed if "
125                         "more than one with max version) selected\n" +
126                         ToJson(records[index_to_pick],
127                                SPSGS_ResolveRequest::fPSGS_AllBioseqFields).
128                             Repr(CJsonNode::fStandardJson),
129                         m_Request->GetStartTimestamp());
130                 }
131 
132                 cache_hit = true;
133                 bioseq_resolution.m_BioseqInfo = std::move(records[index_to_pick]);
134 
135                 break;
136         }
137     } catch (const exception &  exc) {
138         if (m_NeedTrace)
139             m_Reply->SendTrace("Cache fetch exception. Report failure.",
140                                m_Request->GetStartTimestamp());
141         ERR_POST(Critical << "Exception while bioseq info cache lookup: "
142                           << exc.what());
143         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
144         return ePSGS_CacheFailure;
145     } catch (...) {
146         if (m_NeedTrace)
147             m_Reply->SendTrace("Cache fetch exception. Report failure.",
148                                m_Request->GetStartTimestamp());
149         ERR_POST(Critical << "Unknown exception while bioseq info cache lookup");
150         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
151         return ePSGS_CacheFailure;
152     }
153 
154     if (cache_hit) {
155         if (m_NeedTrace)
156             m_Reply->SendTrace("Report cache hit",
157                                m_Request->GetStartTimestamp());
158         timing.Register(eLookupLmdbBioseqInfo, eOpStatusFound, start);
159         app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheHit);
160         return ePSGS_CacheHit;
161     }
162 
163     if (m_NeedTrace)
164         m_Reply->SendTrace("Report cache no hit",
165                            m_Request->GetStartTimestamp());
166     timing.Register(eLookupLmdbBioseqInfo, eOpStatusNotFound, start);
167     app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheMiss);
168     return ePSGS_CacheNotHit;
169 }
170 
171 
172 EPSGS_CacheLookupResult
x_LookupINSDCBioseqInfo(SBioseqResolution & bioseq_resolution)173 CPSGCache::x_LookupINSDCBioseqInfo(SBioseqResolution &  bioseq_resolution)
174 {
175     auto                    app = CPubseqGatewayApp::GetInstance();
176     CPubseqGatewayCache *   cache = app->GetLookupCache();
177 
178     auto    version = bioseq_resolution.m_BioseqInfo.GetVersion();
179     auto    gi = bioseq_resolution.m_BioseqInfo.GetGI();
180 
181     CBioseqInfoFetchRequest     fetch_request;
182     fetch_request.SetAccession(bioseq_resolution.m_BioseqInfo.GetAccession());
183     if (version >= 0)
184         fetch_request.SetVersion(version);
185     if (gi > 0)
186         fetch_request.SetGI(gi);
187 
188     auto        start = chrono::high_resolution_clock::now();
189     bool        cache_hit = false;
190 
191     COperationTiming &      timing = app->GetTiming();
192     try {
193         if (m_NeedTrace) {
194             m_Reply->SendTrace(
195                     "Cache request for INSDC types: " +
196                     ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
197                     m_Request->GetStartTimestamp());
198         }
199 
200         auto    records = cache->FetchBioseqInfo(fetch_request);
201         SINSDCDecision  decision = DecideINSDC(records, version);
202 
203         if (m_NeedTrace) {
204             string  msg = to_string(records.size()) +
205                           " hit(s); decision status: " + to_string(decision.status);
206             for (const auto &  item : records) {
207                 msg += "\n" +
208                        ToJson(item,
209                               SPSGS_ResolveRequest::fPSGS_AllBioseqFields).
210                                     Repr(CJsonNode::fStandardJson);
211             }
212             m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
213         }
214 
215         switch (decision.status) {
216             case CRequestStatus::e200_Ok:
217                 cache_hit = true;
218                 bioseq_resolution.m_BioseqInfo = std::move(records[decision.index]);
219                 break;
220             case CRequestStatus::e404_NotFound:
221                 cache_hit = false;
222                 break;
223             case CRequestStatus::e500_InternalServerError:
224                 // No suitable records
225                 cache_hit = false;
226                 break;
227             default:
228                 // Impossible
229                 cache_hit = false;
230                 break;
231         }
232     } catch (const exception &  exc) {
233         if (m_NeedTrace)
234             m_Reply->SendTrace("Cache fetch for INSDC types exception. "
235                                "Report failure.",
236                                m_Request->GetStartTimestamp());
237 
238         ERR_POST(Critical << "Exception while INSDC bioseq info cache lookup: "
239                           << exc.what());
240         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
241         return ePSGS_CacheFailure;
242     } catch (...) {
243         if (m_NeedTrace)
244             m_Reply->SendTrace("Cache fetch for INSDC types exception. "
245                                "Report failure.",
246                                m_Request->GetStartTimestamp());
247 
248         ERR_POST(Critical << "Unknown exception while INSDC bioseq info cache lookup");
249         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
250         return ePSGS_CacheFailure;
251     }
252 
253     if (cache_hit) {
254         if (m_NeedTrace)
255             m_Reply->SendTrace("Report cache for INSDC types hit",
256                                m_Request->GetStartTimestamp());
257         timing.Register(eLookupLmdbBioseqInfo, eOpStatusFound, start);
258         app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheHit);
259         return ePSGS_CacheHit;
260     }
261 
262     if (m_NeedTrace)
263         m_Reply->SendTrace("Report cache for INSDC types no hit",
264                            m_Request->GetStartTimestamp());
265     timing.Register(eLookupLmdbBioseqInfo, eOpStatusNotFound, start);
266     app->GetCounters().Increment(CPSGSCounters::ePSGS_BioseqInfoCacheMiss);
267     return ePSGS_CacheNotHit;
268 }
269 
270 
271 EPSGS_CacheLookupResult
x_LookupSi2csi(SBioseqResolution & bioseq_resolution)272 CPSGCache::x_LookupSi2csi(SBioseqResolution &  bioseq_resolution)
273 {
274     auto                    app = CPubseqGatewayApp::GetInstance();
275     CPubseqGatewayCache *   cache = app->GetLookupCache();
276 
277     if (cache == nullptr)
278         return ePSGS_CacheNotHit;
279 
280     auto    seq_id_type = bioseq_resolution.m_BioseqInfo.GetSeqIdType();
281 
282     CSi2CsiFetchRequest     fetch_request;
283     fetch_request.SetSecSeqId(bioseq_resolution.m_BioseqInfo.GetAccession());
284     if (seq_id_type >= 0)
285         fetch_request.SetSecSeqIdType(seq_id_type);
286 
287     auto    start = chrono::high_resolution_clock::now();
288     bool    cache_hit = false;
289 
290     try {
291         if (m_NeedTrace) {
292             m_Reply->SendTrace(
293                 "Cache request: " +
294                 ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
295                 m_Request->GetStartTimestamp());
296         }
297 
298         auto    records = cache->FetchSi2Csi(fetch_request);
299 
300         if (m_NeedTrace) {
301             string  msg = to_string(records.size()) + " hit(s)";
302             for (const auto &  item : records) {
303                 msg += "\n" + ToJson(item).Repr(CJsonNode::fStandardJson);
304             }
305             m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
306         }
307 
308         switch (records.size()) {
309             case 0:
310                 cache_hit = false;
311                 break;
312             case 1:
313                 cache_hit = true;
314                 bioseq_resolution.m_BioseqInfo.SetAccession(records[0].GetAccession());
315                 bioseq_resolution.m_BioseqInfo.SetVersion(records[0].GetVersion());
316                 bioseq_resolution.m_BioseqInfo.SetSeqIdType(records[0].GetSeqIdType());
317                 bioseq_resolution.m_BioseqInfo.SetGI(records[0].GetGI());
318                 break;
319             default:
320                 if (m_NeedTrace) {
321                     m_Reply->SendTrace(
322                         to_string(records.size()) + " hits. "
323                         "Cannot decide what to choose so treat as no hit",
324                         m_Request->GetStartTimestamp());
325                 }
326 
327                 // More than one record: there is no basis to choose, so
328                 // say that there was no cache hit
329                 cache_hit = false;
330                 break;
331         }
332     } catch (const exception &  exc) {
333         if (m_NeedTrace)
334             m_Reply->SendTrace("Cache fetch exception. Report failure.",
335                                m_Request->GetStartTimestamp());
336         ERR_POST(Critical << "Exception while csi cache lookup: "
337                           << exc.what());
338         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
339         return ePSGS_CacheFailure;
340     } catch (...) {
341         if (m_NeedTrace)
342             m_Reply->SendTrace("Cache fetch exception. Report failure.",
343                                m_Request->GetStartTimestamp());
344         ERR_POST(Critical << "Unknown exception while csi cache lookup");
345         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
346         return ePSGS_CacheFailure;
347     }
348 
349     COperationTiming &      timing = app->GetTiming();
350     if (cache_hit) {
351         if (m_NeedTrace)
352             m_Reply->SendTrace("Report cache hit",
353                                m_Request->GetStartTimestamp());
354         timing.Register(eLookupLmdbSi2csi, eOpStatusFound, start);
355         app->GetCounters().Increment(CPSGSCounters::ePSGS_Si2csiCacheHit);
356         return ePSGS_CacheHit;
357     }
358 
359     if (m_NeedTrace)
360         m_Reply->SendTrace("Report cache no hit",
361                            m_Request->GetStartTimestamp());
362     timing.Register(eLookupLmdbSi2csi, eOpStatusNotFound, start);
363     app->GetCounters().Increment(CPSGSCounters::ePSGS_Si2csiCacheMiss);
364     return ePSGS_CacheNotHit;
365 }
366 
367 
x_LookupBlobProp(int sat,int sat_key,int64_t & last_modified,CBlobRecord & blob_record)368 EPSGS_CacheLookupResult  CPSGCache::x_LookupBlobProp(
369                                             int  sat,
370                                             int  sat_key,
371                                             int64_t &  last_modified,
372                                             CBlobRecord &  blob_record)
373 {
374     auto                    app = CPubseqGatewayApp::GetInstance();
375     CPubseqGatewayCache *   cache = app->GetLookupCache();
376 
377     if (cache == nullptr)
378         return ePSGS_CacheNotHit;
379 
380     CBlobFetchRequest       fetch_request;
381     fetch_request.SetSat(sat);
382     fetch_request.SetSatKey(sat_key);
383     if (last_modified != INT64_MIN)
384         fetch_request.SetLastModified(last_modified);
385 
386     auto    start = chrono::high_resolution_clock::now();
387     bool    cache_hit = false;
388 
389     try {
390         if (m_NeedTrace) {
391             m_Reply->SendTrace(
392                 "Cache request: " +
393                 ToJson(fetch_request).Repr(CJsonNode::fStandardJson),
394                 m_Request->GetStartTimestamp());
395         }
396 
397         auto    records = cache->FetchBlobProp(fetch_request);
398 
399         if (m_NeedTrace) {
400             string  msg = to_string(records.size()) + " hit(s)";
401             for (const auto &  item : records) {
402                 msg += "\n" + ToJson(item).Repr(CJsonNode::fStandardJson);
403             }
404             m_Reply->SendTrace(msg, m_Request->GetStartTimestamp());
405         }
406 
407         switch (records.size()) {
408             case 0:
409                 cache_hit = false;
410                 break;
411             case 1:
412                 cache_hit = true;
413                 last_modified = records[0].GetModified();
414                 blob_record = std::move(records[0]);
415                 break;
416             default:
417                 // More than one record: need to choose by last modified
418                 cache_hit = true;
419                 size_t      max_last_modified_index = 0;
420                 for (size_t  k = 0; k < records.size(); ++k) {
421                     if (records[k].GetModified() >
422                         records[max_last_modified_index].GetModified())
423                         max_last_modified_index = k;
424                 }
425                 if (m_NeedTrace) {
426                     m_Reply->SendTrace(
427                         "Record with max last_modified selected\n" +
428                         ToJson(records[max_last_modified_index]).
429                             Repr(CJsonNode::fStandardJson),
430                         m_Request->GetStartTimestamp());
431                 }
432 
433                 last_modified = records[max_last_modified_index].GetModified();
434                 blob_record = std::move(records[max_last_modified_index]);
435 
436                 break;
437         }
438     } catch (const exception &  exc) {
439         if (m_NeedTrace)
440             m_Reply->SendTrace("Cache fetch exception. Report failure.",
441                                m_Request->GetStartTimestamp());
442         ERR_POST(Critical << "Exception while blob prop cache lookup: "
443                           << exc.what());
444         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
445         return ePSGS_CacheFailure;
446     } catch (...) {
447         if (m_NeedTrace)
448             m_Reply->SendTrace("Cache fetch exception. Report failure.",
449                                m_Request->GetStartTimestamp());
450         ERR_POST(Critical << "Unknown exception while blob prop cache lookup");
451         app->GetCounters().Increment(CPSGSCounters::ePSGS_LMDBError);
452         return ePSGS_CacheFailure;
453     }
454 
455     COperationTiming &      timing = app->GetTiming();
456     if (cache_hit) {
457         if (m_NeedTrace)
458             m_Reply->SendTrace("Report cache hit",
459                                m_Request->GetStartTimestamp());
460         timing.Register(eLookupLmdbBlobProp, eOpStatusFound, start);
461         app->GetCounters().Increment(CPSGSCounters::ePSGS_BlobPropCacheHit);
462         return ePSGS_CacheHit;
463     }
464 
465     if (m_NeedTrace)
466         m_Reply->SendTrace("Report cache no hit",
467                            m_Request->GetStartTimestamp());
468     timing.Register(eLookupLmdbBlobProp, eOpStatusNotFound, start);
469     app->GetCounters().Increment(CPSGSCounters::ePSGS_BlobPropCacheMiss);
470     return ePSGS_CacheNotHit;
471 }
472 
473