1 /*  $Id: resolve_base.cpp 629837 2021-04-22 12:47:49Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Sergey Satskiy
27  *
28  * File Description: base class for processors which need to resolve seq_id
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 
35 #include <corelib/request_status.hpp>
36 #include <corelib/ncbidiag.hpp>
37 
38 #include "pubseq_gateway.hpp"
39 #include "pubseq_gateway_utils.hpp"
40 #include "pubseq_gateway_cache_utils.hpp"
41 #include "cass_fetch.hpp"
42 #include "psgs_request.hpp"
43 #include "psgs_reply.hpp"
44 #include "insdc_utils.hpp"
45 #include "resolve_base.hpp"
46 
47 #include <objects/seqloc/Seq_id.hpp>
48 #include <objects/general/Dbtag.hpp>
49 #include <objects/general/Object_id.hpp>
50 USING_IDBLOB_SCOPE;
51 USING_SCOPE(objects);
52 
53 using namespace std::placeholders;
54 
55 
CPSGS_ResolveBase()56 CPSGS_ResolveBase::CPSGS_ResolveBase()
57 {}
58 
59 
CPSGS_ResolveBase(shared_ptr<CPSGS_Request> request,shared_ptr<CPSGS_Reply> reply,TSeqIdResolutionFinishedCB finished_cb,TSeqIdResolutionErrorCB error_cb,TSeqIdResolutionStartProcessingCB resolution_start_processing_cb)60 CPSGS_ResolveBase::CPSGS_ResolveBase(shared_ptr<CPSGS_Request> request,
61                                      shared_ptr<CPSGS_Reply> reply,
62                                      TSeqIdResolutionFinishedCB finished_cb,
63                                      TSeqIdResolutionErrorCB error_cb,
64                                      TSeqIdResolutionStartProcessingCB resolution_start_processing_cb) :
65     CPSGS_AsyncResolveBase(request, reply,
66                            bind(&CPSGS_ResolveBase::x_OnSeqIdResolveFinished,
67                                 this, _1),
68                            bind(&CPSGS_ResolveBase::x_OnSeqIdResolveError,
69                                 this, _1, _2, _3, _4),
70                            bind(&CPSGS_ResolveBase::x_OnResolutionGoodData,
71                                 this)),
72     CPSGS_AsyncBioseqInfoBase(request, reply,
73                               bind(&CPSGS_ResolveBase::x_OnSeqIdResolveFinished,
74                                    this, _1),
75                               bind(&CPSGS_ResolveBase::x_OnSeqIdResolveError,
76                                    this, _1, _2, _3, _4)),
77     m_FinalFinishedCB(finished_cb),
78     m_FinalErrorCB(error_cb),
79     m_FinalStartProcessingCB(resolution_start_processing_cb),
80     m_AsyncStarted(false)
81 {}
82 
83 
~CPSGS_ResolveBase()84 CPSGS_ResolveBase::~CPSGS_ResolveBase()
85 {}
86 
87 
88 SPSGS_RequestBase::EPSGS_CacheAndDbUse
x_GetRequestUseCache(void)89 CPSGS_ResolveBase::x_GetRequestUseCache(void)
90 {
91     switch (m_Request->GetRequestType()) {
92         case CPSGS_Request::ePSGS_ResolveRequest:
93             return m_Request->GetRequest<SPSGS_ResolveRequest>().m_UseCache;
94         case CPSGS_Request::ePSGS_BlobBySeqIdRequest:
95             return m_Request->GetRequest<SPSGS_BlobBySeqIdRequest>().m_UseCache;
96         case CPSGS_Request::ePSGS_AnnotationRequest:
97             return m_Request->GetRequest<SPSGS_AnnotRequest>().m_UseCache;
98         default:
99             break;
100     }
101     NCBI_THROW(CPubseqGatewayException, eLogic,
102                "Not handled request type " +
103                to_string(static_cast<int>(m_Request->GetRequestType())));
104 }
105 
106 
107 bool
x_GetEffectiveSeqIdType(const CSeq_id & parsed_seq_id,int16_t & eff_seq_id_type,bool need_trace)108 CPSGS_ResolveBase::x_GetEffectiveSeqIdType(
109                                 const CSeq_id &  parsed_seq_id,
110                                 int16_t &  eff_seq_id_type,
111                                 bool  need_trace)
112 {
113     auto    parsed_seq_id_type = parsed_seq_id.Which();
114     bool    parsed_seq_id_type_found = (parsed_seq_id_type !=
115                                         CSeq_id_Base::e_not_set);
116     auto    request_seq_id_type = GetRequestSeqIdType();
117 
118     if (!parsed_seq_id_type_found && request_seq_id_type < 0) {
119         eff_seq_id_type = -1;
120         return true;
121     }
122 
123     if (!parsed_seq_id_type_found) {
124         eff_seq_id_type = request_seq_id_type;
125         return true;
126     }
127 
128     if (request_seq_id_type < 0) {
129         eff_seq_id_type = parsed_seq_id_type;
130         return true;
131     }
132 
133     // Both found
134     if (parsed_seq_id_type == request_seq_id_type) {
135         eff_seq_id_type = request_seq_id_type;
136         return true;
137     }
138 
139     // The parsed and url explicit seq_id_type do not match
140     if (IsINSDCSeqIdType(parsed_seq_id_type) &&
141         IsINSDCSeqIdType(request_seq_id_type)) {
142         if (need_trace) {
143             m_Reply->SendTrace(
144                 "Seq id type mismatch. Parsed CSeq_id reports seq_id_type as " +
145                 to_string(parsed_seq_id_type) + " while the URL reports " +
146                 to_string(request_seq_id_type) + ". They both belong to INSDC types so "
147                 "CSeq_id provided type " + to_string(parsed_seq_id_type) +
148                 " is taken as an effective one",
149                 m_Request->GetStartTimestamp());
150         }
151         eff_seq_id_type = parsed_seq_id_type;
152         return true;
153     }
154 
155     return false;
156 }
157 
158 
159 EPSGS_SeqIdParsingResult
x_ParseInputSeqId(CSeq_id & seq_id,string & err_msg)160 CPSGS_ResolveBase::x_ParseInputSeqId(CSeq_id &  seq_id,
161                                      string &  err_msg)
162 {
163     bool    need_trace = m_Request->NeedTrace();
164     auto    request_seq_id = GetRequestSeqId();
165     auto    request_seq_id_type = GetRequestSeqIdType();
166 
167     try {
168         seq_id.Set(request_seq_id);
169         if (need_trace)
170             m_Reply->SendTrace("Parsing CSeq_id('" + request_seq_id +
171                              "') succeeded", m_Request->GetStartTimestamp());
172 
173         if (request_seq_id_type <= 0) {
174             if (need_trace)
175                 m_Reply->SendTrace("Parsing CSeq_id finished OK (#1)",
176                                    m_Request->GetStartTimestamp());
177             return ePSGS_ParsedOK;
178         }
179 
180         // Check the parsed type with the given
181         int16_t     eff_seq_id_type;
182         if (x_GetEffectiveSeqIdType(seq_id, eff_seq_id_type, false)) {
183             if (need_trace)
184                 m_Reply->SendTrace("Parsing CSeq_id finished OK (#2)",
185                                    m_Request->GetStartTimestamp());
186             return ePSGS_ParsedOK;
187         }
188 
189         // seq_id_type from URL and from CSeq_id differ
190         CSeq_id_Base::E_Choice  seq_id_type = seq_id.Which();
191 
192         if (need_trace)
193             m_Reply->SendTrace("CSeq_id provided type " + to_string(seq_id_type) +
194                                " and URL provided seq_id_type " +
195                                to_string(request_seq_id_type) + " mismatch",
196                                m_Request->GetStartTimestamp());
197 
198         if (IsINSDCSeqIdType(request_seq_id_type) &&
199             IsINSDCSeqIdType(seq_id_type)) {
200             // Both seq_id_types belong to INSDC
201             if (need_trace) {
202                 m_Reply->SendTrace("Both types belong to INSDC types.\n"
203                                    "Parsing CSeq_id finished OK (#3)",
204                                    m_Request->GetStartTimestamp());
205             }
206             return ePSGS_ParsedOK;
207         }
208 
209         // Type mismatch: form the error message in case of resolution problems
210         err_msg = "Seq_id '" + request_seq_id +
211                   "' possible type mismatch: the URL provides " +
212                   to_string(request_seq_id_type) +
213                   " while the CSeq_Id detects it as " +
214                   to_string(static_cast<int>(seq_id_type));
215     } catch (...) {
216         if (need_trace)
217             m_Reply->SendTrace("Parsing CSeq_id('" + request_seq_id +
218                                "') failed (exception)",
219                                m_Request->GetStartTimestamp());
220     }
221 
222     // Second variation of Set()
223     if (request_seq_id_type > 0) {
224         try {
225             seq_id.Set(CSeq_id::eFasta_AsTypeAndContent,
226                        (CSeq_id_Base::E_Choice)(request_seq_id_type),
227                        request_seq_id);
228             if (need_trace) {
229                 m_Reply->SendTrace("Parsing CSeq_id(eFasta_AsTypeAndContent, " +
230                                    to_string(request_seq_id_type) +
231                                    ", '" + request_seq_id + "') succeeded.\n"
232                                    "Parsing CSeq_id finished OK (#4)",
233                                    m_Request->GetStartTimestamp());
234             }
235             return ePSGS_ParsedOK;
236         } catch (...) {
237             if (need_trace)
238                 m_Reply->SendTrace("Parsing CSeq_id(eFasta_AsTypeAndContent, " +
239                                    to_string(request_seq_id_type) +
240                                    ", '" + request_seq_id + "') failed (exception)",
241                                    m_Request->GetStartTimestamp());
242         }
243     }
244 
245     if (need_trace) {
246         m_Reply->SendTrace("Parsing CSeq_id finished FAILED",
247                            m_Request->GetStartTimestamp());
248     }
249 
250     return ePSGS_ParseFailed;
251 }
252 
253 
254 bool
x_ComposeOSLT(CSeq_id & parsed_seq_id,int16_t & effective_seq_id_type,list<string> & secondary_id_list,string & primary_id)255 CPSGS_ResolveBase::x_ComposeOSLT(CSeq_id &  parsed_seq_id,
256                                  int16_t &  effective_seq_id_type,
257                                  list<string> &  secondary_id_list,
258                                  string &  primary_id)
259 {
260     bool    need_trace = m_Request->NeedTrace();
261 
262     if (!x_GetEffectiveSeqIdType(parsed_seq_id,
263                                  effective_seq_id_type, need_trace)) {
264         if (need_trace) {
265             m_Reply->SendTrace("OSLT has not been tried due to mismatch "
266                              "between the  parsed CSeq_id seq_id_type and "
267                              "the URL provided one",
268                              m_Request->GetStartTimestamp());
269         }
270         return false;
271     }
272 
273     try {
274         primary_id = parsed_seq_id.ComposeOSLT(&secondary_id_list,
275                                                CSeq_id::fGpipeAddSecondary);
276     } catch (...) {
277         if (need_trace) {
278             m_Reply->SendTrace("OSLT call failure (exception)",
279                              m_Request->GetStartTimestamp());
280         }
281         return false;
282     }
283 
284     if (need_trace) {
285         string  trace_msg("OSLT succeeded");
286         trace_msg += "\nOSLT primary id: " + primary_id;
287 
288         if (secondary_id_list.empty()) {
289             trace_msg += "\nOSLT secondary id list is empty";
290         } else {
291             for (const auto &  item : secondary_id_list) {
292                 trace_msg += "\nOSLT secondary id: " + item;
293             }
294         }
295         m_Reply->SendTrace(trace_msg, m_Request->GetStartTimestamp());
296     }
297 
298     return true;
299 }
300 
301 
302 EPSGS_CacheLookupResult
x_ResolvePrimaryOSLTInCache(const string & primary_id,int16_t effective_version,int16_t effective_seq_id_type,SBioseqResolution & bioseq_resolution)303 CPSGS_ResolveBase::x_ResolvePrimaryOSLTInCache(
304                                 const string &  primary_id,
305                                 int16_t  effective_version,
306                                 int16_t  effective_seq_id_type,
307                                 SBioseqResolution &  bioseq_resolution)
308 {
309     EPSGS_CacheLookupResult     bioseq_cache_lookup_result = ePSGS_CacheNotHit;
310 
311     if (!primary_id.empty()) {
312         CPSGCache           psg_cache(true, m_Request, m_Reply);
313 
314         // Try BIOSEQ_INFO
315         bioseq_resolution.m_BioseqInfo.SetAccession(primary_id);
316         bioseq_resolution.m_BioseqInfo.SetVersion(effective_version);
317         bioseq_resolution.m_BioseqInfo.SetSeqIdType(effective_seq_id_type);
318 
319         bioseq_cache_lookup_result = psg_cache.LookupBioseqInfo(
320                                         bioseq_resolution);
321         if (bioseq_cache_lookup_result == ePSGS_CacheHit) {
322             bioseq_resolution.m_ResolutionResult = ePSGS_BioseqCache;
323             return ePSGS_CacheHit;
324         }
325 
326         bioseq_resolution.Reset();
327     }
328     return bioseq_cache_lookup_result;
329 }
330 
331 
332 EPSGS_CacheLookupResult
x_ResolveSecondaryOSLTInCache(const string & secondary_id,int16_t effective_seq_id_type,SBioseqResolution & bioseq_resolution)333 CPSGS_ResolveBase::x_ResolveSecondaryOSLTInCache(
334                                 const string &  secondary_id,
335                                 int16_t  effective_seq_id_type,
336                                 SBioseqResolution &  bioseq_resolution)
337 {
338     bioseq_resolution.m_BioseqInfo.SetAccession(secondary_id);
339     bioseq_resolution.m_BioseqInfo.SetSeqIdType(effective_seq_id_type);
340 
341     CPSGCache   psg_cache(true, m_Request, m_Reply);
342     auto        si2csi_cache_lookup_result =
343                         psg_cache.LookupSi2csi(bioseq_resolution);
344     if (si2csi_cache_lookup_result == ePSGS_CacheHit) {
345         bioseq_resolution.m_ResolutionResult = ePSGS_Si2csiCache;
346         return ePSGS_CacheHit;
347     }
348 
349     bioseq_resolution.Reset();
350 
351     if (si2csi_cache_lookup_result == ePSGS_CacheFailure)
352         return ePSGS_CacheFailure;
353     return ePSGS_CacheNotHit;
354 }
355 
356 
357 EPSGS_CacheLookupResult
x_ResolveAsIsInCache(SBioseqResolution & bioseq_resolution,bool need_as_is)358 CPSGS_ResolveBase::x_ResolveAsIsInCache(
359                                 SBioseqResolution &  bioseq_resolution,
360                                 bool  need_as_is)
361 {
362     EPSGS_CacheLookupResult     cache_lookup_result = ePSGS_CacheNotHit;
363 
364     // Capitalize seq_id
365     string      upper_seq_id = GetRequestSeqId();
366     NStr::ToUpper(upper_seq_id);
367 
368     auto        seq_id_type = GetRequestSeqIdType();
369 
370     // 1. As is
371     if (need_as_is == true) {
372         cache_lookup_result = x_ResolveSecondaryOSLTInCache(
373                                     upper_seq_id, seq_id_type,
374                                     bioseq_resolution);
375     }
376 
377     if (cache_lookup_result == ePSGS_CacheNotHit) {
378         // 2. if there are | at the end => strip all trailing bars
379         //    else => add one | at the end
380         if (upper_seq_id[upper_seq_id.size() - 1] == '|') {
381             string  strip_bar_seq_id(upper_seq_id);
382             while (strip_bar_seq_id[strip_bar_seq_id.size() - 1] == '|')
383                 strip_bar_seq_id.erase(strip_bar_seq_id.size() - 1, 1);
384             cache_lookup_result = x_ResolveSecondaryOSLTInCache(
385                                         strip_bar_seq_id, seq_id_type,
386                                         bioseq_resolution);
387         } else {
388             string      seq_id_added_bar(upper_seq_id);
389             seq_id_added_bar.append(1, '|');
390             cache_lookup_result = x_ResolveSecondaryOSLTInCache(
391                                         seq_id_added_bar, seq_id_type,
392                                         bioseq_resolution);
393         }
394     }
395 
396     if (cache_lookup_result == ePSGS_CacheFailure) {
397         bioseq_resolution.Reset();
398         bioseq_resolution.m_Error.m_ErrorMessage = "Cache lookup failure";
399         bioseq_resolution.m_Error.m_ErrorCode = CRequestStatus::e500_InternalServerError;
400     }
401 
402     return cache_lookup_result;
403 }
404 
405 
406 void
x_ResolveViaComposeOSLTInCache(CSeq_id & parsed_seq_id,int16_t effective_seq_id_type,const list<string> & secondary_id_list,const string & primary_id,SBioseqResolution & bioseq_resolution)407 CPSGS_ResolveBase::x_ResolveViaComposeOSLTInCache(
408                                 CSeq_id &  parsed_seq_id,
409                                 int16_t  effective_seq_id_type,
410                                 const list<string> &  secondary_id_list,
411                                 const string &  primary_id,
412                                 SBioseqResolution &  bioseq_resolution)
413 {
414     const CTextseq_id *  text_seq_id = parsed_seq_id.GetTextseq_Id();
415     int16_t              effective_version = GetEffectiveVersion(text_seq_id);
416     bool                 cache_failure = false;
417 
418     if (!primary_id.empty()) {
419         auto    cache_lookup_result =
420                     x_ResolvePrimaryOSLTInCache(primary_id, effective_version,
421                                                 effective_seq_id_type,
422                                                 bioseq_resolution);
423         if (cache_lookup_result == ePSGS_CacheHit)
424             return;
425         if (cache_lookup_result == ePSGS_CacheFailure)
426             cache_failure = true;
427     }
428 
429     for (const auto &  secondary_id : secondary_id_list) {
430         auto    cache_lookup_result =
431                     x_ResolveSecondaryOSLTInCache(secondary_id,
432                                                   effective_seq_id_type,
433                                                   bioseq_resolution);
434         if (cache_lookup_result == ePSGS_CacheHit)
435             return;
436         if (cache_lookup_result == ePSGS_CacheFailure) {
437             cache_failure = true;
438             break;
439         }
440     }
441 
442     // Try cache as it came from URL
443     // The primary id may match the URL given seq_id so it makes sense to
444     // exclude trying the very same string in x_ResolveAsIsInCache(). The
445     // x_ResolveAsIsInCache() capitalizes the url seq id so the capitalized
446     // versions need to be compared
447     string      upper_seq_id = GetRequestSeqId();
448     NStr::ToUpper(upper_seq_id);
449     bool        need_as_is = primary_id != upper_seq_id;
450     auto        cache_lookup_result =
451                     x_ResolveAsIsInCache(bioseq_resolution, need_as_is);
452     if (cache_lookup_result == ePSGS_CacheHit)
453         return;
454     if (cache_lookup_result == ePSGS_CacheFailure)
455         cache_failure = true;
456 
457     bioseq_resolution.Reset();
458 
459     if (cache_failure) {
460         bioseq_resolution.m_Error.m_ErrorMessage = "Cache lookup failure";
461         bioseq_resolution.m_Error.m_ErrorCode = CRequestStatus::e500_InternalServerError;
462     }
463 }
464 
465 
466 void
ResolveInputSeqId(void)467 CPSGS_ResolveBase::ResolveInputSeqId(void)
468 {
469     SBioseqResolution   bioseq_resolution;
470     auto                app = CPubseqGatewayApp::GetInstance();
471     string              parse_err_msg;
472     CSeq_id             oslt_seq_id;
473     auto                parsing_result = x_ParseInputSeqId(oslt_seq_id,
474                                                            parse_err_msg);
475 
476     // The results of the ComposeOSLT are used in both cache and DB
477     int16_t         effective_seq_id_type;
478     list<string>    secondary_id_list;
479     string          primary_id;
480     bool            composed_ok = false;
481     if (parsing_result == ePSGS_ParsedOK) {
482         composed_ok = x_ComposeOSLT(oslt_seq_id, effective_seq_id_type,
483                                     secondary_id_list, primary_id);
484     }
485 
486     auto    request_use_cache = x_GetRequestUseCache();
487     if (request_use_cache != SPSGS_RequestBase::ePSGS_DbOnly) {
488         // Try cache
489         if (composed_ok)
490             x_ResolveViaComposeOSLTInCache(oslt_seq_id, effective_seq_id_type,
491                                            secondary_id_list, primary_id,
492                                            bioseq_resolution);
493         else
494             x_ResolveAsIsInCache(bioseq_resolution);
495 
496         if (bioseq_resolution.IsValid()) {
497             // Special case for the seq_id like gi|156232
498             bool    continue_with_cassandra = false;
499             if (bioseq_resolution.m_ResolutionResult == ePSGS_Si2csiCache) {
500                 if (!CanSkipBioseqInfoRetrieval(
501                             bioseq_resolution.m_BioseqInfo)) {
502                     // This is an optimization. Try to find the record in the
503                     // BIOSEQ_INFO only if needed.
504                     CPSGCache   psg_cache(true, m_Request, m_Reply);
505                     auto        bioseq_cache_lookup_result =
506                                     psg_cache.LookupBioseqInfo(bioseq_resolution);
507 
508                     if (bioseq_cache_lookup_result != ePSGS_CacheHit) {
509                         // Not found or error
510                         continue_with_cassandra = true;
511                         bioseq_resolution.Reset();
512                     } else {
513                         bioseq_resolution.m_ResolutionResult = ePSGS_BioseqCache;
514 
515                         auto    adj_result = AdjustBioseqAccession(
516                                                             bioseq_resolution);
517                         if (adj_result == ePSGS_LogicError ||
518                             adj_result == ePSGS_SeqIdsEmpty) {
519                             continue_with_cassandra = true;
520                             bioseq_resolution.Reset();
521                         }
522                     }
523                 }
524             } else {
525                 // The result is coming from the BIOSEQ_INFO cache. Need to try
526                 // the adjustment
527                 auto    adj_result = AdjustBioseqAccession(bioseq_resolution);
528                 if (adj_result == ePSGS_LogicError ||
529                     adj_result == ePSGS_SeqIdsEmpty) {
530                     continue_with_cassandra = true;
531                     bioseq_resolution.Reset();
532                 }
533             }
534 
535             if (!continue_with_cassandra) {
536                 x_OnSeqIdResolveFinished(move(bioseq_resolution));
537                 return;
538             }
539         }
540     }
541 
542     if (request_use_cache != SPSGS_RequestBase::ePSGS_CacheOnly) {
543         // Need to initiate async DB resolution
544 
545         // Memorize an error if there was one
546         if (! parse_err_msg.empty() &&
547             ! bioseq_resolution.m_Error.HasError()) {
548             bioseq_resolution.m_Error.m_ErrorMessage = parse_err_msg;
549             bioseq_resolution.m_Error.m_ErrorCode = CRequestStatus::e404_NotFound;
550         }
551 
552         // Async request
553         m_AsyncStarted = true;
554         CPSGS_AsyncResolveBase::Process(
555                 GetEffectiveVersion(oslt_seq_id.GetTextseq_Id()),
556                 effective_seq_id_type,
557                 move(secondary_id_list),
558                 move(primary_id),
559                 composed_ok,
560                 move(bioseq_resolution));
561 
562         // Async resolver will call a callback
563         return;
564     }
565 
566     // Finished with resolution:
567     // - not found
568     // - parsing error
569     // - LMDB error
570     app->GetCounters().Increment(CPSGSCounters::ePSGS_InputSeqIdNotResolved);
571 
572     if (bioseq_resolution.m_Error.HasError()) {
573         x_OnSeqIdResolveError(bioseq_resolution.m_Error.m_ErrorCode,
574                               ePSGS_UnresolvedSeqId,
575                               eDiag_Error,
576                               bioseq_resolution.m_Error.m_ErrorMessage);
577         return;
578     }
579 
580     if (!parse_err_msg.empty()) {
581         x_OnSeqIdResolveError(CRequestStatus::e404_NotFound,
582                               ePSGS_UnresolvedSeqId, eDiag_Error,
583                               parse_err_msg);
584         return;
585     }
586 
587     x_OnSeqIdResolveError(CRequestStatus::e404_NotFound, ePSGS_UnresolvedSeqId,
588                           eDiag_Error,
589                           "Could not resolve seq_id " + GetRequestSeqId());
590 }
591 
592 
593 SBioseqResolution
ResolveTestInputSeqId(void)594 CPSGS_ResolveBase::ResolveTestInputSeqId(void)
595 {
596     // The method is to support the 'health' and 'deep-health' URLs.
597     // The only cache needs to be tried and no writing to the reply is allowed
598     SBioseqResolution   bioseq_resolution;
599     string              parse_err_msg;
600     CSeq_id             oslt_seq_id;
601     auto                parsing_result = x_ParseInputSeqId(oslt_seq_id,
602                                                            parse_err_msg);
603 
604     // The results of the ComposeOSLT are used in both cache and DB
605     int16_t         effective_seq_id_type;
606     list<string>    secondary_id_list;
607     string          primary_id;
608     bool            composed_ok = false;
609     if (parsing_result == ePSGS_ParsedOK) {
610         composed_ok = x_ComposeOSLT(oslt_seq_id, effective_seq_id_type,
611                                     secondary_id_list, primary_id);
612     }
613 
614     // Try cache unconditionally
615     if (composed_ok)
616         x_ResolveViaComposeOSLTInCache(oslt_seq_id, effective_seq_id_type,
617                                        secondary_id_list, primary_id,
618                                        bioseq_resolution);
619     else
620         x_ResolveAsIsInCache(bioseq_resolution);
621 
622 
623     if (!bioseq_resolution.IsValid()) {
624         if (!bioseq_resolution.m_Error.HasError()) {
625             if (!parse_err_msg.empty()) {
626                 bioseq_resolution.m_Error.m_ErrorMessage = parse_err_msg;
627             }
628         }
629     }
630 
631     return bioseq_resolution;
632 }
633 
634 
635 void
x_OnSeqIdResolveError(CRequestStatus::ECode status,int code,EDiagSev severity,const string & message)636 CPSGS_ResolveBase::x_OnSeqIdResolveError(
637                         CRequestStatus::ECode  status,
638                         int  code,
639                         EDiagSev  severity,
640                         const string &  message)
641 {
642     auto    app = CPubseqGatewayApp::GetInstance();
643     if (status == CRequestStatus::e404_NotFound) {
644         app->GetTiming().Register(eResolutionNotFound, eOpStatusNotFound,
645                                   m_Request->GetStartTimestamp());
646         if (m_AsyncStarted)
647             app->GetTiming().Register(eResolutionCass, eOpStatusNotFound,
648                                       GetAsyncResolutionStartTimestamp());
649         else
650             app->GetTiming().Register(eResolutionLmdb, eOpStatusNotFound,
651                                       m_Request->GetStartTimestamp());
652     }
653     else {
654         app->GetTiming().Register(eResolutionError, eOpStatusNotFound,
655                                   m_Request->GetStartTimestamp());
656     }
657 
658     m_FinalErrorCB(status, code, severity, message);
659 }
660 
661 
662 // Called only in case of a success
x_OnSeqIdResolveFinished(SBioseqResolution && bioseq_resolution)663 void CPSGS_ResolveBase::x_OnSeqIdResolveFinished(
664                                     SBioseqResolution &&  bioseq_resolution)
665 {
666     // A few cases here: comes from cache or DB
667     // ePSGS_Si2csiCache, ePSGS_Si2csiDB, ePSGS_BioseqCache, ePSGS_BioseqDB
668     if (bioseq_resolution.m_ResolutionResult == ePSGS_Si2csiDB ||
669         bioseq_resolution.m_ResolutionResult == ePSGS_Si2csiCache) {
670         // We have the following fields at hand:
671         // - accession, version, seq_id_type, gi
672         // May be it is what the user asked for
673         if (!CanSkipBioseqInfoRetrieval(bioseq_resolution.m_BioseqInfo)) {
674             // Need to pull the full bioseq info
675             CPSGCache   psg_cache(m_Request, m_Reply);
676             auto        cache_lookup_result =
677                                 psg_cache.LookupBioseqInfo(bioseq_resolution);
678             if (cache_lookup_result != ePSGS_CacheHit) {
679                 // No cache hit (or not allowed); need to get to DB if allowed
680                 if (x_GetRequestUseCache() != SPSGS_RequestBase::ePSGS_CacheOnly) {
681                     // Async DB query
682 
683                     // To have the proper timing registered in the errors
684                     // handler
685                     m_AsyncStarted = true;
686 
687                     if (bioseq_resolution.m_CassQueryCount == 0) {
688                         // It now became cassandra based so need to memorize
689                         // the start timestamp
690                         SetAsyncResolutionStartTimestamp(
691                                         chrono::high_resolution_clock::now());
692                     }
693 
694                     CPSGS_AsyncBioseqInfoBase::MakeRequest(
695                                                     move(bioseq_resolution));
696                     return;
697                 }
698 
699                 // It is a bioseq inconsistency case
700                 x_OnSeqIdResolveError(
701                                 CRequestStatus::e500_InternalServerError,
702                                 ePSGS_NoBioseqInfo, eDiag_Error,
703                                 "Data inconsistency: the bioseq key info was "
704                                 "resolved for seq_id " + GetRequestSeqId() +
705                                 " but the bioseq info is not found");
706                 return;
707             } else {
708                 bioseq_resolution.m_ResolutionResult = ePSGS_BioseqCache;
709             }
710         }
711     }
712 
713     // All good
714     x_OnResolutionGoodData();
715     x_RegisterSuccessTiming(bioseq_resolution);
716     m_FinalFinishedCB(move(bioseq_resolution));
717 }
718 
719 
720 void
x_RegisterSuccessTiming(const SBioseqResolution & bioseq_resolution)721 CPSGS_ResolveBase::x_RegisterSuccessTiming(
722                                 const SBioseqResolution &  bioseq_resolution)
723 {
724     auto    app = CPubseqGatewayApp::GetInstance();
725 
726     // Overall timing, regardless how it was done
727     app->GetTiming().Register(eResolutionFound, eOpStatusFound,
728                               m_Request->GetStartTimestamp());
729 
730     if (bioseq_resolution.m_CassQueryCount > 0) {
731         // Regardless how many requests
732         app->GetTiming().Register(eResolutionCass, eOpStatusFound,
733                                   GetAsyncResolutionStartTimestamp());
734 
735         // Separated by the number of requests
736         app->GetTiming().Register(eResolutionFoundInCassandra,
737                                   eOpStatusFound,
738                                   GetAsyncResolutionStartTimestamp(),
739                                   bioseq_resolution.m_CassQueryCount);
740     } else {
741         app->GetTiming().Register(eResolutionLmdb, eOpStatusFound,
742                                   m_Request->GetStartTimestamp());
743     }
744 }
745 
746 
x_OnResolutionGoodData(void)747 void CPSGS_ResolveBase::x_OnResolutionGoodData(void)
748 {
749     m_FinalStartProcessingCB();
750 }
751 
752